|
|
@@ -576,10 +576,12 @@ static uint16_t Fp16Mul(uint16_t v_1, uint16_t v_2) { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
int16_t e_a, e_b; |
|
|
|
uint32_t m_a, m_b; |
|
|
|
uint16_t s_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
int16_t e_ret; |
|
|
|
uint32_t mul_m; |
|
|
|
uint16_t m_a_tmp, m_b_tmp; |
|
|
|
uint16_t m_a_tmp; |
|
|
|
uint16_t m_b_tmp; |
|
|
|
// 1.Extract |
|
|
|
ExtractFp16(v_1, s_a, e_a, m_a_tmp); |
|
|
|
ExtractFp16(v_2, s_b, e_b, m_b_tmp); |
|
|
@@ -635,7 +637,8 @@ static uint16_t Fp16Div(uint16_t v_1, uint16_t v_2) { |
|
|
|
uint16_t ret; |
|
|
|
if (FP16_IS_ZERO(v_2)) { // result is inf |
|
|
|
// throw "fp16_t division by zero."; |
|
|
|
uint16_t s_a, s_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
uint16_t s_ret; |
|
|
|
s_a = FP16_EXTRAC_SIGN(v_1); |
|
|
|
s_b = FP16_EXTRAC_SIGN(v_2); |
|
|
@@ -644,11 +647,15 @@ static uint16_t Fp16Div(uint16_t v_1, uint16_t v_2) { |
|
|
|
} else if (FP16_IS_ZERO(v_1)) { |
|
|
|
ret = 0u; |
|
|
|
} else { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
int16_t e_a, e_b; |
|
|
|
uint64_t m_a, m_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
int16_t e_a; |
|
|
|
int16_t e_b; |
|
|
|
uint64_t m_a; |
|
|
|
uint64_t m_b; |
|
|
|
float m_div; |
|
|
|
uint16_t m_a_tmp, m_b_tmp; |
|
|
|
uint16_t m_a_tmp; |
|
|
|
uint16_t m_b_tmp; |
|
|
|
// 1.Extract |
|
|
|
ExtractFp16(v_1, s_a, e_a, m_a_tmp); |
|
|
|
ExtractFp16(v_2, s_b, e_b, m_b_tmp); |
|
|
@@ -742,9 +749,12 @@ bool fp16_t::operator!=(const fp16_t &fp) const { |
|
|
|
return result; |
|
|
|
} |
|
|
|
bool fp16_t::operator>(const fp16_t &fp) const { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
uint16_t e_a, e_b; |
|
|
|
uint16_t m_a, m_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
uint16_t e_a; |
|
|
|
uint16_t e_b; |
|
|
|
uint16_t m_a; |
|
|
|
uint16_t m_b; |
|
|
|
bool result = true; |
|
|
|
|
|
|
|
// 1.Extract |
|
|
@@ -823,9 +833,11 @@ fp16_t &fp16_t::operator=(const fp16_t &fp) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const float &f_val) { |
|
|
|
uint16_t s_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
int16_t e_ret; |
|
|
|
uint32_t e_f, m_f; |
|
|
|
uint32_t e_f; |
|
|
|
uint32_t m_f; |
|
|
|
const uint32_t ui32_v = *(reinterpret_cast<const uint32_t *>(&f_val)); // 1:8:23bit sign:exp:man |
|
|
|
uint32_t m_len_delta; |
|
|
|
|
|
|
@@ -874,7 +886,9 @@ fp16_t &fp16_t::operator=(const float &f_val) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const int8_t &i_val) { |
|
|
|
uint16_t s_ret, e_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t e_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
|
|
|
|
s_ret = static_cast<uint16_t>(((static_cast<uint8_t>(i_val)) & 0x80) >> kDim7); |
|
|
|
m_ret = static_cast<uint16_t>(((static_cast<uint8_t>(i_val)) & kInt8Max)); |
|
|
@@ -898,7 +912,9 @@ fp16_t &fp16_t::operator=(const int8_t &i_val) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const uint8_t &ui_val) { |
|
|
|
uint16_t s_ret, e_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t e_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
s_ret = 0; |
|
|
|
e_ret = 0; |
|
|
|
m_ret = ui_val; |
|
|
|