|
|
@@ -90,7 +90,8 @@ static float Fp16ToFloat(const uint16_t &fp_val) { |
|
|
|
hf_exp--; |
|
|
|
} |
|
|
|
|
|
|
|
uint32_t e_ret, m_ret; |
|
|
|
uint32_t e_ret; |
|
|
|
uint32_t m_ret; |
|
|
|
uint32_t s_ret = hf_sign; |
|
|
|
if (hf_man == 0) { |
|
|
|
e_ret = 0; |
|
|
@@ -573,13 +574,18 @@ static uint16_t Fp16Sub(uint16_t v_1, uint16_t v_2) { |
|
|
|
/// @brief Performing fp16_t multiplication |
|
|
|
/// @return Return fp16_t result of multiplying this and fp |
|
|
|
static uint16_t Fp16Mul(uint16_t v_1, uint16_t v_2) { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
int16_t e_a, e_b; |
|
|
|
uint32_t m_a, m_b; |
|
|
|
uint16_t s_ret, m_ret; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
int16_t e_a; |
|
|
|
int16_t e_b; |
|
|
|
uint32_t m_a; |
|
|
|
uint32_t m_b; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
int16_t e_ret; |
|
|
|
uint32_t mul_m; |
|
|
|
uint16_t m_a_tmp, m_b_tmp; |
|
|
|
uint16_t m_a_tmp; |
|
|
|
uint16_t m_b_tmp; |
|
|
|
// 1.Extract |
|
|
|
ExtractFp16(v_1, s_a, e_a, m_a_tmp); |
|
|
|
ExtractFp16(v_2, s_b, e_b, m_b_tmp); |
|
|
@@ -635,7 +641,8 @@ static uint16_t Fp16Div(uint16_t v_1, uint16_t v_2) { |
|
|
|
uint16_t ret; |
|
|
|
if (FP16_IS_ZERO(v_2)) { // result is inf |
|
|
|
// throw "fp16_t division by zero."; |
|
|
|
uint16_t s_a, s_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
uint16_t s_ret; |
|
|
|
s_a = FP16_EXTRAC_SIGN(v_1); |
|
|
|
s_b = FP16_EXTRAC_SIGN(v_2); |
|
|
@@ -644,11 +651,15 @@ static uint16_t Fp16Div(uint16_t v_1, uint16_t v_2) { |
|
|
|
} else if (FP16_IS_ZERO(v_1)) { |
|
|
|
ret = 0u; |
|
|
|
} else { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
int16_t e_a, e_b; |
|
|
|
uint64_t m_a, m_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
int16_t e_a; |
|
|
|
int16_t e_b; |
|
|
|
uint64_t m_a; |
|
|
|
uint64_t m_b; |
|
|
|
float m_div; |
|
|
|
uint16_t m_a_tmp, m_b_tmp; |
|
|
|
uint16_t m_a_tmp; |
|
|
|
uint16_t m_b_tmp; |
|
|
|
// 1.Extract |
|
|
|
ExtractFp16(v_1, s_a, e_a, m_a_tmp); |
|
|
|
ExtractFp16(v_2, s_b, e_b, m_b_tmp); |
|
|
@@ -742,9 +753,12 @@ bool fp16_t::operator!=(const fp16_t &fp) const { |
|
|
|
return result; |
|
|
|
} |
|
|
|
bool fp16_t::operator>(const fp16_t &fp) const { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
uint16_t e_a, e_b; |
|
|
|
uint16_t m_a, m_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
uint16_t e_a; |
|
|
|
uint16_t e_b; |
|
|
|
uint16_t m_a; |
|
|
|
uint16_t m_b; |
|
|
|
bool result = true; |
|
|
|
|
|
|
|
// 1.Extract |
|
|
@@ -823,9 +837,11 @@ fp16_t &fp16_t::operator=(const fp16_t &fp) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const float &f_val) { |
|
|
|
uint16_t s_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
int16_t e_ret; |
|
|
|
uint32_t e_f, m_f; |
|
|
|
uint32_t e_f; |
|
|
|
uint32_t m_f; |
|
|
|
const uint32_t ui32_v = *(reinterpret_cast<const uint32_t *>(&f_val)); // 1:8:23bit sign:exp:man |
|
|
|
uint32_t m_len_delta; |
|
|
|
|
|
|
@@ -874,7 +890,9 @@ fp16_t &fp16_t::operator=(const float &f_val) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const int8_t &i_val) { |
|
|
|
uint16_t s_ret, e_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t e_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
|
|
|
|
s_ret = static_cast<uint16_t>(((static_cast<uint8_t>(i_val)) & 0x80) >> kDim7); |
|
|
|
m_ret = static_cast<uint16_t>(((static_cast<uint8_t>(i_val)) & kInt8Max)); |
|
|
@@ -898,7 +916,9 @@ fp16_t &fp16_t::operator=(const int8_t &i_val) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const uint8_t &ui_val) { |
|
|
|
uint16_t s_ret, e_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t e_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
s_ret = 0; |
|
|
|
e_ret = 0; |
|
|
|
m_ret = ui_val; |
|
|
|