|
|
@@ -90,7 +90,8 @@ static float Fp16ToFloat(const uint16_t &fp_val) { |
|
|
|
hf_exp--; |
|
|
|
} |
|
|
|
|
|
|
|
uint32_t e_ret, m_ret; |
|
|
|
uint32_t e_ret; |
|
|
|
uint32_t m_ret; |
|
|
|
uint32_t s_ret = hf_sign; |
|
|
|
if (hf_man == 0) { |
|
|
|
e_ret = 0; |
|
|
@@ -573,13 +574,18 @@ static uint16_t Fp16Sub(uint16_t v_1, uint16_t v_2) { |
|
|
|
/// @brief Performing fp16_t multiplication |
|
|
|
/// @return Return fp16_t result of multiplying this and fp |
|
|
|
static uint16_t Fp16Mul(uint16_t v_1, uint16_t v_2) { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
int16_t e_a, e_b; |
|
|
|
uint32_t m_a, m_b; |
|
|
|
uint16_t s_ret, m_ret; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
int16_t e_a; |
|
|
|
int16_t e_b; |
|
|
|
uint32_t m_a; |
|
|
|
uint32_t m_b; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
int16_t e_ret; |
|
|
|
uint32_t mul_m; |
|
|
|
uint16_t m_a_tmp, m_b_tmp; |
|
|
|
uint16_t m_a_tmp; |
|
|
|
uint16_t m_b_tmp; |
|
|
|
// 1.Extract |
|
|
|
ExtractFp16(v_1, s_a, e_a, m_a_tmp); |
|
|
|
ExtractFp16(v_2, s_b, e_b, m_b_tmp); |
|
|
@@ -644,11 +650,15 @@ static uint16_t Fp16Div(uint16_t v_1, uint16_t v_2) { |
|
|
|
} else if (FP16_IS_ZERO(v_1)) { |
|
|
|
ret = 0u; |
|
|
|
} else { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
int16_t e_a, e_b; |
|
|
|
uint64_t m_a, m_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
int16_t e_a; |
|
|
|
int16_t e_b; |
|
|
|
uint64_t m_a; |
|
|
|
uint64_t m_b; |
|
|
|
float m_div; |
|
|
|
uint16_t m_a_tmp, m_b_tmp; |
|
|
|
uint16_t m_a_tmp; |
|
|
|
uint16_t m_b_tmp; |
|
|
|
// 1.Extract |
|
|
|
ExtractFp16(v_1, s_a, e_a, m_a_tmp); |
|
|
|
ExtractFp16(v_2, s_b, e_b, m_b_tmp); |
|
|
@@ -742,9 +752,12 @@ bool fp16_t::operator!=(const fp16_t &fp) const { |
|
|
|
return result; |
|
|
|
} |
|
|
|
bool fp16_t::operator>(const fp16_t &fp) const { |
|
|
|
uint16_t s_a, s_b; |
|
|
|
uint16_t e_a, e_b; |
|
|
|
uint16_t m_a, m_b; |
|
|
|
uint16_t s_a; |
|
|
|
uint16_t s_b; |
|
|
|
uint16_t e_a; |
|
|
|
uint16_t e_b; |
|
|
|
uint16_t m_a; |
|
|
|
uint16_t m_b; |
|
|
|
bool result = true; |
|
|
|
|
|
|
|
// 1.Extract |
|
|
@@ -823,9 +836,11 @@ fp16_t &fp16_t::operator=(const fp16_t &fp) { |
|
|
|
return *this; |
|
|
|
} |
|
|
|
fp16_t &fp16_t::operator=(const float &f_val) { |
|
|
|
uint16_t s_ret, m_ret; |
|
|
|
uint16_t s_ret; |
|
|
|
uint16_t m_ret; |
|
|
|
int16_t e_ret; |
|
|
|
uint32_t e_f, m_f; |
|
|
|
uint32_t e_f; |
|
|
|
uint32_t m_f; |
|
|
|
const uint32_t ui32_v = *(reinterpret_cast<const uint32_t *>(&f_val)); // 1:8:23bit sign:exp:man |
|
|
|
uint32_t m_len_delta; |
|
|
|
|
|
|
@@ -1180,40 +1195,20 @@ fp16_t &fp16_t::operator=(const double &d_val) { |
|
|
|
} |
|
|
|
|
|
|
|
// convert |
|
|
|
fp16_t::operator float() const { |
|
|
|
return Fp16ToFloat(val); |
|
|
|
} |
|
|
|
fp16_t::operator double() const { |
|
|
|
return Fp16ToDouble(val); |
|
|
|
} |
|
|
|
fp16_t::operator int8_t() const { |
|
|
|
return Fp16ToInt8(val); |
|
|
|
} |
|
|
|
fp16_t::operator uint8_t() const { |
|
|
|
return Fp16ToUInt8(val); |
|
|
|
} |
|
|
|
fp16_t::operator int16_t() const { |
|
|
|
return Fp16ToInt16(val); |
|
|
|
} |
|
|
|
fp16_t::operator uint16_t() const { |
|
|
|
return Fp16ToUInt16(val); |
|
|
|
} |
|
|
|
fp16_t::operator int32_t() const { |
|
|
|
return Fp16ToInt32(val); |
|
|
|
} |
|
|
|
fp16_t::operator uint32_t() const { |
|
|
|
return Fp16ToUInt32(val); |
|
|
|
} |
|
|
|
fp16_t::operator float() const { return Fp16ToFloat(val); } |
|
|
|
fp16_t::operator double() const { return Fp16ToDouble(val); } |
|
|
|
fp16_t::operator int8_t() const { return Fp16ToInt8(val); } |
|
|
|
fp16_t::operator uint8_t() const { return Fp16ToUInt8(val); } |
|
|
|
fp16_t::operator int16_t() const { return Fp16ToInt16(val); } |
|
|
|
fp16_t::operator uint16_t() const { return Fp16ToUInt16(val); } |
|
|
|
fp16_t::operator int32_t() const { return Fp16ToInt32(val); } |
|
|
|
fp16_t::operator uint32_t() const { return Fp16ToUInt32(val); } |
|
|
|
// Cannot be used, just in order to solve the compile error |
|
|
|
fp16_t::operator int64_t() const { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
fp16_t::operator int64_t() const { return 0; } |
|
|
|
// Cannot be used, just in order to solve the compile error |
|
|
|
fp16_t::operator uint64_t() const { |
|
|
|
return 0; |
|
|
|
} |
|
|
|
fp16_t::operator uint64_t() const { return 0; } |
|
|
|
|
|
|
|
int fp16_t::IsInf() { |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int fp16_t::IsInf() { |
|
|
|
if ((val & kFp16AbsMax) == kFp16ExpMask) { |
|
|
|
if (val & kFp16SignMask) { |
|
|
|
return -1; |
|
|
@@ -1225,28 +1220,12 @@ int fp16_t::IsInf() { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
float fp16_t::ToFloat() const { |
|
|
|
return Fp16ToFloat(val); |
|
|
|
} |
|
|
|
double fp16_t::ToDouble() const { |
|
|
|
return Fp16ToDouble(val); |
|
|
|
} |
|
|
|
int8_t fp16_t::ToInt8() const { |
|
|
|
return Fp16ToInt8(val); |
|
|
|
} |
|
|
|
uint8_t fp16_t::ToUInt8() const { |
|
|
|
return Fp16ToUInt8(val); |
|
|
|
} |
|
|
|
int16_t fp16_t::ToInt16() const { |
|
|
|
return Fp16ToInt16(val); |
|
|
|
} |
|
|
|
uint16_t fp16_t::ToUInt16() const { |
|
|
|
return Fp16ToUInt16(val); |
|
|
|
} |
|
|
|
int32_t fp16_t::ToInt32() const { |
|
|
|
return Fp16ToInt32(val); |
|
|
|
} |
|
|
|
uint32_t fp16_t::ToUInt32() const { |
|
|
|
return Fp16ToUInt32(val); |
|
|
|
} |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY float fp16_t::ToFloat() const { return Fp16ToFloat(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY double fp16_t::ToDouble() const { return Fp16ToDouble(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int8_t fp16_t::ToInt8() const { return Fp16ToInt8(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint8_t fp16_t::ToUInt8() const { return Fp16ToUInt8(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int16_t fp16_t::ToInt16() const { return Fp16ToInt16(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint16_t fp16_t::ToUInt16() const { return Fp16ToUInt16(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int32_t fp16_t::ToInt32() const { return Fp16ToInt32(val); } |
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t fp16_t::ToUInt32() const { return Fp16ToUInt32(val); } |
|
|
|
} // namespace ge |