|
|
@@ -133,31 +133,49 @@ constexpr uint16_t kFp16MaxValidExp = 0x001E; |
|
|
|
constexpr uint16_t kFp16MaxMan = 0x03FF; |
|
|
|
/// @ingroup fp16 basic operator |
|
|
|
/// @brief get sign of fp16 |
|
|
|
#define FP16_EXTRAC_SIGN(x) (((x) >> 15) & 1) |
|
|
|
inline uint16_t FP16_EXTRAC_SIGN(const uint16_t x) { |
|
|
|
return (((x) >> 15) & 1); |
|
|
|
} |
|
|
|
/// @ingroup fp16 basic operator |
|
|
|
/// @brief get exponent of fp16 |
|
|
|
#define FP16_EXTRAC_EXP(x) (((x) >> 10) & kFp16MaxExp) |
|
|
|
inline uint16_t FP16_EXTRAC_EXP(const uint16_t x) { |
|
|
|
return (((x) >> 10) & kFp16MaxExp); |
|
|
|
} |
|
|
|
/// @ingroup fp16 basic operator |
|
|
|
/// @brief get mantissa of fp16 |
|
|
|
#define FP16_EXTRAC_MAN(x) ((((x) >> 0) & 0x3FF) | (((((x) >> 10) & 0x1F) > 0 ? 1 : 0) * 0x400)) |
|
|
|
inline uint16_t FP16_EXTRAC_MAN(const uint16_t x) { |
|
|
|
return ((((x) >> 0) & 0x3FF) | (((((x) >> 10) & 0x1F) > 0 ? 1 : 0) * 0x400)); |
|
|
|
} |
|
|
|
/// @ingroup fp16 basic operator |
|
|
|
/// @brief constructor of fp16 from sign exponent and mantissa |
|
|
|
#define FP16_CONSTRUCTOR(s, e, m) (((s) << kFp16SignIndex) | ((e) << kFp16ManLen) | ((m) & kFp16MaxMan)) |
|
|
|
inline uint16_t FP16_CONSTRUCTOR(const uint16_t s, const uint16_t e, const uint16_t m) { |
|
|
|
return (((s) << kFp16SignIndex) | ((e) << kFp16ManLen) | ((m) & kFp16MaxMan)); |
|
|
|
} |
|
|
|
/// @ingroup fp16 special value judgment |
|
|
|
/// @brief whether a fp16 is zero |
|
|
|
#define FP16_IS_ZERO(x) (((x) & kFp16AbsMax) == 0) |
|
|
|
inline bool FP16_IS_ZERO(const uint16_t x) { |
|
|
|
return (((x) & kFp16AbsMax) == 0); |
|
|
|
} |
|
|
|
/// @ingroup fp16 special value judgment |
|
|
|
/// @brief whether a fp16 is a denormalized value |
|
|
|
#define FP16_IS_DENORM(x) ((((x) & kFp16ExpMask) == 0)) |
|
|
|
inline bool FP16_IS_DENORM(const uint16_t x) { |
|
|
|
return ((((x) & kFp16ExpMask) == 0)); |
|
|
|
} |
|
|
|
/// @ingroup fp16 special value judgment |
|
|
|
/// @brief whether a fp16 is infinite |
|
|
|
#define FP16_IS_INF(x) (((x)&kFp16AbsMax) == kFp16ExpMask) |
|
|
|
inline bool FP16_IS_INF(const uint16_t x) { |
|
|
|
return (((x)&kFp16AbsMax) == kFp16ExpMask); |
|
|
|
} |
|
|
|
/// @ingroup fp16 special value judgment |
|
|
|
/// @brief whether a fp16 is NaN |
|
|
|
#define FP16_IS_NAN(x) ((((x) & kFp16ExpMask) == kFp16ExpMask) && ((x) & kFp16ManMask)) |
|
|
|
inline bool FP16_IS_NAN(const uint16_t x) { |
|
|
|
return ((((x) & kFp16ExpMask) == kFp16ExpMask) && ((x) & kFp16ManMask)); |
|
|
|
} |
|
|
|
/// @ingroup fp16 special value judgment |
|
|
|
/// @brief whether a fp16 is invalid |
|
|
|
#define FP16_IS_INVALID(x) (((x) & kFp16ExpMask) == kFp16ExpMask) |
|
|
|
inline bool FP16_IS_INVALID(const uint16_t x) { |
|
|
|
return (((x) & kFp16ExpMask) == kFp16ExpMask); |
|
|
|
} |
|
|
|
/// @ingroup fp32 basic parameter |
|
|
|
/// @brief fp32 exponent bias |
|
|
|
constexpr uint16_t kFp32ExpBias = 127U; |
|
|
@@ -601,7 +619,7 @@ T GetManSum(int16_t e_a, const T &m_a, int16_t e_b, const T &m_b) { |
|
|
|
T sum = 0; |
|
|
|
if (e_a != e_b) { |
|
|
|
T m_tmp = 0; |
|
|
|
int16_t e_tmp = staic_cast<int16_t>(std::abs(e_a - e_b)); |
|
|
|
int16_t e_tmp = std::abs(e_a - e_b); |
|
|
|
if (e_a > e_b) { |
|
|
|
m_tmp = m_b; |
|
|
|
m_tmp = RightShift(m_tmp, e_tmp); |
|
|
|