You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fp16_t.h 26 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_COMMON_FP16_T_H_
  17. #define GE_COMMON_FP16_T_H_
  18. #include <algorithm>
  19. #include <cmath>
  20. #include <cstdint>
  21. namespace ge {
  22. using DimIndex = enum {
  23. kDim0 = 0,
  24. kDim1,
  25. kDim2,
  26. kDim3,
  27. kDim4,
  28. kDim5,
  29. kDim6,
  30. kDim7,
  31. kDim8,
  32. kDim9,
  33. kDim10,
  34. kDim11,
  35. kDim12,
  36. kDim13,
  37. kDim14,
  38. kDim15,
  39. kDim16,
  40. };
  41. using BitShift = enum {
  42. kBitShift2 = 2,
  43. kBitShift3 = 3,
  44. kBitShift4 = 4,
  45. kBitShift5 = 5,
  46. kBitShift6 = 6,
  47. kBitShift7 = 7,
  48. kBitShift8 = 8,
  49. kBitShift9 = 9,
  50. kBitShift10 = 10,
  51. kBitShift11 = 11,
  52. kBitShift12 = 12,
  53. kBitShift13 = 13,
  54. kBitShift14 = 14,
  55. kBitShift15 = 15,
  56. kBitShift16 = 16,
  57. kBitShift20 = 20,
  58. kBitShift24 = 24,
  59. kBitShift27 = 27,
  60. kBitShift28 = 28,
  61. kBitShift31 = 31,
  62. kBitShift32 = 32,
  63. kBitShift36 = 36,
  64. kBitShift40 = 40,
  65. kBitShift44 = 44,
  66. kBitShift48 = 48,
  67. kBitShift52 = 52,
  68. kBitShift56 = 56,
  69. kBitShift59 = 59,
  70. kBitShift60 = 60,
  71. kBitShift63 = 63,
  72. kBitShift64 = 64,
  73. kBitShift128 = 128,
  74. kBitShift255 = 255,
  75. kBitShift256 = 256,
  76. kBitShift512 = 512,
  77. kBitShift768 = 768,
  78. kBitShift784 = 784,
  79. kBitShift1020 = 1020,
  80. kBitShift1024 = 1024,
  81. kBitShift3136 = 3136,
  82. kBitShift4096 = 4096,
  83. kBitShift6144 = 6144,
  84. kBitShift10240 = 10240,
  85. kBitShift65536 = 65536
  86. };
  87. /// @ingroup fp16 basic parameter
  88. /// @brief fp16 exponent bias
  89. constexpr uint16_t kFp16ExpBias = 15;
  90. /// @ingroup fp16 basic parameter
  91. /// @brief the exponent bit length of fp16 is 5
  92. constexpr uint16_t kFp16ExpLen = 5;
  93. /// @ingroup fp16 basic parameter
  94. /// @brief the mantissa bit length of fp16 is 10
  95. constexpr uint16_t kFp16ManLen = 10;
  96. /// @ingroup fp16 basic parameter
  97. /// @brief bit index of sign in fp16
  98. constexpr uint16_t kFp16SignIndex = 15;
  99. /// @ingroup fp16 basic parameter
  100. /// @brief sign mask of fp16 (1 00000 00000 00000)
  101. constexpr uint16_t kFp16SignMask = 0x8000;
  102. /// @ingroup fp16 basic parameter
  103. /// @brief exponent mask of fp16 ( 11111 00000 00000)
  104. constexpr uint16_t kFp16ExpMask = 0x7C00;
  105. /// @ingroup fp16 basic parameter
  106. /// @brief mantissa mask of fp16 ( 11111 11111)
  107. constexpr uint16_t kFp16ManMask = 0x03FF;
  108. /// @ingroup fp16 basic parameter
  109. /// @brief hide bit of mantissa of fp16( 1 00000 00000)
  110. constexpr uint16_t kFp16ManHideBit = 0x0400;
  111. /// @ingroup fp16 basic parameter
  112. /// @brief maximum value (0111 1011 1111 1111)
  113. constexpr uint16_t kFp16Max = 0x7BFF;
  114. /// @ingroup fp16 basic parameter
  115. /// @brief minimum value (1111 1011 1111 1111)
  116. constexpr uint16_t kFp16Min = 0xFBFF;
  117. /// @ingroup fp16 basic parameter
  118. /// @brief absolute maximum value (0111 1111 1111 1111)
  119. constexpr uint16_t kFp16AbsMax = 0x7FFF;
  120. /// @ingroup fp16 basic parameter
  121. /// @brief maximum exponent value of fp16 is 15(11111)
  122. constexpr uint16_t kFp16MaxExp = 0x001F;
  123. /// @ingroup fp16 basic parameter
  124. /// @brief maximum valid exponent value of fp16 is 14(11110)
  125. constexpr uint16_t kFp16MaxValidExp = 0x001E;
  126. /// @ingroup fp16 basic parameter
  127. /// @brief maximum mantissa value of fp16(11111 11111)
  128. constexpr uint16_t kFp16MaxMan = 0x03FF;
  129. /// @ingroup fp16 basic parameter
  130. /// @brief absolute minimum normal value of fp16
  131. /// (E=1,M=0 D=2^(-14)=0.00006103515625)
  132. constexpr uint16_t kFp16MinNormal = 1.0f / (2 << 14);
  133. /// @ingroup fp16 basic operator
  134. /// @brief get sign of fp16
  135. #define FP16_EXTRAC_SIGN(x) (((x) >> 15) & 1)
  136. /// @ingroup fp16 basic operator
  137. /// @brief get exponent of fp16
  138. #define FP16_EXTRAC_EXP(x) (((x) >> 10) & kFp16MaxExp)
  139. /// @ingroup fp16 basic operator
  140. /// @brief get mantissa of fp16
  141. #define FP16_EXTRAC_MAN(x) ((((x) >> 0) & 0x3FF) | (((((x) >> 10) & 0x1F) > 0 ? 1 : 0) * 0x400))
  142. /// @ingroup fp16 basic operator
  143. /// @brief constructor of fp16 from sign exponent and mantissa
  144. #define FP16_CONSTRUCTOR(s, e, m) (((s) << kFp16SignIndex) | ((e) << kFp16ManLen) | ((m)&kFp16MaxMan))
  145. /// @ingroup fp16 special value judgment
  146. /// @brief whether a fp16 is zero
  147. #define FP16_IS_ZERO(x) (((x)&kFp16AbsMax) == 0)
  148. /// @ingroup fp16 special value judgment
  149. /// @brief whether a fp16 is a denormalized value
  150. #define FP16_IS_DENORM(x) ((((x)&kFp16ExpMask) == 0))
  151. /// @ingroup fp16 special value judgment
  152. /// @brief whether a fp16 is infinite
  153. #define FP16_IS_INF(x) (((x)&kFp16AbsMax) == kFp16ExpMask)
  154. /// @ingroup fp16 special value judgment
  155. /// @brief whether a fp16 is NaN
  156. #define FP16_IS_NAN(x) (((x & kFp16ExpMask) == kFp16ExpMask) && (x & kFp16ManMask))
  157. /// @ingroup fp16 special value judgment
  158. /// @brief whether a fp16 is invalid
  159. #define FP16_IS_INVALID(x) ((x & kFp16ExpMask) == kFp16ExpMask)
  160. /// @ingroup fp32 basic parameter
  161. /// @brief fp32 exponent bias
  162. constexpr uint16_t kFp32ExpBias = 127;
  163. /// @ingroup fp32 basic parameter
  164. /// @brief the exponent bit length of float/fp32 is 8
  165. constexpr uint16_t kFp32ExpLen = 8;
  166. /// @ingroup fp32 basic parameter
  167. /// @brief the mantissa bit length of float/fp32 is 23
  168. constexpr uint16_t kFp32ManLen = 23;
  169. /// @ingroup fp32 basic parameter
  170. /// @brief bit index of sign in float/fp32
  171. constexpr uint16_t kFp32SignIndex = 31;
  172. /// @ingroup fp32 basic parameter
  173. /// @brief sign mask of fp32 (1 0000 0000 0000 0000 0000 0000 000)
  174. constexpr uint32_t kFp32SignMask = 0x80000000u;
  175. /// @ingroup fp32 basic parameter
  176. /// @brief exponent mask of fp32 ( 1111 1111 0000 0000 0000 0000 000)
  177. constexpr uint32_t kFp32ExpMask = 0x7F800000u;
  178. /// @ingroup fp32 basic parameter
  179. /// @brief mantissa mask of fp32 ( 1111 1111 1111 1111 111)
  180. constexpr uint32_t kFp32ManMask = 0x007FFFFFu;
  181. /// @ingroup fp32 basic parameter
  182. /// @brief hide bit of mantissa of fp32 ( 1 0000 0000 0000 0000 000)
  183. constexpr uint32_t kFp32ManHideBit = 0x00800000u;
  184. /// @ingroup fp32 basic parameter
  185. /// @brief absolute maximum value (0 1111 1111 1111 1111 1111 1111 111)
  186. constexpr uint32_t kFp32AbsMax = 0x7FFFFFFFu;
  187. /// @ingroup fp32 basic parameter
  188. /// @brief maximum exponent value of fp32 is 255(1111 1111)
  189. constexpr uint32_t kFp32MaxExp = 0xFF;
  190. /// @ingroup fp32 basic parameter
  191. /// @brief maximum mantissa value of fp32 (1111 1111 1111 1111 1111 111)
  192. constexpr uint32_t kFp32MaxMan = 0x7FFFFF;
  193. /// @ingroup fp32 special value judgment
  194. /// @brief whether a fp32 is NaN
  195. #define FP32_IS_NAN(x) (((x & kFp32ExpMask) == kFp32ExpMask) && (x & kFp32ManMask))
  196. /// @ingroup fp32 special value judgment
  197. /// @brief whether a fp32 is infinite
  198. #define FP32_IS_INF(x) (((x & kFp32ExpMask) == kFp32ExpMask) && (!(x & kFp32ManMask)))
  199. /// @ingroup fp32 special value judgment
  200. /// @brief whether a fp32 is a denormalized value
  201. #define FP32_IS_DENORM(x) ((((x)&kFp32ExpMask) == 0))
  202. /// @ingroup fp32 basic operator
  203. /// @brief get sign of fp32
  204. #define FP32_EXTRAC_SIGN(x) (((x) >> kFp32SignIndex) & 1)
  205. /// @ingroup fp32 basic operator
  206. /// @brief get exponent of fp16
  207. #define FP32_EXTRAC_EXP(x) (((x)&kFp32ExpMask) >> kFp32ManLen)
  208. /// @ingroup fp32 basic operator
  209. /// @brief get mantissa of fp16
  210. #define FP32_EXTRAC_MAN(x) (((x)&kFp32ManMask) | (((((x) >> kFp32ManLen) & kFp32MaxExp) > 0 ? 1 : 0) * kFp32ManHideBit))
  211. /// @ingroup fp32 basic operator
  212. /// @brief constructor of fp32 from sign exponent and mantissa
  213. #define FP32_CONSTRUCTOR(s, e, m) (((s) << kFp32SignIndex) | ((e) << kFp32ManLen) | ((m)&kFp32MaxMan))
  214. /// @ingroup fp64 basic parameter
  215. /// @brief fp64 exponent bias
  216. constexpr uint16_t kFp64ExpBias = 1023;
  217. /// @ingroup fp64 basic parameter
  218. /// @brief the exponent bit length of double/fp64 is 11
  219. constexpr uint16_t kFp64ExpLen = 11;
  220. /// @ingroup fp64 basic parameter
  221. /// @brief the mantissa bit length of double/fp64 is 52
  222. constexpr uint16_t kFp64ManLen = 52;
  223. /// @ingroup fp64 basic parameter
  224. /// @brief bit index of sign in double/fp64 is 63
  225. constexpr uint16_t kFp64SignIndex = 63;
  226. /// @ingroup fp64 basic parameter
  227. /// @brief sign mask of fp64 (1 000 (total 63bits 0))
  228. constexpr uint64_t kFp64SignMask = 0x8000000000000000LLu;
  229. /// @ingroup fp64 basic parameter
  230. /// @brief exponent mask of fp64 (0 1 11111 11111 0000?-?-(total 52bits 0))
  231. constexpr uint64_t kFp64ExpMask = 0x7FF0000000000000LLu;
  232. /// @ingroup fp64 basic parameter
  233. /// @brief mantissa mask of fp64 ( 1111?-?-(total 52bits 1))
  234. constexpr uint64_t kFp64ManMask = 0x000FFFFFFFFFFFFFLLu;
  235. /// @ingroup fp64 basic parameter
  236. /// @brief hide bit of mantissa of fp64 ( 1 0000?-?-(total 52bits 0))
  237. constexpr uint64_t kFp64ManHideBit = 0x0010000000000000LLu;
  238. /// @ingroup fp64 basic parameter
  239. /// @brief absolute maximum value (0 111?-?-(total 63bits 1))
  240. constexpr uint64_t kFp64AbsMax = 0x7FFFFFFFFFFFFFFFLLu;
  241. /// @ingroup fp64 basic parameter
  242. /// @brief maximum exponent value of fp64 is 2047(1 11111 11111)
  243. constexpr uint64_t kFp64MaxExp = 0x07FF;
  244. /// @ingroup fp64 basic parameter
  245. /// @brief maximum mantissa value of fp64 (111?-?-(total 52bits 1))
  246. constexpr uint64_t kFp64MaxMan = 0xFFFFFFFFFFFLLu;
  247. /// @ingroup fp64 special value judgment
  248. /// @brief whether a fp64 is NaN
  249. #define FP64_IS_NAN(x) (((x & kFp64ExpMask) == kFp64ExpMask) && (x & kFp64ManMask))
  250. /// @ingroup fp64 special value judgment
  251. /// @brief whether a fp64 is infinite
  252. #define FP64_IS_INF(x) (((x & kFp64ExpMask) == kFp64ExpMask) && (!(x & kFp64ManMask)))
  253. /// @ingroup integer special value judgment
  254. /// @brief maximum positive value of int8_t (0111 1111)
  255. constexpr int8_t kInt8Max = 0x7F;
  256. /// @ingroup integer special value judgment
  257. /// @brief maximum value of a data with 8 bits length (1111 111)
  258. constexpr uint8_t kBitLen8Max = 0xFF;
  259. /// @ingroup integer special value judgment
  260. /// @brief maximum positive value of int16_t (0111 1111 1111 1111)
  261. constexpr int16_t kInt16Max = 0x7FFF;
  262. /// @ingroup integer special value judgment
  263. /// @brief maximum value of a data with 16 bits length (1111 1111 1111 1111)
  264. constexpr uint16_t kBitLen16Max = 0xFFFF;
  265. /// @ingroup integer special value judgment
  266. /// @brief maximum positive value of int32_t (0111 1111 1111 1111 1111 1111 1111 1111)
  267. constexpr int32_t kInt32Max = 0x7FFFFFFFu;
  268. /// @ingroup integer special value judgment
  269. /// @brief maximum value of a data with 32 bits length (1111 1111 1111 1111 1111 1111 1111 1111)
  270. constexpr uint32_t kBitLen32Max = 0xFFFFFFFFu;
  271. /// @ingroup integer special value judgment
  272. /// @brief maximum positive value of int64_t
  273. /// (0111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111)
  274. constexpr int64_t kInt64Max = 0x7FFFFFFFFFFFFFFFu;
  275. /// @ingroup integer special value judgment
  276. /// @brief maximum value of a data with 64 bits length
  277. /// (1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111 1111)
  278. constexpr uint64_t kBitLen64Max = 0xFFFFFFFFFFFFFFFFu;
  279. /// @ingroup fp16_t enum
  280. /// @brief round mode of last valid digital
  281. enum TagFp16RoundMode {
  282. kRoundToNearest = 0, // < round to nearest even
  283. kRoundByTruncated, // < round by truncated
  284. kRoundModeReserved,
  285. };
  286. /// @ingroup fp16_t
  287. /// @brief Half precision float
  288. /// bit15: 1 bit SIGN +---+-----+------------+
  289. /// bit14-10: 5 bit EXP | S |EEEEE|MM MMMM MMMM|
  290. /// bit0-9: 10bit MAN +---+-----+------------+
  291. using fp16_t = struct TagFp16 {
  292. uint16_t val;
  293. public:
  294. /// @ingroup fp16_t constructor
  295. /// @brief Constructor without any param(default constructor)
  296. TagFp16(void) { val = 0x0u; }
  297. /// @ingroup fp16_t constructor
  298. /// @brief Constructor with an uint16_t value
  299. TagFp16(const uint16_t &ui_val) : val(ui_val) {}
  300. /// @ingroup fp16_t constructor
  301. /// @brief Constructor with a fp16_t object(copy constructor)
  302. TagFp16(const TagFp16 &fp) : val(fp.val) {}
  303. /// @ingroup fp16_t math operator
  304. /// @param [in] fp fp16_t object to be added
  305. /// @brief Override addition operator to performing fp16_t addition
  306. /// @return Return fp16_t result of adding this and fp
  307. TagFp16 operator+(const TagFp16 fp);
  308. /// @ingroup fp16_t math operator
  309. /// @param [in] fp fp16_t object to be subtracted
  310. /// @brief Override addition operator to performing fp16_t subtraction
  311. /// @return Return fp16_t result of subtraction fp from this
  312. TagFp16 operator-(const TagFp16 fp);
  313. /// @ingroup fp16_t math operator
  314. /// @param [in] fp fp16_t object to be multiplied
  315. /// @brief Override multiplication operator to performing fp16_t multiplication
  316. /// @return Return fp16_t result of multiplying this and fp
  317. TagFp16 operator*(const TagFp16 fp);
  318. /// @ingroup fp16_t math operator divided
  319. /// @param [in] fp fp16_t object to be divided
  320. /// @brief Override division operator to performing fp16_t division
  321. /// @return Return fp16_t result of division this by fp
  322. TagFp16 operator/(const TagFp16 fp);
  323. /// @ingroup fp16_t math operator
  324. /// @param [in] fp fp16_t object to be added
  325. /// @brief Override addition operator to performing fp16_t addition
  326. /// @return Return fp16_t result of adding this and fp
  327. TagFp16 operator+=(const TagFp16 fp);
  328. /// @ingroup fp16_t math operator
  329. /// @param [in] fp fp16_t object to be subtracted
  330. /// @brief Override addition operator to performing fp16_t subtraction
  331. /// @return Return fp16_t result of subtraction fp from this
  332. TagFp16 operator-=(const TagFp16 fp);
  333. /// @ingroup fp16_t math operator
  334. /// @param [in] fp fp16_t object to be multiplied
  335. /// @brief Override multiplication operator to performing fp16_t multiplication
  336. /// @return Return fp16_t result of multiplying this and fp
  337. TagFp16 operator*=(const TagFp16 fp);
  338. /// @ingroup fp16_t math operator divided
  339. /// @param [in] fp fp16_t object to be divided
  340. /// @brief Override division operator to performing fp16_t division
  341. /// @return Return fp16_t result of division this by fp
  342. TagFp16 operator/=(const TagFp16 fp);
  343. /// @ingroup fp16_t math compare operator
  344. /// @param [in] fp fp16_t object to be compared
  345. /// @brief Override basic comparison operator to performing fp16_t if-equal comparison
  346. /// @return Return boolean result of if-equal comparison of this and fp.
  347. bool operator==(const TagFp16 &fp) const;
  348. /// @ingroup fp16_t math compare operator
  349. /// @param [in] fp fp16_t object to be compared
  350. /// @brief Override basic comparison operator to performing fp16_t not-equal comparison
  351. /// @return Return boolean result of not-equal comparison of this and fp.
  352. bool operator!=(const TagFp16 &fp) const;
  353. /// @ingroup fp16_t math compare operator
  354. /// @param [in] fp fp16_t object to be compared
  355. /// @brief Override basic comparison operator to performing fp16_t greater-than comparison
  356. /// @return Return boolean result of greater-than comparison of this and fp.
  357. bool operator>(const TagFp16 &fp) const;
  358. /// @ingroup fp16_t math compare operator
  359. /// @param [in] fp fp16_t object to be compared
  360. /// @brief Override basic comparison operator to performing fp16_t greater-equal comparison
  361. /// @return Return boolean result of greater-equal comparison of this and fp.
  362. bool operator>=(const TagFp16 &fp) const;
  363. /// @ingroup fp16_t math compare operator
  364. /// @param [in] fp fp16_t object to be compared
  365. /// @brief Override basic comparison operator to performing fp16_t less-than comparison
  366. /// @return Return boolean result of less-than comparison of this and fp.
  367. bool operator<(const TagFp16 &fp) const;
  368. /// @ingroup fp16_t math compare operator
  369. /// @param [in] fp fp16_t object to be compared
  370. /// @brief Override basic comparison operator to performing fp16_t less-equal comparison
  371. /// @return Return boolean result of less-equal comparison of this and fp.
  372. bool operator<=(const TagFp16 &fp) const;
  373. /// @ingroup fp16_t math evaluation operator
  374. /// @param [in] fp fp16_t object to be copy to fp16_t
  375. /// @brief Override basic evaluation operator to copy fp16_t to a new fp16_t
  376. /// @return Return fp16_t result from fp
  377. TagFp16 &operator=(const TagFp16 &fp);
  378. /// @ingroup fp16_t math evaluation operator
  379. /// @param [in] f_val float object to be converted to fp16_t
  380. /// @brief Override basic evaluation operator to convert float to fp16_t
  381. /// @return Return fp16_t result from f_val
  382. TagFp16 &operator=(const float &f_val);
  383. /// @ingroup fp16_t math evaluation operator
  384. /// @param [in] d_val double object to be converted to fp16_t
  385. /// @brief Override basic evaluation operator to convert double to fp16_t
  386. /// @return Return fp16_t result from d_val
  387. TagFp16 &operator=(const double &d_val);
  388. /// @ingroup fp16_t math evaluation operator
  389. /// @param [in] i_val float object to be converted to fp16_t
  390. /// @brief Override basic evaluation operator to convert float to fp16_t
  391. /// @return Return fp16_t result from i_val
  392. TagFp16 &operator=(const int8_t &i_val);
  393. /// @ingroup fp16_t math evaluation operator
  394. /// @param [in] ui_val uint8_t object to be converted to fp16_t
  395. /// @brief Override basic evaluation operator to convert uint8_t to fp16_t
  396. /// @return Return fp16_t result from ui_val
  397. TagFp16 &operator=(const uint8_t &ui_val);
  398. /// @ingroup fp16_t math evaluation operator
  399. /// @param [in] i_val int16_t object to be converted to fp16_t
  400. /// @brief Override basic evaluation operator to convert int16_t to fp16_t
  401. /// @return Return fp16_t result from i_val
  402. TagFp16 &operator=(const int16_t &i_val);
  403. /// @ingroup fp16_t math evaluation operator
  404. /// @param [in] ui_val uint16_t object to be converted to fp16_t
  405. /// @brief Override basic evaluation operator to convert uint16_t to fp16_t
  406. /// @return Return fp16_t result from ui_val
  407. TagFp16 &operator=(const uint16_t &ui_val);
  408. /// @ingroup fp16_t math evaluation operator
  409. /// @param [in] i_val int32_t object to be converted to fp16_t
  410. /// @brief Override basic evaluation operator to convert int32_t to fp16_t
  411. /// @return Return fp16_t result from i_val
  412. TagFp16 &operator=(const int32_t &i_val);
  413. /// @ingroup fp16_t math evaluation operator
  414. /// @param [in] ui_val uint32_t object to be converted to fp16_t
  415. /// @brief Override basic evaluation operator to convert uint32_t to fp16_t
  416. /// @return Return fp16_t result from ui_val
  417. TagFp16 &operator=(const uint32_t &ui_val);
  418. /// @ingroup fp16_t math conversion
  419. /// @brief Override convert operator to convert fp16_t to float/fp32
  420. /// @return Return float/fp32 value of fp16_t
  421. operator float() const;
  422. /// @ingroup fp16_t math conversion
  423. /// @brief Override convert operator to convert fp16_t to double/fp64
  424. /// @return Return double/fp64 value of fp16_t
  425. operator double() const;
  426. /// @ingroup fp16_t math conversion
  427. /// @brief Override convert operator to convert fp16_t to int8_t
  428. /// @return Return int8_t value of fp16_t
  429. operator int8_t() const;
  430. /// @ingroup fp16_t math conversion
  431. /// @brief Override convert operator to convert fp16_t to uint8_t
  432. /// @return Return uint8_t value of fp16_t
  433. operator uint8_t() const;
  434. /// @ingroup fp16_t conversion
  435. /// @brief Override convert operator to convert fp16_t to int16_t
  436. /// @return Return int16_t value of fp16_t
  437. operator int16_t() const;
  438. /// @ingroup fp16_t math conversion
  439. /// @brief Override convert operator to convert fp16_t to uint16_t
  440. /// @return Return uint16_t value of fp16_t
  441. operator uint16_t() const;
  442. /// @ingroup fp16_t math conversion
  443. /// @brief Override convert operator to convert fp16_t to int32_t
  444. /// @return Return int32_t value of fp16_t
  445. operator int32_t() const;
  446. /// @ingroup fp16_t math conversion
  447. /// @brief Override convert operator to convert fp16_t to uint32_t
  448. /// @return Return uint32_t value of fp16_t
  449. operator uint32_t() const;
  450. /// @ingroup fp16_t math conversion
  451. /// @brief Override convert operator to convert fp16_t to int64_t
  452. /// @return Return int64_t value of fp16_t
  453. operator int64_t() const;
  454. /// @ingroup fp16_t math conversion
  455. /// @brief Override convert operator to convert fp16_t to uint64_t
  456. /// @return Return uint64_t value of fp16_t
  457. operator uint64_t() const;
  458. /// @ingroup fp16_t judgment method
  459. /// @param [in] fp fp16_t object to be judgement
  460. /// @brief whether a fp16_t is inifinite
  461. /// @return Returns 1:+INF -1:-INF 0:not INF
  462. int IsInf();
  463. /// @ingroup fp16_t math conversion
  464. /// @brief Convert fp16_t to float/fp32
  465. /// @return Return float/fp32 value of fp16_t
  466. float ToFloat() const;
  467. /// @ingroup fp16_t math conversion
  468. /// @brief Convert fp16_t to double/fp64
  469. /// @return Return double/fp64 value of fp16_t
  470. double ToDouble() const;
  471. /// @ingroup fp16_t math conversion
  472. /// @brief Convert fp16_t to int8_t
  473. /// @return Return int8_t value of fp16_t
  474. int8_t ToInt8() const;
  475. /// @ingroup fp16_t math conversion
  476. /// @brief Convert fp16_t to uint8_t
  477. /// @return Return uint8_t value of fp16_t
  478. uint8_t ToUInt8() const;
  479. /// @ingroup fp16_t conversion
  480. /// @brief Convert fp16_t to int16_t
  481. /// @return Return int16_t value of fp16_t
  482. int16_t ToInt16() const;
  483. /// @ingroup fp16_t math conversion
  484. /// @brief Convert fp16_t to uint16_t
  485. /// @return Return uint16_t value of fp16_t
  486. uint16_t ToUInt16() const;
  487. /// @ingroup fp16_t math conversion
  488. /// @brief Convert fp16_t to int32_t
  489. /// @return Return int32_t value of fp16_t
  490. int32_t ToInt32() const;
  491. /// @ingroup fp16_t math conversion
  492. /// @brief Convert fp16_t to uint32_t
  493. /// @return Return uint32_t value of fp16_t
  494. uint32_t ToUInt32() const;
  495. };
  496. /// @ingroup fp16_t public method
  497. /// @param [in] val signature is negative
  498. /// @param [in|out] s sign of fp16_t object
  499. /// @param [in|out] e exponent of fp16_t object
  500. /// @param [in|out] m mantissa of fp16_t object
  501. /// @brief Extract the sign, exponent and mantissa of a fp16_t object
  502. void ExtractFp16(const uint16_t &val, uint16_t &s, int16_t &e, uint16_t &m);
  503. /// @ingroup fp16_t public method
  504. /// @param [in] negative sign is negative
  505. /// @param [in|out] man mantissa to be reverse
  506. /// @brief Calculate a mantissa's complement (add ont to it's radix-minus-one complement)
  507. /// @return Return complement of man
  508. template <typename T>
  509. void ReverseMan(bool negative, T &man) {
  510. if (negative) {
  511. man = (~(man)) + 1;
  512. }
  513. }
  514. /// @ingroup fp16_t public method
  515. /// @param [in] e_a exponent of one fp16_t/float number
  516. /// @param [in] m_a mantissa of one fp16_t/float number
  517. /// @param [in] e_b exponent of another fp16_t/float number
  518. /// @param [in] m_b mantissa of another fp16_t/float number
  519. /// @brief choose mantissa to be shift right whoes exponent is less than another one
  520. /// @return Return mantissawhoes exponent is less than another one
  521. template <typename T>
  522. T MinMan(const int16_t &e_a, T &m_a, const int16_t &e_b, T &m_b) {
  523. return (e_a > e_b) ? m_b : m_a;
  524. }
  525. /// @ingroup fp16_t public method
  526. /// @param [in] man mantissa to be operate
  527. /// @param [in] shift right shift bits
  528. /// @brief right shift a mantissa
  529. /// @return Return right-shift mantissa
  530. template <typename T>
  531. T RightShift(T man, int16_t shift) {
  532. int bits = sizeof(T) * 8; // one byte have 8 bits
  533. T mask = (((T)1u) << ((unsigned int)(bits - 1)));
  534. for (int i = 0; i < shift; i++) {
  535. man = ((man & mask) | (man >> 1));
  536. }
  537. return man;
  538. }
  539. /// @ingroup fp16_t public method
  540. /// @param [in] e_a exponent of one temp fp16_t number
  541. /// @param [in] m_a mantissa of one temp fp16_t number
  542. /// @param [in] e_b exponent of another temp fp16_t number
  543. /// @param [in] m_b mantissa of another temp fp16_t number
  544. /// @brief Get mantissa sum of two temp fp16_t numbers, T support types: uint16_t/uint32_t/uint64_t
  545. /// @return Return mantissa sum
  546. template <typename T>
  547. T GetManSum(int16_t e_a, const T &m_a, int16_t e_b, const T &m_b) {
  548. T sum = 0;
  549. if (e_a != e_b) {
  550. T m_tmp = 0;
  551. int16_t e_tmp = std::abs(e_a - e_b);
  552. if (e_a > e_b) {
  553. m_tmp = m_b;
  554. m_tmp = RightShift(m_tmp, e_tmp);
  555. sum = m_a + m_tmp;
  556. } else {
  557. m_tmp = m_a;
  558. m_tmp = RightShift(m_tmp, e_tmp);
  559. sum = m_tmp + m_b;
  560. }
  561. } else {
  562. sum = m_a + m_b;
  563. }
  564. return sum;
  565. }
  566. /// @ingroup fp16_t public method
  567. /// @param [in] bit0 whether the last preserved bit is 1 before round
  568. /// @param [in] bit1 whether the abbreviation's highest bit is 1
  569. /// @param [in] bitLeft whether the abbreviation's bits which not contain highest bit grater than 0
  570. /// @param [in] man mantissa of a fp16_t or float number, support types: uint16_t/uint32_t/uint64_t
  571. /// @param [in] shift abbreviation bits
  572. /// @brief Round fp16_t or float mantissa to nearest value
  573. /// @return Returns true if round 1,otherwise false;
  574. template <typename T>
  575. T ManRoundToNearest(bool bit0, bool bit1, bool bitLeft, T man, uint16_t shift = 0) {
  576. man = (man >> shift) + ((bit1 && (bitLeft || bit0)) ? 1 : 0);
  577. return man;
  578. }
  579. /// @ingroup fp16_t public method
  580. /// @param [in] man mantissa of a float number, support types: uint16_t/uint32_t/uint64_t
  581. /// @brief Get bit length of a uint32_t number
  582. /// @return Return bit length of man
  583. template <typename T>
  584. int16_t GetManBitLength(T man) {
  585. int16_t len = 0;
  586. while (man) {
  587. man >>= 1;
  588. len++;
  589. }
  590. return len;
  591. }
  592. } // namespace ge
  593. #endif // GE_COMMON_FP16_T_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示