You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_ops.h 16 kB

5 years ago
5 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*!
  17. * \file nn_ops.h
  18. * \brief
  19. */
  20. #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
  21. #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
  22. #include "graph/operator_reg.h"
  23. #include "nn_pooling_ops.h"
  24. namespace ge {
  25. /**
  26. * @brief Says whether the targets are in the top "k" predictions . \n
  27. * @par Inputs:
  28. * Three inputs, including:
  29. * @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
  30. * @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
  31. * @li k: A 1D Tensor of the same type as "targets".
  32. * Specifies the number of top elements to look at for computing precision . \n
  33. * @par Outputs:
  34. * precision: A Tensor of type bool . \n
  35. * @attention Constraints:
  36. * @li targets must be non-negative tensor.
  37. * @par Third-party framework compatibility
  38. * @li Compatible with the TensorFlow operator InTopKV2.
  39. */
  40. REG_OP(InTopKV2)
  41. .INPUT(predictions, TensorType({DT_FLOAT}))
  42. .INPUT(targets, TensorType(IndexNumberType))
  43. .INPUT(k, TensorType({IndexNumberType}))
  44. .OUTPUT(precision, TensorType({DT_BOOL}))
  45. .OP_END_FACTORY_REG(InTopKV2)
  46. /**
  47. *@brief Performs batch normalization . \n
  48. *@par Inputs:
  49. * Five inputs, including: (NHWC, NCHW supported)
  50. *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D.
  51. *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  52. Specifies the scaling factor.
  53. *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  54. Specifies the offset.
  55. *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  56. Specifies the mean used for inference. Must be "None" if the
  57. operation is used for training.
  58. *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  59. Specifies the variance used for inference. Must be "None"
  60. if the operation is used for training . \n
  61. *@par Attributes:
  62. *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
  63. *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
  64. *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
  65. *@par Outputs:
  66. * Five outputs, including: (NHWC, NCHWsupported)
  67. *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D.
  68. *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  69. Specifies the mean of "x".
  70. *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  71. pecifies the variance of "x".
  72. *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  73. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
  74. *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
  75. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
  76. *@attention Constraints:
  77. *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
  78. then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
  79. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
  80. */
  81. REG_OP(FusedBatchNormV2)
  82. .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
  83. .INPUT(scale, TensorType({DT_FLOAT}))
  84. .INPUT(offset, TensorType({DT_FLOAT}))
  85. .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
  86. .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
  87. .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
  88. .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
  89. .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
  90. .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
  91. .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
  92. .ATTR(epsilon, Float, 0.0001)
  93. .ATTR(data_format, String, "NHWC")
  94. .ATTR(is_training, Bool, true)
  95. .OP_END_FACTORY_REG(FusedBatchNormV2)
  96. /**
  97. * @brief Large amount of data sort.First operator of TopK.
  98. * @par Inputs:
  99. * two input, including:
  100. * @li input_data: A Tensor. Data to be sorted. Support float16 or float32.
  101. * @li input_index: A Tensor. Range(0, 2048). Support float16 or int32.
  102. * @par Attributes:
  103. * @li k_num: Int.Number to be sorted.
  104. * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
  105. * If "True", the "k" largest elements are returned in descending order.
  106. * If "False", the "k" smallest elements are returned in ascending order.
  107. * @par Outputs:
  108. * One output, including:
  109. * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
  110. * @par Restrictions:
  111. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  112. */
  113. REG_OP(SegmentSort)
  114. .INPUT(input_data, TensorType({DT_FLOAT16,DT_FLOAT}))
  115. .INPUT(input_index, TensorType({DT_FLOAT16,DT_INT32}))
  116. .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
  117. .REQUIRED_ATTR(k_num, Int)
  118. .ATTR(largest, Bool, true)
  119. .OP_END_FACTORY_REG(SegmentSort)
  120. /**
  121. * @brief: Large amount of data sort.Second operator of TopK.
  122. * @par Inputs:
  123. * One input, including:
  124. * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 or float32
  125. * @par Attributes:
  126. * @li k_num: Int.Number to be sorted.
  127. * @li include_index: Bool.include_index is false,output proposal. include_index is true, output data and index.
  128. * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
  129. * If "True", the "k" largest elements are returned in descending order.
  130. * If "False", the "k" smallest elements are returned in ascending order.
  131. * @par Outputs:
  132. * Two output, including:
  133. * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
  134. * output_index: A Tensor.If include_index is true, output index.
  135. * @par Restrictions:
  136. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  137. */
  138. REG_OP(MultiMerge)
  139. .INPUT(input_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
  140. .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
  141. .OUTPUT(output_index, TensorType({DT_INT32}))
  142. .REQUIRED_ATTR(k_num, Int)
  143. .ATTR(include_index, Bool, false)
  144. .ATTR(largest, Bool, true)
  145. .OP_END_FACTORY_REG(MultiMerge)
  146. /**
  147. * @brief Large amount of data sort.Third operator of TopK.
  148. * @par Inputs:
  149. * One input, including:
  150. * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
  151. * @par Attributes:
  152. * @li k_num: Int.Number to be sorted.
  153. * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
  154. * If "True", the "k" largest elements are returned in descending order.
  155. * If "False", the "k" smallest elements are returned in ascending order.
  156. * @par Outputs:
  157. * Two output, including:
  158. * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
  159. * @li output_index: A Tensor. int32. Data index.
  160. * @par Restrictions:
  161. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  162. */
  163. REG_OP(SingleMerge)
  164. .INPUT(input_proposal, TensorType({ DT_FLOAT16 }))
  165. .OUTPUT(output_data, TensorType({ DT_FLOAT16 }))
  166. .OUTPUT(output_index, TensorType({ DT_INT32 }))
  167. .REQUIRED_ATTR(k_num, Int)
  168. .ATTR(largest, Bool, true)
  169. .OP_END_FACTORY_REG(SingleMerge)
  170. /**
  171. * @brief MultiHeadAttention.
  172. * @par Inputs:
  173. * thirteen input, including:
  174. * @li query: A Tensor. Query of Attention. Support float16
  175. * @li key: A Tensor. Key of Attention. Support float16
  176. * @li value: A Tensor. Value of Attention. Support float16
  177. * @li query_weight: A Tensor. QueryWeight of Attention. Support float16
  178. * @li key_weight: A Tensor. KeyWeight of Attention. Support float16
  179. * @li value_weight: A Tensor. ValueWeight of Attention. Support float16
  180. * @li attn_mask: A Tensor. AttentionMask of Attention. Support float16
  181. * @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16
  182. * @li query_bias: Optional Tensor. QueryBias of Attention. Support float16
  183. * @li key_bias: Optional Tensor. KeyBias of Attention. Support float16
  184. * @li value_bias: Optional Tensor. ValueBias of Attention. Support float16
  185. * @li out_proj_bias: Optional Tensor. OutProjBias of Attention. Support float16
  186. * @li dropout_mask_input: Optional Tensor. DropOutMask of Attention. Support uint8 \n
  187. * @par Attributes:
  188. * @li attn_head_num: Attention Head numbers, Support int
  189. * @li attn_dim_per_head: Attention dim of a Head, Support int
  190. * @li src_len: source length, Support int
  191. * @li tgt_len: target length, Support int
  192. * @li keep_prob: dropout keep probability, Support float
  193. * @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool \n
  194. * @par Outputs:
  195. * Eight output, including:
  196. * @li y: A Tensor. Result of Attention. Support float16
  197. * @li dropout_mask: DropOutMask of Attention. Support uint8
  198. * @li query_res: Query Result of Attention. Support float16
  199. * @li key_res: Key Result of Attention. Support float16
  200. * @li value_res: Value Result of Attention. Support float16
  201. * @li attn_scores: Attention Scores of SoftMax. Support float16, float
  202. * @li attn_res: Attention Result of SoftMax. Support float16
  203. * @li context: Context of Attention. Support float16
  204. * @par Restrictions:
  205. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  206. */
  207. REG_OP(MultiHeadAttention)
  208. .INPUT(query, TensorType({DT_FLOAT16}))
  209. .INPUT(key, TensorType({DT_FLOAT16}))
  210. .INPUT(value, TensorType({DT_FLOAT16}))
  211. .INPUT(query_weight, TensorType({DT_FLOAT16}))
  212. .INPUT(key_weight, TensorType({DT_FLOAT16}))
  213. .INPUT(value_weight, TensorType({DT_FLOAT16}))
  214. .INPUT(attn_mask, TensorType({DT_FLOAT16}))
  215. .INPUT(out_proj_weight, TensorType({DT_FLOAT16}))
  216. .OPTIONAL_INPUT(query_bias, TensorType({DT_FLOAT16}))
  217. .OPTIONAL_INPUT(key_bias, TensorType({DT_FLOAT16}))
  218. .OPTIONAL_INPUT(value_bias, TensorType({DT_FLOAT16}))
  219. .OPTIONAL_INPUT(out_proj_bias, TensorType({DT_FLOAT16}))
  220. .OPTIONAL_INPUT(dropout_mask_input, TensorType({DT_UINT8}))
  221. .OUTPUT(y, TensorType({DT_FLOAT16}))
  222. .OUTPUT(dropout_mask, TensorType({DT_UINT8}))
  223. .OUTPUT(query_res, TensorType({DT_FLOAT16}))
  224. .OUTPUT(key_res, TensorType({DT_FLOAT16}))
  225. .OUTPUT(value_res, TensorType({DT_FLOAT16}))
  226. .OUTPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
  227. .OUTPUT(attn_res, TensorType({DT_FLOAT16}))
  228. .OUTPUT(context, TensorType({DT_FLOAT16}))
  229. .REQUIRED_ATTR(attn_head_num, Int)
  230. .REQUIRED_ATTR(attn_dim_per_head, Int)
  231. .REQUIRED_ATTR(src_len, Int)
  232. .REQUIRED_ATTR(tgt_len, Int)
  233. .REQUIRED_ATTR(keep_prob, Float)
  234. .REQUIRED_ATTR(softmax_use_float, Bool)
  235. .OP_END_FACTORY_REG(MultiHeadAttention)
  236. /**
  237. * @brief MultiHeadAttentionGrad.
  238. * @par Inputs:
  239. * thirteen input, including:
  240. * @li query: A Tensor. Query of Attention. Support float16
  241. * @li key: A Tensor. Key of Attention. Support float16
  242. * @li value: A Tensor. Value of Attention. Support float16
  243. * @li query_weight: A Tensor. QueryWeight of Attention. Support float16
  244. * @li key_weight: A Tensor. KeyWeight of Attention. Support float16
  245. * @li value_weight: A Tensor. ValueWeight of Attention. Support float16
  246. * @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16
  247. * @li query_res: A Tensor. Query Result of Attention. Support float16
  248. * @li key_res: A Tensor. Key Result of Attention. Support float16
  249. * @li value_res: A Tensor. Value Result of Attention. Support float16
  250. * @li attn_scores: A Tensor. Attention Scores of Attention. Support float16, float
  251. * @li attn_res: A Tensor. Attention Result of Attention. Support float16
  252. * @li context: A Tensor. Context of Attention. Support float16
  253. * @li y_grad: A Tensor. Grad of Attention. Support float16
  254. * @li dropout_mask: : A Tensor. Query Result of Attention. Support uint8 \n
  255. * @par Attributes:
  256. * @li attn_head_num: Attention Head numbers, Support int
  257. * @li attn_dim_per_head: Attention dim of a Head, Support int
  258. * @li src_len: source length, Support int
  259. * @li tgt_len: target length, Support int
  260. * @li keep_prob: dropout keep probability, Support float
  261. * @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool
  262. * @li bias_grad_mask: mask for attention has bias grad, Support list bool \n
  263. * @par Outputs:
  264. * Eight output, including:
  265. * @li query_weight_grad: QueryWeight Grad of Attention. Support float16
  266. * @li key_weight_grad: KeyWeight Grad of Attention. Support float16
  267. * @li value_weight_grad: ValueWeight Grad of Attention. Support float16
  268. * @li out_proj_weight_grad: OutProjWeight Grad of Attention. Support float16
  269. * @li query_grad: Query Grad of Attention. Support float16
  270. * @li key_grad: Key Grad of Attention. Support float16
  271. * @li value_grad: Value Grad of Attention. Support float16
  272. * @li query_bias_grad: QueryBias Grad of Attention. Support float16
  273. * @li key_bias_grad: KeyBias Grad of Attention. Support float16
  274. * @li value_bias_grad: ValueBias Grad of Attention. Support float16
  275. * @li out_proj_bias_grad: OutProjBias Grad of Attention. Support float16
  276. * @par Restrictions:
  277. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
  278. */
  279. REG_OP(MultiHeadAttentionGrad)
  280. .INPUT(query, TensorType({DT_FLOAT16}))
  281. .INPUT(key, TensorType({DT_FLOAT16}))
  282. .INPUT(value, TensorType({DT_FLOAT16}))
  283. .INPUT(query_weight, TensorType({DT_FLOAT16}))
  284. .INPUT(key_weight, TensorType({DT_FLOAT16}))
  285. .INPUT(value_weight, TensorType({DT_FLOAT16}))
  286. .INPUT(out_proj_weight, TensorType({DT_FLOAT16}))
  287. .INPUT(query_res, TensorType({DT_FLOAT16}))
  288. .INPUT(key_res, TensorType({DT_FLOAT16}))
  289. .INPUT(value_res, TensorType({DT_FLOAT16}))
  290. .INPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
  291. .INPUT(attn_res, TensorType({DT_FLOAT16}))
  292. .INPUT(context, TensorType({DT_FLOAT16}))
  293. .INPUT(y_grad, TensorType({DT_FLOAT16}))
  294. .OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8}))
  295. .OUTPUT(query_weight_grad, TensorType({DT_FLOAT16}))
  296. .OUTPUT(key_weight_grad, TensorType({DT_UINT8}))
  297. .OUTPUT(value_weight_grad, TensorType({DT_FLOAT16}))
  298. .OUTPUT(out_proj_weight_grad, TensorType({DT_FLOAT16}))
  299. .OUTPUT(query_grad, TensorType({DT_FLOAT16}))
  300. .OUTPUT(key_grad, TensorType({DT_FLOAT16}))
  301. .OUTPUT(value_grad, TensorType({DT_FLOAT16}))
  302. .OUTPUT(query_bias_grad, TensorType({DT_FLOAT16}))
  303. .OUTPUT(key_bias_grad, TensorType({DT_FLOAT16}))
  304. .OUTPUT(value_bias_grad, TensorType({DT_FLOAT16}))
  305. .OUTPUT(out_proj_bias_grad, TensorType({DT_FLOAT16}))
  306. .REQUIRED_ATTR(attn_head_num, Int)
  307. .REQUIRED_ATTR(attn_dim_per_head, Int)
  308. .REQUIRED_ATTR(src_len, Int)
  309. .REQUIRED_ATTR(tgt_len, Int)
  310. .REQUIRED_ATTR(keep_prob, Float)
  311. .REQUIRED_ATTR(softmax_use_float, Bool)
  312. .REQUIRED_ATTR(bias_grad_mask, ListBool)
  313. .OP_END_FACTORY_REG(MultiHeadAttentionGrad)
  314. }// namespace ge
  315. #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示