You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

op_parser_util.h 15 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /**
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_
  17. #define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_
  18. #include <climits>
  19. #include <cmath>
  20. #include <cstdint>
  21. namespace ge {
  22. // general
  23. const float DEFAULT_ALPHA_VALUE = 1.0;
  24. const float DEFAULT_BETA_VALUE = 0.0;
  25. const uint32_t NORMAL_INPUT_NUM = 1;
  26. const uint32_t NORMAL_OUTPUT_NUM = 1;
  27. const uint32_t NORMAL_WORKSPACE_NUM = 0;
  28. const int32_t NORMAL_1D_DIM_NUM = 1;
  29. const int32_t NORMAL_SCALE_DIM_NUM = 0;
  30. const int32_t NORMAL_TENSOR_SIZE = 4;
  31. const uint32_t DEFAULT_REAL_DIM_CNT = 4;
  32. // const
  33. const uint32_t CONST_OP_INPUT_NUM = 0;
  34. const uint32_t CONST_OP_NORMAL_WEIGHT_SIZE = 1;
  35. // MatMul
  36. const uint32_t MATMUL_INPUT_NUM = 2;
  37. // ActivationGrad
  38. const int32_t ACTIVATIONGRAD_INPUT_NUM = 2;
  39. // FusedBatchNorm
  40. const int32_t FUSED_BATCH_NORM_WORKSPACE_NUM = 1;
  41. const int32_t FUSED_BATCH_NORM_INPUT_NUM = 5;
  42. const int32_t FUSED_BATCH_NORM_OUTPUT_NUM = 5;
  43. // FusedBatchNormGrad
  44. const int32_t FUSEDBATCHNORMGRAD_WORKSPACE_NUM = 1;
  45. const int32_t FUSEDBATCHNORMGRAD_INPUT_NUM = 5;
  46. const int32_t FUSEDBATCHNORMGRAD_OUTPUT_NUM = 3;
  47. // conv
  48. const uint32_t CONVOLUTION_WORKSPACE_NUM = 1;
  49. const uint32_t CONVOLUTION_PAD_SIZE = 4;
  50. const uint32_t CONVOLUTION_STRIDE_SIZE = 2;
  51. const uint32_t CONVOLUTION_DILATION_SIZE = 2;
  52. const int32_t CONVOLUTION_ADJ_SIZE = 2;
  53. const int32_t CONVOLUTION_TARGET_SHAPE_SIZE = 2;
  54. // ConvGradFilter
  55. const uint32_t CONVGRADFILTER_WORKSPACE_NUM = 1;
  56. const uint32_t CONVGRADFILTER_INPUT_NUM = 3;
  57. // Pooling
  58. const uint32_t POOLING_WINDOW_SIZE = 2;
  59. const uint32_t POOLING_STRIDE_SIZE = 2;
  60. const uint32_t POOLING_PAD_SIZE = 4;
  61. // Add Sub Mul
  62. const uint32_t ADD_INPUT_NUM = 2;
  63. const uint32_t SUB_INPUT_NUM = 2;
  64. const uint32_t MUL_INPUT_NUM = 2;
  65. const uint32_t DIV_INPUT_NUM = 2;
  66. const uint32_t ADD_WORKSPACE_NUM = 1;
  67. const uint32_t SUB_WORKSPACE_NUM = 1;
  68. const uint32_t MUL_WORKSPACE_NUM = 1;
  69. const uint32_t DIV_WORKSPACE_NUM = 1;
  70. const int32_t DEFAULT_AXIS_VALUE = -1;
  71. const int32_t RESHAPE_AXIS_DEFAULT_VALUE = 0;
  72. const int32_t RESHAPE_NUM_AXES_DEFAULT_VALUE = -1;
  73. const uint32_t RESHAPE_WORKSPACE_NUM = 1;
  74. const uint32_t FLATTEN_WORKSPACE_NUM = 1;
  75. const int32_t CONCAT_MIN_INPUT_SIZE = 1;
  76. const int32_t CONCAT_DEFAULT_AXIS = 1;
  77. const uint32_t CONCAT_WORKSPACE_NUM = 1;
  78. // The value for LRN parameters
  79. const uint32_t LRN_DEFAULT_NORM_REGION = 0;
  80. const float LRN_DEFAULT_K = 1.0;
  81. const uint32_t LRN_DEFAULT_LOCAL_SIZE = 5;
  82. const float LRN_DEFAULT_ALPHA = 1.0;
  83. const float LRN_DEFAULT_BETA = 0.75;
  84. ///
  85. /// @ingroup domi_common
  86. /// @brief roipooling default value
  87. ///
  88. const uint32_t ROIPOOLING_DEFAULT_POOLED_H = 0;
  89. const uint32_t ROIPOOLING_DEFAULT_POOLED_W = 0;
  90. const float ROIPOOLING_DEFAULT_SPATIAL_SCALE = 1;
  91. const int32_t ROIPOOLING_DEFAULT_SAMPLING_RATIO = -1;
  92. // DetectionOutput
  93. const int32_t DETECTIONOUTPUT_INPUT_SIZE = 3;
  94. const int32_t DETECTIONOUTPUT_OUTPUT_SIZE = 2;
  95. const int32_t DETECTIONOUTPUT_WORKSPACE_NUM = 1;
  96. const int32_t DETECTIONOUTPUT_CLASS_NUM = 20; // Number of background categories
  97. const int32_t DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21;
  98. const float DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
  99. const float DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.8;
  100. // Proposal
  101. const int32_t PROPOSAL_INPUT_SIZE = 3;
  102. const int32_t PROPOSAL_OUTPUT_MAX_SIZE = 2;
  103. const int32_t PROPOSAL_WORKSPACE_NUM = 1;
  104. const float PROPOSAL_BASE_SIZE_DEFAULT_VALUE = 16;
  105. const float PROPOSAL_RATIO_DIM_0_DEFAULT_VALUE = 0.5;
  106. const float PROPOSAL_RATIO_DIM_1_DEFAULT_VALUE = 1;
  107. const float PROPOSAL_RATIO_DIM_2_DEFAULT_VALUE = 2;
  108. const float PROPOSAL_SCALE_DIM_0_DEFAULT_VALUE = 8;
  109. const float PROPOSAL_SCALE_DIM_1_DEFAULT_VALUE = 16;
  110. const float PROPOSAL_SCALE_DIM_2_DEFAULT_VALUE = 32;
  111. const float PROPOSAL_MIN_SIZE_DEFAULT_VALUE = 16;
  112. const int32_t PROPOSAL_PRE_NMS_TOPN_DEFAULT_VALUE = 6000;
  113. const int32_t PROPOSAL_POST_NMS_TOPN_DEFAULT_VALUE = 304;
  114. const float PROPOSAL_NMS_THRESH_DEFAULT_VALUE = 0.7;
  115. const float PROPOSAL_FILTER_THRESH_DEFAULT_VALUE = 0;
  116. // TVM OP
  117. const uint32_t DEFAULT_KERNEL_BLOCK_DIM = 1;
  118. // Softmax
  119. const int32_t SOFTMAX_WORKSPACE_NUM = 1;
  120. // SoftmaxCrossEntropy
  121. const int32_t SOFTMAXCROSSENTROPY_INPUT_NUM = 2;
  122. const int32_t SOFTMAXCROSSENTROPY_OUTPUT_NUM = 2;
  123. // Permute
  124. const int32_t PERMUTE_INPUT_NUM = 1;
  125. const int32_t PERMUTE_OUTPUT_NUM = 1;
  126. const int32_t PERMUTE_WORKSPACE_NUM = 1;
  127. const int32_t PERMUTE_ORDER_NUM = 4;
  128. // Ssd normalize
  129. const int32_t SSD_NORMALIZE_INPUT_SIZE = 1;
  130. const float SSD_NORMALIZE_EPS_DEFAULT_VALUE = 2e-7;
  131. // SsdPriroBox
  132. const int32_t SSD_PRIOR_BOX_WORKSPACE_NUM = 1;
  133. const int32_t SSD_PRIOR_BOX_INPUT_NUM = 2;
  134. const bool SSD_PRIOR_BOX_FLIP_VALUE = true;
  135. const bool SSD_PRIOR_BOX_CLIP_VALUE = false;
  136. const double SSD_PRIOR_BOX_ASPECT_OFFSET_VALUE = 0.5;
  137. const double SSD_PRIORBOX_VARIANCE_VALUE = 0.1;
  138. const double SSD_PRIORBOX_VARIANCE_SIZE_ONE = 1;
  139. const double SSD_PRIORBOX_VARIANCE_SIZE_FOUR = 4;
  140. const double SSD_PRIORBOX_ASPECT_RATIO_VALUE = 1.0;
  141. const int32_t SSD_PRIOR_BOX_CODETYPE_CORNER_VALUE = 1;
  142. const int32_t SSD_PRIOR_BOX_CODETYPE_CENTER_SIZE_VALUE = 2;
  143. const int32_t SSD_PRIOR_BOX_CODETYPE_CORNER_SIZE_VALUE = 3;
  144. // Ssd DetectionOutput
  145. const int32_t SSD_DETECTIONOUTPUT_INPUT_SIZE = 3;
  146. const int32_t SSD_DETECTIONOUTPUT_INPUT_SIZE_AFTER_FUSION = 2;
  147. const int32_t SSD_DETECTIONOUTPUT_OUTPUT_SIZE = 2;
  148. const int32_t SSD_DETECTIONOUTPUT_OUTPUT_SIZE_AFTER_FUSION = 3;
  149. const int32_t SSD_DETECTIONOUTPUT_WORKSPACE_NUM = 1;
  150. const int32_t SSD_DETECTIONOUTPUT_WORKSPACE_NUM_AFTER_FUSION = 0;
  151. const bool SSD_DETECTIONOUTPUT_SHARED_LOCATION_DEFAULT_VALUE = true;
  152. const int32_t SSD_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0;
  153. const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
  154. const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200;
  155. const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0;
  156. const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200;
  157. const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false;
  158. const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1;
  159. // Refinedet DetectionOutput
  160. const int32_t REFINEDET_DETECTIONOUTPUT_INPUT_SIZE = 5;
  161. const int32_t REFINEDET_DETECTIONOUTPUT_INPUT_SIZE_AFTER_FUSION = 2;
  162. const int32_t REFINEDET_DETECTIONOUTPUT_OUTPUT_SIZE = 2;
  163. const int32_t REFINEDET_DETECTIONOUTPUT_OUTPUT_SIZE_AFTER_FUSION = 3;
  164. const int32_t REFINEDET_DETECTIONOUTPUT_WORKSPACE_NUM = 1;
  165. const bool REFINEDET_DETECTIONOUTPUT_SHARED_LOCATION_DEFAULT_VALUE = true;
  166. const int32_t REFINEDET_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0;
  167. const float REFINEDET_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
  168. const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200;
  169. const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0;
  170. const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false;
  171. const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200;
  172. const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1;
  173. const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0;
  174. // Channel axpy
  175. const int32_t CHANNEL_AXPY_INPUT_NUM = 3;
  176. const int32_t CHANNEL_AXPY_INPUT_DIM_SIZE = 4;
  177. const int32_t CHANNEL_AXPY_WORKSPACE_NUM = 1;
  178. // Psroi pooling
  179. const int32_t PSROI_POOLING_INPUT_COUNT = 2;
  180. const int32_t PSROI_POOLING_WORKSPACE_NUM = 1;
  181. // MaxPoolWithArgmax
  182. const uint32_t MAX_POOL_WITH_ARGMAX_OUTPUT_NUM = 2;
  183. const uint32_t MAX_POOL_GRAD_WITH_ARGMAX_INPUT_NUM = 3;
  184. // AvgPoolGrad
  185. const uint32_t AVG_POOL_GRAD_INPUT_NUM = 2;
  186. // ROIAlign
  187. const int32_t ROIALIGN_INPUT_SIZE = 2;
  188. const int32_t ROIALIGN_WORKSPACE_NUM = 1;
  189. const int32_t ROIALIGN_DEFAULT_POOLED_H = 1;
  190. const int32_t ROIALIGN_DEFAULT_POOLED_W = 1;
  191. // Correlation
  192. const uint32_t CORRELATION_INPUT_NUM = 2;
  193. const int32_t CORRELATION_WORKSPACE_NUM = 1;
  194. // Detectionpostprocess
  195. const int32_t POSTPROCESS_INPUT_SIZE = 4;
  196. const int32_t POSTPROCESS_OUTPUT_SIZE = 2;
  197. const int32_t POSTPROCESS_WORKSPACE_NUM = 1;
  198. const uint32_t POSTPROCESS_CLS_NUM_DEFAULT_VALUE = 12;
  199. const uint32_t POSTPROCESS_POST_NMS_TOPN_DEFAULT_VALUE = 100;
  200. const float POSTPROCESS_NMS_THRESH_DEFAULT_VALUE = 0.3;
  201. const float POSTPROCESS_CONF_THRESH_DEFAULT_VALUE = 0.5;
  202. const float POSTPROCESS_BBOX_REG_WEIGHT_DIM_DEFAULT_VALUE = 1.0;
  203. const int32_t POSTPROCESS_BBOX_REG_WEIGHT_SIZE_DEFAULT_VALUE = 4;
  204. // Split
  205. const int32_t SPLIT_INPUT_NUM = 2;
  206. const int32_t SPLIT_DEFAULT_AXIS_VALUE = 1;
  207. const int32_t SPLIT_MIN_OUTPUT_SIZE = 1;
  208. const uint32_t STRIDEDSLICE_INPUT_NUM = 4;
  209. // Slice
  210. const int32_t SLICE_INPUT_NUM = 3;
  211. const int32_t SLICE_WEIGHT_NUM = 2;
  212. // GatherNd
  213. const int32_t GATHERND_INPUT_NUM = 2;
  214. // ArgMax
  215. const int32_t ARGMAX_INPUT_NUM = 2;
  216. const int32_t ARGMAX_REAL_INPUT_NUM = 1;
  217. // HighWay
  218. const int32_t HIGHWAY_INPUT_NUM = 4;
  219. const int32_t HIGHWAY_WORKSPACE_NUM = 1;
  220. // RealDiv
  221. const int32_t REALDIV_INPUT_NUM = 2;
  222. // Range
  223. const int32_t RANGE_INPUT_NUM = 3;
  224. const int32_t RANGE_OUTPUT_NUM = 1;
  225. const int32_t RANGE_INPUT_DIM_SIZE = 0;
  226. // Pad
  227. const int32_t PAD_WEIGHT_NUM = 1;
  228. const int32_t PAD_DIM_SIZE = 2;
  229. const int32_t PAD_DIM0 = 4;
  230. const int32_t PAD_DIM1 = 2;
  231. const int32_t PAD_WEIGHT_WITH_CONSTANT_NUM = 2;
  232. const int32_t PAD_CONSTATNT_DEFAULT_VALUE = 0;
  233. const int32_t PAD_PADDINGS_SIZE = 8;
  234. // Tile
  235. const int32_t TILE_WEIGHT_NUM = 1;
  236. const int32_t TILE_MULTIPLES_DIM_SIZE = 1;
  237. // DecodeBbox
  238. const int32_t DECODE_BBOX_INPUT_NUM = 2;
  239. // GenerateRpnProposals
  240. const int32_t GENERATE_RPN_PROPOSAL_INPUT_SIZE = 2;
  241. const int32_t GENERATE_RPN_PROPOSAL_OUTPUT_SIZE = 3;
  242. // Decode_BBox
  243. const int32_t DECODE_BBOX_INPUT_SIZE = 2;
  244. const int32_t DEFAULT_DECODE_CLIP_VALUE = 0;
  245. // FastRcnnPredictions
  246. const int32_t FASTRCNN_PREDICTIONS_INPUT_SIZE = 2;
  247. const int32_t FASTRCNN_PREDICTIONS_OUTPUT_SIZE = 4;
  248. const int32_t CLIP_BOXES_INPUT_NUM = 1;
  249. const int32_t CLIP_BOXES_WEIGHT_SIZE = 1;
  250. const int32_t CLIP_BOXES_WEIGHT_ITEM_SIZE = 2;
  251. const int32_t CLIP_BOXES_OUTPUT_NUM = 1;
  252. const int32_t FLOORDIV_INPUT_NUM = 2;
  253. // Mean
  254. const int32_t MEAN_WEIGHT_SIZE = 1;
  255. const int32_t MEAN_WEIGHT_DIM_SIZE = 1;
  256. const int32_t MEAN_WEIGHT_DIM = 2;
  257. const int32_t MEAN_FIRST_AXIS = 2;
  258. const int32_t MEAN_SECOND_AXIS = 3;
  259. const int32_t MEAN_STRIDE_PLACE_HOLD = 1;
  260. // Switch
  261. const uint32_t SWITCH_INPUT_NUM = 2;
  262. const uint32_t SWITCH_OUTPUT_NUM = 2;
  263. // Merge
  264. const uint32_t MERGE_INPUT_NUM = 2;
  265. // Greater
  266. const uint32_t GREATER_OUTPUT_NUM = 1;
  267. const uint32_t GREATER_INPUT_NUM = 0;
  268. const uint32_t GREATER_WEIGHT_NUM = 2;
  269. // Yolo region
  270. const uint32_t YOLO_REGION_OUTPUT_NUM = 3;
  271. const uint32_t YOLO_REGION_WORKSPACE_NUM = 1;
  272. const uint32_t YOLO_REGION_COORDS = 4;
  273. const uint32_t YOLO_REGION_CLASSES = 20;
  274. const uint32_t YOLO_REGION_BOXES = 1;
  275. const bool YOLO_REGION_BACKGROUND = false;
  276. const bool YOLO_REGION_SOFTMAX = false;
  277. const bool YOLO_REGION_SOFTMAX_TREE = false;
  278. // Yolo detectionoutput
  279. const uint32_t YOLO_DETECTIONOUTPUT_INPUT_SIZE = 4;
  280. const uint32_t YOLO_DETECTIONOUTPUT_OUTPUT_SIZE = 2;
  281. const uint32_t YOLO_DETECTION_OUTPUT_WORKSPACE_NUM = 1;
  282. const uint32_t YOLO_DETECTION_OUTPUT_CLASSES = 20;
  283. const uint32_t YOLO_DETECTION_OUTPUT_BOXES_V2 = 5;
  284. const uint32_t YOLO_DETECTION_OUTPUT_BOXES_V3 = 3;
  285. const bool YOLO_DETECTION_OUTPUT_RELATIVE = true;
  286. const float YOLO_DETECTION_OUTPUT_OBJECTNESS_THRESHOLD = 0.5;
  287. const float YOLO_DETECTION_OUTPUT_CLASS_THRESHOLD = 0.5;
  288. const uint32_t YOLO_DETECTION_OUTPUT_POST_TOP_K = UINT_MAX;
  289. const float YOLO_DETECTION_OUTPUT_NMS_THRESHOLD = 0;
  290. const float YOLO_DETECTION_OUTPUT_IOU_THRESHOLD_DECAY = 1.0;
  291. const float YOLO_DETECTION_OUTPUT_COOR_SCALE_FACTOR = 1.0;
  292. // Reorg
  293. const int32_t REORG_DEFAULT_STRIDE = 2;
  294. const uint32_t REORG_INPUT_COUNT = 1;
  295. // Reshape
  296. const int32_t RESHAPE_INPUT_NUM = 2;
  297. // Maximum
  298. const int32_t MAXIMUM_INPUT_NUM = 2;
  299. // Spatialtf
  300. const int32_t SPATIALTF_WORKSPACE_NUM = 1;
  301. const int32_t REVERSE_DEFAULT_AXIS = 1;
  302. // Crop
  303. const int32_t CROP_AXIS = 2;
  304. const int32_t CROP_INPUT_NUM = 2;
  305. // ConvGradInput
  306. const uint32_t CONVGRADINPUT_WORKSPACE_NUM = 1;
  307. const uint32_t CONVGRADINPUT_INPUT_NUM = 3;
  308. // RNN
  309. const uint32_t RNN_WORKSPACE_NUM = 1;
  310. // Cropandresize
  311. const int32_t CROPANDRESIZE_WEIGHT_NUM = 1;
  312. const int32_t CROPANDRESIZE_CROP_DIM_SIZE = 1;
  313. const int32_t CROP_DIM0 = 2;
  314. // Attention decoder weight index
  315. const uint32_t ATTENTION_DECODER_WEIGHT_ATTENW0 = 0;
  316. const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION0_KERNEL = 1;
  317. const uint32_t ATTENTION_DECODER_WEIGHT_ATTNOUTPUTPROJECTION_KERNEL = 2;
  318. const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION_DECODER_KERNEL = 3;
  319. const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_GATES_KERNEL = 4;
  320. const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_CANDIDATE_KERNEL = 5;
  321. const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_GATES_KERNEL = 6;
  322. const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_CANDIDATE_KERNEL = 7;
  323. const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION0_BIAS = 8;
  324. const uint32_t ATTENTION_DECODER_WEIGHT_ATTNOUTPUTPROJECTION_BIAS = 9;
  325. const uint32_t ATTENTION_DECODER_WEIGHT_ATTENTION_DECODER_BIAS = 10;
  326. const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_GATES_BIAS = 11;
  327. const uint32_t ATTENTION_DECODER_WEIGHT_CELL0_CANDIDATE_BIAS = 12;
  328. const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_GATES_BIAS = 13;
  329. const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_CANDIDATE_BIAS = 14;
  330. const uint32_t ATTENTION_DECODER_WEIGHT_EMBEDDING = 15;
  331. const uint32_t ATTENTION_DECODER_WEIGHT_ATTENVA = 16;
  332. const uint32_t ATTENTION_DECODER_WEIGHT_DECODER_INITIAL = 17;
  333. // Attention decoder weight size
  334. const uint32_t ATTENTION_DECODER_WEIGHT_SIZE = 18;
  335. const uint32_t ATTENTION_DECODER_INPUT_SIZE = 2;
  336. const uint32_t ATTENTION_DECODER_WORKSPACE_NUM = 1;
  337. const uint32_t ATTENTION_DECODER_INPUT_DECODER_INPUTS = 0;
  338. const uint32_t ATTENTION_DECODER_INPUT_DECODER_INITIAL_HIDDEN = 1;
  339. const int32_t ATTENTION_DECODER_ALGO_NORMAL = 0;
  340. const int32_t ATTENTION_DECODER_SYMBOLS = 10000;
  341. const int32_t ATTENTION_DECODER_EMBEDDING_SIZE = 128;
  342. const int32_t ATTENTION_DECODER_ATTENTION_NUM_HIDDEN = 256;
  343. const int32_t ATTENTION_DECODER_DECODER_NUM_HIDDEN = 128;
  344. const int32_t ATTENTION_DECODER_DECODER_NUM_LAYERS = 2;
  345. const int32_t ATTENTION_DECODER_RNN_UNBIDIRECTIONAL = 0;
  346. const int32_t ATTENTION_DECODER_SEQLEN_VALUE = 57;
  347. const int32_t ATTENTION_DECODER_GRU = 3;
  348. // Logicaland
  349. const int32_t LOGICAL_AND_INPUT_NUM = 2;
  350. const int32_t EQUAL_INPUT_NUM = 2;
  351. static const int32_t OP_WEIGHT_MEM_BASE_OFFSET = 512;
  352. // MultiShape
  353. const uint32_t MULTI_SHAPE_INPUT_NUM = 2;
  354. // Shufflechannel
  355. const uint32_t SHUFFLECHANNEL_DEFAULT_GROUP = 1;
  356. } // namespace ge
  357. #endif // INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示