You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dnn_op.h 228 kB

5 years ago
5 years ago
5 years ago

  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DNN_OP_H__
  17. #define DNN_OP_H__
  18. #include "cce/blas_struct.h"
  19. #include "cce/cce.h"
  20. #include "cce/customize.h"
  21. namespace cce {
  22. /**
  23. * @ingroup dnn
  24. * @brief create descriptor of parameters for exponential function
  25. * @param [in] point to descriptor of parameters for exponential function
  26. * @return ccStatus_t
  27. */
  28. ccStatus_t ccCreateExpDescriptor(ccExpDescriptor_t *expDesc);
  29. /**
  30. * @ingroup dnn
  31. * @brief create descriptor of parameters for logarithmic function
  32. * @param [in] point to descriptor of parameters for logarithmic function
  33. * @return ccStatus_t
  34. */
  35. ccStatus_t ccCreateLogDescriptor(ccLogDescriptor_t *logDesc);
  36. /**
  37. * @ingroup dnn
  38. * @brief create descriptor of parameters for pow function
  39. * @param [in] point to descriptor of parameters for pow function
  40. * @return ccStatus_t
  41. */
  42. ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *powDesc);
  43. /**
  44. * @ingroup dnn
  45. * @brief destroy descriptor of parameters for exponential function
  46. * @param [in] point to descriptor of parameters for exponential function
  47. * @return ccStatus_t
  48. */
  49. ccStatus_t ccDestroyExpDescriptor(ccExpDescriptor_t *expDesc);
  50. /**
  51. * @ingroup dnn
  52. * @brief destroy descriptor of parameters for logarithmic function
  53. * @param [in] point to descriptor of parameters for exponential function
  54. * @return ccStatus_t
  55. */
  56. ccStatus_t ccDestroyLogDescriptor(ccLogDescriptor_t *logDesc);
  57. /**
  58. * @ingroup dnn
  59. * @brief destroy descriptor of parameters for pow function
  60. * @param [in] point to descriptor of parameters for pow function
  61. * @return ccStatus_t
  62. */
  63. ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *powDesc);
  64. /**
  65. * @ingroup dnn
  66. * @brief create descriptor of parameters for NonMaxSuppress function
  67. * @param [in] point to descriptor of parameters for NonMaxSuppress function
  68. * @return ccStatus_t
  69. */
  70. ccStatus_t ccCreateNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
  71. /**
  72. * @ingroup dnn
  73. * @brief destroy descriptor of parameters for NonMaxSuppress function
  74. * @param [in] point to descriptor of parameters for NonMaxSuppress function
  75. * @return ccStatus_t
  76. */
  77. ccStatus_t ccDestroyNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
  78. ccStatus_t ccTransTensorIncertPads(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
  79. void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum, bool interweave,
  80. bool background, uint32_t boxTypeNumMax = 0, bool isScaleVec = false);
  81. ccStatus_t ccTransTensorIncertPadsInt32(const ccTensorDescriptor_t xDesc, const void *x,
  82. const ccTensorDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
  83. uint32_t boxTypeNum, bool interweave, bool background);
  84. ccStatus_t ccTransMskrcnnBbox(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
  85. void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum);
  86. ccStatus_t ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensorDesc,
  87. const ccVecQuantizePara_t *vecQuantizePara);
  88. ccStatus_t ccGetTensorDescriptorQuantizeParam(const ccTensorDescriptor_t tensorDesc,
  89. ccVecQuantizePara_t *vecQuantizePara);
  90. /**
  91. * @ingroup dnn
  92. * @brief init tensor to 4d filter
  93. * @param [in|out] filterDesc descriptor of filter
  94. * @param [in] format format of filter
  95. * @param [in] dataType data type in device
  96. * @param [in] k number of output feature maps
  97. * @param [in] c number of input feature maps
  98. * @param [in] h height of filter
  99. * @param [in] w width of filter
  100. * @return ccStatus_t
  101. */
  102. ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
  103. int32_t k, int32_t c, int32_t h, int32_t w);
  104. ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
  105. int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0);
  106. /**
  107. * @ingroup dnn
  108. * @brief init tensor to Fractal filter
  109. * @param [in|out] filterDesc descriptor of filter
  110. * @param [in] format format of filter
  111. * @param [in] dataType data type in device
  112. * @param [in] k number of output feature maps
  113. * @param [in] c number of input feature maps
  114. * @param [in] h height of filter
  115. * @param [in] w width of filter
  116. * @return ccStatus_t
  117. */
  118. ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
  119. int32_t k, int32_t c, int32_t h, int32_t w);
  120. /**
  121. * @ingroup dnn
  122. * @brief init tensor to Fractal filter
  123. * @param [in|out] filterDesc descriptor of filter
  124. * @param [in] format format of filter
  125. * @param [in] dataType data type in device
  126. * @param [in] k number of output feature maps
  127. * @param [in] c number of input feature maps
  128. * @param [in] h height of filter
  129. * @param [in] w width of filter
  130. * @return ccStatus_t
  131. */
  132. ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
  133. int32_t k, int32_t c, int32_t h, int32_t w, ccDataType_t outputDataType);
  134. /**
  135. * @ingroup dnn
  136. * @brief read 4d filter
  137. * @param [in] filterDesc descriptor of filter
  138. * @param [in|out] format point to format of filter
  139. * @param [in|out] dataType point to data type in device
  140. * @param [in|out] k point to number of output feature maps
  141. * @param [in|out] c point to number of input feature maps
  142. * @param [in|out] h point to height of filter
  143. * @param [in|out] w point to width of filter
  144. * @return ccStatus_t
  145. */
  146. ccStatus_t ccGetFilterFractalDescriptor(const ccFilterDescriptor_t filterDesc, ccTensorFormat_t *format,
  147. ccDataType_t *dataType, int32_t *k, int32_t *c, int32_t *h, int32_t *w);
  148. /**
  149. * @ingroup dnn
  150. * @brief get data size of 4d filter
  151. * @param [in] filterDesc descriptor of filter
  152. * @param [in|out] size point to data size
  153. * @return ccStatus_t
  154. */
  155. ccStatus_t ccGetDepthWiseConvFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, int32_t groupNum, uint32_t *size);
  156. /**
  157. * @ingroup dnn
  158. * @brief trans group conv filter to fractal format
  159. * @param [in] filterSrcInfo descriptor of input filter
  160. * @param [in] filterSrc input data pointer
  161. * @param [in] filterDstInfo descriptor of output filter
  162. * @param [in|out] filterDst output data pointer
  163. * @param [in] group group size
  164. * @return ccStatus_t
  165. */
  166. ccStatus_t ccTransGroupConvFilter(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
  167. ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
  168. uint32_t dstSize);
  169. /**
  170. * @ingroup dnn
  171. * @brief trans conv filter With BoxTypeNuM to fractal format
  172. * @param [in] filterSrcInfo descriptor of input filter
  173. * @param [in] filterSrc input data pointer
  174. * @param [in] filterDstInfo descriptor of output filter
  175. * @param [in|out] filterDst output data pointer
  176. * @param [in] ySizeInBytes the malloc memory size
  177. * @param [in] boxTypeNum the num of boxType
  178. * @param [in] interweave whether the axis interweave
  179. * @return ccStatus_t
  180. */
  181. ccStatus_t ccTransFilterWithBoxTypeNum(const ccFilterDescriptor_t xDesc, const void *x,
  182. const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
  183. uint32_t boxTypeNum, bool interweave, uint32_t boxTypeNumMax = 0);
  184. /**
  185. * @ingroup dnn
  186. * @brief trans conv filter With BoxTypeNuM to fractal format
  187. * @param [in] filterSrcInfo descriptor of input filter
  188. * @param [in] filterSrc input data pointer
  189. * @param [in] filterDstInfo descriptor of output filter
  190. * @param [in|out] filterDst output data pointer
  191. * @param [in] ySizeInBytes the malloc memory size
  192. * @param [in] boxTypeNum the num of boxType
  193. * @param [in] interweave whether the axis interweave
  194. * @param [in] outputDataType output DataType
  195. * @return ccStatus_t
  196. */
  197. ccStatus_t ccTransFilterInt8WithBoxTypeNum(const ccFilterDescriptor_t wDesc, const void *x,
  198. const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
  199. uint32_t boxTypeNum, bool interweave, ccDataType_t outputDataType);
  200. /**
  201. * @ingroup dnn
  202. * @brief trans depthwise conv filter to fractal format
  203. * @param [in] wDesc descriptor of input filter
  204. * @param [in] w input data pointer
  205. * @param [in] groupNum groupNum of conv
  206. * @param [in]..yDesc descriptor of output filter
  207. * @param [in|out] y output data pointer
  208. * @param [in] ySizeInBytes the malloc memory size
  209. * @return ccStatus_t
  210. */
  211. ccStatus_t transDepthWiseConvFilterNCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
  212. ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
  213. /**
  214. * @ingroup dnn
  215. * @brief trans depthwise conv filter to fractal format
  216. * @param [in] wDesc descriptor of input filter
  217. * @param [in] w input data pointer
  218. * @param [in] groupNum groupNum of conv
  219. * @param [in]..yDesc descriptor of output filter
  220. * @param [in|out] y output data pointer
  221. * @param [in] ySizeInBytes the malloc memory size
  222. * @return ccStatus_t
  223. */
  224. ccStatus_t transDepthWiseConvFilterInt8NCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
  225. ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
  226. /**
  227. * @ingroup dnn
  228. * @brief trans depthwise conv filter to fractal format, input format CHWN
  229. * @param [in] wDesc descriptor of input filter
  230. * @param [in]..yDesc descriptor of output filter
  231. * @param [in] ySizeInBytes the malloc memory size
  232. * @param [in] w input data pointer
  233. * @param [in|out] y output data pointer
  234. * @return ccStatus_t
  235. */
  236. ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
  237. uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
  238. uint32_t destSize);
  239. /**
  240. * @ingroup dnn
  241. * @Check if it is surpported by HighPerformance depthwise
  242. * @param [in] inputN,C,H,W input param
  243. * @param [in] filterN,C,H,W
  244. * @param [in] dilationH,W dilation param
  245. * @param [in] padHHead,padHtail,padWHead,padWTail pad param
  246. * @param [in] strideH,W stride param
  247. * @param [in] groupNum Conv groupNum
  248. * @param [in|out] isHighPerformance isHighPerformance flag
  249. * @return ccStatus_t
  250. */
  251. ccStatus_t ccIsDepthwiseHighPerformance(int32_t inputN, int32_t inputC, int32_t inputH, int32_t inputW, int32_t filterN,
  252. int32_t filterC, int32_t filterH, int32_t filterW, int32_t dilationH,
  253. int32_t dilationW, int32_t padHHead, int32_t padHTail, int32_t padWHead,
  254. int32_t padWTail, int32_t strideH, int32_t strideW, int32_t groupNum,
  255. bool &isHighPerformance, bool isquant = false,
  256. ccDataType_t inputDataType = CC_DATA_HALF,
  257. ccDataType_t outputDataType = CC_DATA_HALF);
  258. /**
  259. * @ingroup dnn
  260. * @brief trans depthwise conv filter to fractal format, input format CHWN
  261. * @param [in] wDesc descriptor of input filter
  262. * @param [in]..yDesc descriptor of output filter
  263. * @param [in] ySizeInBytes the malloc memory size
  264. * @param [in] w input data pointer
  265. * @param [in|out] y output data pointer
  266. * @return ccStatus_t
  267. */
  268. ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
  269. uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
  270. uint32_t destSize);
  271. /**
  272. * @ingroup dnn
  273. * @brief create descriptor of fullconnection operator
  274. * @param [in|out] fcDesc point to descriptor of fullconnection operator
  275. * @return ccStatus_t
  276. */
  277. ccStatus_t ccCreateFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
  278. /**
  279. * @ingroup dnn
  280. * @brief destroy descriptor of fullconnection operator
  281. * @param [in] *fcDesc descriptor of fullconnection operator
  282. * @return ccStatus_t
  283. */
  284. ccStatus_t ccDestroyFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
  285. /**
  286. * @ingroup dnn
  287. * @brief init conv descriptor to 2d conv, use for beforeHasPad
  288. * @param [in|out] convDesc descriptor of convolution operator
  289. * @param [in] beforepadHHead before padding in height head
  290. * @param [in] beforepadHTail before padding in height tail
  291. * @param [in] beforepadWHead before padding in width head
  292. * @param [in] beforepadWTail before padding in width tail
  293. * @return ccStatus_t
  294. */
  295. ccStatus_t ccSetConvolution2dDescriptorForPad(ccConvolutionDescriptor_t convDesc, int32_t beforepadHHead,
  296. int32_t beforepadHTail, int32_t beforepadWHead, int32_t beforepadWTail);
  297. /**
  298. * @ingroup dnn
  299. * @brief init conv descriptor to 2d conv, use for concat batch size
  300. * @param [in|out] convDesc descriptor of convolution operator
  301. * @param [in] concatBatchSize concat batch size
  302. * @return ccStatus_t
  303. */
  304. ccStatus_t ccSetConvolution2dDescriptorForConcatBatchSize(ccConvolutionDescriptor_t convDesc, int64_t concatBatchSize);
  305. /**
  306. * @ingroup dnn
  307. * @brief init conv descriptor to 2d conv
  308. * @param [in|out] convDesc descriptor of convolution operator
  309. * @param [in] opType operation type for append at convolution operation
  310. * @param [in] opDesc operation descritpor for the opType
  311. * @return ccStatus_t
  312. */
  313. ccStatus_t ccConvolution2dAppendOp(ccConvolutionDescriptor_t convDesc, ccOpType_t opType, const void *opDesc);
  314. /**
  315. * @ingroup dnn
  316. * @brief read 2d conv beforeHasPad
  317. * @param [in] convDesc descriptor of convolution operator
  318. * @param [in|out] beforepadHHead before padding in height head, default is 0
  319. * @param [in|out] beforepadHTail before padding in height tail, default is 0
  320. * @param [in|out] beforepadWHead before padding in width head, default is 0
  321. * @param [in|out] beforepadWTail before padding in width tail, default is 0
  322. */
  323. ccStatus_t ccGetConvolution2dDescriptorForPad(const ccConvolutionDescriptor_t convDesc, int32_t *beforepadHHead,
  324. int32_t *beforepadHTail, int32_t *beforepadWHead,
  325. int32_t *beforepadWTail);
  326. /**
  327. * @ingroup dnn
  328. * @brief read 2d conv concat batch size
  329. * @param [in] convDesc descriptor of convolution operator
  330. * @param [in|out] concatBatchSize concat batch size, default is 0
  331. */
  332. ccStatus_t ccGetConvolution2dDescriptorForConcatBatchSize(const ccConvolutionDescriptor_t convDesc,
  333. int64_t *concatBatchSize);
  334. /**
  335. * @ingroup dnn
  336. * @brief get the temp space size of convolution forward computation, maybe no need temp space
  337. * @param [in] handle cce handle
  338. * @param [in] convDesc descriptor of convolution operator
  339. * @param [in] xDesc descriptor of input tensor
  340. * @param [in] wDesc descriptor of filter
  341. * @param [in] yDesc descriptor of output tensor
  342. * @param [in] algo algorithm of convolution forward
  343. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  344. * @return ccStatus_t
  345. */
  346. ccStatus_t ccGetConvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
  347. const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
  348. const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
  349. uint32_t *sizeInBytes);
  350. /**
  351. * @ingroup dnn
  352. * @brief get the temp space size of convolution backward computation, maybe no need temp space
  353. * @param [in] handle cce handle
  354. * @param [in] convDesc descriptor of convolution operator
  355. * @param [in] dyDesc descriptor of input tensor
  356. * @param [in] wDesc descriptor of filter
  357. * @param [in] dxDesc descriptor of output tensor
  358. * @param [in] algo algorithm of convolution forward
  359. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  360. * @return ccStatus_t
  361. */
  362. ccStatus_t ccGetConvolutionBackwardDataWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
  363. const ccTensorDescriptor_t dyDesc,
  364. const ccFilterDescriptor_t wDesc,
  365. const ccTensorDescriptor_t dxDesc, ccConvolutionBwdAlgo_t algo,
  366. uint32_t *sizeInBytes);
  367. /**
  368. * @ingroup dnn
  369. * @brief get the temp space size of fc forward computation, maybe no need temp space
  370. * @param [in] handle cce handle
  371. * @param [in] fcDesc descriptor of fc operator
  372. * @param [in] xDesc descriptor of input tensor
  373. * @param [in] wDesc descriptor of filter
  374. * @param [in] yDesc descriptor of output tensor
  375. * @param [in|out] sizeInBytes temp space size need, 0 means no memeory needed
  376. * @return ccStatus_t
  377. */
  378. ccStatus_t ccGetFullConnectionForwardWorkspaceSize(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
  379. const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
  380. const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
  381. /**
  382. * @ingroup dnn
  383. * @brief convolution forward computation
  384. * @param [in] handle cce handle
  385. * @param [in] convDesc descriptor of convolution operator
  386. * @param [in] alpha scaling factors
  387. * @param [in] xDesc descriptor of input tensor
  388. * @param [in] x input data in device memory
  389. * @param [in] wDesc descriptor of filter
  390. * @param [in] w filter data in device memory
  391. * @param [in] biasDesc descriptor of bias
  392. * @param [in] bias bias data in device memory
  393. * @param [in] algo algorithm of convolution forward
  394. * @param [in] workSpace temp space, maybe NULL if no need temp space
  395. * @param [in] workSpaceSizeInBytes sizeof workspace
  396. * @param [in] beta scaling factors
  397. * @param [in] yDesc descriptor of output tensor
  398. * @param [in|out] y output data in device memory
  399. * @return ccStatus_t
  400. */
  401. ccStatus_t ccConvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
  402. const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
  403. const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
  404. ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
  405. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  406. /**
  407. * @ingroup dnn
  408. * @brief full alloc float and reset to 0
  409. * @param [in] handle cce handle
  410. * @param [in] alpha scaling factors
  411. * @param [in] xDesc descriptor of input tensor
  412. * @param [in|out] x output data in device memory
  413. * @param [in] beta scaling factors
  414. * @return ccStatus_t
  415. */
  416. ccStatus_t ccAllocFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  417. const void *beta);
  418. /**
  419. * @ingroup dnn
  420. * @brief full get data set by op
  421. * @param [in] handle cce handle
  422. * @param [in] alpha scaling factors
  423. * @param [in] xDesc descriptor of input tensor
  424. * @param [in|out] x output data in device memory
  425. * @param [in] beta scaling factors
  426. * @param [in] yDesc descriptor of output tensor
  427. * @param [out] y output data in device memory
  428. * @return ccStatus_t
  429. */
  430. ccStatus_t ccGetFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  431. const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
  432. /**
  433. * @ingroup dnn
  434. * @brief full clear register
  435. * @param [in] handle cce handle
  436. * @param [in] alpha scaling factors
  437. * @param [in] xDesc descriptor of input tensor
  438. * @param [in] x input data in device memory
  439. * @param [in] beta scaling factors
  440. * @param [in] yDesc descriptor of output tensor
  441. * @param [out] y output data in device memory
  442. * @return ccStatus_t
  443. */
  444. ccStatus_t ccClearFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  445. const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
  446. #ifndef DAVINCI_LITE
  447. /**
  448. * @ingroup dnn
  449. * @brief convolution backward data computation
  450. * @param [in] handle cce handle
  451. * @param [in] convDesc descriptor of convolution operator
  452. * @param [in] alpha scaling factors
  453. * @param [in] dyDesc descriptor of input tensor
  454. * @param [in] dy input data in device memory
  455. * @param [in] wDesc descriptor of filter
  456. * @param [in] w filter data in device memory
  457. * @param [in] algo algorithm of convolution backward
  458. * @param [in] workSpace temp space, maybe NULL if no need temp space
  459. * @param [in] workSpaceSizeInBytes sizeof workspace
  460. * @param [in] beta scaling factors
  461. * @param [in] dxDesc descriptor of output tensor
  462. * @param [in|out] dx output data in device memory
  463. * @return ccStatus_t
  464. */
  465. ccStatus_t ccConvolutionBackwardData(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
  466. const ccTensorDescriptor_t dyDesc, const void *dy,
  467. const ccFilterDescriptor_t wDesc, const void *w, ccConvolutionBwdAlgo_t algo,
  468. void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
  469. const ccTensorDescriptor_t dxDesc, void *dx);
  470. #endif
  471. /**
  472. * @ingroup dnn
  473. * @brief create descriptor of pooling operator
  474. * @param [in|out] poolingDesc point to descriptor of pooling operator
  475. * @return ccStatus_t
  476. */
  477. ccStatus_t ccCreatePoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
  478. /**
  479. * @ingroup dnn
  480. * @brief destroy descriptor of pooling operator
  481. * @param [in] *poolingDesc descriptor of pooling operator
  482. * @return ccStatus_t
  483. */
  484. ccStatus_t ccDestroyPoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
  485. /**
  486. * @ingroup dnn
  487. * @brief init pooling descriptor to 2d pooling
  488. * @param [in|out] poolingDesc descriptor of pooling operator
  489. * @param [in] mode mode of pooling
  490. * @param [in] padMode mode of padding
  491. * @param [in] maxpoolingNanOpt Nan propagation mode
  492. * @param [in] windowH height of pooling window
  493. * @param [in] windowW width of pooling window
  494. * @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
  495. * value.
  496. * @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
  497. * @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
  498. * value.
  499. * @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN..
  500. * @param [in] strideH stride in height
  501. * @param [in] strideW stride in width
  502. * @param [in] dataMode
  503. * @param [in] ceilMode 0:Floor 1:Ceil
  504. * @return ccStatus_t
  505. */
  506. ccStatus_t ccSetPooling2dDescriptor(ccPoolingDescriptor_t poolingDesc, ccPoolingMode_t mode, ccPaddingMode_t padMode,
  507. ccNanPropagation_t maxpoolingNanOpt, int32_t windowH, int32_t windowW,
  508. int32_t padHHead, int32_t padHTail, int32_t padWHead, int32_t padWTail,
  509. int32_t strideH, int32_t strideW, int32_t dataMode, int32_t ceilMode,
  510. ccPooingFwdAlgo_t algo = CC_POOLING_FWD_ALGO_HALF);
  511. /**
  512. * @ingroup dnn
  513. * @brief get the output dimension info of 2d pooling
  514. * @param [in] poolingDesc descriptor of pooling operator
  515. * @param [in] xDesc descriptor of input tensor
  516. * @param [in|out] n point to batch size
  517. * @param [in|out] c point to channels
  518. * @param [in|out] h point to height of feature map
  519. * @param [in|out] w point to width of feature map
  520. * @return ccStatus_t
  521. */
  522. ccStatus_t ccGetPooling2dForwardOutputDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc,
  523. int32_t *n, int32_t *c, int32_t *h, int32_t *w);
  524. /**
  525. * @ingroup dnn
  526. * @brief pooling forward computation
  527. * @param [in] handle cce handle
  528. * @param [in] poolingDesc descriptor of pooling operator
  529. * @param [in] alpha scaling factors
  530. * @param [in] xDesc descriptor of input tensor
  531. * @param [in] x input data in device memory
  532. * @param [in] beta scaling factors
  533. * @param [in] yDesc descriptor of output tensor
  534. * @param [in|out] y output data in device memory
  535. * @return ccStatus_t
  536. */
  537. ccStatus_t ccPoolingForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
  538. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  539. const ccTensorDescriptor_t yDesc, void *y);
  540. /**
  541. * @ingroup dnn
  542. * @brief pooling backward computation
  543. * @param [in] handle cce handle
  544. * @param [in] poolingDesc descriptor of pooling operator
  545. * @param [in] alpha scaling factors
  546. * @param [in] beta scaling factors
  547. * @param [in] argMaskDesc descriptor of mask tensor
  548. * @param [in] argMask mask data in device memory
  549. * @param [in] dyDesc descriptor of input tensor
  550. * @param [in] dy input data in device memory
  551. * @param [in] dxDesc descriptor of output tensor
  552. * @param [in|out] dx output data in device memory
  553. * @return ccStatus_t
  554. */
  555. ccStatus_t ccMaxPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
  556. const void *beta, const ccTensorDescriptor_t argMaskDesc, const void *argMask,
  557. const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t dxDesc,
  558. void *dx);
  559. /**
  560. * @ingroup dnn
  561. * @brief create descriptor of activation operator
  562. * @param [in|out] activationDesc point to descriptor of activation operator
  563. * @return ccStatus_t
  564. */
  565. ccStatus_t ccCreateActivationDescriptor(ccActivationDescriptor_t *activationDesc);
  566. /**
  567. * @ingroup dnn
  568. * @brief init activation descriptor to 2d activation
  569. * @param [in|out] activationDesc descriptor of activation operator
  570. * @param [in] mode mode of activation
  571. * @param [in] reluNanOpt Nan propagation mode
  572. * @param [in] coef ceiling for clipped RELU, alpha for ELU
  573. * @param [in] activationPara activation parameter union
  574. * @return ccStatus_t
  575. */
  576. ccStatus_t ccSetActivationDescriptor(ccActivationDescriptor_t activationDesc, ccActivationMode_t mode,
  577. ccNanPropagation_t reluNanOpt, double coef,
  578. ccActivationPara_u activationPara = {{0, CC_NAN_NOT_PROPAGATE}});
  579. /**
  580. * @ingroup dnn
  581. * @brief read activation param
  582. * @param [in] activationDesc descriptor of activation operator
  583. * @param [in|out] mode point to mode of activation
  584. * @param [in|out] reluNanOpt point to Nan propagation mode
  585. * @param [in|out] coef point to coef
  586. * @param [in|out] activationPara point to activation parameter union
  587. * @return ccStatus_t
  588. */
  589. ccStatus_t ccGetActivationDescriptor(const ccActivationDescriptor_t activationDesc, ccActivationMode_t *mode,
  590. ccNanPropagation_t *reluNanOpt, double *coef,
  591. ccActivationPara_u *activationPara = NULL);
  592. /**
  593. * @ingroup dnn
  594. * @brief destroy descriptor of activation operator
  595. * @param [in] *activationDesc descriptor of activation operator
  596. * @return ccStatus_t
  597. */
  598. ccStatus_t ccDestroyActivationDescriptor(ccActivationDescriptor_t *activationDesc);
  599. /**
  600. * @ingroup dnn
  601. * @brief activation forward computation
  602. * @param [in] handle cce handle
  603. * @param [in] activationDesc descriptor of activation operator
  604. * @param [in] alpha scaling factors
  605. * @param [in] xDesc descriptor of input tensor
  606. * @param [in] x input data in device memory
  607. * @param [in] beta scaling factors
  608. * @param [in] yDesc descriptor of output tensor
  609. * @param [in|out] y output data in device memory
  610. * @return ccStatus_t
  611. */
  612. ccStatus_t ccActivationForward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
  613. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  614. const ccTensorDescriptor_t yDesc, void *y);
  615. /**
  616. * @ingroup dnn
  617. * @brief Derives a tensor descriptor from layer data descriptor for BatchNormalization
  618. * @param [in|out] derivedBnDesc descriptor of mean, variance, bias, scale tensors tensor
  619. * @param [in] xDesc descriptor of input tensor
  620. * @param [in] mode mode of BatchNormalization
  621. * @return ccStatus_t
  622. */
  623. ccStatus_t ccDeriveBNTensorDescriptor(ccTensorDescriptor_t derivedBnDesc, const ccTensorDescriptor_t xDesc,
  624. ccBatchNormMode_t mode);
  625. /**
  626. * @ingroup dnn
  627. * @brief batchnorm forward computation
  628. * @param [in] handle cce handle
  629. * @param [in] mode mode of batchnorm
  630. * @param [in] alpha scaling factors
  631. * @param [in] beta scaling factors
  632. * @param [in] xDesc descriptor of input tensor
  633. * @param [in] x input data in device memory
  634. * @param [in] yDesc descriptor of output tensor
  635. * @param [in|out] y output data in device memory
  636. * @param [in] bnScaleBiasMeanVarDesc descriptor of scale, bias, mean, variance tensor
  637. * @param [in] bnScale scaling factor
  638. * @param [in] bnBias bias factor
  639. * @param [in] estimatedMean mean
  640. * @param [in] estimatedVariance variance
  641. * @param [in] epsilon epsilon
  642. * @return ccStatus_t
  643. */
  644. ccStatus_t ccBatchNormForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha, const void *beta,
  645. const ccTensorDescriptor_t xDesc, const void *x,
  646. const ccTensorDescriptor_t yDesc, void *y,
  647. const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
  648. const void *bnBias, const void *estimatedMean, const void *estimatedVariance,
  649. double epsilon);
  650. /**
  651. * @ingroup dnn
  652. * @brief batchnorm forward computation
  653. * @param [in] handle cce handle
  654. * @param [in] mode mode of batchnorm
  655. * @param [in] reluFlag relu fusion flag
  656. * @param [in] alpha scaling factors
  657. * @param [in] beta scaling factors
  658. * @param [in] xDesc descriptor of input tensor
  659. * @param [in] x input data in device memory
  660. * @param [in] yDesc descriptor of output tensor
  661. * @param [in|out] y output data in device memory
  662. * @param [in] bnScaleBiasMeanVarDesc descriptor of scale, bias, mean, variance tensor
  663. * @param [in] bnScale scaling factor
  664. * @param [in] bnBias bias factor
  665. * @param [in] estimatedMean mean
  666. * @param [in] estimatedVariance variance
  667. * @param [in] epsilon epsilon
  668. * @return ccStatus_t
  669. */
  670. ccStatus_t ccBatchNormFusionForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, ccBatchNormDescriptor_t bnDesc,
  671. const void *alpha, const void *beta, const ccTensorDescriptor_t xDesc,
  672. const void *x, const ccTensorDescriptor_t yDesc, void *y,
  673. const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
  674. const void *bnBias, const void *estimatedMean,
  675. const void *estimatedVariance, double epsilon);
  676. /**
  677. * @ingroup dnn
  678. * @brief create descriptor of batchnorm operator
  679. * @param [in|out] bnDesc point to descriptor of batchnorm operator
  680. * @return ccStatus_t
  681. */
  682. ccStatus_t ccCreateBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
  683. /**
  684. * @ingroup dnn
  685. * @brief destroy batchnorm descriptor
  686. * @param [in] descriptor of batchnorm operator
  687. * @return ccStatus_t
  688. */
  689. ccStatus_t ccDestroyBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
  690. /**
  691. * @ingroup dnn
  692. * @brief append operation after batchnorm
  693. * @param [in|out] bnDesc descriptor of batchnorm operator
  694. * @param [in] opType operation type for append at batchnorm operation
  695. * @param [in] opDesc operation descritpor for the opType
  696. * @return ccStatus_t
  697. */
  698. ccStatus_t ccBatchNormAppendOp(ccBatchNormDescriptor_t bnDesc, ccOpType_t opType, const void *opDesc);
  699. /**
  700. * @ingroup dnn
  701. * @brief full get the output 4d dimension info of full connection
  702. * @param [in] xDesc descriptor of input tensor
  703. * @param [in] wDesc descriptor of weight tensor
  704. * @param [in|out] n point to batch size
  705. * @param [in|out] c point to channels
  706. * @param [in|out] h point to height of feature map
  707. * @param [in|out] w point to width of feature map
  708. * @return ccStatus_t
  709. */
  710. ccStatus_t ccGetFullConnectionFwdOutputDim(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
  711. int32_t *n, int32_t *c, int32_t *h, int32_t *w);
  712. /**
  713. * @ingroup dnn
  714. * @brief full connection forward computation
  715. * @param [in] handle cce handle
  716. * @param [in] fcDesc fc desc
  717. * @param [in] alpha scaling factors
  718. * @param [in] xDesc descriptor of input tensor
  719. * @param [in] x input data in device memory
  720. * @param [in] wDesc descriptor of weight tensor
  721. * @param [in] w filter data in device memory
  722. * @param [in] biasDesc bias data in device memory
  723. * @param [in] bias descriptor of bias tensor
  724. * @param [in] beta scaling factors
  725. * @param [in] yDesc descriptor of output tensor
  726. * @param [in|out] y output data in device memory
  727. * @return ccStatus_t
  728. */
  729. ccStatus_t ccFullConnectionForwardEx2(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc, const void *alpha,
  730. const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
  731. const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
  732. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  733. /**
  734. * @ingroup dnn
  735. * @brief full connection forward computation with workspace
  736. * @param [in] handle cce handle
  737. * @param [in] fcDesc fc desc
  738. * @param [in] alpha scaling factors
  739. * @param [in] xDesc descriptor of input tensor
  740. * @param [in] x input data in device memory
  741. * @param [in] wDesc descriptor of weight tensor
  742. * @param [in] w filter data in device memory
  743. * @param [in] biasDesc bias data in device memory
  744. * @param [in] bias descriptor of bias tensor
  745. * @param [in] workSpace workSpace in device memory
  746. * @param [in] workSpaceSizeInBytes workSpace size in bytes
  747. * @param [in] beta scaling factors
  748. * @param [in] yDesc descriptor of output tensor
  749. * @param [in|out] y output data in device memory
  750. * @return ccStatus_t
  751. */
  752. ccStatus_t ccFullConnectionForwardWithWorkSpace(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
  753. const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  754. const ccFilterDescriptor_t wDesc, const void *w,
  755. const ccTensorDescriptor_t biasDesc, const void *bias, void *workSpace,
  756. uint32_t workSpaceSizeInBytes, const void *beta,
  757. const ccTensorDescriptor_t yDesc, void *y);
  758. /**
  759. * @ingroup dnn
  760. * @brief full softmax forward computation
  761. * @param [in] handle cce handle
  762. * @param [in] algo softmax algorithm
  763. * @param [in] mode mode of softmax
  764. * @param [in] alpha scaling factors
  765. * @param [in] xDesc descriptor of input tensor
  766. * @param [in] x input data in device memory
  767. * @param [in] workSpace workSpace in device memory
  768. * @param [in] workSpaceSizeInBytes workSpace size in bytes
  769. * @param [in] beta scaling factors
  770. * @param [in] yDesc descriptor of output tensor
  771. * @param [in|out] y output data in device memory
  772. * @return ccStatus_t
  773. */
  774. ccStatus_t ccSoftmaxForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
  775. const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
  776. uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  777. /**
  778. * @ingroup dnn
  779. * @brief full softmax forward computation
  780. * @param [in] handle cce handle
  781. * @param [in] algo softmax algorithm
  782. * @param [in] softmaxAxis mode of softmax
  783. * @param [in] alpha scaling factors
  784. * @param [in] xDesc descriptor of input tensor
  785. * @param [in] x input data in device memory
  786. * @param [in] workSpace workSpace in device memory
  787. * @param [in] workSpaceSizeInBytes workSpace size in bytes
  788. * @param [in] beta scaling factors
  789. * @param [in] yDesc descriptor of output tensor
  790. * @param [in|out] y output data in device memory
  791. * @param [in] classNum class number
  792. * @param [in] padNum pad Num
  793. * @return ccStatus_t
  794. */
  795. ccStatus_t ccSoftmaxClassForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
  796. const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
  797. uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
  798. void *y, uint32_t classNum, uint32_t padNum);
  799. /**
  800. * @ingroup dnn
  801. * @brief full scale forward computation
  802. * @param [in] handle cce handle
  803. * @param [in] scaleBiasDesc descriptor of scale and bias tensor
  804. * @param [in] scale scaling factor
  805. * @param [in] bias bias factor
  806. * @param [in] alpha scaling factors
  807. * @param [in] xDesc descriptor of input tensor
  808. * @param [in] x input data in device memory
  809. * @param [in] beta scaling factors
  810. * @param [in] yDesc descriptor of output tensor
  811. * @param [in|out] y output data in device memory
  812. * @return ccStatus_t
  813. */
  814. ccStatus_t ccScaleForward(ccHandle_t handle, const ccTensorDescriptor_t scaleBiasDesc, const void *scale,
  815. const void *bias, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  816. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  817. /**
  818. * @ingroup dnn
  819. * @brief full scale forward computation
  820. * @param [in] handle cce handle
  821. * @param [in] scaleDesc descriptor of scale and bias tensor
  822. * @param [in] scale scaling factor
  823. * @param [in] alpha scaling factors
  824. * @param [in] xDesc descriptor of input tensor
  825. * @param [in] x input data in device memory
  826. * @param [in] beta scaling factors
  827. * @param [in] yDesc descriptor of output tensor
  828. * @param [in|out] y output data in device memory
  829. * @return ccStatus_t
  830. */
  831. ccStatus_t ccScaleNoBiasForward(ccHandle_t handle, const ccTensorDescriptor_t scaleDesc, const void *scale,
  832. const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  833. const ccTensorDescriptor_t yDesc, void *y);
  834. /**
  835. * @ingroup dnn
  836. * @brief get the output dimension info of depth to space
  837. * @param [in] xDesc descriptor of input tensor
  838. * @param [in] blockSize the size of block
  839. * @param [in|out] dimCnt point to the output dimCnt
  840. * @param [in|out] dim arrays to save dims
  841. * @return ccStatus_t
  842. */
  843. ccStatus_t ccGetDepthToSpaceOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
  844. int32_t dim[], int32_t dimLen);
  845. /**
  846. * @ingroup dnn
  847. * @brief depth to space forward computation
  848. * @param [in] handle cce handle
  849. * @param [in] alpha scaling factors
  850. * @param [in] xDesc descriptor of input tensor
  851. * @param [in] x input data in device memory
  852. * @param [in] blockSize the size of block
  853. * @param [in] beta bias factors
  854. * @param [in] outputDesc descriptor of output tensor
  855. * @param [in|out] output output data in device memory
  856. * @return ccStatus_t
  857. */
  858. ccStatus_t ccDepthToSpaceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  859. const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
  860. void *output);
  861. /**
  862. * @ingroup dnn
  863. * @brief get the output dimension info of space to depth
  864. * @param [in] xDesc descriptor of input tensor
  865. * @param [in] blockSize the size of block
  866. * @param [in|out] dimCnt point to the output dimCnt
  867. * @param [in|out] dim arrays to save dims
  868. * @return ccStatus_t
  869. */
  870. ccStatus_t ccGetSpaceToDepthOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
  871. int32_t dim[], int32_t dimLen);
  872. /**
  873. * @ingroup dnn
  874. * @brief space to depth forward computation
  875. * @param [in] handle cce handle
  876. * @param [in] alpha scaling factors
  877. * @param [in] xDesc descriptor of input tensor
  878. * @param [in] x input data in device memory
  879. * @param [in] blockSize the size of block
  880. * @param [in] beta bias factors
  881. * @param [in] outputDesc descriptor of output tensor
  882. * @param [in|out] output output data in device memory
  883. * @return ccStatus_t
  884. */
  885. ccStatus_t ccSpaceToDepthForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  886. const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
  887. void *output);
  888. /**
  889. * @ingroup dnn
  890. * @brief full eltwise forward computation
  891. * @param [in] handle cce handle
  892. * @param [in] eltDesc eltwise descriptor
  893. * @param [in] mode mode of eltwise
  894. * @param [in] alpha scaling factors
  895. * @param [in] broadcast(Reserve) support tensor broadcasting or not
  896. * @param [in] xDesc[] array of descriptor for input tensor
  897. * @param [in] x array of input data in device memory
  898. * @param [in] inputNum the number of input tensors
  899. * @param [in] beta scaling factors
  900. * @param [in] yDesc descriptor of output tensor
  901. * @param [in|out] y output data in device memory
  902. * @return ccStatus_t
  903. */
  904. ccStatus_t ccEltwiseForwardEx(ccHandle_t handle, ccEltwiseDescriptor_t eltDesc, ccEltwiseMode_t mode, int32_t inputNum,
  905. const void *alpha, bool broadcast, const ccTensorDescriptor_t xDesc[], const void *x[],
  906. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  907. /**
  908. * @ingroup dnn
  909. * @brief create descriptor of eltwise operator
  910. * @param [in|out] eltwiseDesc point to descriptor of eltwise operator
  911. * @return ccStatus_t
  912. */
  913. ccStatus_t ccCreateEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
  914. /**
  915. * @ingroup dnn
  916. * @brief destroy eltwise descriptor
  917. * @param [in] descriptor of eltwise operator
  918. * @return ccStatus_t
  919. */
  920. ccStatus_t ccDestroyEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
  921. /**
  922. * @ingroup dnn
  923. * @brief append operation after eltwise
  924. * @param [in|out] eltDesc descriptor of eltwise operator
  925. * @param [in] opType operation type for append at eltwise operation
  926. * @param [in] opDesc operation descritpor for the opType
  927. * @return ccStatus_t
  928. */
  929. ccStatus_t ccEltwiseAppendOp(ccEltwiseDescriptor_t eltDesc, ccOpType_t opType, const void *opDesc);
  930. /**
  931. * @ingroup dnn
  932. * @brief set eltwise desciptor's quantize parameters
  933. * @param [in] eltDesc eltwise descriptor
  934. * @param [in] quantizeInfo descriptor of quantize parameters
  935. * @return ccStatus_t
  936. */
  937. ccStatus_t ccSetEltwiseQuantizeInfo(ccEltwiseDescriptor_t eltDesc, const ccQuantizeDescriptor_t QuantizeInfo);
  938. /**
  939. * @ingroup dnn
  940. * @brief get the temp space size of reshape forward computation, maybe no need temp space
  941. * @param [in] handle cce handle
  942. * @param [in] xDesc descriptor of input tensor
  943. * @param [in] yDesc descriptor of output tensor
  944. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  945. * @return ccStatus_t
  946. */
  947. ccStatus_t ccGetReshapeForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
  948. const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
  949. /**
  950. * @ingroup dnn
  951. * @brief reshape the input tensor
  952. * @param [in] handle cce handle
  953. * @param [in] alpha scaling factors
  954. * @param [in] xDesc input tensor
  955. * @param [in] x input data
  956. * @param [in] workSpace temp space, maybe NULL if no need temp space
  957. * @param [in] workSpaceSizeInBytes sizeof workspace
  958. * @param [in] beta scaling factors
  959. * @param [in] yDesc output tensor
  960. * @param [in|out] y output data
  961. * @return ccStatus_t
  962. */
  963. ccStatus_t ccReshapeForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  964. void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
  965. const ccTensorDescriptor_t yDesc, void *y);
  966. /**
  967. * @ingroup dnn
  968. * @brief reshape the input tensor for data in ND format
  969. * @param [in] handle cce handle
  970. * @param [in] alpha scaling factors
  971. * @param [in] xDesc input tensor
  972. * @param [in] x input data
  973. * @param [in] workSpace temp space, maybe NULL if no need temp space
  974. * @param [in] workSpaceSizeInBytes sizeof workspace
  975. * @param [in] beta scaling factors
  976. * @param [in] yDesc output tensor
  977. * @param [in|out] y output data
  978. * @return ccStatus_t
  979. */
  980. ccStatus_t ccNdReshapeForward(ccHandle_t handle, const void *alpha, ccTensorFormat_t rawFormat,
  981. const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
  982. uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
  983. void *y);
  984. /**
  985. * @ingroup dnn
  986. * @brief Four2Five forward computation
  987. * @param [in] handle cce handle
  988. * @param [in] alpha scaling factors
  989. * @param [in] xDesc descriptor of input tensor
  990. * @param [in] x input data in device memory
  991. * @param [in] beta bias factors
  992. * @param [in] yDesc descriptor of output tensor
  993. * @param [in | out] y output data in device memory
  994. * @return ccStatus_t
  995. */
  996. ccStatus_t ccFour2FiveForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  997. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  998. /**
  999. * @ingroup dnn
  1000. * @brief Five2Four forward computation
  1001. * @param [in] handle cce handle
  1002. * @param [in] alpha scaling factors
  1003. * @param [in] xDesc descriptor of input tensor
  1004. * @param [in] x input data in device memory
  1005. * @param [in] beta bias factors
  1006. * @param [in] yDesc descriptor of output tensor
  1007. * @param [in | out] y output data in device memory
  1008. * @return ccStatus_t
  1009. */
  1010. ccStatus_t ccFive2FourForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1011. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  1012. /**
  1013. * @ingroup dnn
  1014. * @brief get the temp space size of add forward computation
  1015. * @param [in] handle cce handle
  1016. * @param [in] xDesc descriptor of the first input tensor
  1017. * @param [in] wDesc descriptor of the second input tensor
  1018. * @param [in] yDesc descriptor of output tensor
  1019. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  1020. * @return ccStatus_t
  1021. */
  1022. ccStatus_t ccGetAddForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
  1023. const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
  1024. uint32_t *sizeInBytes);
  1025. /**
  1026. * @ingroup dnn
  1027. * @brief Add forward computation
  1028. * @param [in] handle cce handle
  1029. * @param [in] alpha scaling factors
  1030. * @param [in] xDesc descriptor of input tensor
  1031. * @param [in] x one input data in device memory
  1032. * @param [in] wDesc descriptor of input tensor
  1033. * @param [in] w the other input data in device memory
  1034. * @param [in] beta bias factors
  1035. * @param [in] workSpace the address apply in HBM
  1036. * @param [in] workSpaceSizeInBytes the size apply in HBM
  1037. * @param [in] yDesc descriptor of output tensor
  1038. * @param [in|out] y output data in device memory
  1039. * @return ccStatus_t
  1040. */
  1041. ccStatus_t ccAddForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1042. const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
  1043. uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
  1044. /**
  1045. * @ingroup dnn
  1046. * @brief Stack forward computation
  1047. * @param [in] handle cce handle
  1048. * @param [in] alpha scaling factors
  1049. * @param [in] xDesc descriptor of input tensor
  1050. * @param [in] x[] x array is host mem array, the element is device address of input data
  1051. * @param [in] num number of input tensor
  1052. * @param [in] axis along which axis to stack the input tensor
  1053. * @param [in] beta bias factors
  1054. * @param [in] yDesc descriptor of output tensor
  1055. * @param [in|out] y output data in device memory
  1056. * @return ccStatus_t
  1057. */
  1058. ccStatus_t ccStackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x[],
  1059. uint32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  1060. /**
  1061. * @ingroup dnn
  1062. * @brief get the output dimension info of stack
  1063. * @param [in] xDesc descriptor of input tensor
  1064. * @param [in] num number of input tensor
  1065. * @param [in] axis along which axis to stack the input tensor
  1066. * @param [in|out] n point to batch size
  1067. * @param [in|out] c point to channels
  1068. * @param [in|out] h point to height
  1069. * @param [in|out] w point to width
  1070. * @param [in|out] realDimCnt point to real dimCnt after stack
  1071. * @return ccStatus_t
  1072. */
  1073. ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *n, int32_t *c,
  1074. int32_t *h, int32_t *w, int32_t *realDimCnt);
  1075. /**
  1076. * @ingroup dnn
  1077. * @brief get the output dimension info of stack
  1078. * @param [in] xDesc descriptor of input tensor
  1079. * @param [in] num number of input tensor
  1080. * @param [in] axis along which axis to stack the input tensor
  1081. * @param [in|out] dimCnt dimcnt
  1082. * @param [in|out] dim save dim value
  1083. * @param [in| dimlen length of dim
  1084. * @return ccStatus_t
  1085. */
  1086. ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *dimCnt,
  1087. int32_t dim[], int32_t dimLen);
  1088. /**
  1089. * @ingroup dnn
  1090. * @brief return need grid generator or not
  1091. * @param [in] inputH, inputW, outputH, outputW, alignCorner(interp=true,resizeBilinear depends para align corner)
  1092. * @param [out] bool needGridFlag, true mean need, false mean not need
  1093. * @return ccStatus_t
  1094. */
  1095. ccStatus_t ccIsGridGenetatorNeed(int32_t inputH, int32_t inputW, int32_t outputH, int32_t outputW, bool alignCorner,
  1096. bool &needGridFlag);
  1097. /**
  1098. * @ingroup dnn
  1099. * @brief get the temp space size of Deconvolution forward computation, maybe no need temp space
  1100. * @param [in] handle cce handle
  1101. * @param [in] deconvDesc descriptor of Deconvolution operator
  1102. * @param [in] xDesc descriptor of input tensor
  1103. * @param [in] wDesc descriptor of filter
  1104. * @param [in] yDesc descriptor of output tensor
  1105. * @param [in] algo algorithm of Deconvolution forward
  1106. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  1107. * @return ccStatus_t
  1108. */
  1109. ccStatus_t ccGetDeconvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc,
  1110. const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
  1111. const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
  1112. uint32_t *sizeInBytes);
  1113. /**
  1114. * @ingroup dnn
  1115. * @brief Deconvolution forward computation
  1116. * @param [in] handle cce handle
  1117. * @param [in] deconvDesc descriptor of deconvolution operator
  1118. * @param [in] alpha scaling factors
  1119. * @param [in] xDesc descriptor of input tensor
  1120. * @param [in] x input data in device memory
  1121. * @param [in] wDesc descriptor of filter
  1122. * @param [in] w filter data in device memory
  1123. * @param [in] biasDesc descriptor of bias
  1124. * @param [in] bias bias data in device memory
  1125. * @param [in] algo algorithm of deconvolution forward
  1126. * @param [in] workSpace temp space, maybe NULL if no need temp space
  1127. * @param [in] workSpaceSizeInBytes sizeof workspace
  1128. * @param [in] beta scaling factors
  1129. * @param [in] yDesc descriptor of output tensor
  1130. * @param [in|out] y output data in device memory
  1131. * @return ccStatus_t
  1132. */
  1133. ccStatus_t ccDeconvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc, const void *alpha,
  1134. const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
  1135. const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
  1136. ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
  1137. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  1138. #define MODE_C_N (0)
  1139. #define MODE_N_C (1)
  1140. /**
  1141. * [ccArgMaxForward]
  1142. * @param [in] handle [handle]
  1143. * @param [in] alpha [reserved parameters]
  1144. * @param [in] xDesc [x tensor descriptor]
  1145. * @param [in] x [innput tensor]
  1146. * @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
  1147. * ]
  1148. * @param [in] topK [The number that returns the maximum index or maximum value]
  1149. * @param [in] axis [Describes which axis of the input Tensor to reduce across]
  1150. * @param [in] beta [reserved parameters]
  1151. * @param [in] yDesc [y tensor descriptor]
  1152. * @param [in] y [The max value index or max value tensor]
  1153. */
  1154. ccStatus_t ccArgMaxForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1155. bool outMaxVal, uint32_t topK, int32_t axis, const void *beta,
  1156. const ccTensorDescriptor_t yDesc, void *y);
  1157. /**
  1158. * [ccGetArgMaxOutputDim]
  1159. * @param [in] xDesc [x tensor descriptor]
  1160. * @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
  1161. * ]
  1162. * @param [in] topK [The number that returns the maximum index or maximum value]
  1163. * @param [in] axis [Describes which axis of the input Tensor to reduce across]
  1164. * @param [in|out] dimCnt [point to the output dimCnt]
  1165. * @param [in|out] dim [arrays to save dims]
  1166. * @param [in| dimlen length of dim
  1167. */
  1168. ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
  1169. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  1170. /**
  1171. * [ccGetArgMaxOutputDim]
  1172. * @param [in] xDesc [x tensor descriptor]
  1173. * @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
  1174. * ]
  1175. * @param [in] topK [The number that returns the maximum index or maximum value]
  1176. * @param [in] axis [Describes which axis of the input Tensor to reduce across]
  1177. * @param [in] n [Batch number of the output tensor]
  1178. * @param [in] c [Channel of the output tensor]
  1179. * @param [in] h [Height number of the output tensor]
  1180. * @param [in] w [Weight number of the output tensor]
  1181. */
  1182. ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
  1183. int32_t *n, int32_t *c, int32_t *h, int32_t *w);
  1184. /**
  1185. * @ingroup dnn
  1186. * @brief Yolo2ReorgForward computation
  1187. * @param [in] handle CCE handle
  1188. * @param [in] stride scale parameter
  1189. * @param [in] reverse reverse parameter
  1190. * @param [in] alpha alpha factor
  1191. * @param [in] beta beta factor
  1192. * @param [in] xDesc x-tensor descriptor
  1193. * @param [in] x x-tensor in device memory
  1194. * @param [out] workSpaceSizeInBytes temporary work sapce size
  1195. * @param [out] workSpace temporary work sapce in device memory
  1196. * @param [in] yDesc y-tensor descriptor
  1197. * @param [out] y y-tensor in device memory
  1198. * @return ccStatus_t
  1199. */
  1200. ccStatus_t ccYolo2ReorgForward(ccHandle_t handle, int32_t stride, bool reverse, const void *alpha,
  1201. const ccTensorDescriptor_t xDesc, const void *x, uint32_t workSpaceSizeInBytes,
  1202. void *workSpace, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  1203. /**
  1204. * @param [in] stride scale parameter
  1205. * @param [in] reverse reverse parameter
  1206. * @param [in] xDesc x-tensor descriptor
  1207. * @param [in|out] n point to batch size
  1208. * @param [in|out] c point to channels
  1209. * @param [in|out] h point to height of feature map
  1210. * @param [in|out] w point to width of feature map
  1211. */
  1212. ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *n, int32_t *c,
  1213. int32_t *h, int32_t *w);
  1214. /**
  1215. * @param [in] stride scale parameter
  1216. * @param [in] reverse reverse parameter
  1217. * @param [in] xDesc x-tensor descriptor
  1218. * @param [out] dimCnt output tensor dim cnt
  1219. * @param [out] dim output tensor dim
  1220. * @param [in| dimlen length of dim
  1221. * @return ccStatus_t
  1222. */
  1223. ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *dimCnt,
  1224. int32_t dim[], int32_t dimLen);
  1225. /**
  1226. * @param [in] xDesc x-tensor descriptor
  1227. * @param [out] temporary work sapce size
  1228. */
  1229. ccStatus_t ccGetYolo2ReorgForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
  1230. /**
  1231. * @ingroup dnn
  1232. * @brief full shuffle channel forward computation
  1233. * @param [in] handle cce handle
  1234. * @param [in] groupNum number of groups in a channal
  1235. * @param [in] subgroupNum number of sub-groups in a group
  1236. * @param [in] alpha scaling factors
  1237. * @param [in] xDesc descriptor of input tensor
  1238. * @param [in] x input data in device memory
  1239. * @param [in] beta scaling factors
  1240. * @param [in] yDesc descriptor of output tensor
  1241. * @param [in|out] y output data in device memory
  1242. * @return ccStatus_t
  1243. */
  1244. ccStatus_t ccShuffleChannelForward(ccHandle_t handle, int32_t groupNum, int32_t subgroupNum, const void *alpha,
  1245. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  1246. const ccTensorDescriptor_t yDesc, void *y);
  1247. /**
  1248. * @ingroup dnn
  1249. * @brief get the temp space size of permute forward computation, maybe no need temp space
  1250. * @param [in] handle cce handle
  1251. * @param [in] xDesc descriptor of input tensor
  1252. * @param [in] yDesc descriptor of output tensor
  1253. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  1254. * @return ccStatus_t
  1255. */
  1256. ccStatus_t ccGetPermuteForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
  1257. const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
  1258. /**
  1259. * @ingroup dnn
  1260. * @brief get the output dim of permute forward computation
  1261. * @param [in] xDesc descriptor of input tensor
  1262. * @param [in] dimIndex dim Index
  1263. * @param [in|out] dimCnt dim count
  1264. * @param [in|out] dim dim value
  1265. * @param [in| dimlen length of dim
  1266. * @return ccStatus_t
  1267. */
  1268. ccStatus_t ccGetPermuteOutputDim(const ccTensorDescriptor_t xDesc, const int32_t dimIndex[], const int32_t dimIndexLen,
  1269. int32_t *dimCnt, int32_t *dim, int32_t dimLen);
  1270. /**
  1271. * @ingroup dnn
  1272. * @brief full permute forward computation
  1273. * @param [in] handle cce handle
  1274. * @param [in] dimIndex dim Index,only support [0,1,2,3]
  1275. * @param [in] alpha scaling factors
  1276. * @param [in] xDesc descriptor of input tensor
  1277. * @param [in] x input data in device memory
  1278. * @param [in] workSpace temp space, maybe NULL if no need temp space
  1279. * @param [in] workSpaceSizeInBytes sizeof workspace
  1280. * @param [in] beta scaling factors
  1281. * @param [in] yDesc descriptor of output tensor
  1282. * @param [in|out] y output data in device memory
  1283. * @return ccStatus_t
  1284. */
  1285. ccStatus_t ccPermuteForward(ccHandle_t handle, const int32_t dimIndex[], const void *alpha,
  1286. const ccTensorDescriptor_t xDesc, const void *x, void *workspace,
  1287. uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  1288. /**
  1289. * @ingroup dnn
  1290. * @brief full split forward computation
  1291. * @param [in] handle cce handle
  1292. * @param [in] alpha scaling factors
  1293. * @param [in] xDesc descriptor of input tensor
  1294. * @param [in] x input data in device memory
  1295. * @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
  1296. * @param [in] num the number of outputs
  1297. * @param [in] beta scaling factors
  1298. * @param [in] yDescArr descriptors of output tensors
  1299. * @param [in|out] yArr output data array in device memory
  1300. * @return ccStatus_t
  1301. */
  1302. ccStatus_t ccSplitForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1303. int32_t axis, uint32_t num, const void *beta, const ccTensorDescriptor_t yDescArr[],
  1304. void *yArr[]);
  1305. /**
  1306. * @ingroup dnn
  1307. * @brief get the output dimensions info of split
  1308. * @param [in] xDesc descriptor of input tensor
  1309. * @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
  1310. * @param [in] num the number of outputs
  1311. * @param [in] sizes Optional, used to specify the sizes of each output tensor along split dim. The tensor x would
  1312. * be split evenly along split dim if sizes is NULL
  1313. * @param [in|out] nArr point to the first element of batch sizes
  1314. * @param [in|out] cArr point to the first element of channels
  1315. * @param [in|out] hArr point to the first element of heights of feature map
  1316. * @param [in|out] wArr point to the first element of widths of feature map
  1317. * @return ccStatus_t
  1318. */
  1319. ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
  1320. const uint32_t sizes[], uint32_t nArr[], uint32_t cArr[], uint32_t hArr[],
  1321. uint32_t wArr[]);
  1322. /**
  1323. * @ingroup dnn
  1324. * @brief Get split output shape(s).
  1325. * @param [in] xDesc input tensor, support ND and NC1HWC0
  1326. * @param [in] axis split axis, negtive axis will increased by dimCnt once time.
  1327. * @param [in] num splited nums.
  1328. * @param [in] sizes splited dim size on axis. if NULL was set, The input will be divided into num equally.
  1329. * @param [output] dimCnt splited dimCnt array. One to one correspondence with the splited output.
  1330. * @param [output] dim array of splited dim array. One to one correspondence with the splited output.
  1331. * @param [in| dimlen length of dim(Pass in the length of the entire space pointed to by dim,
  1332. not just the length of the dim array, because dim is a level 2 array
  1333. dimlen = lengthof dim[][], not just lengthof dim[])
  1334. * @return ccStatus_t
  1335. */
  1336. ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
  1337. const uint32_t sizes[], int32_t *dimCnt, int32_t *dim[], int32_t dimLen);
  1338. /**
  1339. * @ingroup dnn
  1340. * @brief create weight compress info
  1341. * @param [in|out] compressInfo point to CompressInfo
  1342. * @return ccStatus_t
  1343. */
  1344. ccStatus_t ccCreateWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
  1345. /**
  1346. * @ingroup dnn
  1347. * @brief destory weight compress info
  1348. * @param [in] *compressInfo point to CompressInfo
  1349. * @return ccStatus_t
  1350. */
  1351. ccStatus_t ccDestroyWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
  1352. /**
  1353. * @ingroup dnn
  1354. * @brief create compress table
  1355. * @param [in|out] compressTab point to weight compress table
  1356. * @return ccStatus_t
  1357. */
  1358. ccStatus_t ccCreateWeightCompressTab(ccWeightCompressTab_t **compressTab);
  1359. /**
  1360. * @ingroup dnn
  1361. * @brief destory compress table
  1362. * @param [in] compressTab point to weight compress table
  1363. * @return ccStatus_t
  1364. */
  1365. ccStatus_t ccDestroyWeightCompressTab(ccWeightCompressTab_t **compressTab);
  1366. /**
  1367. * @ingroup dnn
  1368. * @brief get fc compress info
  1369. * @param [in] xDesc descriptor of input tensor
  1370. * @param [in] wDesc descriptor of weight tensor
  1371. * @param [in] biasDesc descriptor of bias tensor
  1372. * @param [in] dataTypeTransmode mode of data type transform
  1373. * @param [in] weightCompressInfo compress info, compute based on tiling method
  1374. * @param [in|out] outputSize output data size in byte
  1375. * @param [in|out] infoTabSize compress info table
  1376. * @return ccStatus_t
  1377. */
  1378. ccStatus_t ccGetCompressedFcWeightInfo(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
  1379. const ccTensorDescriptor_t biasDesc, ccDataTypeTransMode_t dataTypeTransmode,
  1380. ccWeightCompressInfo_t *weightCompressInfo, uint32_t *outputSize,
  1381. uint32_t *infoTabSize);
  1382. /**
  1383. * @ingroup dnn
  1384. * @brief compress fc
  1385. * @param [in] wDesc descriptor of weight tensor
  1386. * @param [in] w filter data in device memory
  1387. * @param [in] weightCompressInfo compress info, compute based on tiling method
  1388. * @param [in] dataTypeTransmode mode of data type transform
  1389. * @param [in|out] y output data in device memory
  1390. * @param [in] ySize transformed data size in byte
  1391. * @param [in|out] yCompressedSize compressed output data size in byte
  1392. * @param [in|out] infoTab compressed info table
  1393. * @param [in] infoTabSize compressed info table size in byte
  1394. * @return ccStatus_t
  1395. */
  1396. ccStatus_t ccCompressWeight(const ccFilterDescriptor_t wDesc, const void *w,
  1397. const ccWeightCompressInfo_t *weightCompressInfo, ccDataTypeTransMode_t dataTypeTransmode,
  1398. ccFilterDescriptor_t yDesc, void *y, uint32_t ySize, uint32_t *yCompressedSize,
  1399. void *infoTab, uint32_t infoTabSize);
  1400. /**
  1401. * @ingroup dnn
  1402. * @brief restore compressed fc data
  1403. * @param [in] x input data in device memory
  1404. * @param [in] xSizeInBytes input compressed weight data size in byte
  1405. * @param [in|out] y output data in device memory
  1406. * @param [in] ySizeInBytes output data size in byte
  1407. * @return ccStatus_t
  1408. */
  1409. ccStatus_t ccRestoreCompressedWeight(const void *x, uint32_t xSizeInBytes, void *y, uint32_t ySizeInBytes,
  1410. rtMemcpyKind_t kind);
  1411. /**
  1412. * @ingroup dnn
  1413. * @brief create quantize parameters struct
  1414. * @param [in|out] quantizeInfo descriptor of quantize parameters
  1415. * @return ccStatus_t
  1416. */
  1417. ccStatus_t ccCreateQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
  1418. /**
  1419. * @ingroup dnn
  1420. * @brief destroy quantize parameters struct
  1421. * @param [in] quantizeInfo descriptor of quantize parameters
  1422. * @return ccStatus_t
  1423. */
  1424. ccStatus_t ccDestoryQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
  1425. /**
  1426. * @ingroup dnn
  1427. * @brief set quantize parameters
  1428. * @param [in] quantizeInfo descriptor of quantize parameters
  1429. * @param [in] scaleValMode enmu type for quantize scale value type (normal or sqrt)
  1430. * @param [in] scale quantize scale value
  1431. * @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be
  1432. * configed)
  1433. * @param [in] offsetPad padding value for load3d (only for half offset or full offset)
  1434. * @return ccStatus_t
  1435. */
  1436. ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
  1437. const uint16_t *scale, const uint16_t *offset, const uint8_t *offsetPad);
  1438. /**
  1439. * @ingroup dnn
  1440. * @brief set Requantize parameters
  1441. * @param [in] quantizeInfo descriptor of quantize parameters
  1442. * @param [in] scaleValMode enmu type for requantize scale value type (normal or sqrt)
  1443. * @param [in] scale quantize scale value
  1444. * @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be
  1445. * configed)
  1446. * @param [in] offsetw offset for filter (only config for full offset quantize)
  1447. * @return ccStatus_t
  1448. */
  1449. ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
  1450. const uint16_t *scaleRq, const uint16_t *nextLayerOffset, const int32_t *offsetw);
  1451. /**
  1452. * @ingroup dnn
  1453. * @brief set Dequantize parameters
  1454. * @param [in] quantizeInfo descriptor of quantize parameters
  1455. * @param [in] scaleValMode enmu type for dequantize scale value type (normal or sqrt)
  1456. * @param [in] scaleDq quantize scale value
  1457. * @param [in] offsetw offset for filter (only config for full offset quantize)
  1458. * @return ccStatus_t
  1459. */
  1460. ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
  1461. const uint16_t *scaleDq, const int32_t *offsetw);
  1462. /**
  1463. * @ingroup dnn
  1464. * @brief set convolution desciptor's quantize parameters
  1465. * @param [in] convDesc convolution descriptor
  1466. * @param [in] quantizeInfo descriptor of quantize parameters
  1467. * @return ccStatus_t
  1468. */
  1469. ccStatus_t ccSetConvolutionQuantizeInfo(ccConvolutionDescriptor_t convDesc, const ccQuantizeDescriptor_t QuantizeInfo);
  1470. /**
  1471. * @ingroup dnn
  1472. * @brief set convolution desciptor's all offset quantize parameters
  1473. * @param [in] convDesc convolution descriptor
  1474. * @param [in] offsetw descriptor of quantize parameters
  1475. * @param [in] scaleReq descriptor of quantize parameters
  1476. * @param [in] offset_d_next descriptor of quantize parameters
  1477. * @return ccStatus_t
  1478. */
  1479. ccStatus_t ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, const uint8_t *offsetW,
  1480. const uint8_t *offsetD, const uint16_t *scaleReq, const uint16_t *offsetDNext);
  1481. /**
  1482. * @ingroup dnn
  1483. * @brief set full connection desciptor's quantize parameters
  1484. * @param [in] fcDesc full connection descriptor
  1485. * @param [in] quantizeInfo descriptor of quantize parameters
  1486. * @return ccStatus_t
  1487. */
  1488. ccStatus_t ccSetFullConnectionQuantizeInfo(ccFullConnectionDescriptor_t fcDesc,
  1489. const ccQuantizeDescriptor_t QuantizeInfo);
  1490. /**
  1491. * @ingroup dnn
  1492. * @brief set pooling desciptor's quantize parameters
  1493. * @param [in] poolingDesc pooling descriptor
  1494. * @param [in] quantizeInfo descriptor of quantize parameters
  1495. * @return ccStatus_t
  1496. */
  1497. ccStatus_t ccSetPoolingQuantizeInfo(ccPoolingDescriptor_t poolingDesc, const ccQuantizeDescriptor_t QuantizeInfo);
  1498. /**
  1499. * @ingroup dnn
  1500. * @brief set full connection desciptor's info table
  1501. * @param [in] fcDesc full connection descriptor
  1502. * @param [in] infoTabSize table size
  1503. * @param [in] infoTab pointer to info table
  1504. * @return ccStatus_t
  1505. */
  1506. ccStatus_t ccSetFullConnectionDescriptor(ccFullConnectionDescriptor_t fcDesc, uint32_t infoTabSize, const void *infoTab,
  1507. ccFullConnectFwdAlgo_t algo = CC_FULLCONNECT_FWD_ALGO_HALF);
  1508. /**
  1509. * @ingroup dnn
  1510. * @brief set full connection desciptor's relu flag
  1511. * @param [in] fcDesc full connection descriptor
  1512. * @param [in] opType operation type for append at convolution operation
  1513. * @param [in] opDesc operation descritpor for the opType
  1514. * @return ccStatus_t
  1515. */
  1516. ccStatus_t ccFullConnectionAppendOp(ccFullConnectionDescriptor_t fcDesc, tagCcOpType opType, const void *opDesc);
  1517. /**
  1518. * @ingroup dnn
  1519. * @brief check aipp basic info
  1520. * @param [in] inputFormat format of input image
  1521. * @param [in] loadStartPosH vertical start position in source image
  1522. * @param [in] loadStartPosW horizontal start position in source image
  1523. * @param [in] srcImageSizeH vertical size of source image
  1524. * @param [in] srcImageSizeW horizontal size of source image
  1525. * @param [in] cpaddingValue C direction padding value
  1526. * @param [in] cscSwitch csc enable or not
  1527. * @param [in] rbuvSwapSwitch swap R/U and B/V position of the image
  1528. * @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV
  1529. * @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
  1530. * not useful.
  1531. * @return ccStatus_t
  1532. */
  1533. ccStatus_t ccCheckConvolutionAippCommInfo(ccAippInputFormat_t inputFormat, int32_t loadStartPosW, int32_t loadStartPosH,
  1534. int32_t srcImageSizeW, int32_t srcImageSizeH, float cpaddingValue,
  1535. bool cscSwitch, bool rbuvSwapSwitch, bool axSwapSwitch, bool singleLineMode);
  1536. /**
  1537. * @ingroup dnn
  1538. * @brief check aipp dtc info
  1539. * @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x
  1540. * @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x
  1541. * @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x
  1542. * @return ccStatus_t
  1543. */
  1544. ccStatus_t ccCheckConvolutionAippDtcInfo(int32_t dtcPixelMeanChn0, int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2,
  1545. float dtcPixelMinChn0, float dtcPixelMinChn1, float dtcPixelMinChn2,
  1546. float dtcPixelVarReciChn0, float dtcPixelVarReciChn1,
  1547. float dtcPixelVarReciChn2);
  1548. /**
  1549. * @ingroup dnn
  1550. * @brief check aipp pad info
  1551. * @param [in] paddingMode padding mode
  1552. * @param [in] leftPaddingSize left hblank/padding size
  1553. * @param [in] rightPaddingSize right hblank/padding size
  1554. * @param [in] topPaddingSize top padding size
  1555. * @param [in] bottomPaddingSize bottom padding size
  1556. * @return ccStatus_t
  1557. */
  1558. ccStatus_t ccCheckConvolutionAippPadInfo(ccAippPaddingMode_t paddingMode, int32_t leftPaddingSize,
  1559. int32_t rightPaddingSize, int32_t topPaddingSize, int32_t bottomPaddingSize);
  1560. /**
  1561. * @ingroup dnn
  1562. * @brief check aipp csc info
  1563. * @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
  1564. * @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m
  1565. * @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m
  1566. * @return ccStatus_t
  1567. */
  1568. ccStatus_t ccCheckConvolutionAippCscInfo(int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, int32_t cscMatrixR0C2,
  1569. int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, int32_t cscMatrixR1C2,
  1570. int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, int32_t cscMatrixR2C2,
  1571. int32_t cscOutputBias0, int32_t cscOutputBias1, int32_t cscOutputBias2,
  1572. int32_t cscInputBias0, int32_t cscInputBias1, int32_t cscInputBias2);
  1573. /**
  1574. * @ingroup dnn
  1575. * @brief check aipp scf info
  1576. * @param [in] scfSwitch scaling enable or not
  1577. * @param [in] scfInputW input width of scaling
  1578. * @param [in] scfInputH input height of scaling
  1579. * @param [in] scfOutputW output width of scaling
  1580. * @param [in] scfOutputH output height of scaling
  1581. * @return ccStatus_t
  1582. */
  1583. ccStatus_t ccCheckConvolutionAippScfInfo(bool scfSwitch, int32_t scfInputW, int32_t scfInputH, int32_t scfOutputW,
  1584. int32_t scfOutputH);
  1585. /**
  1586. * @ingroup dnn
  1587. * @brief check aipp param
  1588. * @param [in] convDesc descriptor of conv operator
  1589. * @param [in] xDesc input tensor info
  1590. * @param [in] yDesc output tensor info
  1591. * @return ccStatus_t
  1592. */
  1593. ccStatus_t ccCheckConvFwdAippParam(const ccConvolutionDescriptor_t convDesc, const ccTensorDescriptor_t xDesc,
  1594. const ccTensorDescriptor_t yDesc);
  1595. /**
  1596. * @ingroup dnn
  1597. * @brief init aipp basic info
  1598. * @param [in|out] convDesc descriptor of conv operator
  1599. * @param [in] inputFormat format of input image
  1600. * @param [in] loadStartPosH vertical start position in source image
  1601. * @param [in] loadStartPosW horizontal start position in source image
  1602. * @param [in] srcImageSizeH vertical size of source image
  1603. * @param [in] srcImageSizeW horizontal size of source image
  1604. * @param [in] cpaddingValue C direction padding value
  1605. * @param [in] cscSwitch csc enable or not
  1606. * @param [in] rbuvSwapSwitch swap R/U and B/V position of the image
  1607. * @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV
  1608. * @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
  1609. * not useful.
  1610. * @return ccStatus_t
  1611. */
  1612. ccStatus_t ccSetConvolutionAippCommInfo(ccConvolutionDescriptor_t convDesc, ccAippInputFormat_t inputFormat,
  1613. int32_t loadStartPosW, int32_t loadStartPosH, int32_t srcImageSizeW,
  1614. int32_t srcImageSizeH, float cpaddingValue, bool cscSwitch, bool rbuvSwapSwitch,
  1615. bool axSwapSwitch, bool singleLineMode);
  1616. /**
  1617. * @ingroup dnn
  1618. * @brief init aipp dtc info
  1619. * @param [in|out] convDesc descriptor of conv operator
  1620. * @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x
  1621. * @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x
  1622. * @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x
  1623. * @return ccStatus_t
  1624. */
  1625. ccStatus_t ccSetConvolutionAippDtcInfo(ccConvolutionDescriptor_t convDesc, int32_t dtcPixelMeanChn0,
  1626. int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, float dtcPixelMinChn0,
  1627. float dtcPixelMinChn1, float dtcPixelMinChn2, float dtcPixelVarReciChn0,
  1628. float dtcPixelVarReciChn1, float dtcPixelVarReciChn2);
  1629. /**
  1630. * @ingroup dnn
  1631. * @brief init aipp pad info
  1632. * @param [in|out] convDesc descriptor of conv operator
  1633. * @param [in] paddingMode padding mode
  1634. * @param [in] leftPaddingSize left hblank/padding size
  1635. * @param [in] rightPaddingSize right hblank/padding size
  1636. * @param [in] topPaddingSize top padding size
  1637. * @param [in] bottomPaddingSize bottom padding size
  1638. * @return ccStatus_t
  1639. */
  1640. ccStatus_t ccSetConvolutionAippPadInfo(ccConvolutionDescriptor_t convDesc, ccAippPaddingMode_t paddingMode,
  1641. int32_t leftPaddingSize, int32_t rightPaddingSize, int32_t topPaddingSize,
  1642. int32_t bottomPaddingSize);
  1643. /**
  1644. * @ingroup dnn
  1645. * @brief init aipp csc info
  1646. * @param [in|out] convDesc descriptor of conv operator
  1647. * @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
  1648. * @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m
  1649. * @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m
  1650. * @return ccStatus_t
  1651. */
  1652. ccStatus_t ccSetConvolutionAippCscInfo(ccConvolutionDescriptor_t convDesc, int32_t cscMatrixR0C0, int32_t cscMatrixR0C1,
  1653. int32_t cscMatrixR0C2, int32_t cscMatrixR1C0, int32_t cscMatrixR1C1,
  1654. int32_t cscMatrixR1C2, int32_t cscMatrixR2C0, int32_t cscMatrixR2C1,
  1655. int32_t cscMatrixR2C2, int32_t cscOutputBias0, int32_t cscOutputBias1,
  1656. int32_t cscOutputBias2, int32_t cscInputBias0, int32_t cscInputBias1,
  1657. int32_t cscInputBias2);
  1658. /**
  1659. * @ingroup dnn
  1660. * @brief init aipp scf info
  1661. * @param [in|out] convDesc descriptor of conv operator
  1662. * @param [in] scfSwitch scaling enable or not
  1663. * @param [in] scfInputW input width of scaling
  1664. * @param [in] scfInputH input height of scaling
  1665. * @param [in] scfOutputW output width of scaling
  1666. * @param [in] scfOutputH output height of scaling
  1667. * @return ccStatus_t
  1668. */
  1669. ccStatus_t ccSetConvolutionAippScfInfo(ccConvolutionDescriptor_t convDesc, bool scfSwitch, int32_t scfInputW,
  1670. int32_t scfInputH, int32_t scfOutputW, int32_t scfOutputH);
  1671. /**
  1672. * @ingroup dnn
  1673. * @brief set dynamic aipp parameter address and enflag info
  1674. * @param [in|out] convDesc descriptor of conv operator
  1675. * @param [in] dyncParaAddr aipp parameter address
  1676. * @param [in] dyncAippFlag flag to show whether to use dynamic aipp
  1677. * @return ccStatus_t
  1678. */
  1679. ccStatus_t ccSetConvolutionAippDyncParaAddr(ccConvolutionDescriptor_t convDesc, const void *dyncParaAddr,
  1680. bool dyncAippFlag, bool rotationFlag = false);
  1681. /**
  1682. * @ingroup dnn
  1683. * @brief check dynamic aipp parameter
  1684. * @param [in] dyncParaAddr aipp parameter address
  1685. * @param [in] dataLength parameter lenght
  1686. * @param [in] convolutionDimW convDimW
  1687. * @param [in] convolutionDimH convDimH
  1688. * @return ccStatus_t
  1689. */
  1690. ccStatus_t ccCheckDynamicAippParam(const void *dynamicParamAddr, uint32_t dataLength, int64_t convolutionDimW,
  1691. int64_t convolutionDimH);
  1692. /*** @ingroup dnn
  1693. * @brief trans mean and var
  1694. * @param [in|out] mean' = bnScale/sqrt(var)
  1695. * @param [in|out] var' = -bnScale * mean / sqrt(var) + bnBias
  1696. * @return ccStatus_t
  1697. */
  1698. ccStatus_t ccTransBatchnormMeanAndVar(void *mean, void *var, const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
  1699. const void *alpha, const void *beta, void *bnScale, void *bnBias, double epsilon);
  1700. /**
  1701. * @ingroup dnn
  1702. * @brief init deconvolution adj or targetShape info.
  1703. * @param [in] convDesc conv descriptor.
  1704. * @param [in] adjH, adjust H output.
  1705. * @param [in] adjW, adjust W output.
  1706. * @param [in] targetShape, values of output shape, if this pointer was set, ignore adj.
  1707. * @return ccStatus_t
  1708. */
  1709. ccStatus_t ccSetDeconvolutionOutShapeInfo(ccConvolutionDescriptor_t convDesc, uint32_t adjSize, const uint32_t *adj,
  1710. uint32_t targetShapeSize, const uint32_t *targetShape);
  1711. /**
  1712. * @ingroup dnn
  1713. * @brief gather elements according to the indices.
  1714. * @param [in] alpha reserved.
  1715. * @param [in] xDesc description of the tensor from which to gather elements.
  1716. * @param [in] x data point of the tensor from which to gather elements.
  1717. * @param [in] indicesDesc description of the tensor of indices.
  1718. * @param [in] indices data point of the tensor of indices.
  1719. * @param [in] beta reserved.
  1720. * @param [in] outputDesc description of the output tensor.
  1721. * @param [output] output data point of the output tensor.
  1722. * @return ccStatus_t
  1723. */
  1724. ccStatus_t ccGatherNdForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1725. const ccTensorDescriptor_t indicesDesc, const void *indices, const void *beta,
  1726. const ccTensorDescriptor_t outputDesc, void *output);
  1727. /**
  1728. * @ingroup dnn
  1729. * @brief get output shape of gather_nd.
  1730. * @param [in] xDesc description of the tensor from which to gather elements.
  1731. * @param [in] indicesDesc description of the tensor of indices.
  1732. * @param [output] n dim-size of n-dim.
  1733. * @param [output] c dim-size of c-dim.
  1734. * @param [output] h dim-size of h-dim.
  1735. * @param [output] w dim-size of w-dim.
  1736. * @param [output] realDimCnt real dim.
  1737. * @return ccStatus_t
  1738. */
  1739. ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, int32_t *n,
  1740. int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
  1741. /**
  1742. * @ingroup dnn
  1743. * @brief get output shape of realdiv.
  1744. * @param [in] xDesc description of the left operator tensor.
  1745. * @param [in] yDesc description of the right operator tensor.
  1746. * @param [output] dimCnt dim nums.
  1747. * @param [output] dim dim size.
  1748. * @param [in| dimlen length of dim
  1749. * @return ccStatus_t
  1750. */
  1751. ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc,
  1752. int32_t *dimCnt, int32_t *dim, int32_t dimLen);
  1753. /**
  1754. * @ingroup dnn
  1755. * @brief tile tensor by multiples.
  1756. * @param [in] alpha reserved.
  1757. * @param [in] xDesc description of the tensor which to be tiled.
  1758. * @param [in] x data point of the tensor which to be tiled.
  1759. * @param [in] multiples tile coefficient of each dim.
  1760. * @param [in] beta reserved.
  1761. * @param [in] outputDesc description of the output tensor.
  1762. * @param [output] output data point of the output tensor.
  1763. * @return ccStatus_t
  1764. */
  1765. ccStatus_t ccTileForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1766. const ccIntArray_t *multiples, const void *beta, const ccTensorDescriptor_t outputDesc,
  1767. void *output);
  1768. /**
  1769. * @ingroup dnn
  1770. * @brief get output shape of tile.
  1771. * @param [in] xDesc description of the dividend tensor.
  1772. * @param [in] multiples multiples of each dim.
  1773. * @param [in|out] dimCnt [point to the output dimCnt]
  1774. * @param [in|out] dim [arrays to save dims]
  1775. * @param [in| dimlen length of dim
  1776. * @return ccStatus_t
  1777. */
  1778. ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *multiples, int32_t *dimCnt,
  1779. int32_t dim[], int32_t dimLen);
  1780. /**
  1781. * @ingroup dnn
  1782. * @brief get output shape of tile.
  1783. * @param [in] xDesc description of the dividend tensor.
  1784. * @param [in] multiples multiples of each dim.
  1785. * @param [output] n dim-size of n-dim.
  1786. * @param [output] c dim-size of c-dim.
  1787. * @param [output] h dim-size of h-dim.
  1788. * @param [output] w dim-size of w-dim.
  1789. * @param [output] realDimCnt real dim.
  1790. * @return ccStatus_t
  1791. */
  1792. ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc,
  1793. // const ccIntArrayDescriptor_t multiples,
  1794. const ccIntArray_t *multiples, int32_t *n, int32_t *c, int32_t *h, int32_t *w,
  1795. int32_t *realDimCnt);
  1796. /**
  1797. * @ingroup dnn
  1798. * @brief get output shape of realdiv.
  1799. * @param [in] xDesc description of the left operator tensor.
  1800. * @param [in] yDesc description of the right operator tensor.
  1801. * @param [output] dimCnt dim nums.
  1802. * @param [output] dim dim size.
  1803. * @param [in| dimlen length of dim
  1804. * @return ccStatus_t
  1805. */
  1806. ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  1807. int32_t *dim, int32_t dimLen);
  1808. /**
  1809. * @ingroup dnn
  1810. * @brief realdiv between two tensors.
  1811. * @param [in] alpha reserved.
  1812. * @param [in] xDesc description of the dividend tensor.
  1813. * @param [in] x data point of the dividend tensor.
  1814. * @param [in] yDesc description of the divisor tensor.
  1815. * @param [in] y data point of the divisor tensor.
  1816. * @param [in] beta reserved.
  1817. * @param [in] outputDesc description of the output tensor.
  1818. * @param [output] output data point of the output tensor.
  1819. * @return ccStatus_t
  1820. */
  1821. ccStatus_t ccRealdivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1822. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  1823. const ccTensorDescriptor_t outputDesc, void *output);
  1824. /**
  1825. * @ingroup dnn
  1826. * @brief get output shape of realdiv.
  1827. * @param [in] xDesc description of the dividend tensor.
  1828. * @param [in] yDesc description of the divisor tensor.
  1829. * @param [output] n dim-size of n-dim.
  1830. * @param [output] c dim-size of c-dim.
  1831. * @param [output] h dim-size of h-dim.
  1832. * @param [output] w dim-size of w-dim.
  1833. * @param [output] realDimCnt real dim.
  1834. * @return ccStatus_t
  1835. */
  1836. ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *n,
  1837. int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
  1838. /**
  1839. * @ingroup dnn
  1840. * @brief realdiv between two tensors.
  1841. * @param [in] alpha reserved.
  1842. * @param [in] xDesc description of the left operator tensor.
  1843. * @param [in] x data point of the left operator tensor.
  1844. * @param [in] yDesc description of the right operator tensor.
  1845. * @param [in] y data point of the right operator tensor.
  1846. * @param [in] beta reserved.
  1847. * @param [in] outputDesc description of the output tensor.
  1848. * @param [output] output data point of the output tensor.
  1849. * @return ccStatus_t
  1850. */
  1851. ccStatus_t ccFloordivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1852. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  1853. const ccTensorDescriptor_t outputDesc, void *output);
  1854. /**
  1855. * @ingroup dnn
  1856. * @brief get output shape of realdiv.
  1857. * @param [in] xDesc description of the left operator tensor.
  1858. * @param [in] yDesc description of the right operator tensor.
  1859. * @param [output] realDimCnt real dim.
  1860. * @param [in| dimlen length of dim
  1861. * @return ccStatus_t
  1862. */
  1863. ccStatus_t ccGetFloordivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  1864. int32_t *dim, int32_t dimLen);
  1865. /**
  1866. * @ingroup dnn
  1867. * @brief realdiv between two tensors.
  1868. * @param [in] alpha reserved.
  1869. * @param [in] xDesc description of the left operator tensor.
  1870. * @param [in] x data point of the left operator tensor.
  1871. * @param [in] yDesc description of the right operator tensor.
  1872. * @param [in] y data point of the right operator tensor.
  1873. * @param [in] beta reserved.
  1874. * @param [in] outputDesc description of the output tensor.
  1875. * @param [output] output data point of the output tensor.
  1876. * @return ccStatus_t
  1877. */
  1878. ccStatus_t ccGreaterForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1879. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  1880. const ccTensorDescriptor_t outputDesc, void *output);
  1881. /**
  1882. * @ingroup dnn
  1883. * @brief get output shape of realdiv.
  1884. * @param [in] xDesc description of the left operator tensor.
  1885. * @param [in] yDesc description of the right operator tensor.
  1886. * @param [output] dimCnt dim nums.
  1887. * @param [output] dim dim size.
  1888. * @param [in| dimlen length of dim
  1889. * @return ccStatus_t
  1890. */
  1891. ccStatus_t ccGetGreaterOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  1892. int32_t *dim, int32_t dimLen);
  1893. /**
  1894. * @ingroup dnn
  1895. * @brief realdiv between two tensors.
  1896. * @param [in] alpha reserved.
  1897. * @param [in] xDesc description of the left operator tensor.
  1898. * @param [in] x data point of the left operator tensor.
  1899. * @param [in] yDesc description of the right operator tensor.
  1900. * @param [in] y data point of the right operator tensor.
  1901. * @param [in] beta reserved.
  1902. * @param [in] outputDesc description of the output tensor.
  1903. * @param [output] output data point of the output tensor.
  1904. * @return ccStatus_t
  1905. */
  1906. ccStatus_t ccLessForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1907. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  1908. const ccTensorDescriptor_t outputDesc, void *output);
  1909. /**
  1910. * @ingroup dnn
  1911. * @brief get output shape of realdiv.
  1912. * @param [in] xDesc description of the left operator tensor.
  1913. * @param [in] yDesc description of the right operator tensor.
  1914. * @param [output] dimCnt dim nums.
  1915. * @param [output] dim dim size.
  1916. * @param [in| dimlen length of dim
  1917. * @return ccStatus_t
  1918. */
  1919. ccStatus_t ccGetLessOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  1920. int32_t *dim, int32_t dimLen);
  1921. /**
  1922. * @ingroup dnn
  1923. * @brief get output shape of LogicalOr.
  1924. * @param [in] xDesc description of the left operator tensor.
  1925. * @param [in] yDesc description of the right operator tensor.
  1926. * @param [output] dimCnt dim nums.
  1927. * @param [output] dim dim size.
  1928. * @param [in| dimlen length of dim
  1929. * @return ccStatus_t
  1930. */
  1931. ccStatus_t ccGetLogicalOrOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  1932. int32_t *dim, int32_t dimLen);
  1933. /**
  1934. * @ingroup dnn
  1935. * @brief get output shape of LogicalXor.
  1936. * @param [in] xDesc description of the left operator tensor.
  1937. * @param [in] yDesc description of the right operator tensor.
  1938. * @param [output] dimCnt dim nums.
  1939. * @param [output] dim dim size.
  1940. * @param [in] dimlen length of dim
  1941. * @return ccStatus_t
  1942. */
  1943. ccStatus_t ccGetLogicalXorOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  1944. int32_t *dim, int32_t dimLen);
  1945. /**
  1946. * @ingroup dnn
  1947. * @brief sqrt forward:
  1948. * data type only support bool
  1949. * data format only support ND
  1950. * @param [in] handle cce handle
  1951. * @param [in] alpha common scale factor
  1952. * @param [in] xDesc descriptor of input data
  1953. * @param [in] x input data in device memory
  1954. * @param [in] beta common scale factor
  1955. * @param [in] outputDesc descriptor of output data
  1956. * @param [in|out] output output data in device memory
  1957. * @return ccStatus_t
  1958. */
  1959. ccStatus_t ccLogicalNotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1960. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  1961. /**
  1962. * @ingroup dnn
  1963. * @brief equal between two tensors.
  1964. * @param [in] alpha reserved.
  1965. * @param [in] xDesc description of the left operator tensor.
  1966. * @param [in] x data point of the left operator tensor.
  1967. * @param [in] yDesc description of the right operator tensor.
  1968. * @param [in] y data point of the right operator tensor.
  1969. * @param [in] beta reserved.
  1970. * @param [in] outputDesc description of the output tensor.
  1971. * @param [output] output data point of the output tensor.
  1972. * @return ccStatus_t
  1973. */
  1974. ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1975. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  1976. const ccTensorDescriptor_t outputDesc, void *output);
  1977. /**
  1978. * @ingroup dnn
  1979. * @brief dump data during inference, only for eng ver.
  1980. * @param [in] handle cce handle
  1981. * @return ccStatus_t
  1982. */
  1983. ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t bufLen, const uint32_t taskIndex);
  1984. /**
  1985. * @ingroup dnn
  1986. * @brief logicaland between two tensors.
  1987. * @param [in] alpha reserved.
  1988. * @param [in] xDesc description of the left operator tensor.
  1989. * @param [in] x data point of the left operator tensor.
  1990. * @param [in] yDesc description of the right operator tensor.
  1991. * @param [in] y data point of the right operator tensor.
  1992. * @param [in] beta reserved.
  1993. * @param [in] outputDesc description of the output tensor.
  1994. * @param [output] output data point of the output tensor.
  1995. * @return ccStatus_t
  1996. */
  1997. ccStatus_t ccLogicalAndForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  1998. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  1999. const ccTensorDescriptor_t outputDesc, void *output);
  2000. /**
  2001. * @ingroup dnn
  2002. * @brief logical or between two tensors.
  2003. * @param [in] alpha reserved.
  2004. * @param [in] xDesc description of the left operator tensor.
  2005. * @param [in] x data point of the left operator tensor.
  2006. * @param [in] yDesc description of the right operator tensor.
  2007. * @param [in] y data point of the right operator tensor.
  2008. * @param [in] beta reserved.
  2009. * @param [in] outputDesc description of the output tensor.
  2010. * @param [output] output data point of the output tensor.
  2011. * @return ccStatus_t
  2012. */
  2013. ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2014. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  2015. const ccTensorDescriptor_t outputDesc, void *output);
  2016. /**
  2017. * @ingroup dnn
  2018. * @brief logical Xor between two tensors(x ^ y = (x | y) & ~(x & y).
  2019. * @param [in] alpha reserved.
  2020. * @param [in] xDesc description of the left operator tensor.
  2021. * @param [in] x data point of the left operator tensor.
  2022. * @param [in] yDesc description of the right operator tensor.
  2023. * @param [in] y data point of the right operator tensor.
  2024. * @param [in] beta reserved.
  2025. * @param [in] outputDesc description of the output tensor.
  2026. * @param [output] output data point of the output tensor.
  2027. * @return ccStatus_t
  2028. */
  2029. ccStatus_t ccLogicalXorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2030. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  2031. const ccTensorDescriptor_t outputDesc, void *output);
  2032. /**
  2033. * @ingroup dnn
  2034. * @brief get output shape of equal.
  2035. * @param [in] xDesc description of the left operator tensor.
  2036. * @param [in] yDesc description of the right operator tensor.
  2037. * @param [output] dimCnt dim nums.
  2038. * @param [output] dim dim size.
  2039. * @param [in| dimlen length of dim
  2040. * @return ccStatus_t
  2041. */
  2042. ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  2043. int32_t *dim, int32_t dimLen);
  2044. /**
  2045. * @ingroup dnn
  2046. * @brief get output shape of logicaland.
  2047. * @param [in] xDesc description of the left operator tensor.
  2048. * @param [in] yDesc description of the right operator tensor.
  2049. * @param [output] dimCnt dim nums.
  2050. * @param [output] dim dim size.
  2051. * @param [in| dimlen length of dim
  2052. * @return ccStatus_t
  2053. */
  2054. ccStatus_t ccGetLogicalAndOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  2055. int32_t *dim, int32_t dimLen);
  2056. /**
  2057. * @ingroup dnn
  2058. * @brief realdiv between two tensors.
  2059. * @param [in] alpha reserved.
  2060. * @param [in] xDesc description of the left operator tensor.
  2061. * @param [in] x data point of the left operator tensor.
  2062. * @param [in] yDesc description of the right operator tensor.
  2063. * @param [in] y data point of the right operator tensor.
  2064. * @param [in] beta reserved.
  2065. * @param [in] outputDesc description of the output tensor.
  2066. * @param [output] output data point of the output tensor.
  2067. * @return ccStatus_t
  2068. */
  2069. ccStatus_t ccFloormodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2070. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  2071. const ccTensorDescriptor_t outputDesc, void *output);
  2072. /**
  2073. * @ingroup dnn
  2074. * @brief get output shape of realdiv.
  2075. * @param [in] xDesc description of the left operator tensor.
  2076. * @param [in] yDesc description of the right operator tensor.
  2077. * @param [output] dimCnt dim nums.
  2078. * @param [output] dim dim size.
  2079. * @param [in| dimlen length of dim
  2080. * @return ccStatus_t
  2081. */
  2082. ccStatus_t ccGetFloormodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  2083. int32_t *dim, int32_t dimLen);
  2084. /**
  2085. * @ingroup dnn
  2086. * @brief compare between two tensors.
  2087. * @param [in] alpha reserved.
  2088. * @param [in] xDesc description of the left operator tensor.
  2089. * @param [in] x data point of the left operator tensor.
  2090. * @param [in] yDesc description of the right operator tensor.
  2091. * @param [in] y data point of the right operator tensor.
  2092. * @param [in] beta reserved.
  2093. * @param [in] outputDesc description of the output tensor.
  2094. * @param [output] output data point of the output tensor.
  2095. * @return ccStatus_t
  2096. */
  2097. ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compareType, const void *alpha,
  2098. const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
  2099. const void *y, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2100. /**
  2101. * @ingroup dnn
  2102. * @brief get output shape of realdiv.
  2103. * @param [in] xDesc description of the left operator tensor.
  2104. * @param [in] yDesc description of the right operator tensor.
  2105. * @param [output] dimCnt dim nums.
  2106. * @param [output] dim dim size.
  2107. * @param [in| dimlen length of dim
  2108. * @return ccStatus_t
  2109. */
  2110. ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  2111. int32_t *dim, int32_t dimLen);
  2112. /**
  2113. * @ingroup dnn
  2114. * @brief create descriptor of FillParam
  2115. * @param [in|out] fillParamDesc point to descriptor of fill param
  2116. * @return ccStatus_t
  2117. */
  2118. ccStatus_t ccCreateFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
  2119. /**
  2120. * @ingroup dnn
  2121. * @brief destroy descriptor of FillParam
  2122. * @param [in] *fillParamDesc point to descriptor of fill param
  2123. * @return ccStatus_t
  2124. */
  2125. ccStatus_t ccDestroyFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
  2126. /**
  2127. * @ingroup dnn
  2128. * @brief get output shape of broadcat operations.
  2129. * @param [in] inputNum input number of the operation tensors.
  2130. * @param [in] xDesc[] description of the input operation tensors list.
  2131. * @param [output] dimCnt dim-size of output tensor.
  2132. * @param [output] dim dim of output tensor.
  2133. * @param [in| dimlen length of dim
  2134. * @return ccStatus_t
  2135. */
  2136. ccStatus_t ccGetMultiNdBroadcastOpOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
  2137. int32_t *dim, int32_t dimLen);
  2138. /**
  2139. * @ingroup dnn
  2140. * @brief get output shape of maximultitensor.
  2141. * @param [in] inputNum the num of input operator tensors.
  2142. * @param [in] xDesc[] description of the input operator tensors list.
  2143. * @param [output] dimCnt dim count of output tensor.
  2144. * @param [output] dim array of output tensor.
  2145. * @param [in| dimlen length of dim
  2146. * @return ccStatus_t
  2147. */
  2148. ccStatus_t ccGetMaxMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
  2149. int32_t *dim, int32_t dimLen);
  2150. /**
  2151. * @ingroup dnn
  2152. * @brief get output shape of minmultitensor.
  2153. * @param [in] inputNum the num of input operator tensors.
  2154. * @param [in] xDesc[] description of the input operator tensors list.
  2155. * @param [output] dimCnt dim count of output tensor.
  2156. * @param [output] dim array of output tensor.
  2157. * @param [in| dimlen length of dim
  2158. * @return ccStatus_t
  2159. */
  2160. ccStatus_t ccGetMinMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
  2161. int32_t *dim, int32_t dimLen);
  2162. /**
  2163. * @ingroup dnn
  2164. * @brief MaxMultitensor forward:
  2165. * data type only support float float16 and int32
  2166. * data format only support ND
  2167. * @param [in] handle cce handle
  2168. * @param [in] inputNum input tensor number
  2169. * @param [in] alpha common scale factor
  2170. * @param [in] xDesc[] descriptor of input tensors list
  2171. * @param [in] x[] input data in device memory list
  2172. * @param [in] beta common scale factor
  2173. * @param [in] outputDesc descriptor of output data
  2174. * @param [in|out] output output data in device memory
  2175. * @return ccStatus_t
  2176. */
  2177. ccStatus_t ccMaxMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
  2178. const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
  2179. const ccTensorDescriptor_t outputDesc, void *output);
  2180. /**
  2181. * @ingroup dnn
  2182. * @brief MinMultitensor forward:
  2183. * data type only support float float16 and int32
  2184. * data format only support ND
  2185. * @param [in] handle cce handle
  2186. * @param [in] inputNum input tensor number
  2187. * @param [in] alpha common scale factor
  2188. * @param [in] xDesc[] descriptor of input data list
  2189. * @param [in] x[] input data in device memory list
  2190. * @param [in] beta common scale factor
  2191. * @param [in] outputDesc descriptor of output data
  2192. * @param [in|out] output output data in device memory
  2193. * @return ccStatus_t
  2194. */
  2195. ccStatus_t ccMinMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
  2196. const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
  2197. const ccTensorDescriptor_t outputDesc, void *output);
  2198. /**
  2199. * @ingroup dnn
  2200. * @brief create descriptor of StridedSlice
  2201. * @param [in|out] stridedSliceDesc point to descriptor of StridedSlice param
  2202. * @return ccStatus_t
  2203. */
  2204. ccStatus_t ccCreateStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
  2205. /**
  2206. * @ingroup dnn
  2207. * @brief destroy descriptor of StridedSlice
  2208. * @param [in] *stridedSliceDesc point to descriptor of StridedSlice param
  2209. * @return ccStatus_t
  2210. */
  2211. ccStatus_t ccDestroyStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
  2212. /**
  2213. * @ingroup dnn
  2214. * @brief init stridedSlice descriptor_t.
  2215. * @param [out] stridedSliceDesc struct of stridedslice param
  2216. * @param [in] dimCnt dimension of the input tensor
  2217. * @param [in] begin slice begin(include)
  2218. * @param [in] end slice end index(not include)
  2219. * @param [in] strides slice stride
  2220. * @return ccStatus_t
  2221. */
  2222. ccStatus_t ccSetStridedSliceDescriptor(ccStridedSliceDescriptor_t stridedSliceDesc, int32_t dimCnt, int32_t begin[],
  2223. int32_t end[], int32_t strides[]);
  2224. /**
  2225. * @ingroup dnn
  2226. * @brief create descriptor of StridedSlice
  2227. * @param [in|out] stridedSliceDesc point to descriptor of StridedSlice attr
  2228. * @return ccStatus_t
  2229. */
  2230. ccStatus_t ccCreateStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
  2231. /**
  2232. * @ingroup dnn
  2233. * @brief destroy descriptor of StridedSlice
  2234. * @param [in] *stridedSliceDesc point to descriptor of StridedSlice attr
  2235. * @return ccStatus_t
  2236. */
  2237. ccStatus_t ccDestroyStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
  2238. /**
  2239. * @ingroup dnn
  2240. * @brief init stridedSlice mask attrs desescriptor.
  2241. * @param [out] attrDesc struct of stridedslice mask attrs
  2242. * @param [in] beginMask begin mask
  2243. * @param [in] endMask end mask
  2244. * @param [in] ellipsisMask ellipsis mask
  2245. * @param [in] newAxisMask new axis mask
  2246. * @param [in] shrinkAxisMask shrink axis mask
  2247. * @return ccStatus_t
  2248. */
  2249. ccStatus_t ccSetStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t attrDesc, int32_t beginMask,
  2250. int32_t endMask, int32_t ellipsisMask, int32_t newAxisMask,
  2251. int32_t shrinkAxisMask);
  2252. /**
  2253. * @ingroup dnn
  2254. * @brief Extracts a strided slice of a tensor.
  2255. * @param [in] xDesc descriptor of input data
  2256. * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
  2257. * @param [in] attrDesc reserve for optional attributes.
  2258. * @param [out] n point to n size
  2259. * @param [out] c point to c size
  2260. * @param [out] h point to h size
  2261. * @param [out] w point to w size
  2262. * @return ccStatus_t
  2263. */
  2264. ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
  2265. const ccStridedSliceDescriptor_t stridedSliceDesc,
  2266. const ccStridedSliceAttrsDescriptor_t attrDesc, int32_t *n, int32_t *c,
  2267. int32_t *h, int32_t *w, int32_t *realDimCnt);
  2268. /**
  2269. * @ingroup dnn
  2270. * @brief Extracts a strided slice of a tensor.
  2271. * @param [in] handle cce handle
  2272. * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
  2273. * @param [in] attrDesc reserve for optional attributes.
  2274. * @param [in] alpha common scale factor
  2275. * @param [in] xDesc descriptor of input data
  2276. * @param [in] x input data in device memory
  2277. * @param [in] beta common scale factor
  2278. * @param [in] yDesc descriptor of output data
  2279. * @param [in|out] y output data in device memory
  2280. * @return ccStatus_t
  2281. */
  2282. ccStatus_t ccStridedSliceForward(ccHandle_t handle, const ccStridedSliceDescriptor_t stridedSliceDesc,
  2283. const ccStridedSliceAttrsDescriptor_t attrDesc, const void *alpha,
  2284. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2285. const ccTensorDescriptor_t yDesc, void *y);
  2286. /**
  2287. * @
  2288. * @brief get out put descrition of slice tensor.
  2289. * @param [in] xDesc descriptor of input data
  2290. * @param [in] begin begin position of tensor
  2291. * @param [in] size size to slice
  2292. * @param [out] n point to n size
  2293. * @param [out] c point to c size
  2294. * @param [out] h point to h size
  2295. * @param [out] w point to w size
  2296. * @param [out] realDimCnt realdim count
  2297. * @return ccStatus_t
  2298. */
  2299. ccStatus_t ccGetSliceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *begin, const ccIntArray_t *size,
  2300. int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
  2301. /**
  2302. * @ingroup dnn
  2303. * @brief slice of a tensor.
  2304. * @param [in] handle cce handle
  2305. * @param [in] alpha common scale factor
  2306. * @param [in] xDesc descriptor of input data
  2307. * @param [in] x input data in device memory
  2308. * @param [in] begin begin position of tensor
  2309. * @param [in] size size to slice
  2310. * @param [in] beta common scale factor
  2311. * @param [in] yDesc descriptor of output data
  2312. * @param [in|out] y output data in device memory
  2313. * @return ccStatus_t
  2314. */
  2315. ccStatus_t ccSliceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2316. const ccIntArray_t *begin, const ccIntArray_t *size, const void *beta,
  2317. const ccTensorDescriptor_t yDesc, void *y);
  2318. /**
  2319. * @ingroup dnn
  2320. * @brief gather forward computation
  2321. * @param [in] handle cce handle
  2322. * @param [in] paramsDesc descriptor of params tensor
  2323. * @param [in] params input data in device memory
  2324. * @param [in] indicesDesc descriptor of indices tensor
  2325. * @param [in] indices indices data in device memory
  2326. * @param [in] axis descriptor of roi tensor
  2327. * @param [in] alpha reserved
  2328. * @param [in] beta reserved
  2329. * @param [in] outputDesc descriptor of output tensor
  2330. * @param [out] output output data in device memory
  2331. * @return ccStatus_t
  2332. */
  2333. ccStatus_t ccGatherForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc,
  2334. const void *params, const ccTensorDescriptor_t indicesDesc, const void *indices,
  2335. const int32_t axis, const void *beta, ccTensorDescriptor_t outputDesc, void *output);
  2336. /**
  2337. * @ingroup dnn
  2338. * @brief gather output dim computation, for NC1HWC0
  2339. * @param [in] paramsDesc descriptor of params tensor
  2340. * @param [in] indicesDesc descriptor of indices tensor
  2341. * @param [in] axis descriptor of roi tensor
  2342. * @param [out] n dim of n
  2343. * @param [out] c dim of c
  2344. * @param [out] h dim of h
  2345. * @param [out] w dim of w
  2346. * @param [out] realDimCnt real dim count
  2347. * @return ccStatus_t
  2348. */
  2349. ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
  2350. int32_t axis, int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
  2351. /**
  2352. * @ingroup dnn
  2353. * @brief gather output dim computation
  2354. * @param [in] paramsDesc descriptor of params tensor
  2355. * @param [in] indicesDesc descriptor of indices tensor
  2356. * @param [in] axis descriptor of roi tensor
  2357. * @param [out] dimCnt dimcnt of output
  2358. * @param [out] dim dim of output
  2359. * @param [in| dimlen length of dim
  2360. * @return ccStatus_t
  2361. */
  2362. ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
  2363. int32_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  2364. /**
  2365. * @ingroup dnn
  2366. * @brief exp forward computation
  2367. * @param [in] handle cce handle
  2368. * @param [in] expDesc descriptor of expParam
  2369. * @param [in] expParam a ternary array
  2370. * @param [in] alpha reserved parameter
  2371. * @param [in] xDesc descriptor of input tensor
  2372. * @param [in] x input data in device memory
  2373. * @param [in] beta reserved parameter
  2374. * @param [in] yDesc descriptor of output tensor
  2375. * @param [out] y output data in device memory
  2376. * @return ccStatus_t
  2377. */
  2378. ccStatus_t ccExpForward(ccHandle_t handle, const ccExpDescriptor_t expDesc, const void *expParam, const void *alpha,
  2379. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2380. const ccTensorDescriptor_t yDesc, void *y);
  2381. /**
  2382. * @ingroup dnn
  2383. * @brief expm1 forward:
  2384. * data type only support float float16 and double
  2385. * data format only support ND
  2386. * @param [in] handle cce handle
  2387. * @param [in] alpha common scale factor
  2388. * @param [in] xDesc descriptor of input data
  2389. * @param [in] x input data in device memory
  2390. * @param [in] beta common scale factor
  2391. * @param [in] outputDesc descriptor of output data
  2392. * @param [in|out] output output data in device memory
  2393. * @return ccStatus_t
  2394. */
  2395. ccStatus_t ccExpm1Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2396. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2397. /**
  2398. * @ingroup dnn
  2399. * @brief log1p forward:
  2400. * data type only support float float16 and double
  2401. * data format only support ND
  2402. * @param [in] handle cce handle
  2403. * @param [in] alpha common scale factor
  2404. * @param [in] xDesc descriptor of input data
  2405. * @param [in] x input data in device memory
  2406. * @param [in] beta common scale factor
  2407. * @param [in] outputDesc descriptor of output data
  2408. * @param [in|out] output output data in device memory
  2409. * @return ccStatus_t
  2410. */
  2411. ccStatus_t ccLog1pForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2412. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2413. /**
  2414. * @ingroup dnn
  2415. * @brief init descriptor for parameter of exp function
  2416. * @param [in|out] powDesc descriptor of tensor
  2417. * @param [in] dataType data type in device
  2418. * @param [in] paramCnt number of parameters
  2419. * @return ccStatus_t
  2420. */
  2421. ccStatus_t ccSetExpDescriptor(ccExpDescriptor_t expDesc, ccDataType_t dataType, uint32_t paramCnt);
  2422. /**
  2423. * @ingroup dnn
  2424. * @brief exp forward computation
  2425. * @param [in] handle cce handle
  2426. * @param [in] logDesc descriptor of logParam
  2427. * @param [in] logParam a ternary array
  2428. * @param [in] alpha reserved parameter
  2429. * @param [in] xDesc descriptor of input tensor
  2430. * @param [in] x input data in device memory
  2431. * @param [in] beta reserved parameter
  2432. * @param [in] yDesc descriptor of output tensor
  2433. * @param [in] y output data in device memory
  2434. * @return ccStatus_t
  2435. */
  2436. ccStatus_t ccLogForward(ccHandle_t handle, const ccLogDescriptor_t logDesc, const void *logParam, const void *alpha,
  2437. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2438. const ccTensorDescriptor_t yDesc, void *y);
  2439. /**
  2440. * @ingroup dnn
  2441. * @brief init descriptor for parameter of log function
  2442. * @param [in|out] logDesc descriptor of tensor
  2443. * @param [in] dataType data type in device
  2444. * @param [in] paramCnt number of parameters
  2445. * @return ccStatus_t
  2446. */
  2447. ccStatus_t ccSetLogDescriptor(ccLogDescriptor_t logDesc, ccDataType_t dataType, uint32_t paramCnt);
  2448. /**
  2449. * @ingroup dnn
  2450. * @brief pow forward computation
  2451. * @param [in] handle cce handle
  2452. * @param [in] powDesc descriptor of logParam
  2453. * @param [in] powParam a ternary array
  2454. * @param [in] alpha reserved parameter
  2455. * @param [in] xDesc descriptor of input tensor
  2456. * @param [in] x input data in device memory
  2457. * @param [in] beta reserved parameter
  2458. * @param [in] yDesc descriptor of input tensor
  2459. * @param [in] y input data in device memory
  2460. * @param [in] zDesc descriptor of output tensor
  2461. * @param [out] z output data in device memory
  2462. * @return ccStatus_t
  2463. */
  2464. ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t powDesc, const void *powParam, const void *alpha,
  2465. const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
  2466. const void *y, const void *beta, const ccTensorDescriptor_t zDesc, void *z);
  2467. /**
  2468. * @brief init descriptor for parameter of pow function
  2469. * @param [in|out] powDesc descriptor of tensor
  2470. * @param [in] dataType data type in device
  2471. * @param [in] paramCnt number of parameters
  2472. * @return ccStatus_t
  2473. */
  2474. ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t powDesc, ccDataType_t dataType, uint32_t paramCnt);
  2475. /**
  2476. * @ingroup dnn
  2477. * @brief non max suppression forward.
  2478. * @param [in] handle cce handle
  2479. * @param [in] nonmaxParaDesc descriptor of para
  2480. * @param [in] nonmaxPara input para in host memory
  2481. * @param [in] maxoutputsizex input para in host memory
  2482. * @param [in] alpha common scale factor
  2483. * @param [in] boxesDesc descriptor of input data boxesDesc
  2484. * @param [in] boxes input data boxes in device memory
  2485. * @param [in] scoresDesc descriptor of input data boxesDesc
  2486. * @param [in] scores input data scores in device memory
  2487. * @param [in] workSpaceSizeInBytes workspace size
  2488. * @param [in] workSpace input workspace in device memory
  2489. * @param [in] beta common scale factor
  2490. * @param [in] outputDesc descriptor of output data
  2491. * @param [in|out] output output data in device memory
  2492. * @return ccStatus_t
  2493. */
  2494. ccStatus_t ccNonMaxSuppressionForward(ccHandle_t handle, const ccNonMaxSuppressionDescriptor_t nonmaxParaDesc,
  2495. const void *nonmaxPara, const int *maxoutputsize, const void *alpha,
  2496. const ccTensorDescriptor_t boxesDesc, const void *boxes,
  2497. const ccTensorDescriptor_t scoresDesc, const void *scores,
  2498. const uint32_t workSpaceSizeInBytes, void *workSpace, const void *beta,
  2499. const ccTensorDescriptor_t outputDesc, void *output);
  2500. /**
  2501. * @brief init descriptor for parameter of NonMaxSuppression function
  2502. * @param [in|out] powDesc descriptor of tensor
  2503. * @param [in] dataType data type in device
  2504. * @param [in] paramCnt number of parameters
  2505. * @return ccStatus_t
  2506. */
  2507. ccStatus_t ccSetNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t nonMaxSuppressionDesc,
  2508. ccDataType_t dataType, uint32_t paramCnt);
  2509. /**
  2510. * @ingroup dnn
  2511. * @brief get the output dimension info of resizeBilinear op.
  2512. * @param [in] xDesc descriptor of input data
  2513. * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
  2514. * @param [out] dimCnt
  2515. * @param [out] dim[] dim of output
  2516. * @param [in| dimlen length of dim
  2517. * @return ccStatus_t
  2518. */
  2519. ccStatus_t ccGetResizeBilinearOutputDim(const ccTensorDescriptor_t xDesc,
  2520. const ccResizeBilinearDescriptor_t resizeBilinearDesc, int32_t *dimCnt,
  2521. int32_t dim[], int32_t dimLen);
  2522. /**
  2523. * @ingroup dnn
  2524. * @brief get the output dimension info of interp op.
  2525. * @param [in] xDesc descriptor of input data
  2526. * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
  2527. * @param [out] dimCnt
  2528. * @param [out] dim[] dim of output
  2529. * @param [in| dimlen length of dim
  2530. * @return ccStatus_t
  2531. */
  2532. ccStatus_t ccGetInterpOutputDim(const ccTensorDescriptor_t xDesc, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
  2533. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  2534. /**
  2535. * @ingroup dnn
  2536. * @brief resize bilinear forward for t network.
  2537. * @param [in] handle cce handle
  2538. * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
  2539. * @param [in] alpha common scale factor
  2540. * @param [in] xDesc descriptor of input data
  2541. * @param [in] x input data in device memory
  2542. * @param [in] beta common scale factor
  2543. * @param [in] yDesc descriptor of output data
  2544. * @param [in|out] y output data in device memory
  2545. * @return ccStatus_t
  2546. */
  2547. ccStatus_t ccResizeBilinearForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
  2548. const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2549. const ccTensorDescriptor_t outputDesc, void *output);
  2550. /**
  2551. * @ingroup dnn
  2552. * @brief resize bilinear forward for c network.
  2553. * @param [in] handle cce handle
  2554. * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
  2555. * @param [in] alpha common scale factor
  2556. * @param [in] xDesc descriptor of input data
  2557. * @param [in] x input data in device memory
  2558. * @param [in] beta common scale factor
  2559. * @param [in] yDesc descriptor of output data
  2560. * @param [in|out] y output data in device memory
  2561. * @return ccStatus_t
  2562. */
  2563. ccStatus_t ccInterpForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, const void *alpha,
  2564. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2565. const ccTensorDescriptor_t outputDesc, void *output);
  2566. /**
  2567. * @ingroup dnn
  2568. * @brief create descriptor of ResizeBilinear
  2569. * @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr
  2570. * @return ccStatus_t
  2571. */
  2572. ccStatus_t ccCreateResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
  2573. /**
  2574. * @ingroup dnn
  2575. * @brief destroy descriptor of Interp
  2576. * @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr
  2577. * @return ccStatus_t
  2578. */
  2579. ccStatus_t ccDestroyResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
  2580. /**
  2581. * @ingroup dnn
  2582. * @brief set descriptor of resizeBilinear.
  2583. * @param [in|out] resizeBilinearDesc descriptor of resize_bilinear operator
  2584. * @param [in] resizeOutputDimMode way to decide output dimensions
  2585. * @param [in] alignCorners whether the centers of input and output are aligned
  2586. * @param [in] zoom_factor zoom factor
  2587. * @param [in] shrink_factor shrink factor
  2588. * @param [in] height height of output
  2589. * @param [in] width width of output
  2590. * @param [in] pad_begin padding at begin of input
  2591. * @param [in] pad_end padding at end of input
  2592. * @return ccStatus_t
  2593. */
  2594. ccStatus_t ccSetResizeBilinearDescriptor(ccResizeBilinearDescriptor_t resizeBilinearDesc,
  2595. ccResizeOutputDimMode_t resizeOutputDimMode, bool alignCorners,
  2596. int32_t zoom_factor, int32_t shrink_factor, int32_t height, int32_t width,
  2597. int32_t pad_begin, int32_t pad_end);
  2598. /**
  2599. * @ingroup dnn
  2600. * @brief fill forward computation
  2601. * @param [in] handle cce handle
  2602. * @param [in] fillParamDesc descriptor of fill parameter
  2603. * @param [in] alpha reserved
  2604. * @param [in] givenDesc descriptor of given tensor
  2605. * @param [in] givenData given data in device memory
  2606. * @param [in] workspace space for fill algorithm
  2607. * @param [in] workSpaceSizeInBytes space size in byte
  2608. * @param [in] beta reserved
  2609. * @param [in] outputDesc descriptor of output tensor
  2610. * @param [out] output output data in device memory
  2611. * @return ccStatus_t
  2612. */
  2613. ccStatus_t ccFillForward(ccHandle_t handle, const ccFillParamDescriptor_t fillParamDesc, const void *alpha,
  2614. const ccTensorDescriptor_t givenDesc, const void *givenData, const void *workspace,
  2615. const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
  2616. void *output);
  2617. /**
  2618. * @ingroup dnn
  2619. *[ccGetFillWorkspaceSize]
  2620. *@param fillType [fill type]
  2621. *@param givenDesc [given tensor descriptor]
  2622. *@param xDesc [input tensor descriptor]
  2623. *@param sizeInBytes [output size]
  2624. *@return ccStatus_t [status]
  2625. */
  2626. ccStatus_t ccGetFillWorkspaceSize(const ccFillOpType_t fillType, const ccTensorDescriptor_t xDesc,
  2627. uint32_t *sizeInBytes);
  2628. /**
  2629. *[ccCast]
  2630. *@param handle [cce handler]
  2631. *@param alpha [alpha]
  2632. *@param xDesc [tensor Description of tensor x]
  2633. *@param x [input tensor x]
  2634. *@param beta [beta
  2635. *@param yDesc [tensor Description of tensor y]
  2636. *@param y [output tensor y]
  2637. *@return ccStatus_t [status]
  2638. */
  2639. ccStatus_t ccCast(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2640. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  2641. /**
  2642. * @ingroup dnn
  2643. * @brief round forward:
  2644. * data type only support float float16 and int32
  2645. * data format only support ND
  2646. * @param [in] handle cce handle
  2647. * @param [in] alpha common scale factor
  2648. * @param [in] xDesc descriptor of input data
  2649. * @param [in] x input data in device memory
  2650. * @param [in] beta common scale factor
  2651. * @param [in] outputDesc descriptor of output data
  2652. * @param [in|out] output output data in device memory
  2653. * @return ccStatus_t
  2654. */
  2655. ccStatus_t ccRoundForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2656. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2657. /**
  2658. * @ingroup dnn
  2659. * @brief rint forward:
  2660. * data type only support float float16
  2661. * data format only support ND
  2662. * @param [in] handle cce handle
  2663. * @param [in] alpha common scale factor
  2664. * @param [in] xDesc descriptor of input data
  2665. * @param [in] x input data in device memory
  2666. * @param [in] beta common scale factor
  2667. * @param [in] outputDesc descriptor of output data
  2668. * @param [in|out] output output data in device memory
  2669. * @return ccStatus_t
  2670. */
  2671. ccStatus_t ccRintForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2672. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2673. /**
  2674. * @ingroup dnn
  2675. * @brief sqrt forward:
  2676. * data type only support float float16
  2677. * data format only support ND
  2678. * @param [in] handle cce handle
  2679. * @param [in] alpha common scale factor
  2680. * @param [in] xDesc descriptor of input data
  2681. * @param [in] x input data in device memory
  2682. * @param [in] beta common scale factor
  2683. * @param [in] outputDesc descriptor of output data
  2684. * @param [in|out] output output data in device memory
  2685. * @return ccStatus_t
  2686. */
  2687. ccStatus_t ccSqrtForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2688. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2689. /**
  2690. *[ccCast]
  2691. *@param filterSrcInfo [cce filtersrc descriptor]
  2692. *@param filterSrc [filterSrc address]
  2693. *@param filterDstInfo [cce filterdst descriptor]
  2694. *@param filterDst [filterdst address]
  2695. *@param group [group]
  2696. *@param ySizeInBytes [fraczfilter size]
  2697. *@param outputDataType [datatype]
  2698. *@return ccStatus_t [status]
  2699. */
  2700. ccStatus_t ccTransGroupConvFilterInt8(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
  2701. ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
  2702. uint32_t ySizeInBytes, ccDataType_t outputDataType);
  2703. /**
  2704. *[ccGetConcatOutputDim]
  2705. *@param xDesc[] [input tensor descriptor]
  2706. *@param axis [concat axis]
  2707. *@param inputNum [input tensor numbers]
  2708. *@param dim[] [output dim]
  2709. *@param [in| dimlen length of dim
  2710. *@return ccStatus_t [status]
  2711. */
  2712. ccStatus_t ccGetConcatOutputDim(const ccTensorDescriptor_t xDesc[], int32_t axis, int32_t inputNum, int32_t *dimCnt,
  2713. int32_t dim[], int32_t dimLen);
  2714. /**
  2715. * @ingroup dnn
  2716. * @brief get the output dimension info of reduce.
  2717. * @param [in] xDesc descriptor of input tensor
  2718. * @param [in] axis The dimensions to reduce
  2719. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2720. * @param [in|out] dimCnt point to the output dimCnt
  2721. * @param [in|out] dim arrays to save dims
  2722. * @param [in| dimlen length of dim
  2723. * @return ccStatus_t
  2724. */
  2725. ccStatus_t ccGetReduceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *axis, bool keepDims,
  2726. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  2727. /**
  2728. * @ingroup dnn
  2729. * @brief reduce sum forward computation
  2730. * @param [in] handle cce handle
  2731. * @param [in] axis The dimensions to reduce
  2732. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2733. * @param [in] alpha scaling factors
  2734. * @param [in] xDesc descriptor of input tensor
  2735. * @param [in] x input data in device memory
  2736. * @param [in] beta bias factors
  2737. * @param [in] outputDesc descriptor of output tensor
  2738. * @param [in|out] output output data in device memory
  2739. * @return ccStatus_t
  2740. */
  2741. ccStatus_t ccReduceSumForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
  2742. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2743. const ccTensorDescriptor_t outputDesc, void *output);
  2744. /**
  2745. * @ingroup dnn
  2746. * @brief reduce max forward computation
  2747. * @param [in] handle cce handle
  2748. * @param [in] axis The dimensions to reduce
  2749. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2750. * @param [in] alpha scaling factors
  2751. * @param [in] xDesc descriptor of input tensor
  2752. * @param [in] x input data in device memory
  2753. * @param [in] beta bias factors
  2754. * @param [in] outputDesc descriptor of output tensor
  2755. * @param [in|out] output output data in device memory
  2756. * @return ccStatus_t
  2757. */
  2758. ccStatus_t ccReduceMaxForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
  2759. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2760. const ccTensorDescriptor_t outputDesc, void *output);
  2761. /**
  2762. * @ingroup dnn
  2763. * @brief reduce min forward computation
  2764. * @param [in] handle cce handle
  2765. * @param [in] axis The dimensions to reduce
  2766. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2767. * @param [in] alpha scaling factors
  2768. * @param [in] xDesc descriptor of input tensor
  2769. * @param [in] x input data in device memory
  2770. * @param [in] beta bias factors
  2771. * @param [in] outputDesc descriptor of output tensor
  2772. * @param [in|out] output output data in device memory
  2773. * @return ccStatus_t
  2774. */
  2775. ccStatus_t ccReduceMinForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
  2776. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2777. const ccTensorDescriptor_t outputDesc, void *output);
  2778. /**
  2779. * @ingroup dnn
  2780. * @brief reduce mean forward computation
  2781. * @param [in] handle cce handle
  2782. * @param [in] axis The dimensions to reduce
  2783. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2784. * @param [in] alpha scaling factors
  2785. * @param [in] xDesc descriptor of input tensor
  2786. * @param [in] x input data in device memory
  2787. * @param [in] beta bias factors
  2788. * @param [in] outputDesc descriptor of output tensor
  2789. * @param [in|out] output output data in device memory
  2790. * @return ccStatus_t
  2791. */
  2792. ccStatus_t ccReduceMeanForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
  2793. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2794. const ccTensorDescriptor_t outputDesc, void *output);
  2795. /**
  2796. * @ingroup dnn
  2797. * @brief reduce prod forward computation
  2798. * @param [in] handle cce handle
  2799. * @param [in] axis The dimensions to reduce
  2800. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2801. * @param [in] alpha scaling factors
  2802. * @param [in] xDesc descriptor of input tensor
  2803. * @param [in] x input data in device memory
  2804. * @param [in] beta bias factors
  2805. * @param [in] outputDesc descriptor of output tensor
  2806. * @param [in|out] output output data in device memory
  2807. * @return ccStatus_t
  2808. */
  2809. ccStatus_t ccReduceProdForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
  2810. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2811. const ccTensorDescriptor_t outputDesc, void *output);
  2812. /**
  2813. * @ingroup dnn
  2814. * @brief reduce all forward computation
  2815. * @param [in] handle cce handle
  2816. * @param [in] axis The dimensions to reduce
  2817. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2818. * @param [in] alpha scaling factors
  2819. * @param [in] xDesc descriptor of input tensor
  2820. * @param [in] x input data in device memory
  2821. * @param [in] beta bias factors
  2822. * @param [in] outputDesc descriptor of output tensor
  2823. * @param [in|out] output output data in device memory
  2824. * @return ccStatus_t
  2825. */
  2826. ccStatus_t ccReduceAllForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
  2827. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2828. const ccTensorDescriptor_t outputDesc, void *output);
  2829. /**
  2830. *@brief print times stats
  2831. *@return ccStatus_t [status]
  2832. */
  2833. ccStatus_t ccPrintTimeStat();
  2834. /**
  2835. * @ingroup dnn
  2836. * @brief reduce abs sum forward computation
  2837. * @param [in] handle cce handle
  2838. * @param [in] axis The dimensions to reduce
  2839. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2840. * @param [in] alpha scaling factors
  2841. * @param [in] xDesc descriptor of input tensor
  2842. * @param [in] x input data in device memory
  2843. * @param [in] beta bias factors
  2844. * @param [in] outputDesc descriptor of output tensor
  2845. * @param [in|out] output output data in device memory
  2846. * @return ccStatus_t
  2847. */
  2848. ccStatus_t ccReduceAbsSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
  2849. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2850. const ccTensorDescriptor_t outputDesc, void *output);
  2851. /**
  2852. * @ingroup dnn
  2853. * @brief reduce square sum forward computation
  2854. * @param [in] handle cce handle
  2855. * @param [in] axis The dimensions to reduce
  2856. * @param [in] keepDims If true, retains reduced dimensions with length 1.
  2857. * @param [in] alpha scaling factors
  2858. * @param [in] xDesc descriptor of input tensor
  2859. * @param [in] x input data in device memory
  2860. * @param [in] beta bias factors
  2861. * @param [in] outputDesc descriptor of output tensor
  2862. * @param [in|out] output output data in device memory
  2863. * @return ccStatus_t
  2864. */
  2865. ccStatus_t ccReduceSquareSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
  2866. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2867. const ccTensorDescriptor_t outputDesc, void *output);
  2868. /**
  2869. * @ingroup dnn
  2870. * @brief get the output dimension info of crop and resize
  2871. * @param [in] imageDesc descriptor of images
  2872. * @param [in] boxesDesc descriptor of boxes
  2873. * @param [in] boxidxDesc descriptor of boxidx
  2874. * @param [in] resizeHeight resize height
  2875. * @param [in] resizeWidth resize width
  2876. * @param [out] dimCnt dimcnt of output
  2877. * @param [out] dim dim of output
  2878. * @param [in| dimlen length of dim
  2879. * @return ccStatus_t
  2880. */
  2881. ccStatus_t ccGetCropAndResizeOutputDim(const ccTensorDescriptor_t imageDesc, const ccTensorDescriptor_t boxesDesc,
  2882. const ccTensorDescriptor_t boxidxDesc, const int32_t resizeHeight,
  2883. const int32_t resizeWidth, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  2884. /**
  2885. * @ingroup dnn
  2886. * @brief crop and resize forward.
  2887. * @param [in] handle cce handle
  2888. * @param [in] alpha common scale factor
  2889. * @param [in] imageDesc descriptor of images
  2890. * @param [in] image input data in device memory
  2891. * @param [in] boxesDesc descriptor of boxes
  2892. * @param [in] boxes input data in device memory
  2893. * @param [in] boxidxDesc descriptor of boxidx
  2894. * @param [in] boxidx input data in device memory
  2895. * @param [in] method enum of resize method
  2896. * @param [in] extrapolationValue Value used for extrapolation, when applicable
  2897. * @param [in] beta common scale factor
  2898. * @param [in] outputDesc descriptor of output data
  2899. * @param [out] output output data in device memory
  2900. * @return ccStatus_t
  2901. */
  2902. ccStatus_t ccCropAndResizeForward(ccHandle_t handle, const ccResizeMethod_t method, const float extrapolationValue,
  2903. const void *alpha, const ccTensorDescriptor_t imageDesc, const void *image,
  2904. const ccTensorDescriptor_t boxesDesc, const void *boxes,
  2905. const ccTensorDescriptor_t boxidxDesc, const void *boxidx, const void *beta,
  2906. const ccTensorDescriptor_t outputDesc, void *output);
  2907. /**
  2908. * @ingroup dnn
  2909. * @brief select forward computation
  2910. * @param [in] handle cce handle
  2911. * @param [in] alpha reserved
  2912. * @param [in] condDesc descriptor of cond tensor
  2913. * @param [in] cond cond data in device memory
  2914. * @param [in] xDesc descriptor of x tensor
  2915. * @param [in] x x data in device memory
  2916. * @param [in] yDesc descriptor of y tensor
  2917. * @param [in] y y data in device memory
  2918. * @param [in] beta reserved
  2919. * @param [in] outputDesc descriptor of output tensor
  2920. * @param [out] output output data in device memory
  2921. * @return ccStatus_t
  2922. */
  2923. ccStatus_t ccSelect(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t condDesc, const void *cond,
  2924. const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, const void *y,
  2925. const void *beta, const ccTensorDescriptor_t outDesc, void *out);
  2926. /**
  2927. * @ingroup dnn
  2928. * @brief get the output dimension info of where
  2929. * @param [in] xDesc descriptor of input tensor
  2930. * @param [in|out] dimCnt point to the output dimCnt
  2931. * @param [in|out] dim arrays to save dims
  2932. * @return ccStatus_t
  2933. */
  2934. ccStatus_t ccGetWhereOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
  2935. /**
  2936. * @ingroup dnn
  2937. * @brief where forward computation
  2938. * @param [in] handle cce handle
  2939. * @param [in] alpha reserved
  2940. * @param [in] condDesc descriptor of cond tensor
  2941. * @param [in] cond cond data in device memory
  2942. * @param [in] xDesc descriptor of x tensor
  2943. * @param [in] x x data in device memory
  2944. * @param [in] yDesc descriptor of y tensor
  2945. * @param [out] y y data in device memory
  2946. * @return ccStatus_t
  2947. */
  2948. ccStatus_t ccWhere(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2949. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  2950. /**
  2951. * @ingroup dnn
  2952. * @brief reverse forward.
  2953. * @param [in] handle cce handle
  2954. * @param [in] axis dim that need reverse
  2955. * @param [in] alpha common scale factor
  2956. * @param [in] xDesc descriptor of input data
  2957. * @param [in] x input data in device memory
  2958. * @param [in] beta common scale factor
  2959. * @param [in] outputDesc descriptor of output data
  2960. * @param [in|out] output output data in device memory
  2961. * @return ccStatus_t
  2962. */
  2963. ccStatus_t ccReverseForward(ccHandle_t handle, const ccIntArray_t *axis, const void *alpha,
  2964. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  2965. const ccTensorDescriptor_t outputDesc, void *output);
  2966. /**
  2967. * @ingroup dnn
  2968. * @brief floor forward:
  2969. * data type only support float float16
  2970. * data format only support ND
  2971. * @param [in] handle cce handle
  2972. * @param [in] alpha common scale factor
  2973. * @param [in] xDesc descriptor of input data
  2974. * @param [in] x input data in device memory
  2975. * @param [in] beta common scale factor
  2976. * @param [in] outputDesc descriptor of output data
  2977. * @param [in|out] output output data in device memory
  2978. * @return ccStatus_t
  2979. */
  2980. ccStatus_t ccFloorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2981. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2982. /**
  2983. * @ingroup dnn
  2984. * @brief ceil forward:
  2985. * data type only support float float16
  2986. * data format only support ND
  2987. * @param [in] handle cce handle
  2988. * @param [in] alpha common scale factor
  2989. * @param [in] xDesc descriptor of input data
  2990. * @param [in] x input data in device memory
  2991. * @param [in] beta common scale factor
  2992. * @param [in] outputDesc descriptor of output data
  2993. * @param [in|out] output output data in device memory
  2994. * @return ccStatus_t
  2995. */
  2996. ccStatus_t ccCeilForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  2997. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  2998. /**
  2999. * @ingroup dnn
  3000. * @brief get the output dimension info of truncate mod
  3001. * @param [in] xDesc descriptor of input tensor
  3002. * @param [in] yDesc descriptor of input tensor
  3003. * @param [out] dimCnt [dim count of the output tensor]
  3004. * @param [out] dim[] [shape of the output tensor]
  3005. * @param [in| dimlen length of dim
  3006. * @return ccStatus_t
  3007. */
  3008. ccStatus_t ccGetTruncatemodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc,
  3009. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3010. /**
  3011. * @ingroup dnn
  3012. * @brief truncate mod forward computation
  3013. * @param [in] handle cce handle
  3014. * @param [in] alpha scaling factors
  3015. * @param [in] xDesc descriptor of input tensor
  3016. * @param [in] x input data in device memory
  3017. * @param [in] yDesc descriptor of input tensor
  3018. * @param [in] y input data in device memory
  3019. * @param [in] beta bias factors
  3020. * @param [in] outputDesc descriptor of output tensor
  3021. * @param [out] output output data in device memory
  3022. * @return ccStatus_t
  3023. */
  3024. ccStatus_t ccTruncatemodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3025. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  3026. const ccTensorDescriptor_t outputDesc, void *output);
  3027. /**
  3028. * @ingroup dnn
  3029. * @brief Spatial Pyramid Pooling
  3030. * @param [in] handle cce handle
  3031. * @param [in] alpha reserved
  3032. * @param [in] xDesc descriptor of input tensor
  3033. * @param [in] x input data in device memory
  3034. * @param [in] workspace temp workspace
  3035. * @param [in] workspaceSizeInBytes temp workspace size
  3036. * @param [in] pyramidHeight pyramid height
  3037. * @param [in] poolingMode pooling mode
  3038. * @param [in] beta reserved
  3039. * @param [in] outputDesc descriptor of output tensor
  3040. * @param [out] output output data in device memory
  3041. * @return ccStatus_t
  3042. */
  3043. ccStatus_t ccSPPForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3044. void *workspace, const uint32_t workspaceSizeInBytes, const uint32_t pyramidHeight,
  3045. const ccPoolingMode_t poolingMode, const void *beta, const ccTensorDescriptor_t outputDesc,
  3046. void *output);
  3047. /**
  3048. * @ingroup dnn
  3049. * @brief Get Spatial Pyramid Pooling output dim
  3050. * @param [in] xDesc descriptor of input tensor
  3051. * @param [in] pyramidHeight pyramid height
  3052. * @param [in] dimLen length of dim
  3053. * @param [out] dimCnt output tensor dim cnt
  3054. * @param [out] dim output tensor dim
  3055. * @param [in| dimlen length of dim
  3056. * @return ccStatus_t
  3057. */
  3058. ccStatus_t ccGetSPPOutputDim(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, int32_t *dimCnt,
  3059. int32_t dim[], const int32_t dimLen);
  3060. /**
  3061. * @ingroup dnn
  3062. * @brief Get Spatial Pyramid Pooling workspace size
  3063. * @param [in] xDesc descriptor of input tensor
  3064. * @param [in] pyramidHeight pyramid height
  3065. * @param [out] workspaceSizeInBytes workspace size
  3066. * @return ccStatus_t
  3067. */
  3068. ccStatus_t ccGetSPPWorkspaceSize(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight,
  3069. uint32_t *workspaceSizeInBytes);
  3070. /**
  3071. * @ingroup dnn
  3072. * @brief BNLL forward computation
  3073. * @param [in] handle cce handle
  3074. * @param [in] alpha scaling factors
  3075. * @param [in] xDesc descriptor of input tensor
  3076. * @param [in] x input data in device memory
  3077. * @param [in] beta bias factors
  3078. * @param [in] outputDesc descriptor of output tensor
  3079. * @param [in|out] output output data in device memory
  3080. * @return ccStatus_t
  3081. */
  3082. ccStatus_t ccBNLLForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3083. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3084. /**
  3085. * @ingroup dnn
  3086. * @brief bias forward.
  3087. * @param [in] handle cce handle
  3088. * @param [in] axis axis
  3089. * @param [in] alpha common scale factor
  3090. * @param [in] xDesc descriptor of input data x
  3091. * @param [in] x input data x in device memory
  3092. * @param [in] biasDesc descriptor of input data bias
  3093. * @param [in] bias input data bias in device memory
  3094. * @param [in] beta common scale factor
  3095. * @param [in] outputDesc descriptor of output data
  3096. * @param [in|out] output output data in device memory
  3097. * @return ccStatus_t
  3098. */
  3099. ccStatus_t ccBiasForward(ccHandle_t handle, const int axis, const void *alpha, const ccTensorDescriptor_t xDesc,
  3100. const void *x, const ccTensorDescriptor_t biasDesc, const void *bias, const void *beta,
  3101. const ccTensorDescriptor_t outputDesc, void *output);
  3102. /**
  3103. * @ingroup dnn
  3104. * @brief threshold forward computation
  3105. * @param [in] handle cce handle
  3106. * @param [in] threshold threshold
  3107. * @param [in] alpha scaling factors
  3108. * @param [in] xDesc descriptor of input tensor
  3109. * @param [in] x input data in device memory
  3110. * @param [in] beta bias factors
  3111. * @param [in] outputDesc descriptor of output tensor
  3112. * @param [in|out] output output data in device memory
  3113. * @return ccStatus_t
  3114. */
  3115. ccStatus_t ccThresholdForward(ccHandle_t handle, const void *threshold, const void *alpha,
  3116. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  3117. const ccTensorDescriptor_t outputDesc, void *output);
  3118. /**
  3119. * @ingroup dnn
  3120. * @brief shufflechannel forward.
  3121. * @param [in] handle cce handle
  3122. * @param [in] alpha common scale factor
  3123. * @param [in] group number of groups
  3124. * @param [in] xDesc descriptor of input data
  3125. * @param [in] x input data in device memory
  3126. * @param [in] beta common scale factor
  3127. * @param [in] outputDesc descriptor of output data
  3128. * @param [in|out] output output data in device memory
  3129. * @return ccStatus_t
  3130. */
  3131. // TODO AICPU: please add shufflechannel custom params and comment
  3132. ccStatus_t ccShuffleChannelForward(ccHandle_t handle, const void *alpha, uint32_t group,
  3133. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  3134. const ccTensorDescriptor_t outputDesc, void *output);
  3135. /**
  3136. * @ingroup dnn
  3137. * @brief mvn forward.
  3138. * @param [in] handle cce handle
  3139. * @param [in] acrossChannel across channel. true: across, false: not
  3140. * @param [in] normalizeVariance normalizeVariance. true: normalizeVariance, false: not
  3141. * @param [in] alpha common scale factor
  3142. * @param [in] xDesc descriptor of input data
  3143. * @param [in] x input data in device memory
  3144. * @param [in] beta common scale factor
  3145. * @param [in] outputDesc descriptor of output data
  3146. * @param [in|out] output output data in device memory
  3147. * @return ccStatus_t
  3148. */
  3149. ccStatus_t ccMVNForward(ccHandle_t handle, bool acrossChannel, bool normalizeVariance, const void *alpha,
  3150. const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, uint32_t workSpaceSizeInBytes,
  3151. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3152. /**
  3153. * @ingroup dnn
  3154. * @brief get the workspace size of mvn
  3155. * @param [in] xDesc descriptor of input data
  3156. * @param [in] acrossChannel across channel. true: across, false: not
  3157. * @param [in|out] sizeInBytes Workspace size need for whole computation
  3158. */
  3159. ccStatus_t ccGetMVNWorkspaceSize(const ccTensorDescriptor_t xDesc, bool acrossChannel, uint32_t *sizeInBytes);
  3160. /**
  3161. * @ingroup dnn
  3162. * @brief heatmap2coord forward output is hotspot value and corresponding coordinates
  3163. * @param [in] handle cce handle
  3164. * @param [in] alpha common scale factor
  3165. * @param [in] xDesc descriptor of input data
  3166. * @param [in] x input data in device memory
  3167. * @param [in] coordh calibration high
  3168. * @param [in] coordw calibration wide
  3169. * @param [in] beta common scale factor
  3170. * @param [in] outputDesc descriptor of output data
  3171. * @param [in|out] output output data in device memory
  3172. * @return ccStatus_t
  3173. */
  3174. ccStatus_t ccHeatmap2coordForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3175. int32_t coordh, int32_t coordw, const void *beta,
  3176. const ccTensorDescriptor_t outputDesc, void *output);
  3177. /**
  3178. * @ingroup dnn
  3179. * @brief get the output dimension info of heatmap2coord
  3180. * @param [in] xDesc descriptor of input tensor
  3181. * @param [in|out] dimCnt point to the output dimCnt
  3182. * @param [in|out] dim arrays to save dims
  3183. * @param [in| dimlen length of dim
  3184. * @return ccStatus_t
  3185. */
  3186. ccStatus_t ccGetHeatmap2coordOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
  3187. /**
  3188. * @ingroup dnn
  3189. * @brief swish forward.
  3190. * @param [in] handle cce handle
  3191. * @param [in] scale param of swish function, y = x / (1 + sigmoid(scale * x))
  3192. * @param [in] alpha common scale factor
  3193. * @param [in] xDesc descriptor of input data
  3194. * @param [in] x input data in device memory
  3195. * @param [in] beta common scale factor
  3196. * @param [in] outputDesc descriptor of output data
  3197. * @param [in|out] output output data in device memory
  3198. * @return ccStatus_t
  3199. */
  3200. ccStatus_t ccSwishForward(ccHandle_t handle, const float scale, const void *alpha, const ccTensorDescriptor_t xDesc,
  3201. const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3202. ccStatus_t ccTeForward(ccHandle_t handle, const void *stubFunc, uint32_t coreDim, const void *args, uint32_t argsSize,
  3203. const rtL2Ctrl_t *l2ctrl, int32_t inputNum, const ccTensorDescriptor_t xDesc[], const void *x[],
  3204. int32_t outputNum, const ccTensorDescriptor_t yDesc[], void *y[], bool isAiCore);
  3205. #ifndef DAVINCI_LITE
  3206. ccStatus_t ccAiCpuCustomizeForward(ccHandle_t handle, aicpu_run_func stubFunc, opTensor_t *xOpDesc[], void *x[],
  3207. int32_t inputNum, opTensor_t *yOpDesc[], void *y[], void *op_attr_handle,
  3208. int32_t outputNum, const ccTensorDescriptor_t xDesc[],
  3209. const ccTensorDescriptor_t yDesc[], const void *op_attr_str, uint32_t op_attr_size);
  3210. #endif
  3211. /**
  3212. * @ingroup dnn
  3213. * @brief embedding lookup forward.
  3214. * @param [in] handle cce handle
  3215. * @param [in] alpha common scale factor
  3216. * @param [in] xDesc descriptor of input data x
  3217. * @param [in] x input data x in device memory
  3218. * @param [in] idxDesc descriptor of input data idx
  3219. * @param [in] idx input data idx in device memory
  3220. * @param [in] beta common scale factor
  3221. * @param [in] outputDesc descriptor of output data
  3222. * @param [in|out] output output data in device memory
  3223. * @return ccStatus_t
  3224. */
  3225. ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
  3226. const void *x, const ccTensorDescriptor_t idxDesc, const void *idx,
  3227. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3228. /**
  3229. * @ingroup
  3230. * @brief embedding lookup forward.
  3231. * @param [in] handle cce handle
  3232. * @param [in] alpha common scale factor
  3233. * @param [in] inputNum inputNum
  3234. * @param [in] xDesc[] descriptor array of input data x
  3235. * @param [in] x[] input data x array in device memory
  3236. * @param [in] workSpace workSpace addr
  3237. * @param [in] workSpaceSizeInBytes workSpace size
  3238. * @param [in] idxDesc descriptor of input data idx
  3239. * @param [in] idx input data idx in device memory
  3240. * @param [in] partitionStrategy partitionStrategy
  3241. * @param [in] maxNorm addr of maxNorm
  3242. * @param [in] beta common scale factor
  3243. * @param [in] outputDesc descriptor of output data
  3244. * @param [in|out] output output data in device memory
  3245. * @return ccStatus_t
  3246. */
  3247. ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const int32_t inputNum,
  3248. const ccTensorDescriptor_t xDesc[], const void *x[], void *workSpace,
  3249. const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t idxDesc,
  3250. const void *idx, ccPartitionStrategy_t partitionStrategy, const void *maxNorm,
  3251. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3252. /**
  3253. * @ingroup dnn
  3254. *[ccGetEmbeddingLookupOutputDim]
  3255. *@param inputNum [input tensor numbers]
  3256. *@param xDesc[] [input tensor descriptor]
  3257. *@param idxDesc [idx tensor descriptor]
  3258. *@param dimCnt [output dim count]
  3259. *@param dim[] [output dim]
  3260. *@param [in| dimlen length of dim
  3261. *@return ccStatus_t [status]
  3262. */
  3263. ccStatus_t ccGetEmbeddingLookupOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[],
  3264. const ccTensorDescriptor_t idxDesc, int32_t *dimCnt, int32_t dim[],
  3265. int32_t dimLen);
  3266. /**
  3267. * @ingroup dnn
  3268. *[ccGetEmbeddingLookupWorkspaceSize]
  3269. *@param inputNum [input tensor numbers]
  3270. *@param idxDesc [input tensor descriptor]
  3271. *@param isMaxNormExist [isMaxNormExist]
  3272. *@param sizeInBytes [output size]
  3273. *@return ccStatus_t [status]
  3274. */
  3275. ccStatus_t ccGetEmbeddingLookupWorkspaceSize(const int32_t inputNum, const ccTensorDescriptor_t idxDesc,
  3276. const bool isMaxNormExist, uint32_t *sizeInBytes);
  3277. /**
  3278. * @ingroup dnn
  3279. * @brief check if it is the first layer of resnet50 and semecefc
  3280. * @param [in] tensorDesc descriptor of input tensor.
  3281. * @param [in] convDesc conv descriptor.
  3282. * @param [in] filterDesc descriptor of weight tensor.
  3283. * @return ccStatus_t
  3284. */
  3285. ccStatus_t c04DescParamCheck(const ccTensorDescriptor_t tensorDesc, const ccConvolutionDescriptor_t convDesc,
  3286. const ccFilterDescriptor_t filterDesc);
  3287. #ifndef DAVINCI_LITE
  3288. /**
  3289. * @ingroup dnn
  3290. * @brief convolution forward computation
  3291. * @param [in] handle cce handle
  3292. * @param [in] convDesc descriptor of convolution operator
  3293. * @param [in] alpha scaling factors
  3294. * @param [in] beta scaling factors
  3295. * @param [in] xDesc x descriptor of input tensor
  3296. * @param [in] x x data in device memory
  3297. * @param [in] dyDesc descriptor of dy
  3298. * @param [in] dy dy data in device memory
  3299. * @param [in] dwDesc descriptor of dwDesc
  3300. * @param [out] dw dw data in device memory
  3301. * @param [in] algo algorithm of convolution forward
  3302. * @param [in] workSpace temp space, maybe NULL if no need temp space
  3303. * @param [in] workSpaceSizeInBytes sizeof workspace
  3304. * @return ccStatus_t
  3305. */
  3306. ccStatus_t ccConvolutionBackwardFilter(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, void *alpha,
  3307. void *beta, const ccTensorDescriptor_t xDesc, const void *x,
  3308. const ccTensorDescriptor_t dyDesc, const void *dy,
  3309. const ccFilterDescriptor_t dwDesc, void *dw, ccConvolutionBwdAlgo_t algo,
  3310. void *workSpace, uint32_t workSpaceSizeInBytes);
  3311. #endif
  3312. /**
  3313. * @ingroup dnn
  3314. * @brief get the temp space size of convolution forward computation, maybe no need temp space
  3315. * @param [in] handle cce handle
  3316. * @param [in] dyDesc descriptor of input tensor dy
  3317. * @param [in] convDesc descriptor of convolution operator
  3318. * @param [in] xDesc descriptor of input tensor
  3319. * @param [in] dwDesc descriptor of filter
  3320. * @param [in] algo algorithm of convolution forward
  3321. * @param [in|out] sizeInBytes temp space size need for specified algorithm
  3322. * @return ccStatus_t
  3323. */
  3324. ccStatus_t ccGetConvolutionBackwardFilterWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t dyDesc,
  3325. const ccConvolutionDescriptor_t convDesc,
  3326. const ccTensorDescriptor_t xDesc,
  3327. const ccFilterDescriptor_t dwDesc, ccConvolutionBwdAlgo_t algo,
  3328. uint32_t *sizeInBytes);
  3329. #ifndef DAVINCI_LITE
  3330. ccStatus_t ccBatchNormalizationBackward(ccHandle_t handle, ccBatchNormMode_t mode, const void *alphaDataDiff,
  3331. const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff,
  3332. const ccTensorDescriptor_t xDesc, const void *x,
  3333. const ccTensorDescriptor_t dyDesc, const void *dy,
  3334. const ccTensorDescriptor_t dxDesc, void *dx,
  3335. const ccTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale,
  3336. void *resultBnScaleDiff, void *resultBnBiasDiff, const void *workSpace,
  3337. const uint32_t workSpaceSizeInBytes, double epsilon, const void *SaveMean,
  3338. const void *SaveInvVariance);
  3339. #endif
  3340. ccStatus_t ccGetBatchNormalizationBackwardWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
  3341. ccTensorDescriptor_t xDesc, ccTensorDescriptor_t dyDesc,
  3342. ccTensorDescriptor_t dxDesc,
  3343. ccTensorDescriptor_t bnScaleBiasDesc, uint32_t *sizeInBytes);
  3344. #ifndef DAVINCI_LITE
  3345. ccStatus_t ccBatchNormalizationForwardTraining(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha,
  3346. const void *beta, const ccTensorDescriptor_t xDesc, const void *x,
  3347. const ccTensorDescriptor_t yDesc, void *y,
  3348. const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
  3349. const void *bnBias, double exponentialAverageFactor,
  3350. void *resultRunningMean, void *resultRunningVariance, void *workSpace,
  3351. uint32_t workSpaceSizeInBytes, double epsilon, void *resultSaveMean,
  3352. void *resultSaveInvVariance, const bool isTraining);
  3353. #endif
  3354. ccStatus_t ccGetBatchNormalizationForwardTrainingWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
  3355. ccTensorDescriptor_t xDesc, ccTensorDescriptor_t yDesc,
  3356. const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
  3357. uint32_t *sizeInBytes);
  3358. /**
  3359. * @ingroup dnn
  3360. * @brief generate an random normal Tensor use given on/off scale.
  3361. * @param [in] handle Stream handle.
  3362. * @param [in] alpha reserved.
  3363. * @param [in] meanDesc Mean description of one-hot position.
  3364. * @param [in] mean Data pointer of mean.
  3365. * @param [in] scaleDesc On/off scale description.
  3366. * @param [in] scale Data pointer of on/off scale.
  3367. * @param [in] seed random seed used to generate random number
  3368. * @param [in] seed2 random seed used to generate random number
  3369. * @param [in] beta reserved.
  3370. * @param [in] outputDesc Description of the generated one-hot tensor.
  3371. * @param [output] output Data pointer of output.
  3372. * @return ccStatus_t
  3373. */
  3374. ccStatus_t ccRandomNormalForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t meanDesc,
  3375. const void *mean, const ccTensorDescriptor_t scaleDesc, const void *scale,
  3376. const int64_t seed1, const int64_t seed2, const void *beta,
  3377. const ccTensorDescriptor_t outputDesc, void *output);
  3378. /**
  3379. * @ingroup dnn
  3380. * @brief generate random uniform tensor.
  3381. * @param [in] handle Stream handle.
  3382. * @param [in] alpha reserved.
  3383. * @param [in] minvalDesc Mean description of one-hot position.
  3384. * @param [in] minval Data pointer of mean.
  3385. * @param [in] maxvalDesc On/off scale description.
  3386. * @param [in] maxval Data pointer of on/off scale.
  3387. * @param [in] seed random seed used to generate random number
  3388. * @param [in] seed2 random seed used to generate random number
  3389. * @param [in] beta reserved.
  3390. * @param [in] outputDesc Description of the generated one-hot tensor.
  3391. * @param [output] output Data pointer of output.
  3392. * @return ccStatus_t
  3393. */
  3394. ccStatus_t ccRandomUniformForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t minvalDesc,
  3395. const void *minval, const ccTensorDescriptor_t maxvalDesc, const void *maxval,
  3396. const int64_t seed1, const int64_t seed2, const void *beta,
  3397. const ccTensorDescriptor_t outputDesc, void *output);
  3398. /**^M
  3399. * @ingroup dnn^M\r 10932
  3400. * @brief generate BatchMatMul tensor.^M\r 10933
  3401. * @param [in] handle Stream handle.^M\r 10934
  3402. * @param [in] alpha reserved.^M\r 10935
  3403. * @param [in] xDesc tensorA Desc.^M\r 10936
  3404. * @param [in] x Data pointer of tensorA.^M\r 10937
  3405. * @param [in] yDesc tensorB Desc.^M\r 10938
  3406. * @param [in] y Data pointer of tensorB.^M\r 10939
  3407. * @param [in] beta reserved.^M\r 10940
  3408. * @param [in] adj_x tensorA transpose flag^M\r 10941
  3409. * @param [in] adj_y tensorB transpose flag^M\r 10942
  3410. * @param [in] outpDesc Description of the tensor output .^M\r 10943
  3411. * @param [output] out Data pointer of output.^M\r 10944
  3412. * @return ccStatus_t^M
  3413. */
  3414. ccStatus_t ccBatchMatMulForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3415. const ccTensorDescriptor_t yDesc, const void *y, const void *beta, const bool adj_x,
  3416. const bool adj_y, const ccTensorDescriptor_t outDesc, void *out);
  3417. ccStatus_t ccGetBatchMatMulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, bool adj_x,
  3418. bool adj_y, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3419. /**
  3420. * @ingroup dnn
  3421. * @brief generator conv int8 all offset factor
  3422. * @param [in] para the struct for scale and offset of input, filter and output
  3423. * @param [in|out] offsetW offset of filter
  3424. * @param [in|out] offsetPad offset of input
  3425. * @param [in|out] scaledQrq scale computing result of input , filter and output
  3426. * @param [in|out] nextoffsetq offset of output
  3427. * @return ccStatus_t
  3428. */
  3429. ccStatus_t ccGenQuantAllOffsetFactor(const ccQuantAllOffsetPara_t *para, uint8_t &offsetW, uint8_t &offsetPad,
  3430. uint16_t &scaledQrq, uint16_t &nextoffsetq);
  3431. /**
  3432. * @ingroup dnn
  3433. * @brief get conv int8 all offset fracZ size
  3434. * @param [in] filterDesc descriptor of filter tensor
  3435. * @param [in|out] conv int8 all offset fracZ size
  3436. * @param [in] groupNum group conv num
  3437. * @return ccStatus_t
  3438. */
  3439. ccStatus_t ccSetGroupConvScene(const ccFilterDescriptor_t tensorDesc, ccConvolutionDescriptor_t convDesc);
  3440. ccStatus_t ccGetInt8AllOffsetFilterFracZSizeInBytes(const ccFilterDescriptor_t filterSrcDesc,
  3441. const ccFilterDescriptor_t filterDesc, uint32_t &size,
  3442. uint32_t groupNum);
  3443. /**
  3444. * @ingroup dnn
  3445. * @brief transform filter in conv int8 all offset scene
  3446. * @param [in] filterSrcInfo descriptor of filter tensor before fracZ transform
  3447. * @param [in] filterSrc filter addr before fracZ transform
  3448. * @param [in] filterDstInfo descriptor of filter tensor after fracZ transform
  3449. * @param [in] filterDst filter addr after fracZ transform
  3450. * @param [in] quantPara the struct for scale and offset of input, filter and output
  3451. * @param [in] ySizeInBytes filter size after fracZ transform
  3452. * @param [in|out] outputDataType output data type
  3453. * @param [in] groupNum group conv num
  3454. * @return ccStatus_t
  3455. */
  3456. ccStatus_t ccTransFilterInt8AllOffset(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
  3457. ccFilterDescriptor_t filterDstInfo, void *filterDst,
  3458. const ccQuantAllOffsetPara_t *quantPara, uint32_t ySizeInBytes,
  3459. ccDataType_t outputDataType, uint32_t groupNum);
  3460. /**
  3461. * @ingroup dnn
  3462. * @brief transform bias in conv int8 all offset scene
  3463. * @param [in] filterDesc descriptor of filter tensor
  3464. * @param [in] biasDesc descriptor of bias tensor
  3465. * @param [in] quantPara the struct for scale and offset of input, filter and output
  3466. * @param [in] w filter addr
  3467. * @param [in] bias bias addr
  3468. * @return ccStatus_t
  3469. */
  3470. ccStatus_t ccTransInt8AllOffsetBias(const ccFilterDescriptor_t filterDesc, const ccTensorDescriptor_t biasDesc,
  3471. const ccQuantAllOffsetPara_t *quantPara, const void *w, const void *bias);
  3472. /**
  3473. * @ingroup dnn
  3474. * @get dequantize
  3475. * @param [in] handle handle id
  3476. * @param [in] alpha alpha addr
  3477. * @param [in] xDesc the input Desc descriptor
  3478. * @param [in] x x data addr
  3479. * @param [in] beta beta data addr
  3480. * @param [in] yDesc the output Desc descriptor
  3481. * @param [in] y y data addr
  3482. * @return ccStatus_t
  3483. */
  3484. ccStatus_t ccDequantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
  3485. const void *x, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  3486. /**
  3487. * @ingroup dnn
  3488. * @get quantize
  3489. * @param [in] handle handle id
  3490. * @param [in] alpha alpha addr
  3491. * @param [in] xDesc the input Desc descriptor
  3492. * @param [in] x x data addr
  3493. * @param [in] beta beta data addr
  3494. * @param [in] yDesc the output Desc descriptor
  3495. * @param [in] y y data addr
  3496. * @return ccStatus_t
  3497. */
  3498. ccStatus_t ccQuantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3499. const void *beta, const ccTensorDescriptor_t yDesc, void *y);
  3500. #ifndef DAVINCI_LITE
  3501. ccStatus_t ccActivationBackward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
  3502. const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t xDesc,
  3503. const void *x, const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
  3504. #endif
  3505. ccStatus_t ccL2LossForward(ccHandle_t handle, const ccL2LossDescriptor_t l2lossDesc, const void *alpha,
  3506. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  3507. const ccTensorDescriptor_t yDesc, void *y);
  3508. /**
  3509. * @ingroup dnn
  3510. * @brief get the output dimension info of top k v2
  3511. * @param [in] xDesc descriptor of input tensor x
  3512. * @param [in] yDesc descriptor of input tensor y
  3513. * @param [in|out] dimCnt point to the output dimCnt
  3514. * @param [in|out] dim arrays to save dims
  3515. * @param [in| dimlen length of dim
  3516. * @return ccStatus_t
  3517. */
  3518. ccStatus_t ccGetTopKV2OutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, const void *k,
  3519. const int64_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3520. /**
  3521. * @ingroup dnn
  3522. * @brief top k v2 forward computation
  3523. * @param [in] handle cce handle
  3524. * @param [in] alpha scaling factors
  3525. * @param [in] xDesc descriptor of input tensor x
  3526. * @param [in] x input data x in device memory
  3527. * @param [in] yDesc descriptor of input tensor y
  3528. * @param [in] y input data y in device memory
  3529. * @param [in] beta bias factors
  3530. * @param [in] outputDesc descriptor of output tensor
  3531. * @param [in|out] output output data in device memory
  3532. * @return ccStatus_t
  3533. */
  3534. ccStatus_t ccTopKV2Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3535. const ccTensorDescriptor_t kDesc, const void *k, const void *beta, const bool sorted,
  3536. const int64_t axis, void *workSpace, const uint32_t workSpaceSizeInBytes,
  3537. const ccTensorDescriptor_t outputValuesDesc, void *outputValues,
  3538. const ccTensorDescriptor_t outputIndicesDesc, void *outputIndices);
  3539. /**
  3540. * @ingroup dnn
  3541. * @brief get the workspace size of top k v2
  3542. * @param [in] xDesc descriptor of input tensor x
  3543. * @param [in] yDesc descriptor of input tensor y
  3544. * @param [in] outputDesc descriptor of output tensor
  3545. * @param [in|out] sizeInBytes point to workspace size
  3546. * @return ccStatus_t
  3547. */
  3548. ccStatus_t ccGetTopKV2ForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc,
  3549. const ccTensorDescriptor_t indiceDesc, const void *k, const int64_t axis,
  3550. uint32_t *sizeInBytes);
  3551. /**
  3552. * @ingroup dnn
  3553. * @brief Get unsorted segment reduction output dim
  3554. * @param [in] xDesc descriptor of input tensor
  3555. * @param [in] segmentIdsDesc descriptor of input segmentIds tensor
  3556. * @param [in] segmentsNum output slice num
  3557. * @param [out] dimCnt output tensor dim cnt
  3558. * @param [out] dim output tensor dim
  3559. * @param [in| dimlen length of dim
  3560. * @return ccStatus_t
  3561. */
  3562. ccStatus_t ccGetUnsortedSegmentReductionOutputDim(const ccTensorDescriptor_t xDesc,
  3563. const ccTensorDescriptor_t segmentIdsDesc, int32_t segmentsNum,
  3564. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3565. /**
  3566. * @ingroup dnn
  3567. * @brief reduce all forward computation
  3568. * @param [in] handle cce handle
  3569. * @param [in] segmentsNum output slice num
  3570. * @param [in] alpha scaling factors
  3571. * @param [in] xDesc descriptor of input tensor
  3572. * @param [in] x input data in device memory
  3573. * @param [in] segmentIdsDesc descriptor of input segmentIds tensor
  3574. * @param [in] x input segmentIds data in device memory
  3575. * @param [in] beta bias factors
  3576. * @param [in] outputDesc descriptor of output tensor
  3577. * @param [in|out] output output data in device memory
  3578. * @return ccStatus_t
  3579. */
  3580. ccStatus_t ccUnsortedSegmentSumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
  3581. const void *x, const ccTensorDescriptor_t segmentIdsDesc, const void *segmentIds,
  3582. const int32_t segmentsNum, const void *beta,
  3583. const ccTensorDescriptor_t outputDesc, void *output);
  3584. /**
  3585. * @ingroup dnn
  3586. * @brief reverse sequence forward computation
  3587. * @param [in] handle cce handle
  3588. * @param [in] alpha scaling factors
  3589. * @param [in] xDesc descriptor of input tensor x
  3590. * @param [in] x input data x in device memory
  3591. * @param [in] yDesc descriptor of input tensor y
  3592. * @param [in] y input data y in device memory
  3593. * @param [in] beta bias factors
  3594. * @param [in] outputDesc descriptor of output tensor
  3595. * @param [in|out] output output data in device memory
  3596. * @return ccStatus_t
  3597. */
  3598. ccStatus_t ccReverseSequenceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t inputDesc,
  3599. const void *input, const ccTensorDescriptor_t seqLengthsDesc,
  3600. const void *seqLengths, int64_t seqAxis, int64_t batchAxis, const void *beta,
  3601. const ccTensorDescriptor_t outputDesc, void *output);
  3602. /**
  3603. * @ingroup dnn
  3604. * @brief realdiv between two tensors.
  3605. * @param [in] alpha reserved.
  3606. * @param [in] xDesc description of the left operator tensor.
  3607. * @param [in] x data point of the left operator tensor.
  3608. * @param [in] yDesc description of the right operator tensor.
  3609. * @param [in] y data point of the right operator tensor.
  3610. * @param [in] beta reserved.
  3611. * @param [in] outputDesc description of the output tensor.
  3612. * @param [output] output data point of the output tensor.
  3613. * @return ccStatus_t
  3614. */
  3615. ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3616. const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
  3617. const ccTensorDescriptor_t outputDesc, void *output);
  3618. /**
  3619. * @ingroup dnn
  3620. * @brief get output shape of realdiv.
  3621. * @param [in] xDesc description of the left operator tensor.
  3622. * @param [in] yDesc description of the right operator tensor.
  3623. * @param [out] dimCnt output tensor dim cnt
  3624. * @param [out] dim output tensor dim
  3625. * @param [in| dimlen length of dim
  3626. * @return ccStatus_t
  3627. */
  3628. ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
  3629. int32_t *dim, int32_t dimLen);
  3630. /**
  3631. * @ingroup dnn
  3632. * @brief invert permutation forward computation
  3633. * @param [in] handle cce handle
  3634. * @param [in] alpha scaling factors
  3635. * @param [in] xDesc descriptor of input tensor
  3636. * @param [in] x input data in device memory
  3637. * @param [in] beta bias factors
  3638. * @param [in] outputDesc descriptor of output tensor
  3639. * @param [in|out] output output data in device memory
  3640. * @return ccStatus_t
  3641. */
  3642. ccStatus_t ccInvertPermutationForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
  3643. const void *x, const void *beta, const ccTensorDescriptor_t outputDesc,
  3644. void *output);
  3645. /**
  3646. * @ingroup dnn
  3647. * @brief get the workspace size of non max suppression
  3648. * @param [in] handle descriptor of handle
  3649. * @param [in] scoresDesc descriptor of input tensor scoresDesc
  3650. * @param [in] boxesDesc descriptor of input tensor boxesDesc
  3651. * @param [in|out] sizeInBytes point to workspace size
  3652. * @return ccStatus_t
  3653. */
  3654. ccStatus_t ccGetNonMaxSuppressionWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t scoresDesc,
  3655. const ccTensorDescriptor_t boxesDesc, uint32_t *sizeInBytes);
  3656. /**
  3657. * @ingroup dnn
  3658. * @brief get the output dim of non max suppression
  3659. * @param [in] scoresDesc descriptor of input tensor scoresDesc
  3660. * @param [in] maxOutPutSize the max size of output
  3661. * @param [in|out] dimCnt point to the count of dim
  3662. * @param [in|out] dim[] the array of output dim
  3663. * @param [in| dimlen length of dim
  3664. * @return ccStatus_t
  3665. */
  3666. ccStatus_t ccGetNonMaxSuppressionOutputDim(const ccTensorDescriptor_t scoresDesc, const int32_t maxOutPutSize,
  3667. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3668. /**
  3669. * @ingroup dnn
  3670. * @brief multinomial forward.
  3671. * @param [in] handle cce handle
  3672. * @param [in] alpha common scale factor
  3673. * @param [in] xDesc descriptor of input data
  3674. * @param [in] x input data in device memory
  3675. * @param [in] numSamples number of independent samples to draw for each row slice
  3676. * @param [in] seed1 sed to create a random seed for the distribution
  3677. * @param [in] seed2 sed to create a random seed for the distribution
  3678. * @param [in] workSpace work space for inter access
  3679. * @param [in] workSpaceSizeInBytes work space size
  3680. * @param [in] beta common scale factor
  3681. * @param [in] outputDesc descriptor of output data
  3682. * @param [in|out] output output data in device memory
  3683. * @return ccStatus_t
  3684. */
  3685. ccStatus_t ccMultinomialForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3686. int32_t numSamples, int64_t seed1, int64_t seed2, void *workSpace,
  3687. uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
  3688. void *output);
  3689. /**
  3690. * @ingroup dnn
  3691. * @brief get output dim of generated one-hot tensor.
  3692. * @param [in] indicesDesc Indices description of one-hot position.
  3693. * @param [in] depth On/off value description.
  3694. * @param [in] axis Data pointer of on/off value.
  3695. * @param [output] dimCnt Description of the generated one-hot tensor.
  3696. * @param [output] dim Data pointer of output.
  3697. * @param [in| dimlen length of dim
  3698. * @return ccStatus_t
  3699. */
  3700. ccStatus_t ccGetOneHotOutputDim(const ccTensorDescriptor_t indicesDesc, int32_t depth, int32_t axis, int32_t *dimCnt,
  3701. int32_t *dim, int32_t dimLen);
  3702. /**
  3703. * @ingroup dnn
  3704. * @brief generate an one-hot Tensor use given on/off value.
  3705. * @param [in] handle Stream handle.
  3706. * @param [in] alpha reserved.
  3707. * @param [in] indicesDesc Indices description of one-hot position.
  3708. * @param [in] indices Data pointer of indices.
  3709. * @param [in] onDesc On value description.
  3710. * @param [in] on Data pointer of on value.
  3711. * @param [in] offDesc Off value description.
  3712. * @param [in] off Data pointer of off value.
  3713. * @param [in] depth On/off value description.
  3714. * @param [in] axis Data pointer of on/off value.
  3715. * @param [in] beta reserved.
  3716. * @param [in] outputDesc Description of the generated one-hot tensor.
  3717. * @param [output] output Data pointer of output.
  3718. * @return ccStatus_t
  3719. */
  3720. ccStatus_t ccOneHotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t indicesDesc,
  3721. const void *indices, const ccTensorDescriptor_t onDesc, const void *on,
  3722. const ccTensorDescriptor_t offDesc, const void *off, const int32_t depth, const int32_t axis,
  3723. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3724. /**
  3725. * @ingroup dnn
  3726. * @brief get the workspaceSize of multinomial
  3727. * @param [in] xDesc descriptor of input tensor
  3728. * @param [in] numSamples number sample
  3729. * @param [out] sizeInBytes wor space size of byte
  3730. * @return ccStatus_t
  3731. */
  3732. ccStatus_t ccGetMultinomialWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
  3733. /**
  3734. * @ingroup dnn
  3735. * @brief get the output dimension info of multinomial
  3736. * @param [in] xDesc descriptor of input tensor
  3737. * @param [in] numSample number of independent samples to draw for each row slice
  3738. * @param [in|out] dimCnt point to the output dimCnt
  3739. * @param [in|out] dim arrays to save dims
  3740. * @param [in| dimlen length of dim
  3741. * @return ccStatus_t
  3742. */
  3743. ccStatus_t ccGetMultinomialOutputDim(const ccTensorDescriptor_t xDesc, int32_t numSample, int32_t *dimCnt,
  3744. int32_t dim[], int32_t dimLen);
  3745. /**
  3746. * @ingroup dnn
  3747. * @brief get the output dimension info of BiasAddBackward
  3748. * @param [in] dyDesc descriptor of input tensor
  3749. * @param [in] out] n outputTensor [N]CHW
  3750. * @param [in|out] c outputTensor N[C]HW
  3751. * @param [in|out] h outputTensor NC[H]W
  3752. * @param [in|out] w outputTensor NCH[W]
  3753. * @return ccStatus_t
  3754. */
  3755. ccStatus_t ccGetBiasAddBackwardOutputDim(const ccTensorDescriptor_t dyDesc, int32_t *n, int32_t *c, int32_t *h,
  3756. int32_t *w);
  3757. /**
  3758. * @ingroup dnn
  3759. * @brief biasadd backward.
  3760. * @param [in] handle cce handle
  3761. * @param [in] alpha common scale factor
  3762. * @param [in] dyDesc descriptor of input data
  3763. * @param [in] dy input data in device memory
  3764. * @param [in] beta common scale factor
  3765. * @param [in] dbDesc descriptor of output data
  3766. * @param [in|out] db output data in device memory
  3767. * @return ccStatus_t
  3768. */
  3769. #ifndef DAVINCI_LITE
  3770. ccStatus_t ccBiasAddBackward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t dyDesc, const void *dy,
  3771. const void *beta, const ccTensorDescriptor_t dbDesc, void *db);
  3772. ccStatus_t ccMaxPoolWithArgmaxForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
  3773. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  3774. const ccTensorDescriptor_t yDesc, void *y, const ccTensorDescriptor_t argMaskDesc,
  3775. void *argMask);
  3776. #endif
  3777. ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
  3778. ccStatus_t ccDestroyPoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
  3779. ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t poolingMaskDesc, ccTensorFormat_t format,
  3780. ccDataType_t dataType, int32_t n, int32_t c, int32_t h, int32_t w,
  3781. int32_t windowH, int32_t windowW);
  3782. ccStatus_t ccGetPoolingMaskTensorSizeInBytes(ccTensorDescriptor_t poolingMaskDesc, uint32_t *size);
  3783. /**
  3784. * @ingroup dnn
  3785. * @brief get the mask output dimension info of maxpooling training forward
  3786. * @param [in] pooling descriptor of convolution operator
  3787. * @param [in] xDesc descriptor of input tensor
  3788. * @param [in|out] n point to batch size
  3789. * @param [in|out] c point to channels
  3790. * @param [in|out] h point to height of feature map
  3791. * @param [in|out] w point to width of feature map
  3792. * @param [in|out] windowH point to height of window
  3793. * @param [in|out] windowW point to width of windowW
  3794. * @return ccStatus_t
  3795. */
  3796. ccStatus_t ccGetPoolingMaskDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc, int32_t *n,
  3797. int32_t *c, int32_t *h, int32_t *w, int32_t *windowH, int32_t *windowW);
  3798. #ifndef DAVINCI_LITE
  3799. ccStatus_t ccSoftmaxCrossEntropyLoss(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
  3800. ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
  3801. const ccTensorDescriptor_t logitsDesc, const void *logits,
  3802. const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
  3803. const void *beta, const ccTensorDescriptor_t lossDesc, void *loss);
  3804. ccStatus_t ccSoftmaxCrossEntropyDx(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
  3805. ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
  3806. const ccTensorDescriptor_t logitsDesc, const void *logits,
  3807. const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
  3808. const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
  3809. ccStatus_t ccAvgPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
  3810. const ccTensorDescriptor_t dyDesc, const void *dy, const void *beta,
  3811. const ccTensorDescriptor_t dxDesc, const void *dx);
  3812. ccStatus_t ccTrainingAssignOp(ccHandle_t handle, const ccAssignOpMode_t assignOpDesc, const void *alpha,
  3813. const void *beta, const ccTensorDescriptor_t aDesc, void *a,
  3814. const ccTensorDescriptor_t bDesc, const void *b);
  3815. /**
  3816. * @ingroup dnn
  3817. * @brief momentum optimizer for variable update
  3818. * @param [in] handle cce handle
  3819. * @param [in] inputDesc descriptor of input tensor: gradient,accumulation,variable
  3820. * @param [in] gradient gradient input
  3821. * @param [in|out] accumulation accumulation input and updated output
  3822. * @param [in|out] variable variable input and updated output
  3823. * @param [in] algo indicate whether need FP16 output
  3824. * @param [in] momentum scaler to control accumulation
  3825. * @param [in] learningRate scaler
  3826. * @param [in] lossScaleReciprocal scaler
  3827. * @param [in] workSpace additional memory address
  3828. * @param [in] workSpaceSizeInBytes additional memory size
  3829. * @param [out] variableUpdatedFP16Desc descriptor of FP16 output tensor: variableUpdatedFP16
  3830. * @param [out] variableUpdatedFP16 variableUpdatedFP16
  3831. * @return ccStatus_t
  3832. */
  3833. ccStatus_t ccApplyMomentum(ccHandle_t handle, const ccTensorDescriptor_t inputDesc, const void *gradient,
  3834. void *accumulation, void *variable, const ccMomentumAlgo_t algo, const void *momentum,
  3835. const void *learningRate, const void *lossScaleReciprocal, void *workSpace,
  3836. const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t variableUpdatedFP16Desc,
  3837. void *variableUpdatedFP16);
  3838. ccStatus_t ccSsdClassifyLossTrain(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t labelDesc,
  3839. const void *label, const ccTensorDescriptor_t greaterConstDesc,
  3840. const void *greaterConst, const ccTensorDescriptor_t subConstDesc,
  3841. const void *subConst, const ccTensorDescriptor_t sparseDesc, const void *sparse,
  3842. const void *beta, const ccTensorDescriptor_t castoutDesc, const void *castout,
  3843. const ccTensorDescriptor_t muloutDesc, const void *mulout);
  3844. #endif
  3845. /**
  3846. * @ingroup dnn
  3847. * @brief get the workspace size of applymomentum
  3848. * @param [in] inputDesc descriptor of input tensor
  3849. * @return ccStatus_t
  3850. */
  3851. ccStatus_t ccGetApplyMomentumWorkspaceSize(const ccTensorDescriptor_t inputDesc, uint32_t *sizeInBytes);
  3852. #ifndef DAVINCI_LITE
  3853. ccStatus_t ccHwck2FracZ(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
  3854. const ccFilterDescriptor_t yDesc, void *y);
  3855. ccStatus_t ccFracZ2Hwck(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
  3856. const ccFilterDescriptor_t yDesc, void *y);
  3857. ccStatus_t ccAddNForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
  3858. const void *x[], const void *beta, void *workSpace, uint32_t workSpaceSizeInBytes,
  3859. const ccTensorDescriptor_t yDesc, void *y);
  3860. #endif
  3861. ccStatus_t ccGetAddNForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
  3862. const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
  3863. ccStatus_t ccGetAddNForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
  3864. ccStatus_t ccAddTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3865. const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
  3866. uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
  3867. ccStatus_t ccGetAddTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
  3868. const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
  3869. uint32_t *sizeInBytes);
  3870. ccStatus_t ccGetAddTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
  3871. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3872. ccStatus_t ccMulTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3873. const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
  3874. uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
  3875. ccStatus_t ccGetMulTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
  3876. const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
  3877. uint32_t *sizeInBytes);
  3878. ccStatus_t ccGetMulTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
  3879. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  3880. /**
  3881. * @ingroup dnn
  3882. * @brief get workspace size
  3883. * @param [in] xDesc descriptor of input tensor
  3884. * @param [in|out] sizeInBytes workspace size
  3885. * @return ccStatus_t
  3886. */
  3887. ccStatus_t ccGetRandomShuffleWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
  3888. /**
  3889. * @ingroup dnn
  3890. * @brief random shuffle forward computation
  3891. * @param [in] handle cce handle
  3892. * @param [in] alpha common scale factor
  3893. * @param [in] xDesc descriptor of input data
  3894. * @param [in] x input data in device memory
  3895. * @param [in] workspace temporary space
  3896. * @param [in] workspaceSizeInBytes temporary space size
  3897. * @param [in] seed random seed used to generate random number
  3898. * @param [in] seed2 random seed used to generate random number
  3899. * @param [in] beta common scale factor
  3900. * @param [in] outputDesc descriptor of output data
  3901. * @param [in|out] output output data in device memory
  3902. * @return ccStatus_t
  3903. */
  3904. ccStatus_t ccRandomShuffleForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3905. void *workspace, const uint32_t workspaceSizeInBytes, const int64_t seed1,
  3906. const int64_t seed2, const void *beta, const ccTensorDescriptor_t outputDesc,
  3907. void *output);
  3908. /**
  3909. * @ingroup dnn
  3910. * @brief sin forward:
  3911. * data type only support float float16 double
  3912. * data format only support ND
  3913. * @param [in] handle cce handle
  3914. * @param [in] alpha common scale factor
  3915. * @param [in] xDesc descriptor of input data
  3916. * @param [in] input input data in device memory
  3917. * @param [in] beta common scale factor
  3918. * @param [in] outputDesc descriptor of output data
  3919. * @param [in|out] output output data in device memory
  3920. * @return ccStatus_t
  3921. */
  3922. ccStatus_t ccSinForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
  3923. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3924. /**
  3925. * @ingroup dnn
  3926. * @brief cos forward:
  3927. * data type only support float float16 double
  3928. * data format only support ND
  3929. * @param [in] handle cce handle
  3930. * @param [in] alpha common scale factor
  3931. * @param [in] xDesc descriptor of input data
  3932. * @param [in] input input data in device memory
  3933. * @param [in] beta common scale factor
  3934. * @param [in] outputDesc descriptor of output data
  3935. * @param [in|out] output output data in device memory
  3936. * @return ccStatus_t
  3937. */
  3938. ccStatus_t ccCosForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
  3939. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3940. /**
  3941. * @ingroup dnn
  3942. * @brief tan forward:
  3943. * data type only support float float16 double
  3944. * data format only support ND
  3945. * @param [in] handle cce handle
  3946. * @param [in] alpha common scale factor
  3947. * @param [in] xDesc descriptor of input data
  3948. * @param [in] input input data in device memory
  3949. * @param [in] beta common scale factor
  3950. * @param [in] outputDesc descriptor of output data
  3951. * @param [in|out] output output data in device memory
  3952. * @return ccStatus_t
  3953. */
  3954. ccStatus_t ccTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
  3955. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3956. /**
  3957. * @ingroup dnn
  3958. * @brief get the output dimension info of unstack
  3959. * @param [in] xDesc descriptor of input tensor
  3960. * @param [in] axis the axis to unstack along
  3961. * @param [in|out] dimCnt point to the output dimCnt
  3962. * @param [in|out] dim arrays to save dims
  3963. * @param [in| dimlen length of dim
  3964. * @return ccStatus_t
  3965. */
  3966. ccStatus_t ccGetUnstackOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, int32_t *dimCnt, int32_t dim[],
  3967. int32_t dimLen);
  3968. /**
  3969. * @ingroup dnn
  3970. * @brief unstack forward.
  3971. * @param [in] handle cce handle
  3972. * @param [in] alpha common scale factor
  3973. * @param [in] xDesc descriptor of input data
  3974. * @param [in] x input data in device memory
  3975. * @param [in] num the length of the dimension axis
  3976. * @param [in] axis the axis to unstack along
  3977. * @param [in] beta common scale factor
  3978. * @param [in] outputDesc descriptor of output data
  3979. * @param [in|out] output output data in device memory
  3980. * @return ccStatus_t
  3981. */
  3982. ccStatus_t ccUnstackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3983. int32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t outputDesc,
  3984. void *output[]);
  3985. ccStatus_t ccResizeNearestNeighborCpuForward(ccHandle_t handle, const ccResizeNearestNeighborDescriptor_t resizeDesc,
  3986. const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  3987. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  3988. /**
  3989. * @ingroup dnn
  3990. * @brief get the output dimension info of resize nearest neighbor
  3991. * @param [in] resizeDesc descriptor of resize
  3992. * @param [in] xDesc descriptor of input tensor
  3993. * @param [in|out] dimCnt point to the output dimCnt
  3994. * @param [in|out] dim arrays to save dims
  3995. * @param [in| dimlen length of dim
  3996. * @return ccStatus_t
  3997. */
  3998. ccStatus_t ccGetResizeNearestNeighborOutputDim(const ccResizeNearestNeighborDescriptor_t resizeDesc,
  3999. const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t dim[],
  4000. int32_t dimLen);
  4001. /**
  4002. * @ingroup dnn
  4003. * @brief create descriptor of ResizeNearestNeighbor
  4004. * @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr
  4005. * @return ccStatus_t
  4006. */
  4007. ccStatus_t ccCreateResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
  4008. /**
  4009. * @ingroup dnn
  4010. * @brief destroy descriptor of ResizeNearestNeighbor
  4011. * @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr
  4012. * @return ccStatus_t
  4013. */
  4014. ccStatus_t ccDestroyResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
  4015. /**
  4016. * @ingroup dnn
  4017. * @brief set descriptor of ResizeNearestNeighbor.
  4018. * @param [in|out] resizeDesc descriptor of resize nearest neighbor operator
  4019. * @param [in] alignCorners whether the centers of input and output are aligned
  4020. * @param [in] height height of output
  4021. * @param [in] width width of output
  4022. * @return ccStatus_t
  4023. */
  4024. ccStatus_t ccSetResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t resizeDesc, bool alignCorners,
  4025. int32_t height, int32_t width);
  4026. /**
  4027. * @ingroup dnn
  4028. * [ccGetPadV2OutputDim]
  4029. * @brief get the output dimension info of pad
  4030. * @param [in] xDesc descriptor of input tensor x
  4031. * @param [in] padDesc descriptor of input paddings
  4032. * @param [in|out] dimCnt point to the output dimCnt
  4033. * @param [in|out] dim arrays to save dims
  4034. * @param [in| dimlen length of dim
  4035. * @return ccStatus_t
  4036. */
  4037. ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t xDesc, const ccPadV2Descriptor_t padDesc, int32_t *dimCnt,
  4038. int32_t dim[], int32_t dimLen);
  4039. ccStatus_t ccPadV2CpuForward(ccHandle_t handle, const ccPadV2Descriptor_t padDesc, const void *alpha,
  4040. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  4041. const ccTensorDescriptor_t outputDesc, void *output);
  4042. /**
  4043. * @ingroup dnn
  4044. * @brief create descriptor of parameters for padv2 function
  4045. * @param [in] point to descriptor of parameters for padv2 function
  4046. * @return ccStatus_t
  4047. */
  4048. ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *padDesc);
  4049. /**
  4050. * @ingroup dnn
  4051. * @brief destroy descriptor of parameters for padv2 function
  4052. * @param [in] point to descriptor of parameters for padv2 function
  4053. * @return ccStatus_t
  4054. */
  4055. ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *padDesc);
  4056. /**
  4057. * @brief init descriptor for parameter of padv2 function
  4058. * @param [in|out] padDesc descriptor of pad
  4059. * @param [in] padShapeCnt padshape count
  4060. * @param [in] padShapeLow padshape low
  4061. * @param [in] padShapeHigh padshape high
  4062. * @param [in] padMode pad mode
  4063. * @param [in] padValue pad value ptr
  4064. * @param [in] padValueType pad value data type
  4065. * @return ccStatus_t
  4066. */
  4067. ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t padDesc, const int32_t padShapeCnt, const int32_t padShapeLow[],
  4068. const int32_t padShapeHigh[], const ccPadMode_t padMode, const void *padValue,
  4069. const ccDataType_t padValueType);
  4070. /**
  4071. * @ingroup dnn
  4072. * @brief create descriptor of batchToSpace
  4073. * @param [in|out] batchToSpaceDesc point to descriptor of batchToSpace
  4074. * @return ccStatus_t
  4075. */
  4076. ccStatus_t ccCreateBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
  4077. /**
  4078. * @ingroup dnn
  4079. * @brief set batchToSpaceDesc
  4080. * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
  4081. * @param [in] blockShape blockShape of batchToSpace
  4082. * @param [in] crops crops of batchToSpace
  4083. * @param [in] blockShapeLength blockShapeLength of batchToSpace
  4084. * @return ccStatus_t
  4085. */
  4086. ccStatus_t ccSetBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t paramsDesc, const int32_t *blockShape,
  4087. const int32_t *crops, const int32_t blockShapeLength);
  4088. /**
  4089. * @ingroup dnn
  4090. * @brief get batchToSpaceDesc
  4091. * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
  4092. * @param [in] blockShape blockShape of batchToSpace
  4093. * @param [in] crops crops of batchToSpace
  4094. * @param [in] blockShapeLength blockShapeLength of batchToSpace
  4095. * @return ccStatus_t
  4096. */
  4097. ccStatus_t ccGetBatchToSpaceDescriptor(const ccBatchToSpaceDescriptor_t paramsDesc, int32_t *blockShape, int32_t *crops,
  4098. int32_t *blockShapeLength);
  4099. /**
  4100. * @ingroup dnn
  4101. * @brief destroy descriptor of batchToSpace
  4102. * @param [in] *batchToSpaceDesc descriptor of batchToSpace
  4103. * @return ccStatus_t
  4104. */
  4105. ccStatus_t ccDestroyBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
  4106. /**
  4107. * @ingroup dnn
  4108. * @brief get the output dimension info of batch to space
  4109. * @param [in] xDesc descriptor of input tensor
  4110. * @param [in|out] dimCnt point to the output dimCnt
  4111. * @param [in|out] dim arrays to save dims
  4112. * @param [in| dimlen length of dim
  4113. * @return ccStatus_t
  4114. */
  4115. ccStatus_t ccGetBatchToSpaceOutputDim(const ccTensorDescriptor_t xDesc,
  4116. const ccBatchToSpaceDescriptor_t batchToSpaceDesc, int32_t *dimCnt, int32_t dim[],
  4117. int32_t dimLen);
  4118. /**
  4119. * @ingroup dnn
  4120. * @brief batch to space forward computation
  4121. * @param [in] handle cce handle
  4122. * @param [in] paramsDesc descriptor of input params
  4123. * @param [in] alpha scaling factors
  4124. * @param [in] xDesc descriptor of input tensor
  4125. * @param [in] x input data in device memory
  4126. * @param [in] beta bias factors
  4127. * @param [in] outputDesc descriptor of output tensor
  4128. * @param [in|out] output output data in device memory
  4129. * @return ccStatus_t
  4130. */
  4131. ccStatus_t ccBatchToSpaceForward(ccHandle_t handle, const ccBatchToSpaceDescriptor_t paramsDesc, const void *alpha,
  4132. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  4133. const ccTensorDescriptor_t outputDesc, void *output);
  4134. /**
  4135. * @ingroup dnn
  4136. * @brief create descriptor of spaceToBatch
  4137. * @param [in|out] spaceToBatchDesc point to descriptor of spaceToBatch
  4138. * @return ccStatus_t
  4139. */
  4140. ccStatus_t ccCreateSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
  4141. /**
  4142. * @ingroup dnn
  4143. * @brief set spaceToBatchDesc
  4144. * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
  4145. * @param [in] blockShape blockShape of spaceToBatch
  4146. * @param [in] paddings paddings of spaceToBatch
  4147. * @param [in] blockShapeLength blockShapeLength of spaceToBatch
  4148. * @return ccStatus_t
  4149. */
  4150. ccStatus_t ccSetSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t paramsDesc, const int32_t *blockShape,
  4151. const int32_t *paddings, const int32_t blockShapeLength);
  4152. /**
  4153. * @ingroup dnn
  4154. * @brief get spaceToBatchDesc
  4155. * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
  4156. * @param [in] blockShape blockShape of spaceToBatch
  4157. * @param [in] paddings paddings of spaceToBatch
  4158. * @param [in] blockShapeLength blockShapeLength of spaceToBatch
  4159. * @return ccStatus_t
  4160. */
  4161. ccStatus_t ccGetSpaceToBatchDescriptor(const ccSpaceToBatchDescriptor_t paramsDesc, int32_t *blockShape,
  4162. int32_t *paddings, int32_t *blockShapeLength);
  4163. /**
  4164. * @ingroup dnn
  4165. * @brief destroy descriptor of spaceToBatch
  4166. * @param [in] *spaceToBatchDesc descriptor of spaceToBatch
  4167. * @return ccStatus_t
  4168. */
  4169. ccStatus_t ccDestroySpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
  4170. /**
  4171. * @ingroup dnn
  4172. * @brief get the output dimension info of space to batch
  4173. * @param [in] xDesc descriptor of input tensor
  4174. * @param [in|out] dimCnt point to the output dimCnt
  4175. * @param [in|out] dim arrays to save dims
  4176. * @param [in| dimlen length of dim
  4177. * @return ccStatus_t
  4178. */
  4179. ccStatus_t ccGetSpaceToBatchOutputDim(const ccTensorDescriptor_t xDesc,
  4180. const ccSpaceToBatchDescriptor_t spaceToBatchDesc, int32_t *dimCnt, int32_t dim[],
  4181. int32_t dimLen);
  4182. /**
  4183. * @ingroup dnn
  4184. * @brief space to batch forward computation
  4185. * @param [in] handle cce handle
  4186. * @param [in] paramsDesc descriptor of input params
  4187. * @param [in] alpha scaling factors
  4188. * @param [in] xDesc descriptor of input tensor
  4189. * @param [in] x input data in device memory
  4190. * @param [in] beta bias factors
  4191. * @param [in] outputDesc descriptor of output tensor
  4192. * @param [in|out] output output data in device memory
  4193. * @return ccStatus_t
  4194. */
  4195. ccStatus_t ccSpaceToBatchForward(ccHandle_t handle, const ccSpaceToBatchDescriptor_t paramsDesc, const void *alpha,
  4196. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  4197. const ccTensorDescriptor_t outputDesc, void *output);
  4198. ccStatus_t ccTransFilterDesc2TensorDesc(ccFilterDescriptor_t wDesc, ccTensorDescriptor_t tensorDesc);
  4199. /*
  4200. * @brief get the output dimension info of extractImagePatches
  4201. * @param [in] xDesc descriptor of input tensor x
  4202. * @param [in] ksizes ksizes array
  4203. * @param [in] strides strides array
  4204. * @param [in] rates rates array
  4205. * @param [in] padding padding type
  4206. * @param [in|out] dimCnt point to the output dimCnt
  4207. * @param [in|out] dim arrays to save dims
  4208. * @return ccStatus_t
  4209. */
  4210. ccStatus_t ccGetExtractImagePatchesOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *ksizes,
  4211. const ccIntArray_t *strides, const ccIntArray_t *rates,
  4212. const ccExtractImagePatchesPadType_t padding, int32_t *dimCnt,
  4213. int32_t dim[], const int32_t dimLen);
  4214. /**
  4215. * @ingroup dnn
  4216. * @brief cum forward.
  4217. * @param [in] handle cce handle
  4218. * @param [in] alpha common scale factor
  4219. * @param [in] xDesc descriptor of input data, dimCnt:1~8
  4220. * @param [in] x input data in device memory
  4221. * @param [in] axisDesc scale factor, dimCnt:0
  4222. * @param [in] axis which axis to cum calc, device memory
  4223. * @param [in] beta common scale factor
  4224. * @param [in] opType calc type, eg. sum, prod....
  4225. * @param [in] exclusive cum flag, true or false
  4226. * @param [in] reverse cum flag, true or false
  4227. * @param [in] outputDesc descriptor of output data
  4228. * @param [in|out] output output data in device memory
  4229. * @return ccStatus_t
  4230. */
  4231. ccStatus_t ccCumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  4232. const ccTensorDescriptor_t axisDesc, const void *axis, const void *beta, const CumOpType opType,
  4233. const bool exclusive, const bool reverse, const ccTensorDescriptor_t outputDesc, void *output);
  4234. /**
  4235. * @ingroup dnn
  4236. * @brief ExtractImagePatches forward.
  4237. * @param [in] handle cce handle
  4238. * @param [in] ksizes ksizes array
  4239. * @param [in] strides strides array
  4240. * @param [in] rates rates array
  4241. * @param [in] padding padding type
  4242. * @param [in] alpha common scale factor
  4243. * @param [in] xDesc descriptor of input data x
  4244. * @param [in] x input data x in device memory
  4245. * @param [in] beta common scale factor
  4246. * @param [in] outputDesc descriptor of output data
  4247. * @param [in|out] output output data in device memory
  4248. * @return ccStatus_t
  4249. */
  4250. ccStatus_t ccExtractImagePatchesForward(ccHandle_t handle, const ccIntArray_t *ksizes, const ccIntArray_t *strides,
  4251. const ccIntArray_t *rates, const ccExtractImagePatchesPadType_t padding,
  4252. const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  4253. const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
  4254. /**
  4255. * @brief get argmax output dim info
  4256. * @param [in] argDesc argmaxmin descriptor
  4257. * @param [in] xDesc descriptor of input tensor
  4258. * @param [in|out] dimCnt output dim count
  4259. * @param [in|out] dim output dim
  4260. * @param [in| dimlen length of dim
  4261. * @return ccStatus_t
  4262. */
  4263. ccStatus_t ccGetArgMaxOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
  4264. int32_t *dimCnt, int32_t dim[], int32_t dimLen);
  4265. /**
  4266. * @ingroup dnn
  4267. * @brief argmax forward computation
  4268. * @param [in] handle cce handle
  4269. * @param [in] argDesc argmaxmin descriptor
  4270. * @param [in] alpha scaling factors
  4271. * @param [in] xDesc descriptor of input tensor
  4272. * @param [in] x input data in device memory
  4273. * @param [in] workSpace workspace pointer
  4274. * @param [in] workSpaceSizeInBytes workspace size in bytes
  4275. * @param [in] beta bias factors
  4276. * @param [in] outputDesc descriptor of output tensor
  4277. * @param [in|out] output output data in device memory
  4278. * @return ccStatus_t
  4279. */
  4280. ccStatus_t ccArgMaxForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
  4281. const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
  4282. const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
  4283. void *output);
  4284. /**
  4285. * @ingroup dnn
  4286. * @brief get the output dimension info of argmaxmin
  4287. * @param [in] argDesc descriptor of tagCcArgmaxmin
  4288. * @param [in] xDesc descriptor of input tensor
  4289. * @param [in|out] sizeInBytes workspace size
  4290. * @return ccStatus_t
  4291. */
  4292. ccStatus_t ccGetArgMaxWorkspaceSize(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
  4293. uint32_t *sizeInBytes);
  4294. /**
  4295. * @ingroup dnn
  4296. * @brief create descriptor of Argmaxmin
  4297. * @param [in|out] resizeDesc point to descriptor of Argmaxmin attr
  4298. * @return ccStatus_t
  4299. */
  4300. ccStatus_t ccCreateArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
  4301. /**
  4302. * @ingroup dnn
  4303. * @brief destroy descriptor of Interp
  4304. * @param [in|out] resizeDesc point to descriptor of Argmaxmin attr
  4305. * @return ccStatus_t
  4306. */
  4307. ccStatus_t ccDestroyArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
  4308. /**
  4309. * @ingroup dnn
  4310. * @brief destroy descriptor of Interp
  4311. * @param [in|out] argDesc descriptor of tagCcArgmaxmin
  4312. * @param [in] axisType
  4313. * @param [in] outMaxVal whether to return the maximum value
  4314. * @param [in] topK number that returns the maximum index or maximum value
  4315. * @param [in] axis Describes which axis of the input Tensor to reduce across
  4316. * @param [in] keepDims whether to keep reduced dim
  4317. * @param [in] reduceSize the num of elements to be reduce to get topK elements, reduceSize=-1 means the total num
  4318. * of elements in axis dimension
  4319. * @param [in] reduceStride the stride for reduce operation, reduceStride=1 means the layout of target data is
  4320. * continuous
  4321. * @return ccStatus_t
  4322. */
  4323. ccStatus_t ccSetArgmaxminDescriptor(ccArgmaxminDescriptor_t argDesc, int32_t axisType, bool outMaxVal, int64_t topK,
  4324. int64_t axis, bool keepDims, int64_t reduceSize = -1, int64_t reduceDStride = 1);
  4325. ccStatus_t ccArgMinForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
  4326. const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
  4327. const ccTensorDescriptor_t outputDesc, void *output);
  4328. ccStatus_t ccGetArgMinOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
  4329. int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
  4330. /**
  4331. * @ingroup dnn
  4332. * @brief lsh projection forward computation
  4333. * @param [in] handle cce handle
  4334. * @param [in] alpha scaling factors
  4335. * @param [in] hashDesc descriptor of input tensor hashDesc
  4336. * @param [in] hash input data hash in device memory
  4337. * @param [in] weightDesc descriptor of input tensor weightDesc
  4338. * @param [in] weight input data weight in device memory
  4339. * @param [in] inputDesc descriptor of input tensor inputDesc
  4340. * @param [in] lookup input data lookup in device memory
  4341. * @param [in] type 1:SPARSE 2.DENSE
  4342. * @param [in] beta bias factors
  4343. * @param [in] workSpace workSpace data in device memory
  4344. * @param [in] workSpaceSizeInBytes workSpace length
  4345. * @param [in] outputDesc descriptor of output tensor
  4346. * @param [in|out] output output data in device memory
  4347. * @return ccStatus_t
  4348. */
  4349. ccStatus_t ccLshProjectionForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t hashDesc,
  4350. const void *hash, const ccTensorDescriptor_t weightDesc, const void *weight,
  4351. const ccTensorDescriptor_t inputDesc, const void *input, const LSHProjectionType type,
  4352. const void *beta, void *workSpace, const uint32_t workSpaceSizeInBytes,
  4353. const ccTensorDescriptor_t outputDesc, void *output);
  4354. /**
  4355. * @ingroup dnn
  4356. * @brief get the workspace size of lsh projection
  4357. * @param [in] inputDesc descriptor of input tensor input
  4358. * @param [in] hashDataType data type of hash
  4359. * @param [in|out] sizeInBytes workspace size
  4360. * @return ccStatus_t
  4361. */
  4362. ccStatus_t ccGetLshProjectionForwardWorkspaceSize(const ccTensorDescriptor_t inputDesc, const ccDataType_t hashDataType,
  4363. uint32_t *sizeInBytes);
  4364. /**
  4365. * @ingroup dnn
  4366. * @brief get the output dimension info of LshProjection,
  4367. * @param [in] hashDesc descriptor of hash
  4368. * @param [in] type type of mode
  4369. * @param [in|out] dimCnt point to the output dimCnt
  4370. * @param [in|out] dim arrays to save dims
  4371. * @param [in] dimLen dim length
  4372. * @return ccStatus_t
  4373. */
  4374. ccStatus_t ccGetLshProjectionOutputDim(const ccTensorDescriptor_t hashDesc, const LSHProjectionType type,
  4375. int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
  4376. /**
  4377. * @ingroup dnn
  4378. * @brief get the weight dimension info of LshProjection,
  4379. * @param [in] inputDesc descriptor of input
  4380. * @param [in|out] dimCnt point to the weight dimCnt
  4381. * @param [in|out] dim arrays to save dims
  4382. * @param [in] dimLen dim length
  4383. * @return ccStatus_t
  4384. */
  4385. ccStatus_t ccGetLshProjectionWeightDim(const ccTensorDescriptor_t inputDesc, int32_t *dimCnt, int32_t dim[],
  4386. const int32_t dimLen);
  4387. /**
  4388. * @ingroup dnn
  4389. * @brief init descriptor for parameter of upsample function
  4390. * @param [in] handle cce handle
  4391. * @param [in] upsamplePara input para in host memory
  4392. * @param [in] alpha common scale factor
  4393. * @param [in] bottomDesc descriptor of input data bottomDesc
  4394. * @param [in] bottom input data bottom in device memory
  4395. * @param [in] bottomMaskDesc descriptor of input data bottomMaskDesc
  4396. * @param [in] bottomMask input data bottomMask in device memory
  4397. * @param [in] beta common scale factor
  4398. * @param [in] outputDesc descriptor of output data
  4399. * @param [in|out] output output data in device memory
  4400. * @return ccStatus_t
  4401. */
  4402. ccStatus_t ccUpsampleForward(ccHandle_t handle, const ccUpsampleParaDescriptor_t upsamplePara, const void *alpha,
  4403. const ccTensorDescriptor_t bottomDesc, const void *bottom,
  4404. const ccTensorDescriptor_t bottomMaskDesc, const void *bottomMask, const void *beta,
  4405. const ccTensorDescriptor_t outputDesc, void *output);
  4406. /**
  4407. * @brief creat descriptor for parameter of usample function
  4408. * @param [in|out] upsampleDesc descriptor of upsamplepara
  4409. * @return ccStatus_t
  4410. */
  4411. ccStatus_t ccCreateUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
  4412. /**
  4413. * @brief destroy descriptor for parameter of upsample function
  4414. * @param [in|out] upsampleDesc descriptor of upsamplepara
  4415. * @return ccStatus_t
  4416. */
  4417. ccStatus_t ccDestroyUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
  4418. /**
  4419. * @brief set descriptor for parameter of upsample function
  4420. * @param [in|out] upsampleDesc descriptor of upsamplepara
  4421. * @param [in] scale the scale of height and width
  4422. * @param [in] scaleHeight the scale of height
  4423. * @param [in] scaleWidth the scale of Width
  4424. * @param [in] upsampleHeight the height of output
  4425. * @param [in] upsampleWidth the width of output
  4426. * @param [in] padOutHeight pad value height
  4427. * @param [in] padOutWidth pad value width
  4428. * @return ccStatus_t
  4429. */
  4430. ccStatus_t ccSetUpsampleDescriptor(ccUpsampleParaDescriptor_t upsampleDesc, const int32_t scale,
  4431. const int32_t scaleHeight, const int32_t scaleWidth, const int32_t upsampleHeight,
  4432. const int32_t upsampleWidth, const bool padOutHeight, const bool padOutWidth);
  4433. /**
  4434. * @ingroup dnn
  4435. * @brief get the output dimension info of upsample
  4436. * @param [in] upsamplePara para of upsample
  4437. * @param [in] bottomDesc descriptor of input bottom tensor
  4438. * @param [in|out] dimCnt point to the output dimCnt
  4439. * @param [in|out] dim arrays to save dims
  4440. * @param [in] dimLen the len of dim array
  4441. * @return ccStatus_t
  4442. */
  4443. ccStatus_t ccGetUpsampleOutputDim(const ccUpsampleParaDescriptor_t upsamplePara, const ccTensorDescriptor_t bottomDesc,
  4444. int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
  4445. #ifndef DAVINCI_LITE
  4446. ccStatus_t ccMatmul(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  4447. const ccTensorDescriptor_t wDesc, const void *w, const ccTensorDescriptor_t biasDesc,
  4448. const void *bias, const ccFullConnectFwdAlgo_t algo, void *workSpace,
  4449. const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y,
  4450. const bool transposeA, const bool transposeB);
  4451. ccStatus_t ccGetMatmulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, int32_t *n,
  4452. int32_t *c, int32_t *h, int32_t *w, bool transposeA, bool transposeB);
  4453. ccStatus_t ccGetMatmulWorkspaceSize(ccHandle_t handle, const ccFullConnectFwdAlgo_t algo,
  4454. const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
  4455. const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes, bool transposeA,
  4456. bool transposeB);
  4457. #endif
  4458. /**
  4459. * @ingroup dnn
  4460. * @brief gather_v2 function
  4461. * @param [in] handle cce handle
  4462. * @param [in] alpha common scale factor
  4463. * @param [in] paramsDesc descriptor
  4464. * @param [in] params device memory
  4465. * @param [in] indicesDesc descriptor
  4466. * @param [in] indices device memory
  4467. * @param [in] axisDesc descriptor
  4468. * @param [in] axis device memory
  4469. * @param [in] beta common scale factor
  4470. * @param [in] outputDesc descriptor
  4471. * @param [in|out] output device memory
  4472. * @return ccStatus_t
  4473. */
  4474. ccStatus_t ccGatherV2(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, const void *params,
  4475. const ccTensorDescriptor_t indicesDesc, const void *indices, const ccTensorDescriptor_t axisDesc,
  4476. const void *axis, const void *beta, const ccTensorDescriptor_t outputDesc, const void *output);
  4477. /**
  4478. * @ingroup dnn
  4479. * @brief memory_clear function
  4480. * @param [in] handle cce handle
  4481. * @param [in] addrSpaceSizeInBytes addr space size
  4482. * @param [in|out] addr device memory
  4483. * @return ccStatus_t
  4484. */
  4485. ccStatus_t ccMemoryClear(ccHandle_t handle, const uint64_t addrSpaceSizeInBytes, const void *addr);
  4486. /**
  4487. * @ingroup dnn
  4488. * @brief check input is overflow
  4489. * @param [in] handle cce handle
  4490. * @param [in] alpha scaling factors
  4491. * @param [in] xDesc descriptor of input tensor
  4492. * @param [in] x input data in device memory
  4493. * @param [in] yDesc descriptor of output tensor
  4494. * @param [in|out] y output data in device memory
  4495. * @param [in] beta scaling factors
  4496. * @return ccStatus_t
  4497. */
  4498. ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
  4499. const ccTensorDescriptor_t yDesc, const void *y, const void *beta);
  4500. }; // namespace cce
  4501. #endif // DNN_OP_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示