From f85064db077260757db51d71cc96d2e65baa8ac3 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 2 Dec 2021 21:00:03 +0800 Subject: [PATCH] add cce back --- third_party/fwkacllib/inc/cce/aicpu_engine.h | 63 + .../fwkacllib/inc/cce/aicpu_engine_struct.h | 56 + third_party/fwkacllib/inc/cce/blas_struct.h | 31 + third_party/fwkacllib/inc/cce/cce.h | 101 + third_party/fwkacllib/inc/cce/cce_def.hpp | 152 + third_party/fwkacllib/inc/cce/common/attr_list.hpp | 82 + third_party/fwkacllib/inc/cce/common/catch.hpp | 95 + third_party/fwkacllib/inc/cce/compiler_stub.h | 36 + third_party/fwkacllib/inc/cce/customize.h | 60 + third_party/fwkacllib/inc/cce/dnn.h | 23 + third_party/fwkacllib/inc/cce/dnn_base.h | 676 +++ third_party/fwkacllib/inc/cce/dnn_base_def.hpp | 994 ++++ third_party/fwkacllib/inc/cce/dnn_op.h | 4838 ++++++++++++++++++++ third_party/fwkacllib/inc/cce/dnn_struct.hpp | 23 + third_party/fwkacllib/inc/cce/dnn_struct_base.hpp | 894 ++++ third_party/fwkacllib/inc/cce/fwk_adpt_struct.h | 155 + third_party/fwkacllib/inc/cce/l2fusion_struct.hpp | 56 + .../fwkacllib/inc/cce/optimizer/fusion_engine.h | 65 + third_party/fwkacllib/inc/cce/taskdown_api.h | 54 + third_party/fwkacllib/inc/cce/taskdown_common.hpp | 108 + 20 files changed, 8562 insertions(+) create mode 100644 third_party/fwkacllib/inc/cce/aicpu_engine.h create mode 100644 third_party/fwkacllib/inc/cce/aicpu_engine_struct.h create mode 100644 third_party/fwkacllib/inc/cce/blas_struct.h create mode 100644 third_party/fwkacllib/inc/cce/cce.h create mode 100644 third_party/fwkacllib/inc/cce/cce_def.hpp create mode 100644 third_party/fwkacllib/inc/cce/common/attr_list.hpp create mode 100644 third_party/fwkacllib/inc/cce/common/catch.hpp create mode 100644 third_party/fwkacllib/inc/cce/compiler_stub.h create mode 100644 third_party/fwkacllib/inc/cce/customize.h create mode 100644 third_party/fwkacllib/inc/cce/dnn.h create mode 100644 third_party/fwkacllib/inc/cce/dnn_base.h create mode 100644 third_party/fwkacllib/inc/cce/dnn_base_def.hpp create mode 100644 third_party/fwkacllib/inc/cce/dnn_op.h create mode 100644 third_party/fwkacllib/inc/cce/dnn_struct.hpp create mode 100644 third_party/fwkacllib/inc/cce/dnn_struct_base.hpp create mode 100644 third_party/fwkacllib/inc/cce/fwk_adpt_struct.h create mode 100644 third_party/fwkacllib/inc/cce/l2fusion_struct.hpp create mode 100644 third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h create mode 100644 third_party/fwkacllib/inc/cce/taskdown_api.h create mode 100644 third_party/fwkacllib/inc/cce/taskdown_common.hpp diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h new file mode 100644 index 00000000..bc2e415f --- /dev/null +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -0,0 +1,63 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_ENGINE_H__ +#define AICPU_ENGINE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + AE_STATUS_SUCCESS = 0, + AE_STATUS_BAD_PARAM = 1, + AE_STATUS_OPEN_SO_FAILED = 2, + AE_STATUS_GET_KERNEL_NAME_FAILED = 3, + AE_STATUS_INNER_ERROR = 4, + AE_STATUS_KERNEL_API_INNER_ERROR = 5, + AE_STATUS_END_OF_SEQUENCE = 6, + AE_STATUS_DUMP_FAILED = 7, + AE_STATUS_TASK_WAIT = 101, + AE_STATUS_RESERVED +} aeStatus_t; + +/** + * @ingroup aicpu engine + * @brief aeCallInterface: + * a interface to call a function in a op kernfel lib + * @param [in] addr void *, should be STR_KERNEL * format + * @return aeStatus_t + */ +aeStatus_t aeCallInterface(void *addr); + +/** + * @ingroup aicpu engine + * @brief aeBatchLoadKernelSo: + * a interface to load kernel so + * @param [in] loadSoNum load so number + * @param [in] soPaths load so paths + * @param [in] soNames load so names + * @return aeStatus_t + */ +aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]); + +#ifdef __cplusplus +} +#endif + +#endif // AICPU_ENGINE_H__ diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h new file mode 100644 index 00000000..8c0c1847 --- /dev/null +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -0,0 +1,56 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_ENGINE_STRUCT_H__ +#define AICPU_ENGINE_STRUCT_H__ + +#include "fwk_adpt_struct.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + The different framwork we adapted for. +*/ +typedef enum { + FMK_KERNEL_TYPE_TF = 0, + FMK_KERNEL_TYPE_CF = 10, + FMK_KERNEL_TYPE_PT = 20, + FMK_KERNEL_TYPE_RESERVED +} FwkkernelType_t; + +#pragma pack(push, 1) +typedef struct { + uint32_t fwkKernelType; // FwkkernelType_t + union { + ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; + } fwkKernelBase; +} STR_FWK_OP_KERNEL; +#pragma pack(pop) + +#pragma pack(push, 1) +struct SessionInfo { + uint64_t sessionId; + uint64_t kernelId; + bool sessFlag; +}; +#pragma pack(pop) + +#ifdef __cplusplus +} +#endif +#endif // AICPU_ENGINE_STRUCT_H__ diff --git a/third_party/fwkacllib/inc/cce/blas_struct.h b/third_party/fwkacllib/inc/cce/blas_struct.h new file mode 100644 index 00000000..e0bcee4c --- /dev/null +++ b/third_party/fwkacllib/inc/cce/blas_struct.h @@ -0,0 +1,31 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CC_BLAS_STRUCT_API__ +#define CC_BLAS_STRUCT_API__ + +#include + +typedef enum { CCBLAS_FILL_MODE_LOWER = 0, CCBLAS_FILL_MODE_UPPER = 1 } ccblasFillMode_t; + +typedef enum { + CCBLAS_OP_N = 0, + CCBLAS_OP_T = 1, +} ccblasOperation_t; + +typedef enum { CCBLAS_DIAG_NON_UNIT = 0, CCBLAS_DIAG_UNIT = 1 } ccblasDiagType_t; + +#endif // CC_BLAS_STRUCT_API__ diff --git a/third_party/fwkacllib/inc/cce/cce.h b/third_party/fwkacllib/inc/cce/cce.h new file mode 100644 index 00000000..0cd9613a --- /dev/null +++ b/third_party/fwkacllib/inc/cce/cce.h @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CCE_H__ +#define CCE_H__ + +#include +#include "cce_def.hpp" + +namespace cce { + +/** + * @ingroup cce + * @brief create cc handler + * @param [in|out] handle point of cc handler + * @return ccStatus_t + */ +ccStatus_t ccCreate(ccHandle_t *handle); + +/** + * @ingroup cce + * @brief destroy cc handler + * @param [in] *handle cc handler + * @return ccStatus_t + */ +ccStatus_t ccDestroy(ccHandle_t *handle); + +/** + * @ingroup cce + * @brief bind stream with specified cc handler + * @param [in] handle cc handler + * @param [in] streamId stream + * @return ccStatus_t + */ +ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t streamId); + +/** + * @ingroup cce + * @brief get the stream from cc handler + * @param [in] handle cc handler + * @param [in|out] streamId point of stream + * @return ccStatus_t + */ +ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *streamId); + +/** + * @ingroup cce + * @brief get the stream from cc handler + * @param [in] dataTypeTransMode mode of data type transform + * @param [in] inputData input data point + * @param [in] inputDataSize input data size + * @param [in|out] outputData output data point + * @param [in] outputDataSize output data size + * @return ccStatus_t + */ +ccStatus_t ccTransDataType(ccDataTypeTransMode_t dataTypeTransMode, const void *inputData, uint32_t inputDataSize, + void *outputData, const uint32_t outputDataSize); +/** + * @ingroup cce + * @brief cce sys init func + */ +void cceSysInit(); + +/** + * @ingroup cce + * @brief cce Log Start up func + */ +void cceLogStartup(); + +/** + * @ingroup cce + * @brief cce Log Shut down func + */ +void cceLogShutdown(); + +/** + * @ingroup cce + * @brief set the profiling on or off + * @param [in] const unsigned char* target: The engine gets it from ENV. Don't need care about it. + * @param const char* job_ctx: identifies profiling job + * @param [in] uint32_t flag: value: 0, on ; 1, off. + * @return ccStatus_t value: 0, success; 1, fail. + */ +ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag); + +}; // namespace cce + +#endif // CCE_H__ diff --git a/third_party/fwkacllib/inc/cce/cce_def.hpp b/third_party/fwkacllib/inc/cce/cce_def.hpp new file mode 100644 index 00000000..7b1a1b8a --- /dev/null +++ b/third_party/fwkacllib/inc/cce/cce_def.hpp @@ -0,0 +1,152 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CCE_DEF_H__ +#define CCE_DEF_H__ + +#include "runtime/rt.h" + +namespace cce { + +/** + * @ingroup cce + * @brief memory configure for fusion + */ +typedef struct TagCceFusionMemCfg { + uint64_t memAddr; /**< memAddr */ + uint32_t memSize; /**< memSize */ + uint32_t addrChangeFlag; /**< op data addr change flag. value:0,valid;1,not valid */ + uint32_t poolFlag; /**< mempool flag : value:0,is valid; value: 1, not valid */ + TagCceFusionMemCfg() { + memAddr = 0; + memSize = 0; + addrChangeFlag = 0; + poolFlag = 0; + } +} CceFusionMemCfg_t; +/** + * @ingroup cce + * @brief return value + */ +typedef enum tagCcStatus { + CC_STATUS_SUCCESS = 0, /**< succ */ + CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ + CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ + CC_STATUS_BAD_PARAM = 3, /**< para check failed */ + CC_STATUS_INTERNAL_ERROR = 4, /**< internal error */ + CC_STATUS_KERNEL_ERROR = 5, /**< kernel error */ + CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */ + CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ + CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ + CC_STATUS_RESERVED /**< just for check */ +} ccStatus_t; + +/** + * @ingroup cce + * @brief original data type + */ +typedef enum tagCcDataType { + CC_DATA_FLOAT = 0, /**< float type */ + CC_DATA_HALF, /**< fp16 type */ + CC_DATA_INT8, /**< int8 type */ + CC_DATA_INT32, /**< int32 type */ + CC_DATA_UINT8, /**< uint8 type */ + CC_DATA_HALF_UINT16_PROPOSAL, /** +#include + +#define ERROR_CODE() __catch_error_code +#define ERROR_LINE_NO() __catch_error_line_no +#define ERROR_PROC() __catch_error_line_no = __LINE__; + +#define PROC \ + uint32_t __catch_error_code = 0x7FFFFFCC; \ + uint32_t __catch_error_line_no = 0xFFFFFFFF; \ + { +#define END_PROC \ + } \ + __tabErrorCode: +#define THROW(errcode) \ + { \ + __catch_error_code = (errcode); \ + ERROR_PROC(); \ + goto __tabErrorCode; \ + } +#define EXEC(func) \ + { \ + if (0 != (__catch_error_code = (func))) THROW(__catch_error_code) \ + } +#define EXEC_EX1(func, error_code) \ + { \ + if (0 != (func)) THROW(error_code) \ + } +#define EXEC_EX(func, succRet, error_code) \ + { \ + if (succRet != (__catch_error_code = (func))) THROW(error_code) \ + } +#define ASSERT_EXEC(func, succRet) \ + { \ + if (succRet != (__catch_error_code = (func))) /*GO_ASSERT_FALSE();*/ \ + THROW(__catch_error_code) \ + } \ + } +#define NEW_ERROR_EXEC(errcode, func, succRet) \ + { \ + if (succRet != (func)) { \ + THROW(errcode) \ + } \ + } +#define JUDGE(errcode, expr) \ + { \ + if (!(expr)) { \ + THROW(errcode) \ + } \ + } +#define ASSERT_JUDGE(errcode, expr) \ + { \ + if (!(expr)) { /*GO_ASSERT_FALSE();*/ \ + THROW(errcode) \ + } \ + } +#define JUDGE_FALSE(errcode, expr) \ + { \ + if (expr) { \ + THROW(errcode) \ + } \ + } +#define JUDGE_CONTINUE(expr) \ + { \ + if (expr) { \ + continue; \ + } \ + } +#define CATCH_ERROR(errcode) if (__catch_error_code == (errcode)) { // ERROR_LOG(); +#define CATCH_ALL_ERROR { +#define END_CATCH_ERROR } +#define FINAL \ + __tabFinal: +#define END_FINAL /*GO_ASSERT_FALSE()*/ ; +#define GOTO_FINAL() goto __tabFinal; +#endif // CATCH_HPP_ diff --git a/third_party/fwkacllib/inc/cce/compiler_stub.h b/third_party/fwkacllib/inc/cce/compiler_stub.h new file mode 100644 index 00000000..00ea467e --- /dev/null +++ b/third_party/fwkacllib/inc/cce/compiler_stub.h @@ -0,0 +1,36 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMPILER_STUB_H__ +#define COMPILER_STUB_H__ + +namespace cce { + +/** + * @ingroup cce + * @brief compiler stub init func + */ +bool compilerStubInit(); + +/** + * @ingroup cce + * @brief compiler stub free func + */ +bool compilerStubFree(); + +}; // namespace cce + +#endif // COMPILER_STUB_H__ diff --git a/third_party/fwkacllib/inc/cce/customize.h b/third_party/fwkacllib/inc/cce/customize.h new file mode 100644 index 00000000..7dd97af1 --- /dev/null +++ b/third_party/fwkacllib/inc/cce/customize.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CC_CUSTOMIZE_API__ +#define CC_CUSTOMIZE_API__ + +#include + +#define CC_DEVICE_DIM_MAX 8 +typedef enum tagOpTensorFormat +{ + OP_TENSOR_FORMAT_NC1HWC0 = 0, + OP_TENSOR_FORMAT_ND, + OP_TENSOR_FORMAT_RESERVED, + +} opTensorFormat_t; + + +typedef enum tagOpDataType +{ + OP_DATA_FLOAT = 0, /**< float type */ + OP_DATA_HALF, /**< fp16 type */ + OP_DATA_INT8, /**< int8 type */ + OP_DATA_INT32, /**< int32 type */ + OP_DATA_UINT8, /**< uint8 type */ + OP_DATA_HALF_UINT16_PROPOSAL, /**dimCnt, xDesc->dimCnt) + * @param [in] num the number of outputs + * @param [in] beta scaling factors + * @param [in] yDescArr descriptors of output tensors + * @param [in|out] yArr output data array in device memory + * @return ccStatus_t + */ +ccStatus_t ccSplitForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + int32_t axis, uint32_t num, const void *beta, const ccTensorDescriptor_t yDescArr[], + void *yArr[]); + +/** + * @ingroup dnn + * @brief get the output dimensions info of split + * @param [in] xDesc descriptor of input tensor + * @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt) + * @param [in] num the number of outputs + * @param [in] sizes Optional, used to specify the sizes of each output tensor along split dim. The tensor x would + * be split evenly along split dim if sizes is NULL + * @param [in|out] nArr point to the first element of batch sizes + * @param [in|out] cArr point to the first element of channels + * @param [in|out] hArr point to the first element of heights of feature map + * @param [in|out] wArr point to the first element of widths of feature map + * @return ccStatus_t + */ +ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num, + const uint32_t sizes[], uint32_t nArr[], uint32_t cArr[], uint32_t hArr[], + uint32_t wArr[]); + +/** + * @ingroup dnn + * @brief Get split output shape(s). + * @param [in] xDesc input tensor, support ND and NC1HWC0 + * @param [in] axis split axis, negtive axis will increased by dimCnt once time. + * @param [in] num splited nums. + * @param [in] sizes splited dim size on axis. if NULL was set, The input will be divided into num equally. + * @param [output] dimCnt splited dimCnt array. One to one correspondence with the splited output. + * @param [output] dim array of splited dim array. One to one correspondence with the splited output. + * @param [in| dimlen length of dim(Pass in the length of the entire space pointed to by dim, + not just the length of the dim array, because dim is a level 2 array + dimlen = lengthof dim[][], not just lengthof dim[]) + * @return ccStatus_t + */ +ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num, + const uint32_t sizes[], int32_t *dimCnt, int32_t *dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief create weight compress info + * @param [in|out] compressInfo point to CompressInfo + * @return ccStatus_t + */ +ccStatus_t ccCreateWeightCompressInfo(ccWeightCompressInfo_t **compressInfo); + +/** + * @ingroup dnn + * @brief destory weight compress info + * @param [in] *compressInfo point to CompressInfo + * @return ccStatus_t + */ +ccStatus_t ccDestroyWeightCompressInfo(ccWeightCompressInfo_t **compressInfo); + +/** + * @ingroup dnn + * @brief create compress table + * @param [in|out] compressTab point to weight compress table + * @return ccStatus_t + */ +ccStatus_t ccCreateWeightCompressTab(ccWeightCompressTab_t **compressTab); + +/** + * @ingroup dnn + * @brief destory compress table + * @param [in] compressTab point to weight compress table + * @return ccStatus_t + */ +ccStatus_t ccDestroyWeightCompressTab(ccWeightCompressTab_t **compressTab); + +/** + * @ingroup dnn + * @brief get fc compress info + * @param [in] xDesc descriptor of input tensor + * @param [in] wDesc descriptor of weight tensor + * @param [in] biasDesc descriptor of bias tensor + * @param [in] dataTypeTransmode mode of data type transform + * @param [in] weightCompressInfo compress info, compute based on tiling method + * @param [in|out] outputSize output data size in byte + * @param [in|out] infoTabSize compress info table + * @return ccStatus_t + */ +ccStatus_t ccGetCompressedFcWeightInfo(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc, + const ccTensorDescriptor_t biasDesc, ccDataTypeTransMode_t dataTypeTransmode, + ccWeightCompressInfo_t *weightCompressInfo, uint32_t *outputSize, + uint32_t *infoTabSize); +/** + * @ingroup dnn + * @brief compress fc + * @param [in] wDesc descriptor of weight tensor + * @param [in] w filter data in device memory + * @param [in] weightCompressInfo compress info, compute based on tiling method + * @param [in] dataTypeTransmode mode of data type transform + * @param [in|out] y output data in device memory + * @param [in] ySize transformed data size in byte + * @param [in|out] yCompressedSize compressed output data size in byte + * @param [in|out] infoTab compressed info table + * @param [in] infoTabSize compressed info table size in byte + * @return ccStatus_t + */ +ccStatus_t ccCompressWeight(const ccFilterDescriptor_t wDesc, const void *w, + const ccWeightCompressInfo_t *weightCompressInfo, ccDataTypeTransMode_t dataTypeTransmode, + ccFilterDescriptor_t yDesc, void *y, uint32_t ySize, uint32_t *yCompressedSize, + void *infoTab, uint32_t infoTabSize); + +/** + * @ingroup dnn + * @brief restore compressed fc data + * @param [in] x input data in device memory + * @param [in] xSizeInBytes input compressed weight data size in byte + * @param [in|out] y output data in device memory + * @param [in] ySizeInBytes output data size in byte + * @return ccStatus_t + */ +ccStatus_t ccRestoreCompressedWeight(const void *x, uint32_t xSizeInBytes, void *y, uint32_t ySizeInBytes, + rtMemcpyKind_t kind); + +/** + * @ingroup dnn + * @brief create quantize parameters struct + * @param [in|out] quantizeInfo descriptor of quantize parameters + * @return ccStatus_t + */ +ccStatus_t ccCreateQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo); + +/** + * @ingroup dnn + * @brief destroy quantize parameters struct + * @param [in] quantizeInfo descriptor of quantize parameters + * @return ccStatus_t + */ +ccStatus_t ccDestoryQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo); + +/** + * @ingroup dnn + * @brief set quantize parameters + * @param [in] quantizeInfo descriptor of quantize parameters + * @param [in] scaleValMode enmu type for quantize scale value type (normal or sqrt) + * @param [in] scale quantize scale value + * @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be + * configed) + * @param [in] offsetPad padding value for load3d (only for half offset or full offset) + * @return ccStatus_t + */ +ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode, + const uint16_t *scale, const uint16_t *offset, const uint8_t *offsetPad); + +/** + * @ingroup dnn + * @brief set Requantize parameters + * @param [in] quantizeInfo descriptor of quantize parameters + * @param [in] scaleValMode enmu type for requantize scale value type (normal or sqrt) + * @param [in] scale quantize scale value + * @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be + * configed) + * @param [in] offsetw offset for filter (only config for full offset quantize) + * @return ccStatus_t + */ +ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode, + const uint16_t *scaleRq, const uint16_t *nextLayerOffset, const int32_t *offsetw); + +/** + * @ingroup dnn + * @brief set Dequantize parameters + * @param [in] quantizeInfo descriptor of quantize parameters + * @param [in] scaleValMode enmu type for dequantize scale value type (normal or sqrt) + * @param [in] scaleDq quantize scale value + * @param [in] offsetw offset for filter (only config for full offset quantize) + * @return ccStatus_t + */ +ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode, + const uint16_t *scaleDq, const int32_t *offsetw); + +/** + * @ingroup dnn + * @brief set convolution desciptor's quantize parameters + * @param [in] convDesc convolution descriptor + * @param [in] quantizeInfo descriptor of quantize parameters + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionQuantizeInfo(ccConvolutionDescriptor_t convDesc, const ccQuantizeDescriptor_t QuantizeInfo); + +/** + * @ingroup dnn + * @brief set convolution desciptor's all offset quantize parameters + * @param [in] convDesc convolution descriptor + * @param [in] offsetw descriptor of quantize parameters + * @param [in] scaleReq descriptor of quantize parameters + * @param [in] offset_d_next descriptor of quantize parameters + * @return ccStatus_t + */ +ccStatus_t ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, const uint8_t *offsetW, + const uint8_t *offsetD, const uint16_t *scaleReq, const uint16_t *offsetDNext); + +/** + * @ingroup dnn + * @brief set full connection desciptor's quantize parameters + * @param [in] fcDesc full connection descriptor + * @param [in] quantizeInfo descriptor of quantize parameters + * @return ccStatus_t + */ +ccStatus_t ccSetFullConnectionQuantizeInfo(ccFullConnectionDescriptor_t fcDesc, + const ccQuantizeDescriptor_t QuantizeInfo); + +/** + * @ingroup dnn + * @brief set pooling desciptor's quantize parameters + * @param [in] poolingDesc pooling descriptor + * @param [in] quantizeInfo descriptor of quantize parameters + * @return ccStatus_t + */ +ccStatus_t ccSetPoolingQuantizeInfo(ccPoolingDescriptor_t poolingDesc, const ccQuantizeDescriptor_t QuantizeInfo); + +/** + * @ingroup dnn + * @brief set full connection desciptor's info table + * @param [in] fcDesc full connection descriptor + * @param [in] infoTabSize table size + * @param [in] infoTab pointer to info table + * @return ccStatus_t + */ +ccStatus_t ccSetFullConnectionDescriptor(ccFullConnectionDescriptor_t fcDesc, uint32_t infoTabSize, const void *infoTab, + ccFullConnectFwdAlgo_t algo = CC_FULLCONNECT_FWD_ALGO_HALF); + +/** + * @ingroup dnn + * @brief set full connection desciptor's relu flag + * @param [in] fcDesc full connection descriptor + * @param [in] opType operation type for append at convolution operation + * @param [in] opDesc operation descritpor for the opType + * @return ccStatus_t + */ +ccStatus_t ccFullConnectionAppendOp(ccFullConnectionDescriptor_t fcDesc, tagCcOpType opType, const void *opDesc); + +/** + * @ingroup dnn + * @brief check aipp basic info + * @param [in] inputFormat format of input image + * @param [in] loadStartPosH vertical start position in source image + * @param [in] loadStartPosW horizontal start position in source image + * @param [in] srcImageSizeH vertical size of source image + * @param [in] srcImageSizeW horizontal size of source image + * @param [in] cpaddingValue C direction padding value + * @param [in] cscSwitch csc enable or not + * @param [in] rbuvSwapSwitch swap R/U and B/V position of the image + * @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV + * @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is + * not useful. + * @return ccStatus_t + */ +ccStatus_t ccCheckConvolutionAippCommInfo(ccAippInputFormat_t inputFormat, int32_t loadStartPosW, int32_t loadStartPosH, + int32_t srcImageSizeW, int32_t srcImageSizeH, float cpaddingValue, + bool cscSwitch, bool rbuvSwapSwitch, bool axSwapSwitch, bool singleLineMode); + +/** + * @ingroup dnn + * @brief check aipp dtc info + * @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x + * @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x + * @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x + * @return ccStatus_t + */ +ccStatus_t ccCheckConvolutionAippDtcInfo(int32_t dtcPixelMeanChn0, int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, + float dtcPixelMinChn0, float dtcPixelMinChn1, float dtcPixelMinChn2, + float dtcPixelVarReciChn0, float dtcPixelVarReciChn1, + float dtcPixelVarReciChn2); + +/** + * @ingroup dnn + * @brief check aipp pad info + * @param [in] paddingMode padding mode + * @param [in] leftPaddingSize left hblank/padding size + * @param [in] rightPaddingSize right hblank/padding size + * @param [in] topPaddingSize top padding size + * @param [in] bottomPaddingSize bottom padding size + * @return ccStatus_t + */ +ccStatus_t ccCheckConvolutionAippPadInfo(ccAippPaddingMode_t paddingMode, int32_t leftPaddingSize, + int32_t rightPaddingSize, int32_t topPaddingSize, int32_t bottomPaddingSize); + +/** + * @ingroup dnn + * @brief check aipp csc info + * @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n + * @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m + * @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m + * @return ccStatus_t + */ +ccStatus_t ccCheckConvolutionAippCscInfo(int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, int32_t cscMatrixR0C2, + int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, int32_t cscMatrixR1C2, + int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, int32_t cscMatrixR2C2, + int32_t cscOutputBias0, int32_t cscOutputBias1, int32_t cscOutputBias2, + int32_t cscInputBias0, int32_t cscInputBias1, int32_t cscInputBias2); + +/** + * @ingroup dnn + * @brief check aipp scf info + * @param [in] scfSwitch scaling enable or not + * @param [in] scfInputW input width of scaling + * @param [in] scfInputH input height of scaling + * @param [in] scfOutputW output width of scaling + * @param [in] scfOutputH output height of scaling + * @return ccStatus_t + */ +ccStatus_t ccCheckConvolutionAippScfInfo(bool scfSwitch, int32_t scfInputW, int32_t scfInputH, int32_t scfOutputW, + int32_t scfOutputH); + +/** + * @ingroup dnn + * @brief check aipp param + * @param [in] convDesc descriptor of conv operator + * @param [in] xDesc input tensor info + * @param [in] yDesc output tensor info + * @return ccStatus_t + */ +ccStatus_t ccCheckConvFwdAippParam(const ccConvolutionDescriptor_t convDesc, const ccTensorDescriptor_t xDesc, + const ccTensorDescriptor_t yDesc); + +/** + * @ingroup dnn + * @brief init aipp basic info + * @param [in|out] convDesc descriptor of conv operator + * @param [in] inputFormat format of input image + * @param [in] loadStartPosH vertical start position in source image + * @param [in] loadStartPosW horizontal start position in source image + * @param [in] srcImageSizeH vertical size of source image + * @param [in] srcImageSizeW horizontal size of source image + * @param [in] cpaddingValue C direction padding value + * @param [in] cscSwitch csc enable or not + * @param [in] rbuvSwapSwitch swap R/U and B/V position of the image + * @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV + * @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is + * not useful. + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionAippCommInfo(ccConvolutionDescriptor_t convDesc, ccAippInputFormat_t inputFormat, + int32_t loadStartPosW, int32_t loadStartPosH, int32_t srcImageSizeW, + int32_t srcImageSizeH, float cpaddingValue, bool cscSwitch, bool rbuvSwapSwitch, + bool axSwapSwitch, bool singleLineMode); +/** + * @ingroup dnn + * @brief init aipp dtc info + * @param [in|out] convDesc descriptor of conv operator + * @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x + * @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x + * @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionAippDtcInfo(ccConvolutionDescriptor_t convDesc, int32_t dtcPixelMeanChn0, + int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, float dtcPixelMinChn0, + float dtcPixelMinChn1, float dtcPixelMinChn2, float dtcPixelVarReciChn0, + float dtcPixelVarReciChn1, float dtcPixelVarReciChn2); +/** + * @ingroup dnn + * @brief init aipp pad info + * @param [in|out] convDesc descriptor of conv operator + * @param [in] paddingMode padding mode + * @param [in] leftPaddingSize left hblank/padding size + * @param [in] rightPaddingSize right hblank/padding size + * @param [in] topPaddingSize top padding size + * @param [in] bottomPaddingSize bottom padding size + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionAippPadInfo(ccConvolutionDescriptor_t convDesc, ccAippPaddingMode_t paddingMode, + int32_t leftPaddingSize, int32_t rightPaddingSize, int32_t topPaddingSize, + int32_t bottomPaddingSize); + +/** + * @ingroup dnn + * @brief init aipp csc info + * @param [in|out] convDesc descriptor of conv operator + * @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n + * @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m + * @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionAippCscInfo(ccConvolutionDescriptor_t convDesc, int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, + int32_t cscMatrixR0C2, int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, + int32_t cscMatrixR1C2, int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, + int32_t cscMatrixR2C2, int32_t cscOutputBias0, int32_t cscOutputBias1, + int32_t cscOutputBias2, int32_t cscInputBias0, int32_t cscInputBias1, + int32_t cscInputBias2); + +/** + * @ingroup dnn + * @brief init aipp scf info + * @param [in|out] convDesc descriptor of conv operator + * @param [in] scfSwitch scaling enable or not + * @param [in] scfInputW input width of scaling + * @param [in] scfInputH input height of scaling + * @param [in] scfOutputW output width of scaling + * @param [in] scfOutputH output height of scaling + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionAippScfInfo(ccConvolutionDescriptor_t convDesc, bool scfSwitch, int32_t scfInputW, + int32_t scfInputH, int32_t scfOutputW, int32_t scfOutputH); + +/** + * @ingroup dnn + * @brief set dynamic aipp parameter address and enflag info + * @param [in|out] convDesc descriptor of conv operator + * @param [in] dyncParaAddr aipp parameter address + * @param [in] dyncAippFlag flag to show whether to use dynamic aipp + * @return ccStatus_t + */ +ccStatus_t ccSetConvolutionAippDyncParaAddr(ccConvolutionDescriptor_t convDesc, const void *dyncParaAddr, + bool dyncAippFlag, bool rotationFlag = false); + +/** + * @ingroup dnn + * @brief check dynamic aipp parameter + * @param [in] dyncParaAddr aipp parameter address + * @param [in] dataLength parameter lenght + * @param [in] convolutionDimW convDimW + * @param [in] convolutionDimH convDimH + * @return ccStatus_t + */ +ccStatus_t ccCheckDynamicAippParam(const void *dynamicParamAddr, uint32_t dataLength, int64_t convolutionDimW, + int64_t convolutionDimH); + +/*** @ingroup dnn + * @brief trans mean and var + * @param [in|out] mean' = bnScale/sqrt(var) + * @param [in|out] var' = -bnScale * mean / sqrt(var) + bnBias + * @return ccStatus_t + */ + +ccStatus_t ccTransBatchnormMeanAndVar(void *mean, void *var, const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, + const void *alpha, const void *beta, void *bnScale, void *bnBias, double epsilon); + +/** + * @ingroup dnn + * @brief init deconvolution adj or targetShape info. + * @param [in] convDesc conv descriptor. + * @param [in] adjH, adjust H output. + * @param [in] adjW, adjust W output. + * @param [in] targetShape, values of output shape, if this pointer was set, ignore adj. + * @return ccStatus_t + */ +ccStatus_t ccSetDeconvolutionOutShapeInfo(ccConvolutionDescriptor_t convDesc, uint32_t adjSize, const uint32_t *adj, + uint32_t targetShapeSize, const uint32_t *targetShape); + +/** + * @ingroup dnn + * @brief gather elements according to the indices. + * @param [in] alpha reserved. + * @param [in] xDesc description of the tensor from which to gather elements. + * @param [in] x data point of the tensor from which to gather elements. + * @param [in] indicesDesc description of the tensor of indices. + * @param [in] indices data point of the tensor of indices. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccGatherNdForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t indicesDesc, const void *indices, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of gather_nd. + * @param [in] xDesc description of the tensor from which to gather elements. + * @param [in] indicesDesc description of the tensor of indices. + * @param [output] n dim-size of n-dim. + * @param [output] c dim-size of c-dim. + * @param [output] h dim-size of h-dim. + * @param [output] w dim-size of w-dim. + * @param [output] realDimCnt real dim. + * @return ccStatus_t + */ +ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, int32_t *n, + int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt); +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, + int32_t *dimCnt, int32_t *dim, int32_t dimLen); +/** + * @ingroup dnn + * @brief tile tensor by multiples. + * @param [in] alpha reserved. + * @param [in] xDesc description of the tensor which to be tiled. + * @param [in] x data point of the tensor which to be tiled. + * @param [in] multiples tile coefficient of each dim. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccTileForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccIntArray_t *multiples, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); + +/** + * @ingroup dnn + * @brief get output shape of tile. + * @param [in] xDesc description of the dividend tensor. + * @param [in] multiples multiples of each dim. + * @param [in|out] dimCnt [point to the output dimCnt] + * @param [in|out] dim [arrays to save dims] + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *multiples, int32_t *dimCnt, + int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief get output shape of tile. + * @param [in] xDesc description of the dividend tensor. + * @param [in] multiples multiples of each dim. + * @param [output] n dim-size of n-dim. + * @param [output] c dim-size of c-dim. + * @param [output] h dim-size of h-dim. + * @param [output] w dim-size of w-dim. + * @param [output] realDimCnt real dim. + * @return ccStatus_t + */ +ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, + // const ccIntArrayDescriptor_t multiples, + const ccIntArray_t *multiples, int32_t *n, int32_t *c, int32_t *h, int32_t *w, + int32_t *realDimCnt); +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief realdiv between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the dividend tensor. + * @param [in] x data point of the dividend tensor. + * @param [in] yDesc description of the divisor tensor. + * @param [in] y data point of the divisor tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccRealdivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the dividend tensor. + * @param [in] yDesc description of the divisor tensor. + * @param [output] n dim-size of n-dim. + * @param [output] c dim-size of c-dim. + * @param [output] h dim-size of h-dim. + * @param [output] w dim-size of w-dim. + * @param [output] realDimCnt real dim. + * @return ccStatus_t + */ +ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *n, + int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt); + +/** + * @ingroup dnn + * @brief realdiv between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccFloordivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] realDimCnt real dim. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetFloordivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief realdiv between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccGreaterForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetGreaterOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief realdiv between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccLessForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetLessOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief get output shape of LogicalOr. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetLogicalOrOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief get output shape of LogicalXor. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in] dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetLogicalXorOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief sqrt forward: + * data type only support bool + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccLogicalNotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief equal between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ + +ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief dump data during inference, only for eng ver. + * @param [in] handle cce handle + * @return ccStatus_t + */ +ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t bufLen, const uint32_t taskIndex); + +/** + * @ingroup dnn + * @brief logicaland between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccLogicalAndForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief logical or between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); +/** + * @ingroup dnn + * @brief logical Xor between two tensors(x ^ y = (x | y) & ~(x & y). + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccLogicalXorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of equal. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); +/** + * @ingroup dnn + * @brief get output shape of logicaland. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetLogicalAndOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); +/** + * @ingroup dnn + * @brief realdiv between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccFloormodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetFloormodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief compare between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ +ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compareType, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, + const void *y, const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [output] dimCnt dim nums. + * @param [output] dim dim size. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief create descriptor of FillParam + * @param [in|out] fillParamDesc point to descriptor of fill param + * @return ccStatus_t + */ +ccStatus_t ccCreateFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of FillParam + * @param [in] *fillParamDesc point to descriptor of fill param + * @return ccStatus_t + */ +ccStatus_t ccDestroyFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc); + +/** + * @ingroup dnn + * @brief get output shape of broadcat operations. + * @param [in] inputNum input number of the operation tensors. + * @param [in] xDesc[] description of the input operation tensors list. + * @param [output] dimCnt dim-size of output tensor. + * @param [output] dim dim of output tensor. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetMultiNdBroadcastOpOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief get output shape of maximultitensor. + * @param [in] inputNum the num of input operator tensors. + * @param [in] xDesc[] description of the input operator tensors list. + * @param [output] dimCnt dim count of output tensor. + * @param [output] dim array of output tensor. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetMaxMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief get output shape of minmultitensor. + * @param [in] inputNum the num of input operator tensors. + * @param [in] xDesc[] description of the input operator tensors list. + * @param [output] dimCnt dim count of output tensor. + * @param [output] dim array of output tensor. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetMinMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief MaxMultitensor forward: + * data type only support float float16 and int32 + * data format only support ND + * @param [in] handle cce handle + * @param [in] inputNum input tensor number + * @param [in] alpha common scale factor + * @param [in] xDesc[] descriptor of input tensors list + * @param [in] x[] input data in device memory list + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccMaxMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha, + const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief MinMultitensor forward: + * data type only support float float16 and int32 + * data format only support ND + * @param [in] handle cce handle + * @param [in] inputNum input tensor number + * @param [in] alpha common scale factor + * @param [in] xDesc[] descriptor of input data list + * @param [in] x[] input data in device memory list + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccMinMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha, + const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief create descriptor of StridedSlice + * @param [in|out] stridedSliceDesc point to descriptor of StridedSlice param + * @return ccStatus_t + */ +ccStatus_t ccCreateStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of StridedSlice + * @param [in] *stridedSliceDesc point to descriptor of StridedSlice param + * @return ccStatus_t + */ +ccStatus_t ccDestroyStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc); + +/** + * @ingroup dnn + * @brief init stridedSlice descriptor_t. + * @param [out] stridedSliceDesc struct of stridedslice param + * @param [in] dimCnt dimension of the input tensor + * @param [in] begin slice begin(include) + * @param [in] end slice end index(not include) + * @param [in] strides slice stride + * @return ccStatus_t + */ +ccStatus_t ccSetStridedSliceDescriptor(ccStridedSliceDescriptor_t stridedSliceDesc, int32_t dimCnt, int32_t begin[], + int32_t end[], int32_t strides[]); + +/** + * @ingroup dnn + * @brief create descriptor of StridedSlice + * @param [in|out] stridedSliceDesc point to descriptor of StridedSlice attr + * @return ccStatus_t + */ +ccStatus_t ccCreateStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of StridedSlice + * @param [in] *stridedSliceDesc point to descriptor of StridedSlice attr + * @return ccStatus_t + */ +ccStatus_t ccDestroyStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc); + +/** + * @ingroup dnn + * @brief init stridedSlice mask attrs desescriptor. + * @param [out] attrDesc struct of stridedslice mask attrs + * @param [in] beginMask begin mask + * @param [in] endMask end mask + * @param [in] ellipsisMask ellipsis mask + * @param [in] newAxisMask new axis mask + * @param [in] shrinkAxisMask shrink axis mask + * @return ccStatus_t + */ +ccStatus_t ccSetStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t attrDesc, int32_t beginMask, + int32_t endMask, int32_t ellipsisMask, int32_t newAxisMask, + int32_t shrinkAxisMask); + +/** + * @ingroup dnn + * @brief Extracts a strided slice of a tensor. + * @param [in] xDesc descriptor of input data + * @param [in] stridedSliceDesc specifies the begin, end, strides of slice + * @param [in] attrDesc reserve for optional attributes. + * @param [out] n point to n size + * @param [out] c point to c size + * @param [out] h point to h size + * @param [out] w point to w size + * @return ccStatus_t + */ +ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc, + const ccStridedSliceDescriptor_t stridedSliceDesc, + const ccStridedSliceAttrsDescriptor_t attrDesc, int32_t *n, int32_t *c, + int32_t *h, int32_t *w, int32_t *realDimCnt); + +/** + * @ingroup dnn + * @brief Extracts a strided slice of a tensor. + * @param [in] handle cce handle + * @param [in] stridedSliceDesc specifies the begin, end, strides of slice + * @param [in] attrDesc reserve for optional attributes. + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] yDesc descriptor of output data + * @param [in|out] y output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccStridedSliceForward(ccHandle_t handle, const ccStridedSliceDescriptor_t stridedSliceDesc, + const ccStridedSliceAttrsDescriptor_t attrDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ + * @brief get out put descrition of slice tensor. + * @param [in] xDesc descriptor of input data + * @param [in] begin begin position of tensor + * @param [in] size size to slice + * @param [out] n point to n size + * @param [out] c point to c size + * @param [out] h point to h size + * @param [out] w point to w size + * @param [out] realDimCnt realdim count + * @return ccStatus_t + */ +ccStatus_t ccGetSliceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *begin, const ccIntArray_t *size, + int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt); + +/** + * @ingroup dnn + * @brief slice of a tensor. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] begin begin position of tensor + * @param [in] size size to slice + * @param [in] beta common scale factor + * @param [in] yDesc descriptor of output data + * @param [in|out] y output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccSliceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccIntArray_t *begin, const ccIntArray_t *size, const void *beta, + const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ingroup dnn + * @brief gather forward computation + * @param [in] handle cce handle + * @param [in] paramsDesc descriptor of params tensor + * @param [in] params input data in device memory + * @param [in] indicesDesc descriptor of indices tensor + * @param [in] indices indices data in device memory + * @param [in] axis descriptor of roi tensor + * @param [in] alpha reserved + * @param [in] beta reserved + * @param [in] outputDesc descriptor of output tensor + * @param [out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccGatherForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, + const void *params, const ccTensorDescriptor_t indicesDesc, const void *indices, + const int32_t axis, const void *beta, ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief gather output dim computation, for NC1HWC0 + * @param [in] paramsDesc descriptor of params tensor + * @param [in] indicesDesc descriptor of indices tensor + * @param [in] axis descriptor of roi tensor + * @param [out] n dim of n + * @param [out] c dim of c + * @param [out] h dim of h + * @param [out] w dim of w + * @param [out] realDimCnt real dim count + * @return ccStatus_t + */ +ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc, + int32_t axis, int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt); + +/** + * @ingroup dnn + * @brief gather output dim computation + * @param [in] paramsDesc descriptor of params tensor + * @param [in] indicesDesc descriptor of indices tensor + * @param [in] axis descriptor of roi tensor + * @param [out] dimCnt dimcnt of output + * @param [out] dim dim of output + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc, + int32_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief exp forward computation + * @param [in] handle cce handle + * @param [in] expDesc descriptor of expParam + * @param [in] expParam a ternary array + * @param [in] alpha reserved parameter + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta reserved parameter + * @param [in] yDesc descriptor of output tensor + * @param [out] y output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccExpForward(ccHandle_t handle, const ccExpDescriptor_t expDesc, const void *expParam, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ingroup dnn + * @brief expm1 forward: + * data type only support float float16 and double + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccExpm1Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief log1p forward: + * data type only support float float16 and double + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccLog1pForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief init descriptor for parameter of exp function + * @param [in|out] powDesc descriptor of tensor + * @param [in] dataType data type in device + * @param [in] paramCnt number of parameters + * @return ccStatus_t + */ +ccStatus_t ccSetExpDescriptor(ccExpDescriptor_t expDesc, ccDataType_t dataType, uint32_t paramCnt); + +/** + * @ingroup dnn + * @brief exp forward computation + * @param [in] handle cce handle + * @param [in] logDesc descriptor of logParam + * @param [in] logParam a ternary array + * @param [in] alpha reserved parameter + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta reserved parameter + * @param [in] yDesc descriptor of output tensor + * @param [in] y output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccLogForward(ccHandle_t handle, const ccLogDescriptor_t logDesc, const void *logParam, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ingroup dnn + * @brief init descriptor for parameter of log function + * @param [in|out] logDesc descriptor of tensor + * @param [in] dataType data type in device + * @param [in] paramCnt number of parameters + * @return ccStatus_t + */ +ccStatus_t ccSetLogDescriptor(ccLogDescriptor_t logDesc, ccDataType_t dataType, uint32_t paramCnt); + +/** + * @ingroup dnn + * @brief pow forward computation + * @param [in] handle cce handle + * @param [in] powDesc descriptor of logParam + * @param [in] powParam a ternary array + * @param [in] alpha reserved parameter + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta reserved parameter + * @param [in] yDesc descriptor of input tensor + * @param [in] y input data in device memory + * @param [in] zDesc descriptor of output tensor + * @param [out] z output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t powDesc, const void *powParam, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, + const void *y, const void *beta, const ccTensorDescriptor_t zDesc, void *z); + +/** + * @brief init descriptor for parameter of pow function + * @param [in|out] powDesc descriptor of tensor + * @param [in] dataType data type in device + * @param [in] paramCnt number of parameters + * @return ccStatus_t + */ +ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t powDesc, ccDataType_t dataType, uint32_t paramCnt); + +/** + * @ingroup dnn + * @brief non max suppression forward. + * @param [in] handle cce handle + * @param [in] nonmaxParaDesc descriptor of para + * @param [in] nonmaxPara input para in host memory + * @param [in] maxoutputsizex input para in host memory + * @param [in] alpha common scale factor + * @param [in] boxesDesc descriptor of input data boxesDesc + * @param [in] boxes input data boxes in device memory + * @param [in] scoresDesc descriptor of input data boxesDesc + * @param [in] scores input data scores in device memory + * @param [in] workSpaceSizeInBytes workspace size + * @param [in] workSpace input workspace in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccNonMaxSuppressionForward(ccHandle_t handle, const ccNonMaxSuppressionDescriptor_t nonmaxParaDesc, + const void *nonmaxPara, const int *maxoutputsize, const void *alpha, + const ccTensorDescriptor_t boxesDesc, const void *boxes, + const ccTensorDescriptor_t scoresDesc, const void *scores, + const uint32_t workSpaceSizeInBytes, void *workSpace, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); +/** + * @brief init descriptor for parameter of NonMaxSuppression function + * @param [in|out] powDesc descriptor of tensor + * @param [in] dataType data type in device + * @param [in] paramCnt number of parameters + * @return ccStatus_t + */ +ccStatus_t ccSetNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t nonMaxSuppressionDesc, + ccDataType_t dataType, uint32_t paramCnt); + +/** + * @ingroup dnn + * @brief get the output dimension info of resizeBilinear op. + * @param [in] xDesc descriptor of input data + * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator + * @param [out] dimCnt + * @param [out] dim[] dim of output + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetResizeBilinearOutputDim(const ccTensorDescriptor_t xDesc, + const ccResizeBilinearDescriptor_t resizeBilinearDesc, int32_t *dimCnt, + int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief get the output dimension info of interp op. + * @param [in] xDesc descriptor of input data + * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator + * @param [out] dimCnt + * @param [out] dim[] dim of output + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetInterpOutputDim(const ccTensorDescriptor_t xDesc, const ccResizeBilinearDescriptor_t resizeBilinearDesc, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); +/** + * @ingroup dnn + * @brief resize bilinear forward for t network. + * @param [in] handle cce handle + * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] yDesc descriptor of output data + * @param [in|out] y output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccResizeBilinearForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, + const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief resize bilinear forward for c network. + * @param [in] handle cce handle + * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] yDesc descriptor of output data + * @param [in|out] y output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccInterpForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief create descriptor of ResizeBilinear + * @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr + * @return ccStatus_t + */ +ccStatus_t ccCreateResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of Interp + * @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr + * @return ccStatus_t + */ +ccStatus_t ccDestroyResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc); + +/** + * @ingroup dnn + * @brief set descriptor of resizeBilinear. + * @param [in|out] resizeBilinearDesc descriptor of resize_bilinear operator + * @param [in] resizeOutputDimMode way to decide output dimensions + * @param [in] alignCorners whether the centers of input and output are aligned + * @param [in] zoom_factor zoom factor + * @param [in] shrink_factor shrink factor + * @param [in] height height of output + * @param [in] width width of output + * @param [in] pad_begin padding at begin of input + * @param [in] pad_end padding at end of input + * @return ccStatus_t + */ +ccStatus_t ccSetResizeBilinearDescriptor(ccResizeBilinearDescriptor_t resizeBilinearDesc, + ccResizeOutputDimMode_t resizeOutputDimMode, bool alignCorners, + int32_t zoom_factor, int32_t shrink_factor, int32_t height, int32_t width, + int32_t pad_begin, int32_t pad_end); + +/** + * @ingroup dnn + * @brief fill forward computation + * @param [in] handle cce handle + * @param [in] fillParamDesc descriptor of fill parameter + * @param [in] alpha reserved + * @param [in] givenDesc descriptor of given tensor + * @param [in] givenData given data in device memory + * @param [in] workspace space for fill algorithm + * @param [in] workSpaceSizeInBytes space size in byte + * @param [in] beta reserved + * @param [in] outputDesc descriptor of output tensor + * @param [out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccFillForward(ccHandle_t handle, const ccFillParamDescriptor_t fillParamDesc, const void *alpha, + const ccTensorDescriptor_t givenDesc, const void *givenData, const void *workspace, + const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); + +/** + * @ingroup dnn + *[ccGetFillWorkspaceSize] + *@param fillType [fill type] + *@param givenDesc [given tensor descriptor] + *@param xDesc [input tensor descriptor] + *@param sizeInBytes [output size] + *@return ccStatus_t [status] + */ +ccStatus_t ccGetFillWorkspaceSize(const ccFillOpType_t fillType, const ccTensorDescriptor_t xDesc, + uint32_t *sizeInBytes); + +/** + *[ccCast] + *@param handle [cce handler] + *@param alpha [alpha] + *@param xDesc [tensor Description of tensor x] + *@param x [input tensor x] + *@param beta [beta + *@param yDesc [tensor Description of tensor y] + *@param y [output tensor y] + *@return ccStatus_t [status] + */ +ccStatus_t ccCast(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ingroup dnn + * @brief round forward: + * data type only support float float16 and int32 + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccRoundForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief rint forward: + * data type only support float float16 + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccRintForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief sqrt forward: + * data type only support float float16 + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccSqrtForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + *[ccCast] + *@param filterSrcInfo [cce filtersrc descriptor] + *@param filterSrc [filterSrc address] + *@param filterDstInfo [cce filterdst descriptor] + *@param filterDst [filterdst address] + *@param group [group] + *@param ySizeInBytes [fraczfilter size] + *@param outputDataType [datatype] + *@return ccStatus_t [status] + */ +ccStatus_t ccTransGroupConvFilterInt8(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc, + ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group, + uint32_t ySizeInBytes, ccDataType_t outputDataType); + +/** + *[ccGetConcatOutputDim] + *@param xDesc[] [input tensor descriptor] + *@param axis [concat axis] + *@param inputNum [input tensor numbers] + *@param dim[] [output dim] + *@param [in| dimlen length of dim + *@return ccStatus_t [status] + */ +ccStatus_t ccGetConcatOutputDim(const ccTensorDescriptor_t xDesc[], int32_t axis, int32_t inputNum, int32_t *dimCnt, + int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief get the output dimension info of reduce. + * @param [in] xDesc descriptor of input tensor + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetReduceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *axis, bool keepDims, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief reduce sum forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceSumForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reduce max forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceMaxForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reduce min forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceMinForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reduce mean forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceMeanForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reduce prod forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceProdForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reduce all forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceAllForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + *@brief print times stats + *@return ccStatus_t [status] + */ +ccStatus_t ccPrintTimeStat(); + +/** + * @ingroup dnn + * @brief reduce abs sum forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceAbsSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reduce square sum forward computation + * @param [in] handle cce handle + * @param [in] axis The dimensions to reduce + * @param [in] keepDims If true, retains reduced dimensions with length 1. + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReduceSquareSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get the output dimension info of crop and resize + * @param [in] imageDesc descriptor of images + * @param [in] boxesDesc descriptor of boxes + * @param [in] boxidxDesc descriptor of boxidx + * @param [in] resizeHeight resize height + * @param [in] resizeWidth resize width + * @param [out] dimCnt dimcnt of output + * @param [out] dim dim of output + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetCropAndResizeOutputDim(const ccTensorDescriptor_t imageDesc, const ccTensorDescriptor_t boxesDesc, + const ccTensorDescriptor_t boxidxDesc, const int32_t resizeHeight, + const int32_t resizeWidth, int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief crop and resize forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] imageDesc descriptor of images + * @param [in] image input data in device memory + * @param [in] boxesDesc descriptor of boxes + * @param [in] boxes input data in device memory + * @param [in] boxidxDesc descriptor of boxidx + * @param [in] boxidx input data in device memory + * @param [in] method enum of resize method + * @param [in] extrapolationValue Value used for extrapolation, when applicable + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccCropAndResizeForward(ccHandle_t handle, const ccResizeMethod_t method, const float extrapolationValue, + const void *alpha, const ccTensorDescriptor_t imageDesc, const void *image, + const ccTensorDescriptor_t boxesDesc, const void *boxes, + const ccTensorDescriptor_t boxidxDesc, const void *boxidx, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief select forward computation + * @param [in] handle cce handle + * @param [in] alpha reserved + * @param [in] condDesc descriptor of cond tensor + * @param [in] cond cond data in device memory + * @param [in] xDesc descriptor of x tensor + * @param [in] x x data in device memory + * @param [in] yDesc descriptor of y tensor + * @param [in] y y data in device memory + * @param [in] beta reserved + * @param [in] outputDesc descriptor of output tensor + * @param [out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccSelect(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t condDesc, const void *cond, + const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, const void *y, + const void *beta, const ccTensorDescriptor_t outDesc, void *out); + +/** + * @ingroup dnn + * @brief get the output dimension info of where + * @param [in] xDesc descriptor of input tensor + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @return ccStatus_t + */ +ccStatus_t ccGetWhereOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief where forward computation + * @param [in] handle cce handle + * @param [in] alpha reserved + * @param [in] condDesc descriptor of cond tensor + * @param [in] cond cond data in device memory + * @param [in] xDesc descriptor of x tensor + * @param [in] x x data in device memory + * @param [in] yDesc descriptor of y tensor + * @param [out] y y data in device memory + * @return ccStatus_t + */ +ccStatus_t ccWhere(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ingroup dnn + * @brief reverse forward. + * @param [in] handle cce handle + * @param [in] axis dim that need reverse + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReverseForward(ccHandle_t handle, const ccIntArray_t *axis, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief floor forward: + * data type only support float float16 + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccFloorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief ceil forward: + * data type only support float float16 + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccCeilForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get the output dimension info of truncate mod + * @param [in] xDesc descriptor of input tensor + * @param [in] yDesc descriptor of input tensor + * @param [out] dimCnt [dim count of the output tensor] + * @param [out] dim[] [shape of the output tensor] + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetTruncatemodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief truncate mod forward computation + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] yDesc descriptor of input tensor + * @param [in] y input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccTruncatemodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); +/** + * @ingroup dnn + * @brief Spatial Pyramid Pooling + * @param [in] handle cce handle + * @param [in] alpha reserved + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] workspace temp workspace + * @param [in] workspaceSizeInBytes temp workspace size + * @param [in] pyramidHeight pyramid height + * @param [in] poolingMode pooling mode + * @param [in] beta reserved + * @param [in] outputDesc descriptor of output tensor + * @param [out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccSPPForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + void *workspace, const uint32_t workspaceSizeInBytes, const uint32_t pyramidHeight, + const ccPoolingMode_t poolingMode, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); +/** + * @ingroup dnn + * @brief Get Spatial Pyramid Pooling output dim + * @param [in] xDesc descriptor of input tensor + * @param [in] pyramidHeight pyramid height + * @param [in] dimLen length of dim + * @param [out] dimCnt output tensor dim cnt + * @param [out] dim output tensor dim + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetSPPOutputDim(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, int32_t *dimCnt, + int32_t dim[], const int32_t dimLen); +/** + * @ingroup dnn + * @brief Get Spatial Pyramid Pooling workspace size + * @param [in] xDesc descriptor of input tensor + * @param [in] pyramidHeight pyramid height + * @param [out] workspaceSizeInBytes workspace size + * @return ccStatus_t + */ +ccStatus_t ccGetSPPWorkspaceSize(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, + uint32_t *workspaceSizeInBytes); + +/** + * @ingroup dnn + * @brief BNLL forward computation + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccBNLLForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief bias forward. + * @param [in] handle cce handle + * @param [in] axis axis + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data x + * @param [in] x input data x in device memory + * @param [in] biasDesc descriptor of input data bias + * @param [in] bias input data bias in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccBiasForward(ccHandle_t handle, const int axis, const void *alpha, const ccTensorDescriptor_t xDesc, + const void *x, const ccTensorDescriptor_t biasDesc, const void *bias, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief threshold forward computation + * @param [in] handle cce handle + * @param [in] threshold threshold + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccThresholdForward(ccHandle_t handle, const void *threshold, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief shufflechannel forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] group number of groups + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +// TODO AICPU: please add shufflechannel custom params and comment +ccStatus_t ccShuffleChannelForward(ccHandle_t handle, const void *alpha, uint32_t group, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief mvn forward. + * @param [in] handle cce handle + * @param [in] acrossChannel across channel. true: across, false: not + * @param [in] normalizeVariance normalizeVariance. true: normalizeVariance, false: not + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccMVNForward(ccHandle_t handle, bool acrossChannel, bool normalizeVariance, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, uint32_t workSpaceSizeInBytes, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get the workspace size of mvn + * @param [in] xDesc descriptor of input data + * @param [in] acrossChannel across channel. true: across, false: not + * @param [in|out] sizeInBytes Workspace size need for whole computation + */ +ccStatus_t ccGetMVNWorkspaceSize(const ccTensorDescriptor_t xDesc, bool acrossChannel, uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief heatmap2coord forward output is hotspot value and corresponding coordinates + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] coordh calibration high + * @param [in] coordw calibration wide + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccHeatmap2coordForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + int32_t coordh, int32_t coordw, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); +/** + * @ingroup dnn + * @brief get the output dimension info of heatmap2coord + * @param [in] xDesc descriptor of input tensor + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetHeatmap2coordOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief swish forward. + * @param [in] handle cce handle + * @param [in] scale param of swish function, y = x / (1 + sigmoid(scale * x)) + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ + +ccStatus_t ccSwishForward(ccHandle_t handle, const float scale, const void *alpha, const ccTensorDescriptor_t xDesc, + const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +ccStatus_t ccTeForward(ccHandle_t handle, const void *stubFunc, uint32_t coreDim, const void *args, uint32_t argsSize, + const rtL2Ctrl_t *l2ctrl, int32_t inputNum, const ccTensorDescriptor_t xDesc[], const void *x[], + int32_t outputNum, const ccTensorDescriptor_t yDesc[], void *y[], bool isAiCore); + +#ifndef DAVINCI_LITE +ccStatus_t ccAiCpuCustomizeForward(ccHandle_t handle, aicpu_run_func stubFunc, opTensor_t *xOpDesc[], void *x[], + int32_t inputNum, opTensor_t *yOpDesc[], void *y[], void *op_attr_handle, + int32_t outputNum, const ccTensorDescriptor_t xDesc[], + const ccTensorDescriptor_t yDesc[], const void *op_attr_str, uint32_t op_attr_size); +#endif +/** + * @ingroup dnn + * @brief embedding lookup forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data x + * @param [in] x input data x in device memory + * @param [in] idxDesc descriptor of input data idx + * @param [in] idx input data idx in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, + const void *x, const ccTensorDescriptor_t idxDesc, const void *idx, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup + * @brief embedding lookup forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] inputNum inputNum + * @param [in] xDesc[] descriptor array of input data x + * @param [in] x[] input data x array in device memory + * @param [in] workSpace workSpace addr + * @param [in] workSpaceSizeInBytes workSpace size + * @param [in] idxDesc descriptor of input data idx + * @param [in] idx input data idx in device memory + * @param [in] partitionStrategy partitionStrategy + * @param [in] maxNorm addr of maxNorm + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const int32_t inputNum, + const ccTensorDescriptor_t xDesc[], const void *x[], void *workSpace, + const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t idxDesc, + const void *idx, ccPartitionStrategy_t partitionStrategy, const void *maxNorm, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + *[ccGetEmbeddingLookupOutputDim] + *@param inputNum [input tensor numbers] + *@param xDesc[] [input tensor descriptor] + *@param idxDesc [idx tensor descriptor] + *@param dimCnt [output dim count] + *@param dim[] [output dim] + *@param [in| dimlen length of dim + *@return ccStatus_t [status] + */ +ccStatus_t ccGetEmbeddingLookupOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], + const ccTensorDescriptor_t idxDesc, int32_t *dimCnt, int32_t dim[], + int32_t dimLen); + +/** + * @ingroup dnn + *[ccGetEmbeddingLookupWorkspaceSize] + *@param inputNum [input tensor numbers] + *@param idxDesc [input tensor descriptor] + *@param isMaxNormExist [isMaxNormExist] + *@param sizeInBytes [output size] + *@return ccStatus_t [status] + */ +ccStatus_t ccGetEmbeddingLookupWorkspaceSize(const int32_t inputNum, const ccTensorDescriptor_t idxDesc, + const bool isMaxNormExist, uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief check if it is the first layer of resnet50 and semecefc + * @param [in] tensorDesc descriptor of input tensor. + * @param [in] convDesc conv descriptor. + * @param [in] filterDesc descriptor of weight tensor. + * @return ccStatus_t + */ +ccStatus_t c04DescParamCheck(const ccTensorDescriptor_t tensorDesc, const ccConvolutionDescriptor_t convDesc, + const ccFilterDescriptor_t filterDesc); + +#ifndef DAVINCI_LITE +/** + * @ingroup dnn + * @brief convolution forward computation + * @param [in] handle cce handle + * @param [in] convDesc descriptor of convolution operator + * @param [in] alpha scaling factors + * @param [in] beta scaling factors + * @param [in] xDesc x descriptor of input tensor + * @param [in] x x data in device memory + * @param [in] dyDesc descriptor of dy + * @param [in] dy dy data in device memory + * @param [in] dwDesc descriptor of dwDesc + * @param [out] dw dw data in device memory + * @param [in] algo algorithm of convolution forward + * @param [in] workSpace temp space, maybe NULL if no need temp space + * @param [in] workSpaceSizeInBytes sizeof workspace + * @return ccStatus_t + */ +ccStatus_t ccConvolutionBackwardFilter(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, void *alpha, + void *beta, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t dyDesc, const void *dy, + const ccFilterDescriptor_t dwDesc, void *dw, ccConvolutionBwdAlgo_t algo, + void *workSpace, uint32_t workSpaceSizeInBytes); +#endif + +/** + * @ingroup dnn + * @brief get the temp space size of convolution forward computation, maybe no need temp space + * @param [in] handle cce handle + * @param [in] dyDesc descriptor of input tensor dy + * @param [in] convDesc descriptor of convolution operator + * @param [in] xDesc descriptor of input tensor + * @param [in] dwDesc descriptor of filter + * @param [in] algo algorithm of convolution forward + * @param [in|out] sizeInBytes temp space size need for specified algorithm + * @return ccStatus_t + */ +ccStatus_t ccGetConvolutionBackwardFilterWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t dyDesc, + const ccConvolutionDescriptor_t convDesc, + const ccTensorDescriptor_t xDesc, + const ccFilterDescriptor_t dwDesc, ccConvolutionBwdAlgo_t algo, + uint32_t *sizeInBytes); + +#ifndef DAVINCI_LITE +ccStatus_t ccBatchNormalizationBackward(ccHandle_t handle, ccBatchNormMode_t mode, const void *alphaDataDiff, + const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, + const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t dyDesc, const void *dy, + const ccTensorDescriptor_t dxDesc, void *dx, + const ccTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale, + void *resultBnScaleDiff, void *resultBnBiasDiff, const void *workSpace, + const uint32_t workSpaceSizeInBytes, double epsilon, const void *SaveMean, + const void *SaveInvVariance); +#endif + +ccStatus_t ccGetBatchNormalizationBackwardWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode, + ccTensorDescriptor_t xDesc, ccTensorDescriptor_t dyDesc, + ccTensorDescriptor_t dxDesc, + ccTensorDescriptor_t bnScaleBiasDesc, uint32_t *sizeInBytes); + +#ifndef DAVINCI_LITE +ccStatus_t ccBatchNormalizationForwardTraining(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha, + const void *beta, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, void *y, + const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, + const void *bnBias, double exponentialAverageFactor, + void *resultRunningMean, void *resultRunningVariance, void *workSpace, + uint32_t workSpaceSizeInBytes, double epsilon, void *resultSaveMean, + void *resultSaveInvVariance, const bool isTraining); +#endif + +ccStatus_t ccGetBatchNormalizationForwardTrainingWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode, + ccTensorDescriptor_t xDesc, ccTensorDescriptor_t yDesc, + const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, + uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief generate an random normal Tensor use given on/off scale. + * @param [in] handle Stream handle. + * @param [in] alpha reserved. + * @param [in] meanDesc Mean description of one-hot position. + * @param [in] mean Data pointer of mean. + * @param [in] scaleDesc On/off scale description. + * @param [in] scale Data pointer of on/off scale. + * @param [in] seed random seed used to generate random number + * @param [in] seed2 random seed used to generate random number + * @param [in] beta reserved. + * @param [in] outputDesc Description of the generated one-hot tensor. + * @param [output] output Data pointer of output. + * @return ccStatus_t + */ +ccStatus_t ccRandomNormalForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t meanDesc, + const void *mean, const ccTensorDescriptor_t scaleDesc, const void *scale, + const int64_t seed1, const int64_t seed2, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief generate random uniform tensor. + * @param [in] handle Stream handle. + * @param [in] alpha reserved. + * @param [in] minvalDesc Mean description of one-hot position. + * @param [in] minval Data pointer of mean. + * @param [in] maxvalDesc On/off scale description. + * @param [in] maxval Data pointer of on/off scale. + * @param [in] seed random seed used to generate random number + * @param [in] seed2 random seed used to generate random number + * @param [in] beta reserved. + * @param [in] outputDesc Description of the generated one-hot tensor. + * @param [output] output Data pointer of output. + * @return ccStatus_t + */ +ccStatus_t ccRandomUniformForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t minvalDesc, + const void *minval, const ccTensorDescriptor_t maxvalDesc, const void *maxval, + const int64_t seed1, const int64_t seed2, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/**^M + * @ingroup dnn^M\r 10932 + * @brief generate BatchMatMul tensor.^M\r 10933 + * @param [in] handle Stream handle.^M\r 10934 + * @param [in] alpha reserved.^M\r 10935 + * @param [in] xDesc tensorA Desc.^M\r 10936 + * @param [in] x Data pointer of tensorA.^M\r 10937 + * @param [in] yDesc tensorB Desc.^M\r 10938 + * @param [in] y Data pointer of tensorB.^M\r 10939 + * @param [in] beta reserved.^M\r 10940 + * @param [in] adj_x tensorA transpose flag^M\r 10941 + * @param [in] adj_y tensorB transpose flag^M\r 10942 + * @param [in] outpDesc Description of the tensor output .^M\r 10943 + * @param [output] out Data pointer of output.^M\r 10944 + * @return ccStatus_t^M + */ +ccStatus_t ccBatchMatMulForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, const bool adj_x, + const bool adj_y, const ccTensorDescriptor_t outDesc, void *out); + +ccStatus_t ccGetBatchMatMulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, bool adj_x, + bool adj_y, int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief generator conv int8 all offset factor + * @param [in] para the struct for scale and offset of input, filter and output + * @param [in|out] offsetW offset of filter + * @param [in|out] offsetPad offset of input + * @param [in|out] scaledQrq scale computing result of input , filter and output + * @param [in|out] nextoffsetq offset of output + * @return ccStatus_t + */ +ccStatus_t ccGenQuantAllOffsetFactor(const ccQuantAllOffsetPara_t *para, uint8_t &offsetW, uint8_t &offsetPad, + uint16_t &scaledQrq, uint16_t &nextoffsetq); + +/** + * @ingroup dnn + * @brief get conv int8 all offset fracZ size + * @param [in] filterDesc descriptor of filter tensor + * @param [in|out] conv int8 all offset fracZ size + * @param [in] groupNum group conv num + * @return ccStatus_t + */ +ccStatus_t ccSetGroupConvScene(const ccFilterDescriptor_t tensorDesc, ccConvolutionDescriptor_t convDesc); + +ccStatus_t ccGetInt8AllOffsetFilterFracZSizeInBytes(const ccFilterDescriptor_t filterSrcDesc, + const ccFilterDescriptor_t filterDesc, uint32_t &size, + uint32_t groupNum); + +/** + * @ingroup dnn + * @brief transform filter in conv int8 all offset scene + * @param [in] filterSrcInfo descriptor of filter tensor before fracZ transform + * @param [in] filterSrc filter addr before fracZ transform + * @param [in] filterDstInfo descriptor of filter tensor after fracZ transform + * @param [in] filterDst filter addr after fracZ transform + * @param [in] quantPara the struct for scale and offset of input, filter and output + * @param [in] ySizeInBytes filter size after fracZ transform + * @param [in|out] outputDataType output data type + * @param [in] groupNum group conv num + * @return ccStatus_t + */ +ccStatus_t ccTransFilterInt8AllOffset(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc, + ccFilterDescriptor_t filterDstInfo, void *filterDst, + const ccQuantAllOffsetPara_t *quantPara, uint32_t ySizeInBytes, + ccDataType_t outputDataType, uint32_t groupNum); + +/** + * @ingroup dnn + * @brief transform bias in conv int8 all offset scene + * @param [in] filterDesc descriptor of filter tensor + * @param [in] biasDesc descriptor of bias tensor + * @param [in] quantPara the struct for scale and offset of input, filter and output + * @param [in] w filter addr + * @param [in] bias bias addr + * @return ccStatus_t + */ +ccStatus_t ccTransInt8AllOffsetBias(const ccFilterDescriptor_t filterDesc, const ccTensorDescriptor_t biasDesc, + const ccQuantAllOffsetPara_t *quantPara, const void *w, const void *bias); + +/** + * @ingroup dnn + * @get dequantize + * @param [in] handle handle id + * @param [in] alpha alpha addr + * @param [in] xDesc the input Desc descriptor + * @param [in] x x data addr + * @param [in] beta beta data addr + * @param [in] yDesc the output Desc descriptor + * @param [in] y y data addr + * @return ccStatus_t + */ +ccStatus_t ccDequantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, + const void *x, const void *beta, const ccTensorDescriptor_t yDesc, void *y); +/** + * @ingroup dnn + * @get quantize + * @param [in] handle handle id + * @param [in] alpha alpha addr + * @param [in] xDesc the input Desc descriptor + * @param [in] x x data addr + * @param [in] beta beta data addr + * @param [in] yDesc the output Desc descriptor + * @param [in] y y data addr + * @return ccStatus_t + */ +ccStatus_t ccQuantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t yDesc, void *y); + +#ifndef DAVINCI_LITE +ccStatus_t ccActivationBackward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha, + const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t xDesc, + const void *x, const void *beta, const ccTensorDescriptor_t dxDesc, void *dx); +#endif + +ccStatus_t ccL2LossForward(ccHandle_t handle, const ccL2LossDescriptor_t l2lossDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t yDesc, void *y); + +/** + * @ingroup dnn + * @brief get the output dimension info of top k v2 + * @param [in] xDesc descriptor of input tensor x + * @param [in] yDesc descriptor of input tensor y + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetTopKV2OutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, const void *k, + const int64_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief top k v2 forward computation + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor x + * @param [in] x input data x in device memory + * @param [in] yDesc descriptor of input tensor y + * @param [in] y input data y in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccTopKV2Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t kDesc, const void *k, const void *beta, const bool sorted, + const int64_t axis, void *workSpace, const uint32_t workSpaceSizeInBytes, + const ccTensorDescriptor_t outputValuesDesc, void *outputValues, + const ccTensorDescriptor_t outputIndicesDesc, void *outputIndices); + +/** + * @ingroup dnn + * @brief get the workspace size of top k v2 + * @param [in] xDesc descriptor of input tensor x + * @param [in] yDesc descriptor of input tensor y + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] sizeInBytes point to workspace size + * @return ccStatus_t + */ +ccStatus_t ccGetTopKV2ForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, + const ccTensorDescriptor_t indiceDesc, const void *k, const int64_t axis, + uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief Get unsorted segment reduction output dim + * @param [in] xDesc descriptor of input tensor + * @param [in] segmentIdsDesc descriptor of input segmentIds tensor + * @param [in] segmentsNum output slice num + * @param [out] dimCnt output tensor dim cnt + * @param [out] dim output tensor dim + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetUnsortedSegmentReductionOutputDim(const ccTensorDescriptor_t xDesc, + const ccTensorDescriptor_t segmentIdsDesc, int32_t segmentsNum, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief reduce all forward computation + * @param [in] handle cce handle + * @param [in] segmentsNum output slice num + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] segmentIdsDesc descriptor of input segmentIds tensor + * @param [in] x input segmentIds data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccUnsortedSegmentSumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, + const void *x, const ccTensorDescriptor_t segmentIdsDesc, const void *segmentIds, + const int32_t segmentsNum, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief reverse sequence forward computation + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor x + * @param [in] x input data x in device memory + * @param [in] yDesc descriptor of input tensor y + * @param [in] y input data y in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccReverseSequenceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t inputDesc, + const void *input, const ccTensorDescriptor_t seqLengthsDesc, + const void *seqLengths, int64_t seqAxis, int64_t batchAxis, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief realdiv between two tensors. + * @param [in] alpha reserved. + * @param [in] xDesc description of the left operator tensor. + * @param [in] x data point of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [in] y data point of the right operator tensor. + * @param [in] beta reserved. + * @param [in] outputDesc description of the output tensor. + * @param [output] output data point of the output tensor. + * @return ccStatus_t + */ + +ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get output shape of realdiv. + * @param [in] xDesc description of the left operator tensor. + * @param [in] yDesc description of the right operator tensor. + * @param [out] dimCnt output tensor dim cnt + * @param [out] dim output tensor dim + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief invert permutation forward computation + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccInvertPermutationForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, + const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); + +/** + * @ingroup dnn + * @brief get the workspace size of non max suppression + * @param [in] handle descriptor of handle + * @param [in] scoresDesc descriptor of input tensor scoresDesc + * @param [in] boxesDesc descriptor of input tensor boxesDesc + * @param [in|out] sizeInBytes point to workspace size + * @return ccStatus_t + */ +ccStatus_t ccGetNonMaxSuppressionWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t scoresDesc, + const ccTensorDescriptor_t boxesDesc, uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief get the output dim of non max suppression + * @param [in] scoresDesc descriptor of input tensor scoresDesc + * @param [in] maxOutPutSize the max size of output + * @param [in|out] dimCnt point to the count of dim + * @param [in|out] dim[] the array of output dim + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetNonMaxSuppressionOutputDim(const ccTensorDescriptor_t scoresDesc, const int32_t maxOutPutSize, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief multinomial forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] numSamples number of independent samples to draw for each row slice + * @param [in] seed1 sed to create a random seed for the distribution + * @param [in] seed2 sed to create a random seed for the distribution + * @param [in] workSpace work space for inter access + * @param [in] workSpaceSizeInBytes work space size + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccMultinomialForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + int32_t numSamples, int64_t seed1, int64_t seed2, void *workSpace, + uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); +/** + * @ingroup dnn + * @brief get output dim of generated one-hot tensor. + * @param [in] indicesDesc Indices description of one-hot position. + * @param [in] depth On/off value description. + * @param [in] axis Data pointer of on/off value. + * @param [output] dimCnt Description of the generated one-hot tensor. + * @param [output] dim Data pointer of output. + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetOneHotOutputDim(const ccTensorDescriptor_t indicesDesc, int32_t depth, int32_t axis, int32_t *dimCnt, + int32_t *dim, int32_t dimLen); + +/** + * @ingroup dnn + * @brief generate an one-hot Tensor use given on/off value. + * @param [in] handle Stream handle. + * @param [in] alpha reserved. + * @param [in] indicesDesc Indices description of one-hot position. + * @param [in] indices Data pointer of indices. + * @param [in] onDesc On value description. + * @param [in] on Data pointer of on value. + * @param [in] offDesc Off value description. + * @param [in] off Data pointer of off value. + * @param [in] depth On/off value description. + * @param [in] axis Data pointer of on/off value. + * @param [in] beta reserved. + * @param [in] outputDesc Description of the generated one-hot tensor. + * @param [output] output Data pointer of output. + * @return ccStatus_t + */ +ccStatus_t ccOneHotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t indicesDesc, + const void *indices, const ccTensorDescriptor_t onDesc, const void *on, + const ccTensorDescriptor_t offDesc, const void *off, const int32_t depth, const int32_t axis, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); +/** + * @ingroup dnn + * @brief get the workspaceSize of multinomial + * @param [in] xDesc descriptor of input tensor + * @param [in] numSamples number sample + * @param [out] sizeInBytes wor space size of byte + * @return ccStatus_t + */ +ccStatus_t ccGetMultinomialWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes); +/** + * @ingroup dnn + * @brief get the output dimension info of multinomial + * @param [in] xDesc descriptor of input tensor + * @param [in] numSample number of independent samples to draw for each row slice + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetMultinomialOutputDim(const ccTensorDescriptor_t xDesc, int32_t numSample, int32_t *dimCnt, + int32_t dim[], int32_t dimLen); +/** + * @ingroup dnn + * @brief get the output dimension info of BiasAddBackward + * @param [in] dyDesc descriptor of input tensor + * @param [in] out] n outputTensor [N]CHW + * @param [in|out] c outputTensor N[C]HW + * @param [in|out] h outputTensor NC[H]W + * @param [in|out] w outputTensor NCH[W] + * @return ccStatus_t + */ +ccStatus_t ccGetBiasAddBackwardOutputDim(const ccTensorDescriptor_t dyDesc, int32_t *n, int32_t *c, int32_t *h, + int32_t *w); + +/** + * @ingroup dnn + * @brief biasadd backward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] dyDesc descriptor of input data + * @param [in] dy input data in device memory + * @param [in] beta common scale factor + * @param [in] dbDesc descriptor of output data + * @param [in|out] db output data in device memory + * @return ccStatus_t + */ +#ifndef DAVINCI_LITE +ccStatus_t ccBiasAddBackward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t dyDesc, const void *dy, + const void *beta, const ccTensorDescriptor_t dbDesc, void *db); + +ccStatus_t ccMaxPoolWithArgmaxForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t yDesc, void *y, const ccTensorDescriptor_t argMaskDesc, + void *argMask); +#endif + +ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc); + +ccStatus_t ccDestroyPoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc); + +ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t poolingMaskDesc, ccTensorFormat_t format, + ccDataType_t dataType, int32_t n, int32_t c, int32_t h, int32_t w, + int32_t windowH, int32_t windowW); + +ccStatus_t ccGetPoolingMaskTensorSizeInBytes(ccTensorDescriptor_t poolingMaskDesc, uint32_t *size); + +/** + * @ingroup dnn + * @brief get the mask output dimension info of maxpooling training forward + * @param [in] pooling descriptor of convolution operator + * @param [in] xDesc descriptor of input tensor + * @param [in|out] n point to batch size + * @param [in|out] c point to channels + * @param [in|out] h point to height of feature map + * @param [in|out] w point to width of feature map + * @param [in|out] windowH point to height of window + * @param [in|out] windowW point to width of windowW + * @return ccStatus_t + */ +ccStatus_t ccGetPoolingMaskDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc, int32_t *n, + int32_t *c, int32_t *h, int32_t *w, int32_t *windowH, int32_t *windowW); + +#ifndef DAVINCI_LITE +ccStatus_t ccSoftmaxCrossEntropyLoss(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode, + ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale, + const ccTensorDescriptor_t logitsDesc, const void *logits, + const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth, + const void *beta, const ccTensorDescriptor_t lossDesc, void *loss); + +ccStatus_t ccSoftmaxCrossEntropyDx(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode, + ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale, + const ccTensorDescriptor_t logitsDesc, const void *logits, + const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth, + const void *beta, const ccTensorDescriptor_t dxDesc, void *dx); + +ccStatus_t ccAvgPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha, + const ccTensorDescriptor_t dyDesc, const void *dy, const void *beta, + const ccTensorDescriptor_t dxDesc, const void *dx); + +ccStatus_t ccTrainingAssignOp(ccHandle_t handle, const ccAssignOpMode_t assignOpDesc, const void *alpha, + const void *beta, const ccTensorDescriptor_t aDesc, void *a, + const ccTensorDescriptor_t bDesc, const void *b); + +/** + * @ingroup dnn + * @brief momentum optimizer for variable update + * @param [in] handle cce handle + * @param [in] inputDesc descriptor of input tensor: gradient,accumulation,variable + * @param [in] gradient gradient input + * @param [in|out] accumulation accumulation input and updated output + * @param [in|out] variable variable input and updated output + * @param [in] algo indicate whether need FP16 output + * @param [in] momentum scaler to control accumulation + * @param [in] learningRate scaler + * @param [in] lossScaleReciprocal scaler + * @param [in] workSpace additional memory address + * @param [in] workSpaceSizeInBytes additional memory size + * @param [out] variableUpdatedFP16Desc descriptor of FP16 output tensor: variableUpdatedFP16 + * @param [out] variableUpdatedFP16 variableUpdatedFP16 + * @return ccStatus_t + */ +ccStatus_t ccApplyMomentum(ccHandle_t handle, const ccTensorDescriptor_t inputDesc, const void *gradient, + void *accumulation, void *variable, const ccMomentumAlgo_t algo, const void *momentum, + const void *learningRate, const void *lossScaleReciprocal, void *workSpace, + const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t variableUpdatedFP16Desc, + void *variableUpdatedFP16); + +ccStatus_t ccSsdClassifyLossTrain(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t labelDesc, + const void *label, const ccTensorDescriptor_t greaterConstDesc, + const void *greaterConst, const ccTensorDescriptor_t subConstDesc, + const void *subConst, const ccTensorDescriptor_t sparseDesc, const void *sparse, + const void *beta, const ccTensorDescriptor_t castoutDesc, const void *castout, + const ccTensorDescriptor_t muloutDesc, const void *mulout); + +#endif + +/** + * @ingroup dnn + * @brief get the workspace size of applymomentum + * @param [in] inputDesc descriptor of input tensor + * @return ccStatus_t + */ +ccStatus_t ccGetApplyMomentumWorkspaceSize(const ccTensorDescriptor_t inputDesc, uint32_t *sizeInBytes); +#ifndef DAVINCI_LITE +ccStatus_t ccHwck2FracZ(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x, + const ccFilterDescriptor_t yDesc, void *y); + +ccStatus_t ccFracZ2Hwck(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x, + const ccFilterDescriptor_t yDesc, void *y); +ccStatus_t ccAddNForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const int32_t inputNum, + const void *x[], const void *beta, void *workSpace, uint32_t workSpaceSizeInBytes, + const ccTensorDescriptor_t yDesc, void *y); +#endif +ccStatus_t ccGetAddNForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, const int32_t inputNum, + const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes); +ccStatus_t ccGetAddNForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen); +ccStatus_t ccAddTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace, + uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y); +ccStatus_t ccGetAddTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, + const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc, + uint32_t *sizeInBytes); +ccStatus_t ccGetAddTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); +ccStatus_t ccMulTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace, + uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y); +ccStatus_t ccGetMulTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, + const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc, + uint32_t *sizeInBytes); +ccStatus_t ccGetMulTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief get workspace size + * @param [in] xDesc descriptor of input tensor + * @param [in|out] sizeInBytes workspace size + * @return ccStatus_t + */ +ccStatus_t ccGetRandomShuffleWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief random shuffle forward computation + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] workspace temporary space + * @param [in] workspaceSizeInBytes temporary space size + * @param [in] seed random seed used to generate random number + * @param [in] seed2 random seed used to generate random number + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccRandomShuffleForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + void *workspace, const uint32_t workspaceSizeInBytes, const int64_t seed1, + const int64_t seed2, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); +/** + * @ingroup dnn + * @brief sin forward: + * data type only support float float16 double + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] input input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccSinForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief cos forward: + * data type only support float float16 double + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] input input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccCosForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief tan forward: + * data type only support float float16 double + * data format only support ND + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] input input data in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief get the output dimension info of unstack + * @param [in] xDesc descriptor of input tensor + * @param [in] axis the axis to unstack along + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetUnstackOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, int32_t *dimCnt, int32_t dim[], + int32_t dimLen); + +/** + * @ingroup dnn + * @brief unstack forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data + * @param [in] x input data in device memory + * @param [in] num the length of the dimension axis + * @param [in] axis the axis to unstack along + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ + +ccStatus_t ccUnstackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + int32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output[]); + +ccStatus_t ccResizeNearestNeighborCpuForward(ccHandle_t handle, const ccResizeNearestNeighborDescriptor_t resizeDesc, + const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); +/** + * @ingroup dnn + * @brief get the output dimension info of resize nearest neighbor + * @param [in] resizeDesc descriptor of resize + * @param [in] xDesc descriptor of input tensor + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetResizeNearestNeighborOutputDim(const ccResizeNearestNeighborDescriptor_t resizeDesc, + const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t dim[], + int32_t dimLen); + +/** + * @ingroup dnn + * @brief create descriptor of ResizeNearestNeighbor + * @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr + * @return ccStatus_t + */ +ccStatus_t ccCreateResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of ResizeNearestNeighbor + * @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr + * @return ccStatus_t + */ +ccStatus_t ccDestroyResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc); + +/** + * @ingroup dnn + * @brief set descriptor of ResizeNearestNeighbor. + * @param [in|out] resizeDesc descriptor of resize nearest neighbor operator + * @param [in] alignCorners whether the centers of input and output are aligned + * @param [in] height height of output + * @param [in] width width of output + * @return ccStatus_t + */ +ccStatus_t ccSetResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t resizeDesc, bool alignCorners, + int32_t height, int32_t width); + +/** + * @ingroup dnn + * [ccGetPadV2OutputDim] + * @brief get the output dimension info of pad + * @param [in] xDesc descriptor of input tensor x + * @param [in] padDesc descriptor of input paddings + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t xDesc, const ccPadV2Descriptor_t padDesc, int32_t *dimCnt, + int32_t dim[], int32_t dimLen); + +ccStatus_t ccPadV2CpuForward(ccHandle_t handle, const ccPadV2Descriptor_t padDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief create descriptor of parameters for padv2 function + * @param [in] point to descriptor of parameters for padv2 function + * @return ccStatus_t + */ +ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *padDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of parameters for padv2 function + * @param [in] point to descriptor of parameters for padv2 function + * @return ccStatus_t + */ +ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *padDesc); + +/** + * @brief init descriptor for parameter of padv2 function + * @param [in|out] padDesc descriptor of pad + * @param [in] padShapeCnt padshape count + * @param [in] padShapeLow padshape low + * @param [in] padShapeHigh padshape high + * @param [in] padMode pad mode + * @param [in] padValue pad value ptr + * @param [in] padValueType pad value data type + * @return ccStatus_t + */ +ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t padDesc, const int32_t padShapeCnt, const int32_t padShapeLow[], + const int32_t padShapeHigh[], const ccPadMode_t padMode, const void *padValue, + const ccDataType_t padValueType); +/** + * @ingroup dnn + * @brief create descriptor of batchToSpace + * @param [in|out] batchToSpaceDesc point to descriptor of batchToSpace + * @return ccStatus_t + */ +ccStatus_t ccCreateBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc); + +/** + * @ingroup dnn + * @brief set batchToSpaceDesc + * @param [in|out] batchToSpaceDesc descriptor of batchToSpace + * @param [in] blockShape blockShape of batchToSpace + * @param [in] crops crops of batchToSpace + * @param [in] blockShapeLength blockShapeLength of batchToSpace + * @return ccStatus_t + */ +ccStatus_t ccSetBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t paramsDesc, const int32_t *blockShape, + const int32_t *crops, const int32_t blockShapeLength); + +/** + * @ingroup dnn + * @brief get batchToSpaceDesc + * @param [in|out] batchToSpaceDesc descriptor of batchToSpace + * @param [in] blockShape blockShape of batchToSpace + * @param [in] crops crops of batchToSpace + * @param [in] blockShapeLength blockShapeLength of batchToSpace + * @return ccStatus_t + */ +ccStatus_t ccGetBatchToSpaceDescriptor(const ccBatchToSpaceDescriptor_t paramsDesc, int32_t *blockShape, int32_t *crops, + int32_t *blockShapeLength); + +/** + * @ingroup dnn + * @brief destroy descriptor of batchToSpace + * @param [in] *batchToSpaceDesc descriptor of batchToSpace + * @return ccStatus_t + */ +ccStatus_t ccDestroyBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc); + +/** + * @ingroup dnn + * @brief get the output dimension info of batch to space + * @param [in] xDesc descriptor of input tensor + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ + +ccStatus_t ccGetBatchToSpaceOutputDim(const ccTensorDescriptor_t xDesc, + const ccBatchToSpaceDescriptor_t batchToSpaceDesc, int32_t *dimCnt, int32_t dim[], + int32_t dimLen); + +/** + * @ingroup dnn + * @brief batch to space forward computation + * @param [in] handle cce handle + * @param [in] paramsDesc descriptor of input params + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ + +ccStatus_t ccBatchToSpaceForward(ccHandle_t handle, const ccBatchToSpaceDescriptor_t paramsDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief create descriptor of spaceToBatch + * @param [in|out] spaceToBatchDesc point to descriptor of spaceToBatch + * @return ccStatus_t + */ +ccStatus_t ccCreateSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc); + +/** + * @ingroup dnn + * @brief set spaceToBatchDesc + * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch + * @param [in] blockShape blockShape of spaceToBatch + * @param [in] paddings paddings of spaceToBatch + * @param [in] blockShapeLength blockShapeLength of spaceToBatch + * @return ccStatus_t + */ +ccStatus_t ccSetSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t paramsDesc, const int32_t *blockShape, + const int32_t *paddings, const int32_t blockShapeLength); + +/** + * @ingroup dnn + * @brief get spaceToBatchDesc + * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch + * @param [in] blockShape blockShape of spaceToBatch + * @param [in] paddings paddings of spaceToBatch + * @param [in] blockShapeLength blockShapeLength of spaceToBatch + * @return ccStatus_t + */ +ccStatus_t ccGetSpaceToBatchDescriptor(const ccSpaceToBatchDescriptor_t paramsDesc, int32_t *blockShape, + int32_t *paddings, int32_t *blockShapeLength); + +/** + * @ingroup dnn + * @brief destroy descriptor of spaceToBatch + * @param [in] *spaceToBatchDesc descriptor of spaceToBatch + * @return ccStatus_t + */ +ccStatus_t ccDestroySpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc); + +/** + * @ingroup dnn + * @brief get the output dimension info of space to batch + * @param [in] xDesc descriptor of input tensor + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in| dimlen length of dim + * @return ccStatus_t + */ + +ccStatus_t ccGetSpaceToBatchOutputDim(const ccTensorDescriptor_t xDesc, + const ccSpaceToBatchDescriptor_t spaceToBatchDesc, int32_t *dimCnt, int32_t dim[], + int32_t dimLen); + +/** + * @ingroup dnn + * @brief space to batch forward computation + * @param [in] handle cce handle + * @param [in] paramsDesc descriptor of input params + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ + +ccStatus_t ccSpaceToBatchForward(ccHandle_t handle, const ccSpaceToBatchDescriptor_t paramsDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +ccStatus_t ccTransFilterDesc2TensorDesc(ccFilterDescriptor_t wDesc, ccTensorDescriptor_t tensorDesc); + +/* + * @brief get the output dimension info of extractImagePatches + * @param [in] xDesc descriptor of input tensor x + * @param [in] ksizes ksizes array + * @param [in] strides strides array + * @param [in] rates rates array + * @param [in] padding padding type + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @return ccStatus_t + */ +ccStatus_t ccGetExtractImagePatchesOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *ksizes, + const ccIntArray_t *strides, const ccIntArray_t *rates, + const ccExtractImagePatchesPadType_t padding, int32_t *dimCnt, + int32_t dim[], const int32_t dimLen); + +/** + * @ingroup dnn + * @brief cum forward. + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data, dimCnt:1~8 + * @param [in] x input data in device memory + * @param [in] axisDesc scale factor, dimCnt:0 + * @param [in] axis which axis to cum calc, device memory + * @param [in] beta common scale factor + * @param [in] opType calc type, eg. sum, prod.... + * @param [in] exclusive cum flag, true or false + * @param [in] reverse cum flag, true or false + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccCumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t axisDesc, const void *axis, const void *beta, const CumOpType opType, + const bool exclusive, const bool reverse, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @ingroup dnn + * @brief ExtractImagePatches forward. + * @param [in] handle cce handle + * @param [in] ksizes ksizes array + * @param [in] strides strides array + * @param [in] rates rates array + * @param [in] padding padding type + * @param [in] alpha common scale factor + * @param [in] xDesc descriptor of input data x + * @param [in] x input data x in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccExtractImagePatchesForward(ccHandle_t handle, const ccIntArray_t *ksizes, const ccIntArray_t *strides, + const ccIntArray_t *rates, const ccExtractImagePatchesPadType_t padding, + const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const void *beta, const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @brief get argmax output dim info + * @param [in] argDesc argmaxmin descriptor + * @param [in] xDesc descriptor of input tensor + * @param [in|out] dimCnt output dim count + * @param [in|out] dim output dim + * @param [in| dimlen length of dim + * @return ccStatus_t + */ +ccStatus_t ccGetArgMaxOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc, + int32_t *dimCnt, int32_t dim[], int32_t dimLen); + +/** + * @ingroup dnn + * @brief argmax forward computation + * @param [in] handle cce handle + * @param [in] argDesc argmaxmin descriptor + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] workSpace workspace pointer + * @param [in] workSpaceSizeInBytes workspace size in bytes + * @param [in] beta bias factors + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccArgMaxForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, + const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc, + void *output); + +/** + * @ingroup dnn + * @brief get the output dimension info of argmaxmin + * @param [in] argDesc descriptor of tagCcArgmaxmin + * @param [in] xDesc descriptor of input tensor + * @param [in|out] sizeInBytes workspace size + * @return ccStatus_t + */ +ccStatus_t ccGetArgMaxWorkspaceSize(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc, + uint32_t *sizeInBytes); + +/** + * @ingroup dnn + * @brief create descriptor of Argmaxmin + * @param [in|out] resizeDesc point to descriptor of Argmaxmin attr + * @return ccStatus_t + */ +ccStatus_t ccCreateArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of Interp + * @param [in|out] resizeDesc point to descriptor of Argmaxmin attr + * @return ccStatus_t + */ +ccStatus_t ccDestroyArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc); + +/** + * @ingroup dnn + * @brief destroy descriptor of Interp + * @param [in|out] argDesc descriptor of tagCcArgmaxmin + * @param [in] axisType + * @param [in] outMaxVal whether to return the maximum value + * @param [in] topK number that returns the maximum index or maximum value + * @param [in] axis Describes which axis of the input Tensor to reduce across + * @param [in] keepDims whether to keep reduced dim + * @param [in] reduceSize the num of elements to be reduce to get topK elements, reduceSize=-1 means the total num + * of elements in axis dimension + * @param [in] reduceStride the stride for reduce operation, reduceStride=1 means the layout of target data is + * continuous + * @return ccStatus_t + */ +ccStatus_t ccSetArgmaxminDescriptor(ccArgmaxminDescriptor_t argDesc, int32_t axisType, bool outMaxVal, int64_t topK, + int64_t axis, bool keepDims, int64_t reduceSize = -1, int64_t reduceDStride = 1); + +ccStatus_t ccArgMinForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha, + const ccTensorDescriptor_t xDesc, const void *x, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +ccStatus_t ccGetArgMinOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc, + int32_t *dimCnt, int32_t dim[], const int32_t dimLen); +/** + * @ingroup dnn + * @brief lsh projection forward computation + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] hashDesc descriptor of input tensor hashDesc + * @param [in] hash input data hash in device memory + * @param [in] weightDesc descriptor of input tensor weightDesc + * @param [in] weight input data weight in device memory + * @param [in] inputDesc descriptor of input tensor inputDesc + * @param [in] lookup input data lookup in device memory + * @param [in] type 1:SPARSE 2.DENSE + * @param [in] beta bias factors + * @param [in] workSpace workSpace data in device memory + * @param [in] workSpaceSizeInBytes workSpace length + * @param [in] outputDesc descriptor of output tensor + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccLshProjectionForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t hashDesc, + const void *hash, const ccTensorDescriptor_t weightDesc, const void *weight, + const ccTensorDescriptor_t inputDesc, const void *input, const LSHProjectionType type, + const void *beta, void *workSpace, const uint32_t workSpaceSizeInBytes, + const ccTensorDescriptor_t outputDesc, void *output); +/** + * @ingroup dnn + * @brief get the workspace size of lsh projection + * @param [in] inputDesc descriptor of input tensor input + * @param [in] hashDataType data type of hash + * @param [in|out] sizeInBytes workspace size + * @return ccStatus_t + */ +ccStatus_t ccGetLshProjectionForwardWorkspaceSize(const ccTensorDescriptor_t inputDesc, const ccDataType_t hashDataType, + uint32_t *sizeInBytes); +/** + * @ingroup dnn + * @brief get the output dimension info of LshProjection, + * @param [in] hashDesc descriptor of hash + * @param [in] type type of mode + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in] dimLen dim length + * @return ccStatus_t + */ +ccStatus_t ccGetLshProjectionOutputDim(const ccTensorDescriptor_t hashDesc, const LSHProjectionType type, + int32_t *dimCnt, int32_t dim[], const int32_t dimLen); +/** + * @ingroup dnn + * @brief get the weight dimension info of LshProjection, + * @param [in] inputDesc descriptor of input + * @param [in|out] dimCnt point to the weight dimCnt + * @param [in|out] dim arrays to save dims + * @param [in] dimLen dim length + * @return ccStatus_t + */ +ccStatus_t ccGetLshProjectionWeightDim(const ccTensorDescriptor_t inputDesc, int32_t *dimCnt, int32_t dim[], + const int32_t dimLen); + +/** + * @ingroup dnn + * @brief init descriptor for parameter of upsample function + * @param [in] handle cce handle + * @param [in] upsamplePara input para in host memory + * @param [in] alpha common scale factor + * @param [in] bottomDesc descriptor of input data bottomDesc + * @param [in] bottom input data bottom in device memory + * @param [in] bottomMaskDesc descriptor of input data bottomMaskDesc + * @param [in] bottomMask input data bottomMask in device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor of output data + * @param [in|out] output output data in device memory + * @return ccStatus_t + */ +ccStatus_t ccUpsampleForward(ccHandle_t handle, const ccUpsampleParaDescriptor_t upsamplePara, const void *alpha, + const ccTensorDescriptor_t bottomDesc, const void *bottom, + const ccTensorDescriptor_t bottomMaskDesc, const void *bottomMask, const void *beta, + const ccTensorDescriptor_t outputDesc, void *output); + +/** + * @brief creat descriptor for parameter of usample function + * @param [in|out] upsampleDesc descriptor of upsamplepara + * @return ccStatus_t + */ +ccStatus_t ccCreateUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc); + +/** + * @brief destroy descriptor for parameter of upsample function + * @param [in|out] upsampleDesc descriptor of upsamplepara + * @return ccStatus_t + */ +ccStatus_t ccDestroyUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc); + +/** + * @brief set descriptor for parameter of upsample function + * @param [in|out] upsampleDesc descriptor of upsamplepara + * @param [in] scale the scale of height and width + * @param [in] scaleHeight the scale of height + * @param [in] scaleWidth the scale of Width + * @param [in] upsampleHeight the height of output + * @param [in] upsampleWidth the width of output + * @param [in] padOutHeight pad value height + * @param [in] padOutWidth pad value width + * @return ccStatus_t + */ +ccStatus_t ccSetUpsampleDescriptor(ccUpsampleParaDescriptor_t upsampleDesc, const int32_t scale, + const int32_t scaleHeight, const int32_t scaleWidth, const int32_t upsampleHeight, + const int32_t upsampleWidth, const bool padOutHeight, const bool padOutWidth); +/** + * @ingroup dnn + * @brief get the output dimension info of upsample + * @param [in] upsamplePara para of upsample + * @param [in] bottomDesc descriptor of input bottom tensor + * @param [in|out] dimCnt point to the output dimCnt + * @param [in|out] dim arrays to save dims + * @param [in] dimLen the len of dim array + * @return ccStatus_t + */ +ccStatus_t ccGetUpsampleOutputDim(const ccUpsampleParaDescriptor_t upsamplePara, const ccTensorDescriptor_t bottomDesc, + int32_t *dimCnt, int32_t dim[], const int32_t dimLen); + +#ifndef DAVINCI_LITE +ccStatus_t ccMatmul(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t wDesc, const void *w, const ccTensorDescriptor_t biasDesc, + const void *bias, const ccFullConnectFwdAlgo_t algo, void *workSpace, + const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y, + const bool transposeA, const bool transposeB); +ccStatus_t ccGetMatmulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, int32_t *n, + int32_t *c, int32_t *h, int32_t *w, bool transposeA, bool transposeB); +ccStatus_t ccGetMatmulWorkspaceSize(ccHandle_t handle, const ccFullConnectFwdAlgo_t algo, + const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, + const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes, bool transposeA, + bool transposeB); +#endif + +/** + * @ingroup dnn + * @brief gather_v2 function + * @param [in] handle cce handle + * @param [in] alpha common scale factor + * @param [in] paramsDesc descriptor + * @param [in] params device memory + * @param [in] indicesDesc descriptor + * @param [in] indices device memory + * @param [in] axisDesc descriptor + * @param [in] axis device memory + * @param [in] beta common scale factor + * @param [in] outputDesc descriptor + * @param [in|out] output device memory + * @return ccStatus_t + */ +ccStatus_t ccGatherV2(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, const void *params, + const ccTensorDescriptor_t indicesDesc, const void *indices, const ccTensorDescriptor_t axisDesc, + const void *axis, const void *beta, const ccTensorDescriptor_t outputDesc, const void *output); + +/** + * @ingroup dnn + * @brief memory_clear function + * @param [in] handle cce handle + * @param [in] addrSpaceSizeInBytes addr space size + * @param [in|out] addr device memory + * @return ccStatus_t + */ +ccStatus_t ccMemoryClear(ccHandle_t handle, const uint64_t addrSpaceSizeInBytes, const void *addr); + +/** + * @ingroup dnn + * @brief check input is overflow + * @param [in] handle cce handle + * @param [in] alpha scaling factors + * @param [in] xDesc descriptor of input tensor + * @param [in] x input data in device memory + * @param [in] yDesc descriptor of output tensor + * @param [in|out] y output data in device memory + * @param [in] beta scaling factors + * @return ccStatus_t + */ +ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, + const ccTensorDescriptor_t yDesc, const void *y, const void *beta); +}; // namespace cce + +#endif // DNN_OP_H__ diff --git a/third_party/fwkacllib/inc/cce/dnn_struct.hpp b/third_party/fwkacllib/inc/cce/dnn_struct.hpp new file mode 100644 index 00000000..96566074 --- /dev/null +++ b/third_party/fwkacllib/inc/cce/dnn_struct.hpp @@ -0,0 +1,23 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DNN_STRUCT_HPP__ +#define DNN_STRUCT_HPP__ + +#include "dnn.h" +#include "dnn_struct_base.hpp" + +#endif // DNN_STRUCT_HPP__ diff --git a/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp b/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp new file mode 100644 index 00000000..dd75e9ea --- /dev/null +++ b/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp @@ -0,0 +1,894 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DNN_STRUCT_BASE_HPP__ +#define DNN_STRUCT_BASE_HPP__ + +#include "cce/cce_def.hpp" + +namespace cce { + +/** + * @ingroup dnn + * @brief max number of dimensions + */ +#define CC_DIM_MAX (8) + +/** + * @ingroup dnn + * @brief max number of dimensions when use NC1HWC0 format + */ +#define CC_REALDIM_MAX (4) + +/** + * @ingroup dnn + * @brief max input count of MscnnBoxOutput + */ +#define CC_MAX_INPUT_CNT (10) + +/** + * @ingroup dnn + * @brief image dimensions of aipp input + */ +#define CC_AIPP_IMG_DIM (2) + +/** + * @ingroup dnn + * @brief image channel number of aipp input + */ +#define CC_AIPP_IMG_CHN_NUM (4) + +/** + * @ingroup dnn + * @brief element number of aipp color space convertion matrix + */ +#define CC_AIPP_CSC_MATRIX_DIM (9) + +/** + * @ingroup dnn + * @brief element number of aipp color space convertion bias + */ +#define CC_AIPP_CSC_BIAS_DIM (3) + +/** + * @ingroup dnn + * @brief parameter number of op exp/log/pow + */ +#define PARAM_CNT_THREE (3) + +/** + * @ingroup dnn + * @brief parameter number of op nonmaxsuppression + */ +#define PARAM_CNT_TWO (2) +#define DIMCNT_NUMBER_ONE (1) +#define DIMCNT_NUMBER_TWO (2) +#define DIMCNT_NUMBER_FOUR (4) + +#define COMMON_FORMAT_NCHW_N_INDEX (0) +#define COMMON_FORMAT_NCHW_C_INDEX (1) +#define COMMON_FORMAT_NCHW_H_INDEX (2) +#define COMMON_FORMAT_NCHW_W_INDEX (3) + +/** + * @ingroup dnn + * @brief parameter number of op upsample + */ +#define UPSAMPLE_SCAL_DEFAULT_TWO (2) +#define UPSAMPLE_ILLEGAL_VALUE_1 (1) + +/** + * @ingroup dnn + * @brief struct define of StridedSlice required params. + */ + +typedef struct tagCcStridedSlice { + uint32_t dimCnt; + int32_t begin[CC_DIM_MAX]; + int32_t end[CC_DIM_MAX]; + int32_t strides[CC_DIM_MAX]; +} ccStridedSlice_t; + +/** + * @ingroup dnn + * @brief struct define of Strided_slice attrs + */ +typedef struct tagCcStridedSliceAttrs { + uint32_t beginMask; + uint32_t endMask; + uint32_t ellipsisMask; + uint32_t newAxisMask; + uint32_t shrinkAxisMask; +} ccStridedSliceAttrs_t; + +/** + * @ingroup dnn + * @brief params of batchToSpace + */ +typedef struct tagCcBatchToSpace { + int32_t blockShapeLength; + int32_t blockShape[CC_DIM_MAX]; + int32_t crops[2 * CC_DIM_MAX]; +} ccBatchToSpace_t; + +/** + * @ingroup dnn + * @brief params of spaceToBatch + */ +typedef struct tagCcSpaceToBatch { + int32_t blockShapeLength; + int32_t blockShape[CC_DIM_MAX]; + int32_t paddings[2 * CC_DIM_MAX]; +} ccSpaceToBatch_t; + +/** + * @ingroup dnn + * @brief struct define of tensor + */ +typedef struct tagCcTensor { + ccTensorFormat_t format; + ccDataType_t dataType; + int32_t dimCnt; + int32_t realDimCnt; + uint32_t dataSize; + int32_t dim[CC_DIM_MAX]; + int32_t stride[CC_DIM_MAX]; + ccVecQuantizePara_t vecQuantizePara; +} ccTensor_t; + +/** + * @ingroup dnn + * @brief struct define of filter tensor + */ +typedef struct tagCcFilter { + ccTensorFormat_t format; + ccDataType_t dataType; + int32_t dimCnt; + uint32_t dataSize; + int32_t dim[CC_DIM_MAX]; +} ccFilter_t; + +/** + * @ingroup dnn + * @brief struct define of convolution operator + */ +typedef struct tagCcConvolution { + ccConvolutionMode_t mode; + ccPaddingMode_t padMode; + int32_t dimCnt; + int32_t padding[2 * (CC_DIM_MAX - 2)]; + int32_t filterStride[CC_DIM_MAX - 2]; + int32_t dilation[CC_DIM_MAX - 2]; + int32_t group; + ccQuantizeDescriptor_t quantInfo; + ccConvolutionAipp_t aippInfo; + int32_t adj[CC_DIM_MAX - 2]; + int32_t targetShape[CC_DIM_MAX - 2]; + int32_t beforePadding[2 * (CC_DIM_MAX - 2)]; // pad before conv + uint32_t reluFlag; + int64_t concatBatchSize; +} ccConvolution_t; + +#define ccCorrelation_t ccConvolution_t +typedef struct tagCcFullConnection_t { + ccQuantizeDescriptor_t quantInfo; + uint32_t infoTabSize; + const void *infoTab; + bool reluFlag; + ccFullConnectFwdAlgo_t algo; +} ccFullConnection_t; + +typedef struct tagCcConcatFour2Five_t { + uint32_t branchNum; // how many branch for box or class + uint32_t classNum; // box branch's classNum is four, class branch's classNum is class number +} ccConcatFour2Five_t; + +typedef struct tagCcTransdata_t { + uint64_t scaleQAddr; + uint8_t scaleQValueMode; + uint64_t offsetQAddr; + uint8_t quantAlgo; + uint8_t quantize8bitFlag; +} ccTransdata_t; +/** + * @ingroup dnn + * @brief struct define of pooling operator + */ +typedef struct tagCcPooling { + ccPoolingMode_t mode; + ccPaddingMode_t padMode; + ccNanPropagation_t maxpoolingNanOpt; + int32_t dimCnt; + int32_t windowDim[CC_DIM_MAX - 2]; + int32_t padding[CC_DIM_MAX - 2]; + int32_t stride[CC_DIM_MAX - 2]; + int32_t dataMode; + int32_t ceilMode; + ccQuantizeDescriptor_t quantInfo; + ccPooingFwdAlgo_t algo; +} ccPooling_t; + +/** + * @ingroup dnn + * @brief struct define of activation operator + */ +typedef struct tagCcActivation { + ccActivationMode_t mode; + ccNanPropagation_t reluNanOpt; + double coef; /* ceiling for clipped RELU, alpha for ELU */ + ccActivationPara_u activationPara; +} ccActivation_t; + +/** + * @ingroup dnn + * @brief struct define of svdf operator + */ +typedef struct tagCcSvdf { + ccTensorFormat_t format; + ccDataType_t dataType; + uint32_t batches; + uint32_t features; + uint32_t rank; + uint32_t inputSize; + uint32_t memorySize; +} ccSvdf_t; + +/** + * @ingroup dnn + * @brief struct define of svdf operator + */ +typedef struct tagCcHashTableLookup { + ccTensorFormat_t format; + ccDataType_t lookupType; + ccDataType_t keyType; + ccDataType_t valueType; + ccDataType_t outputType; + ccDataType_t hitsType; + uint32_t lookups; + uint32_t keys; + uint32_t rows; + uint32_t features; + uint16_t valueScale; + uint16_t outputScale; + uint16_t valueOffset; + uint16_t outputOffset; +} ccHashTableLookup_t; + +/** + * @ingroup dnn + * @brief struct define of prelu operator + */ +typedef struct tagCcPRelu { + ccNanPropagation_t reluNanOpt; + int32_t slopeCount; + bool channelShared; +} ccPRelu_t; + +/** + * @ingroup dnn + * @brief struct define of crop operator + */ +typedef struct tagCcCrop { + int32_t startAxis; + int32_t offset[CC_DIM_MAX]; + int32_t offsetCnt; +} ccCrop_t; + +/** + * @ingroup dnn + * @brief struct define of SpatialTransformer operator + */ +typedef struct tagCcSpatialTransformer { + ccSamplerType_t samplerType; + ccDataType_t dataType; + int32_t dimCnt; + uint64_t dim[CC_DIM_MAX]; + uint64_t alignCorner; +} ccSpatialTransformer_t; + +/** + * @ingroup dnn + * @brief struct define of ShiftTransformer operator + */ +typedef struct tagCcShiftTransformer { + ccSamplerType_t samplerType; + double xPreDefined; + double yPreDefined; + bool xShift; + bool yShift; + int32_t gridH; + int32_t gridW; +} ccShiftTransformer_t; + +/** + * @ingroup dnn + * @brief struct define of FasterRcnnProposal operator + */ +typedef struct tagCcFasterRcnnProposal { + int32_t preNMStopK; + int32_t postNMStopK; + float nmsTresh; + float minSize; + float featStride; + float baseSize; + int32_t ratioCnt; + int32_t scaleCnt; + float *ratio; + float *scale; + int32_t imgH; + int32_t imgW; +} ccFasterRcnnProposal_t; + +/** + * @ingroup dnn + * @brief struct define of LRN operator + */ +typedef struct tagCcLRN { + ccLRNMode_t lrnMode; + int32_t lrnN; + double lrnAlpha; + double lrnBeta; + double lrnK; +} ccLRN_t; + +/** + * @ingroup dnn + * @brief struct define of instanceNorm + */ +typedef struct tagCcInstancenorm { + ccInstanceNormMode_t mode; + double epsilon; +} ccInstancenorm_t; + +/** + * @ingroup dnn + * @brief struct define of assignOp operator + */ +typedef struct tagCcAssignOp { + ccAssignOpMode_t assignOpMode; +} ccAssignOp_t; + +/** + * @ingroup dnn + * @brief struct define of arcSinCos operator + */ +typedef struct tagCcArcSinCos { + ccArcSinCosMode_t arcSinCosMode; +} ccArcSinCos_t; + +/** + * @ingroup dnn + * @brief struct define of Detectpostprocess operator + */ +typedef struct tagCcDetectpostprocess { + int32_t numClasses; + float confThreshold; + float nmsThreshold; + int32_t outTopK; + float bboxRegWeightsDx; + float bboxRegWeightsDy; + float bboxRegWeightsDw; + float bboxRegWeightsDh; +} ccDetectpostprocess_t; +/** + * @ingroup dnn + * @brief struct define of FasterRcnnDetectionOutput operator + */ +typedef struct tagCcFasterRcnnDetectionOutput { + int32_t numClasses; + float nmsThreshold; + float postConfThreshold; + int32_t imgH; + int32_t imgW; + int32_t batchSize; +} ccFasterRcnnDetectionOutput_t; + +/** + * @ingroup dnn + * @brief struct define of SsdDetectionOutput operator + */ +typedef struct tagCcSsdDetectionOutput { + int32_t numClasses; + int32_t backgroundLabelId; + double preConfThreshold; + int32_t preTopK; + double nmsThreshold; + double nmsEta; + ccBoxCodeType_t codeType; + int32_t outTopK; + bool shareLocation; + bool varianceEncodedInTarget; + uint32_t boxTypeNum; + float var[4]; + uint32_t variance_num; +} ccSsdDetectionOutput_t; + +/** + * @ingroup dnn + * @brief struct define of RefinedetDetectionOutput operator + */ +typedef struct tagCcRefinedetDetectionOutput { + int32_t numClasses; + int32_t backgroundLabelId; + double preConfThreshold; + int32_t preTopK; + double nmsThreshold; + double nmsEta; + ccBoxCodeType_t codeType; + int32_t outTopK; + bool shareLocation; + bool varianceEncodedInTarget; + uint32_t boxTypeNum; + float var[4]; + uint32_t variance_num; + double objectness_score; +} ccRefinedetDetectionOutput_t; + +/** + * @ingroup dnn + * @brief struct define of MsrGenerateRpnProposals operator + */ +typedef struct tagCcMsrGenerateRpnProposals { + int32_t preNmsTopK; + int32_t postNmsTopK; + float nmsThreshold; + float rpnMiniSize; + int32_t imgH; + int32_t imgW; + uint32_t boxTypeNum; + float scoreThreshold; +} ccMsrGenerateRpnProposals_t; + +/** + * @ingroup dnn + * @brief struct define of RetinaPostprocessor operator + */ +typedef struct tagCcRetinaPostprocessor { + int32_t numClasses; + int32_t maxDetections; + float nmsThreshold; + float scoreThreshold; + int32_t imgH; + int32_t imgW; + uint32_t boxTypeNum; + float mean[4]; + int32_t meanNum; + float std[4]; + int32_t stdNum; + int32_t outputNum; + bool ocrFlag; +} ccRetinaPostprocessor_t; + +/** + * @ingroup dnn + * @brief struct define of GenerateSsdAnchors operator + */ +typedef struct tagCcGenerateSsdAnchors { + int32_t featureMapShapeList[20]; + uint32_t featureMapShapeListSize; + int32_t boxSpecsNum[10]; + uint32_t boxSpecsNumSize; + float scales[10]; + uint32_t scalesNum; + float aspectRatios[10]; + uint32_t aspectRatiosNum; + int32_t baseAnchorSize[2]; + uint32_t baseAnchorSizeNum; + int32_t anchorStride[2]; + uint32_t anchorStrideNum; + int32_t anchorOffset[2]; + uint32_t anchorOffsetNum; + bool reduceBoxesInLowestLayer; + float minScale; + float maxScale; + int32_t imgH; + int32_t imgW; +} ccGenerateSsdAnchors_t; + +/** + * @ingroup dnn + * @brief struct define of MscnnBoxOutput operator + */ +typedef struct tagCcMscnnBoxOutput { + double fgThreshold; + double nmsThreshold; + ccNmsType_t nmsType; + int32_t fieldH[CC_MAX_INPUT_CNT]; + int32_t fieldW[CC_MAX_INPUT_CNT]; + int32_t downsampleRate[CC_MAX_INPUT_CNT]; + int32_t defaultBoxCnt; + double fieldWhr; + double fieldXyr; + int32_t maxNmsNum; + int32_t maxPostNmsNum; + double minSize; +} ccMscnnBoxOutput_t; + +/** + * @ingroup dnn + * @brief struct define of NMS operator + */ +typedef struct tagCcNms { + int32_t numClasses; + int32_t backgroundLabelId; + double preConfThreshold; + int32_t preTopK; + double nmsThreshold; + double nmsEta; + int32_t postTopK; + int32_t outTopK; + double postConfThreshold; + bool shareLocation; +} ccNms_t; + +/** + * @ingroup dnn + * @brief struct define of NMS/MultiClassNMS operator + */ +typedef struct tagCcMultiClassNms { + uint64_t numClasses; + float objThreshold; + float nmsThreshold; + float clsThreshold; + bool normal; + uint64_t coorType; +} ccCcMultiClassNms_t; + +/** + * @ingroup dnn + * @brief struct define of YoloDetectionOutput operator + */ +typedef struct tagCcYoloDetectionOutput { + ccYoloVersion_t yoloVersion; + uint32_t netH; + uint32_t netW; + uint32_t postTopK; + uint32_t classes; + float nmsThreshold; + float iouThreDecay; + float coorScaleFactor; + bool relative; + float objThreshold; + float clsThreshold; + uint32_t biasNum; + float *bias; +} ccYoloDetectionOutput_t; + +/** + * @ingroup dnn + * @brief struct define of GetRegionBox operator + */ +#ifndef CC_MAX_YOLO_BIAS_NUM +#define CC_MAX_YOLO_BIAS_NUM (16) +#endif + +typedef struct tagCcGetRegionBox { + uint32_t biasNum; + uint32_t H; + uint32_t W; + float bias[CC_MAX_YOLO_BIAS_NUM]; +} ccGetRegionBox_t; + +/** + * @ingroup dnn + * @brief struct define of CorrectBoxes operator + */ +typedef struct tagCorrectBoxes { + uint32_t netW; + uint32_t netH; + bool relative; +} ccCorrectBoxes_t; + +/** + * @ingroup dnn + * @brief struct define of ClsProb operator + */ +typedef struct tagClsProb { + float objThreshold; +} ccClsProb_t; + +/** + * @ingroup dnn + * @brief struct define of SsdPriorBox operator + */ +typedef struct tagCcSsdPriorBox { + ccBoxCodeType_t codeType; + double *minSize; + int32_t minSizeNum; + double *maxSize; + int32_t maxSizeNum; + double *aspectRatio; + int32_t aspectRatioNum; + double *variance; + int32_t varianceNum; + int32_t imgH; + int32_t imgW; + double stepH; + double stepW; + double offset; + bool flip; + bool clip; +} ccSsdPriorBox_t; + +/** + * @ingroup dnn + * @brief struct define of Yolo2Region operator + */ +typedef struct tagCcYolo2Region { + ccSoftmaxTree_t softmaxTree; + bool softmax; + bool background; + bool treeSoftmax; +} ccYolo2Region_t; + +/** + * @ingroup dnn + * @brief struct define of YoloRegion operator + */ +typedef struct tagCcYoloRegion { + ccSoftmaxTree_t softmaxTree; + bool softmax; + bool background; + bool treeSoftmax; + int32_t classes; + int32_t coords; + int32_t boxes; + ccYoloVersion_t yoloV; +} ccYoloRegion_t; + +/** + * @ingroup dnn + * @brief struct define of power operator + */ +typedef struct tagCcPower { + float scale; + float shift; + float power; +} ccPower_t; + +/** + * @ingroup dnn + * @brief struct define of exp operator + */ +typedef struct tagCcExp { + ccDataType_t dataType; + uint32_t paramCnt; +} ccExp_t; + +/** + * @ingroup dnn + * @brief struct define of exp operator + */ +typedef struct tagCcLog { + ccDataType_t dataType; + uint32_t paramCnt; +} ccLog_t; + +/** + * @ingroup dnn + * @brief struct define of pow operator + */ +typedef struct tagCcPow { + ccDataType_t dataType; + uint32_t paramCnt; +} ccPow_t; + +/** + * @ingroup dnn + * @brief struct define of padv2 operator + */ +typedef struct tagCcPadV2 { + ccPadMode_t padMode; + void *padValue; + ccDataType_t padValueType; + int32_t padDimCnt; + int32_t padShapeLow[CC_DIM_MAX]; + int32_t padShapeHigh[CC_DIM_MAX]; +} ccPadV2_t; + +/** + * @ingroup dnn + * @brief struct define of psROIPooling operator + */ +typedef struct tagCcPsRoiPooling { + ccPoolingMode_t poolingMode; + int32_t pooledH; + int32_t pooledW; + float spatialScale; + float padRatio; + int32_t groupSize; + int32_t outputDim; +} ccPsRoiPooling_t; + +/** + * @ingroup dnn + * @brief struct define of RoIAlign operator + */ +typedef struct tagCcRoiAlign { + int32_t pooledH; + int32_t pooledW; + float spatialScale; + int32_t samplingRatio; +} ccRoiAlign_t; + +/** + * @ingroup dnn + * @brief struct define of RoiInterpPooling operator + */ +typedef struct tagCcRoiInterpPooling { + int32_t pooledH; + int32_t pooledW; + int32_t poolKernelH; + int32_t poolKernelW; + int32_t pooledTailH; + int32_t pooledTailW; + float spatialScaleH; + float spatialScaleW; +} ccRoiInterpPooling_t; + +/** + * @ingroup dnn + * @brief struct define of DetectionFull3DOutput operator + */ +typedef struct tagCcDetectionFull3DOutput { + int32_t imageWidth; + int32_t imageHeight; + int32_t numAngleBins; + float trcMarginRatioX; + float trcMarginRatioY; + int32_t pitchRangeD; + int32_t pitchPresetD; + float mountHeight; + int32_t visiblenessBins; + float meanVisibleness; + bool discreteVisibleness; +} ccDetectionFull3DOutput_t; + +/** + * @ingroup dnn + * @brief struct define of MsrFastRcnnPredictions operator + */ +typedef struct tagMsrFastRcnnPredictions { + int32_t numClasses; // num of classes + float scoreThreshold; // the threshold of the score + double nmsThreshold; // the threshold of nms + int32_t postTopK; + int32_t outTopK; + int32_t imgH; // the height of image + int32_t imgW; // the width of image +} ccMsrFastRcnnPredictions_t; + +typedef struct tagCcResizeBilinear { + ccResizeOutputDimMode_t resizeOutputDimMode; + bool alignCorners; + int32_t zoom_factor; + int32_t shrink_factor; + int32_t height; + int32_t width; + int32_t pad_begin; + int32_t pad_end; +} ccResizeBilinear_t; + +typedef struct tagCcResizeNearestNeighbor { + bool alignCorners; + int32_t height; + int32_t width; +} ccResizeNearestNeighbor_t; + +typedef struct tagCcEltwise { + ccQuantize_t *quantInfo; + bool reluFlag; +} ccEltwise_t; + +typedef struct tagCcBatchNorm { + bool reluFlag; +} ccBatchNorm_t; + +typedef struct tagCcPad { + ccPadMode_t padMode; + float padValue; + int32_t htoppad; // padLow[0] + int32_t hbottompad; // padHigh[0] + int32_t wleftpad; // padLow[1] + int32_t wrightpad; // padHigh[1] +} ccPad_t; + +typedef struct tagCcSubCondition { + uint32_t BaseCondValue[4]; + ccCMPType_t condType[4]; + ccResultType_t resultType; +} ccSubCondition; + +typedef struct tagCcShapeClassifyCond { + uint32_t subConditionNum; + ccResultType_t resultType; + uint32_t true_value; + ccSubCondition subCond[2]; +} ccShapeClassifyCond; + +#ifndef CC_SHAPE_CLASSIFY_CONDITION_NUM +#define CC_SHAPE_CLASSIFY_CONDITION_NUM (8) +#endif + +typedef struct tagCcShapeClassify { + uint32_t shapeClassifyConditionNum; + uint32_t defaultValue; + ccShapeClassifyCond shapeClassifyCond[CC_SHAPE_CLASSIFY_CONDITION_NUM]; +} ccShapeClassify_t; + +/** + * @ingroup dnn + * @bref struct define of square operator + */ +typedef struct tagCcSquare { + ccSquareMode_t mode; +} ccSquare_t; + +/* + * @ingroup dnn + * @brief operation of segment reduction + */ +typedef enum { + CC_SEGMENT_REDUCTION_OP_SUM = 0, /**< sum */ + CC_SEGMENT_REDUCTION_OP_INVALID +} ccSegmentReductionOpType_t; + +typedef struct tagCcFillParam { + // The filler type. + ccFillOpType_t fillType; + ccDataType_t valueDatatype; + const void *value; // the value in constant fill + const void *min; // the min value in uniform fill + const void *max; // the max value in uniform fill + const void *mean; // the mean value in Gaussian fill + const void *std; // the std value in Gaussian fill + // the seed used to generate data in Gaussian and uniform fill + int64_t seed1; + int64_t seed2; +} ccFillParam_t; + +typedef struct tagNonMaxSuppression { + ccDataType_t dataType; + uint32_t paraCount; +} ccNonMaxSuppression_t; + +typedef struct tagCcArgmaxmin { + int32_t axisType; + bool outMaxVal; + int64_t topK; + int64_t reduceSize; + int64_t reduceStride; + int64_t axis; + bool keepDims; +} ccArgmaxmin_t; + +typedef struct tagUpsamplePara { + int32_t scale; + int32_t scaleHeight; + int32_t scaleWidth; + int32_t upsampleHeight; + int32_t upsampleWidth; + bool padOutHeight; + bool padOutWidth; +} ccUpsamplePara_t; + +typedef struct tagCcConcatFive2Four_t { + ccTransForLossMode_t mode; + uint32_t classNum; +} ccConcatFive2Four_t; + +}; // namespace cce +#endif // DNN_STRUCT_BASE_HPP__ diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h new file mode 100644 index 00000000..5733d68f --- /dev/null +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -0,0 +1,155 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FWK_ADPT_STRUCT_H__ +#define FWK_ADPT_STRUCT_H__ + +#include + +namespace aicpu { +namespace FWKAdapter { + +// API RETURN CODE +enum FWKAdptAPIRetCode { + FWK_ADPT_SUCCESS = 0, // success + FWK_ADPT_NOT_INIT = 1, // not init + FWK_ADPT_ALLOC_FAILED = 2, // allocate memory failed + FWK_ADPT_PARAM_INVALID = 3, // invalid input param + FWK_ADPT_PARAM_PARSE_FAILED = 4, // parase input param failed + FWK_ADPT_NATIVE_ERROR = 5, // error code + FWK_ADPT_NOT_SUPPORT_OPTYPE = 6, // unsupport operate type + FWK_ADPT_INTERNAL_ERROR = 7, // adpter internal error + FWK_ADPT_NOT_SUPPORT_DATATYPE = 8, // unsupport input/output data type + FWK_ADPT_KERNEL_ALREADY_RUNING = 9, // kernel already runing, not support parallel run + FWK_ADPT_SESSION_NOT_EXIST = 10, // session id not exist + FWK_ADPT_SESSION_ALREADY_EXIST = 11, // session id alread exist for create session + FWK_ADPT_NATIVE_END_OF_SEQUENCE = 12, // end of sequence + FWK_ADPT_EXTEND_TYPE_NOT_EXIST = 13, // extend info type not exist + FWK_ADPT_UNKNOWN_ERROR = 99 // unknown error code +}; + +// FWKAdapter operate type +// Notice: add new operate type need check with OMM, and make sure append to the end line. +enum FWKOperateType { + FWK_ADPT_SESSION_CREATE = 0, + FWK_ADPT_KERNEL_RUN, + FWK_ADPT_KERNEL_DESTROY, + FWK_ADPT_SESSION_DESTROY, + FWK_ADPT_SINGLE_OP_RUN, + FWK_ADPT_KERNEL_RUN_NO_SESS, +}; + +// Extend Info type for task +enum FWKTaskExtInfoType { + FWK_ADPT_EXT_SHAPE_TYPE = 0, + FWK_ADPT_EXT_INPUT_SHAPE, + FWK_ADPT_EXT_OUTPUT_SHAPE, + FWK_ADPT_EXT_UPDATE_ADDR, + FWK_ADPT_EXT_OP_NAME, + FWK_ADPT_EXT_SESSION_INFO, + FWK_ADPT_EXT_BITMAP, + FWK_ADPT_EXT_TOPIC_TYPE, + FWK_ADPT_EXT_ASYNCWAIT, + FWK_ADPT_EXT_INVALID +}; + +enum FWKExtTopicType { + FWK_ADPT_TOPIC_DEVICE_ONLY = 0, + FWK_ADPT_TOPIC_DEVICE_FIRST, + FWK_ADPT_TOPIC_HOST_ONLY, + FWK_ADPT_TOPIC_HOST_FIRST, + FWK_ADPT_TOPIC_INVALID +}; + +enum FWKExtUpdateAddrType { + FWK_ADPT_UPDATE_NULL = 0, + FWK_ADPT_UPDATE_INPUT, + FWK_ADPT_UPDATE_OUTPUT, + FWK_ADPT_UPDATE_INPUT_OUTPUT +}; + +enum FWKExtWaitType { + FWK_ADPT_WAIT_TYPE_NULL = 0, + FWK_ADPT_WAIT_TYPE_EVENT, + FWK_ADPT_WAIT_TYPE_INVALID +}; + +#pragma pack(push, 1) +// API Parameter Structure +struct StrFWKKernel { + FWKOperateType opType; + uint64_t sessionID; // unique + + uint64_t stepIDAddr; // step id addr + uint64_t kernelID; // run kernel id, unique in session + uint64_t nodeDefLen; // nodeDef protobuf len + uint64_t nodeDefBuf; // NodeDef protobuf offset addr, need convert to void* + uint64_t funDefLibLen; // FunctionDefLibrary protobuf len + uint64_t funDefLibBuf; // FunctionDefLibrary protobuf addr which use in NodeDef, need convert to void* + + uint64_t inputOutputLen; // InputOutput shap protobuf len + uint64_t inputOutputBuf; // InputOutput shap protobuf addr, need convert to void* + uint64_t workspaceBaseAddr; // Workspace base addr, need convert to void* + uint64_t inputOutputAddr; // InputOutput addr, need convert to void* + + uint64_t extInfoLen; // extend info total length + uint64_t extInfoAddr; // extend info addr, ExtInfo structure +}; +#pragma pack(pop) + +typedef StrFWKKernel FWKOperateParam; + +// Extent info ShapeAndType +const uint32_t kMaxShapeDims = 8; +#pragma pack(push, 1) +struct ShapeAndType { + int32_t type; + int64_t dims[kMaxShapeDims]; +}; +#pragma pack(pop) + +// Extend info structure for extInfoAddr +const uint32_t kExtInfoHeadSize = 8; + +#pragma pack(push, 1) +struct ExtInfo { + int32_t infoType; // extend type + uint32_t infoLen; // length for infoMsg + char infoMsg[0]; // extend value +}; +#pragma pack(pop) + +#pragma pack(push, 1) +struct ResultSummary { + uint64_t shape_data_ptr; // shape data addr, need convert to void* + uint64_t shape_data_size; // num of dims + uint64_t raw_data_ptr; // raw data addr, need convert to void* + uint64_t raw_data_size; // size of raw data +}; +#pragma pack(pop) + +#pragma pack(push, 1) +struct AsyncWait { + uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait + uint32_t waitId; // wait id, GE refresh + uint32_t timeOut; // reserved + uint64_t reserved; +}; +#pragma pack(pop) +} // end namespace FWKAdapter +} // namespace aicpu + +#endif // FWK_ADPT_STRUCT_H__ diff --git a/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp b/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp new file mode 100644 index 00000000..fa5a95c9 --- /dev/null +++ b/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp @@ -0,0 +1,56 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef L2FUSION_STRUCT_HPP_ +#define L2FUSION_STRUCT_HPP_ + +#include +#include +#include "runtime/kernel.h" + +#define L2_DYNAMIC_SPLIT_NUM + +using namespace std; + +namespace fusion { + +typedef struct tagL2Data { + uint32_t l2Index; + uint64_t l2Addr; + uint64_t l2PageNum; +} L2Data_t; + +typedef std::map L2DataMap_t; // the key is ddr addr +typedef std::pair L2DataPair_t; // the key is ddr addr + +typedef struct TagTaskL2Info { + string nodeName; + rtL2Ctrl_t l2ctrl; + + L2DataMap_t input; + L2DataMap_t output; + uint32_t isUsed; +} TaskL2Info_t; + +typedef std::map TaskL2InfoMap_t; // the key is nodeId +typedef std::pair TaskL2InfoPair_t; // the key is nodeId + +typedef std::map TaskL2InfoFEMap_t; // the key is nodeName +typedef std::pair TaskL2InfoFEPair_t; // the key is nodeName + +} // namespace fusion + +#endif // L2FUSION_STRUCT_HPP_ diff --git a/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h b/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h new file mode 100644 index 00000000..299998e3 --- /dev/null +++ b/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h @@ -0,0 +1,65 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUSION_ENGINE_HPP_ +#define FUSION_ENGINE_HPP_ + +#include "cce/cce.h" +#include "graph/compute_graph.h" +#include "proto/task.pb.h" + +#include +#include + +using namespace domi; +using namespace std; + +namespace fusion { +enum { + FUSION_STATUS_SUCCESS = 0, + FUSION_STATUS_FAIL = 1, +}; + +typedef struct { + uint64_t weightSize; + uint64_t memorySize; + uint8_t *dataMemBase; + uint8_t *weightMemBase; + uint32_t l2Enable; // 1 //1 - enable l2 buffer allocation, 0 - disable l2 buffer allocation + uint32_t fusionEnable; // 1 // 1 - enable buffer fusion, 0 - disable buffer fusion +} ModelRes; + +static const std::string SCOPE_ID_ATTR = "fusion_scope"; +static const std::string L2FUSION_DYNAMIC_CONVERGE_OP = "l2fusion_dynamic_converge_op"; +static const std::string L2FUSION_DYNAMIC_SPLIT_NUM = "l2fusion_dynamic_split_num"; +static const std::string FUSION_VIRTUAL_OP = "fusion_virtual_op"; +static const std::string FUSION_MULTI_BATCH_STRIDE = "fusion_multi_bathc_stride"; + +#define TVM_TYPE 1 + +typedef std::map> kScopeNodeMap_t; +typedef std::pair> kScopeNodePair_t; + +uint32_t BufferFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph, bool enable_l2dynamic = true); +uint32_t BufferFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph); +uint32_t GraphFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph); +uint32_t FusionTaskBuild(cce::ccHandle_t ccHandle, ge::ComputeGraphPtr fusionGraph, ge::Buffer &buffer, + ModelRes &modelRes, std::vector &task_def_list_); +void FusionTaskBuildComplete(std::vector cchandleList); +uint32_t GraphFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph); +} // namespace fusion + +#endif // FUSION_ENGINE_HPP_ diff --git a/third_party/fwkacllib/inc/cce/taskdown_api.h b/third_party/fwkacllib/inc/cce/taskdown_api.h new file mode 100644 index 00000000..2323aaa7 --- /dev/null +++ b/third_party/fwkacllib/inc/cce/taskdown_api.h @@ -0,0 +1,54 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TASKDOWN_API_H_ +#define TASKDOWN_API_H_ + +#include +#include +#include "cce/cce.h" +#include "l2fusion_struct.hpp" +#include "taskdown_common.hpp" + +namespace cce { + +#define CC_FUSION_OP_MAX 32 + +typedef struct tagOpAddrsInfo { + void *addrPos; + uintptr_t addrData; +} ccOpAddrsInfo; + +#ifdef __cplusplus +extern "C" { +#endif + +ccStatus_t ccUpdateKernelArgs(ccOpContext &opContext, uint64_t dataBaseAddr, uint64_t weightBaseAddr, + uint64_t variableBaseAddr, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr); + +#ifdef __cplusplus +} +#endif + +ccStatus_t ccGetKernelArgsAddrs(ccOpContext &opContext, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr, + std::vector &opAddrsInfo); + +ccStatus_t ccSetKernelArgs(std::vector &dateInfo); + +ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType); + +} // namespace cce +#endif // TASKDOWN_API_H_ diff --git a/third_party/fwkacllib/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/cce/taskdown_common.hpp new file mode 100644 index 00000000..7954162e --- /dev/null +++ b/third_party/fwkacllib/inc/cce/taskdown_common.hpp @@ -0,0 +1,108 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TASKDOWN_COMMON_H_ +#define TASKDOWN_COMMON_H_ + +#include +#include "cce/cce_def.hpp" +#include "common/attr_list.hpp" +#include "l2fusion_struct.hpp" + +namespace cce { + +#define CC_FUSION_OP_MAX 32 + +typedef enum tagccKernelType { + CCE_AI_CORE = 0, /* cce aicore */ + CCE_AI_CPU = 1, /* cce aicpu */ + TE = 2, /* te operator*/ + CUSTOMIZED = 3, /* customized operator */ + TE_AI_CORE = 4, /* te aicore operator*/ + TE_AI_CPU = 5, /* te aicpu operator */ + AI_CPU = 6, /* aicpu */ + CUST_AI_CPU = 7, /* custom aicpu*/ + HOST_CPU = 8, /* host cpu */ + INVALID = 10000 /* unknown kernel type */ +} ccKernelType; + +typedef struct tagOpContext { + ccKernelType kernelType; + uint32_t opId; + uint32_t kernelFuncId; + uint32_t opIndex; + uint32_t opCount; + uint32_t opIndex2[CC_FUSION_OP_MAX]; + bool isFlowtable; + uint16_t *argsOffset; + uint32_t argsCount; + uint64_t genDataBaseAddr; + uint64_t genDataBaseSize; + uint64_t genWeightBaseAddr; + uint64_t genWeightBaseSize; + uint64_t genVariableBaseAddr; + uint64_t genVariableBaseSize; + uint64_t l2ctrlSize; +} ccOpContext; + +typedef struct tagOpReadCount { + bool isEnable; + std::map tensorRc; +} ccOpReadCount; + +typedef enum tagTaskDownKernelIdMode { + CC_TASKDOWN_RESERVED = 0, + CC_TASKDOWN_ROIPOOLING, + CC_TASKDOWN_ROIPOOLING_PERF, + CC_TASKDOWN_ROIALIGN, + CC_TASKDOWN_ROIALIGN_PERF, + CC_TASKDOWN_FC, + CC_TASKDOWN_FC_COMPRESS, + CC_TASKDOWN_SOFTMAX_LOWEST, + CC_TASKDOWN_ROIALIGN_FP16, + CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR, + CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR_COMMON, +} ccTaskDownKernelIdMode_t; + +ccStatus_t GetStream(ccHandle_t handle, rtStream_t *streamId); + +ccStatus_t ccClearOpMap(ccHandle_t handle); + +ccStatus_t ccSetKernelOpMap(ccHandle_t handle); + +ccStatus_t ccSetKernelContext(ccHandle_t handle, uint32_t opId, AttrList &attrList, bool isFlowtable, + ccKernelType kernelType, void *pgraph); + +ccStatus_t ccGetKernelContext(rtStream_t streamId, ccOpContext &opContext); + +ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType); + +ccStatus_t ccSetStreamL2Map(ccHandle_t handle, fusion::TaskL2InfoMap_t &l2AllocRes); + +ccStatus_t ccGetStreamL2Map(rtStream_t streamId, uint32_t opIndex, fusion::TaskL2Info_t *&l2Data); + +ccStatus_t ccSetOpIndex(ccHandle_t handle, uint32_t opIndex); + +ccStatus_t ccGetOpIndex(ccHandle_t handle, uint32_t &opIndex); + +ccStatus_t ccGetOpIndexByStream(rtStream_t streamId, uint32_t &opIndex); + +ccStatus_t ccClearStreamL2Map(ccHandle_t handle); + +ccStatus_t ccGetKernelReadCount(rtStream_t streamId, ccOpReadCount &rc); + +} // namespace cce +#endif // TASKDOWN_COMMON_H_