From a8420bd06496cd5e728dc5e560e97279064273d5 Mon Sep 17 00:00:00 2001 From: dingpeifei Date: Sat, 26 Jun 2021 16:28:14 +0800 Subject: [PATCH] code_sync_0626_inc --- inc/external/acl/acl.h | 10 +- inc/external/acl/acl_base.h | 91 +-- inc/external/acl/acl_mdl.h | 361 ++++++------ inc/external/acl/acl_op.h | 119 ++-- inc/external/acl/acl_op_compiler.h | 54 +- inc/external/acl/acl_prof.h | 83 ++- inc/external/acl/acl_rt.h | 142 +++-- inc/external/acl/acl_tdt.h | 31 +- inc/external/acl/error_codes/rt_error_codes.h | 150 ++--- inc/external/acl/ops/acl_cblas.h | 179 ++++-- inc/external/acl/ops/acl_dvpp.h | 425 ++++++++------ inc/external/acl/ops/acl_fv.h | 14 +- inc/external/hccl/hccl.h | 39 +- inc/external/hccl/hccl_types.h | 84 +-- inc/external/runtime/rt_error_codes.h | 150 ++--- .../fwkacllib/inc/ops/elewise_calculation_ops.h | 22 +- third_party/fwkacllib/inc/ops/image_ops.h | 10 +- third_party/fwkacllib/inc/ops/math_ops.h | 78 +-- .../fwkacllib/inc/ops/matrix_calculation_ops.h | 20 +- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 646 ++++++++++----------- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 47 +- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 73 +-- third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 21 +- third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h | 10 +- third_party/fwkacllib/inc/ops/random_ops.h | 10 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 101 +++- third_party/fwkacllib/inc/ops/rnn.h | 10 +- third_party/fwkacllib/inc/ops/selection_ops.h | 21 +- .../fwkacllib/inc/ops/split_combination_ops.h | 9 +- third_party/fwkacllib/inc/ops/transformation_ops.h | 4 +- third_party/fwkacllib/inc/runtime/rt_ffts.h | 1 - third_party/fwkacllib/inc/runtime/rt_model.h | 1 - 32 files changed, 1669 insertions(+), 1347 deletions(-) diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h index 8d261201..a53d029d 100644 --- a/inc/external/acl/acl.h +++ b/inc/external/acl/acl.h @@ -26,9 +26,9 @@ extern "C" { #endif // Current version is 1.0.0 -#define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 0 -#define ACL_PATCH_VERSION 0 +#define ACL_MAJOR_VERSION 1 +#define ACL_MINOR_VERSION 0 +#define ACL_PATCH_VERSION 0 /** * @ingroup AscendCL @@ -72,11 +72,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min * * @retval null for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg(); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_H_ +#endif // INC_EXTERNAL_ACL_ACL_H_ diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index 64d4bd81..417a80c8 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -136,49 +136,50 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005; #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE typedef enum { - ACL_DT_UNDEFINED = -1, - ACL_FLOAT = 0, - ACL_FLOAT16 = 1, - ACL_INT8 = 2, - ACL_INT32 = 3, - ACL_UINT8 = 4, - ACL_INT16 = 6, - ACL_UINT16 = 7, - ACL_UINT32 = 8, - ACL_INT64 = 9, - ACL_UINT64 = 10, - ACL_DOUBLE = 11, - ACL_BOOL = 12, - ACL_STRING = 13, + ACL_DT_UNDEFINED = -1, + ACL_FLOAT = 0, + ACL_FLOAT16 = 1, + ACL_INT8 = 2, + ACL_INT32 = 3, + ACL_UINT8 = 4, + ACL_INT16 = 6, + ACL_UINT16 = 7, + ACL_UINT32 = 8, + ACL_INT64 = 9, + ACL_UINT64 = 10, + ACL_DOUBLE = 11, + ACL_BOOL = 12, + ACL_STRING = 13, } aclDataType; typedef enum { - ACL_FORMAT_UNDEFINED = -1, - ACL_FORMAT_NCHW = 0, - ACL_FORMAT_NHWC = 1, - ACL_FORMAT_ND = 2, - ACL_FORMAT_NC1HWC0 = 3, - ACL_FORMAT_FRACTAL_Z = 4, - ACL_FORMAT_NC1HWC0_C04 = 12, - ACL_FORMAT_NDHWC = 27, - ACL_FORMAT_FRACTAL_NZ = 29, - ACL_FORMAT_NCDHW = 30, - ACL_FORMAT_NDC1HWC0 = 32, - ACL_FRACTAL_Z_3D = 33 + ACL_FORMAT_UNDEFINED = -1, + ACL_FORMAT_NCHW = 0, + ACL_FORMAT_NHWC = 1, + ACL_FORMAT_ND = 2, + ACL_FORMAT_NC1HWC0 = 3, + ACL_FORMAT_FRACTAL_Z = 4, + ACL_FORMAT_NC1HWC0_C04 = 12, + ACL_FORMAT_NDHWC = 27, + ACL_FORMAT_FRACTAL_NZ = 29, + ACL_FORMAT_NCDHW = 30, + ACL_FORMAT_NDC1HWC0 = 32, + ACL_FRACTAL_Z_3D = 33 } aclFormat; typedef enum { - ACL_DEBUG = 0, - ACL_INFO = 1, - ACL_WARNING = 2, - ACL_ERROR = 3, + ACL_DEBUG = 0, + ACL_INFO = 1, + ACL_WARNING = 2, + ACL_ERROR = 3, } aclLogLevel; typedef enum { - ACL_MEMTYPE_DEVICE = 0, - ACL_MEMTYPE_HOST = 1, + ACL_MEMTYPE_DEVICE = 0, + ACL_MEMTYPE_HOST = 1, } aclMemType; + /** * @ingroup AscendCL * @brief Converts data of type aclFloat16 to data of type float @@ -311,7 +312,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); * @retval aclTensorDesc pointer. * @retval nullptr if param is invalid or run out of memory */ -ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, +ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, + int numDims, + const int64_t *dims, aclFormat format); /** @@ -333,7 +336,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, +ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc, + size_t dimsCount, int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); /** @@ -430,7 +434,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, +ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, + size_t index, + size_t dimRangeNum, int64_t *dimRange); /** @@ -467,7 +473,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, - aclTensorDesc **dstDesc); + aclTensorDesc **dstDesc); /** * @ingroup AscendCL @@ -555,7 +561,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu * * @retval null for failed. * @retval OtherValues success. - */ +*/ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); /** @@ -566,7 +572,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, * * @retval null for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); /** @@ -618,7 +624,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy * @param ... [IN] the value of current log */ ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, - const char *fmt, ...); + const char *fmt, ...); /** * @ingroup AscendCL @@ -626,13 +632,14 @@ ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const * * @retval null for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY const char *aclrtGetSocName(); -#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) +#define ACL_APP_LOG(level, fmt, ...) \ + aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ +#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 2bf85e29..1721929e 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -27,19 +27,19 @@ extern "C" { #endif -#define ACL_MAX_DIM_CNT 128 -#define ACL_MAX_TENSOR_NAME_LEN 128 -#define ACL_MAX_BATCH_NUM 128 -#define ACL_MAX_HW_NUM 128 -#define ACL_MAX_SHAPE_COUNT 128 -#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF - -#define ACL_MDL_LOAD_FROM_FILE 1 -#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 -#define ACL_MDL_LOAD_FROM_MEM 3 -#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 -#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 -#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 +#define ACL_MAX_DIM_CNT 128 +#define ACL_MAX_TENSOR_NAME_LEN 128 +#define ACL_MAX_BATCH_NUM 128 +#define ACL_MAX_HW_NUM 128 +#define ACL_MAX_SHAPE_COUNT 128 +#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF + +#define ACL_MDL_LOAD_FROM_FILE 1 +#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 +#define ACL_MDL_LOAD_FROM_MEM 3 +#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 +#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 +#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" @@ -52,123 +52,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo; typedef struct aclmdlConfigHandle aclmdlConfigHandle; typedef enum { - ACL_YUV420SP_U8 = 1, - ACL_XRGB8888_U8, - ACL_RGB888_U8, - ACL_YUV400_U8, - ACL_NC1HWC0DI_FP16, - ACL_NC1HWC0DI_S8, - ACL_ARGB8888_U8, - ACL_YUYV_U8, - ACL_YUV422SP_U8, - ACL_AYUV444_U8, - ACL_RAW10, - ACL_RAW12, - ACL_RAW16, - ACL_RAW24, - ACL_AIPP_RESERVED = 0xffff, + ACL_YUV420SP_U8 = 1, + ACL_XRGB8888_U8, + ACL_RGB888_U8, + ACL_YUV400_U8, + ACL_NC1HWC0DI_FP16, + ACL_NC1HWC0DI_S8, + ACL_ARGB8888_U8, + ACL_YUYV_U8, + ACL_YUV422SP_U8, + ACL_AYUV444_U8, + ACL_RAW10, + ACL_RAW12, + ACL_RAW16, + ACL_RAW24, + ACL_AIPP_RESERVED = 0xffff, } aclAippInputFormat; typedef enum { - ACL_MDL_PRIORITY_INT32 = 0, - ACL_MDL_LOAD_TYPE_SIZET, - ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ - ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ - ACL_MDL_MEM_SIZET, - ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ - ACL_MDL_WEIGHT_SIZET, - ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ - ACL_MDL_WORKSPACE_SIZET, - ACL_MDL_INPUTQ_NUM_SIZET, - ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ - ACL_MDL_OUTPUTQ_NUM_SIZET, - ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ + ACL_MDL_PRIORITY_INT32 = 0, + ACL_MDL_LOAD_TYPE_SIZET, + ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ + ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ + ACL_MDL_MEM_SIZET, + ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ + ACL_MDL_WEIGHT_SIZET, + ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ + ACL_MDL_WORKSPACE_SIZET, + ACL_MDL_INPUTQ_NUM_SIZET, + ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ + ACL_MDL_OUTPUTQ_NUM_SIZET, + ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ } aclmdlConfigAttr; typedef enum { - ACL_DATA_WITHOUT_AIPP = 0, - ACL_DATA_WITH_STATIC_AIPP, - ACL_DATA_WITH_DYNAMIC_AIPP, - ACL_DYNAMIC_AIPP_NODE + ACL_DATA_WITHOUT_AIPP = 0, + ACL_DATA_WITH_STATIC_AIPP, + ACL_DATA_WITH_DYNAMIC_AIPP, + ACL_DYNAMIC_AIPP_NODE } aclmdlInputAippType; typedef struct aclmdlIODims { - char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ - size_t dimCount; /**< dim array count */ - int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ + char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ + size_t dimCount; /**< dim array count */ + int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ } aclmdlIODims; typedef struct aclAippDims { - aclmdlIODims srcDims; /**< input dims before model transform */ - size_t srcSize; /**< input size before model transform */ - aclmdlIODims aippOutdims; /**< aipp output dims */ - size_t aippOutSize; /**< aipp output size */ + aclmdlIODims srcDims; /**< input dims before model transform */ + size_t srcSize; /**< input size before model transform */ + aclmdlIODims aippOutdims; /**< aipp output dims */ + size_t aippOutSize; /**< aipp output size */ } aclAippDims; typedef struct aclmdlBatch { - size_t batchCount; /**< batch array count */ - uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ + size_t batchCount; /**< batch array count */ + uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ } aclmdlBatch; typedef struct aclmdlHW { - size_t hwCount; /**< height&width array count */ - uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ + size_t hwCount; /**< height&width array count */ + uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ } aclmdlHW; typedef struct aclAippInfo { - aclAippInputFormat inputFormat; - int32_t srcImageSizeW; - int32_t srcImageSizeH; - int8_t cropSwitch; - int32_t loadStartPosW; - int32_t loadStartPosH; - int32_t cropSizeW; - int32_t cropSizeH; - int8_t resizeSwitch; - int32_t resizeOutputW; - int32_t resizeOutputH; - int8_t paddingSwitch; - int32_t leftPaddingSize; - int32_t rightPaddingSize; - int32_t topPaddingSize; - int32_t bottomPaddingSize; - int8_t cscSwitch; - int8_t rbuvSwapSwitch; - int8_t axSwapSwitch; - int8_t singleLineMode; - int32_t matrixR0C0; - int32_t matrixR0C1; - int32_t matrixR0C2; - int32_t matrixR1C0; - int32_t matrixR1C1; - int32_t matrixR1C2; - int32_t matrixR2C0; - int32_t matrixR2C1; - int32_t matrixR2C2; - int32_t outputBias0; - int32_t outputBias1; - int32_t outputBias2; - int32_t inputBias0; - int32_t inputBias1; - int32_t inputBias2; - int32_t meanChn0; - int32_t meanChn1; - int32_t meanChn2; - int32_t meanChn3; - float minChn0; - float minChn1; - float minChn2; - float minChn3; - float varReciChn0; - float varReciChn1; - float varReciChn2; - float varReciChn3; - aclFormat srcFormat; - aclDataType srcDatatype; - size_t srcDimNum; - size_t shapeCount; - aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; - aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ + aclAippInputFormat inputFormat; + int32_t srcImageSizeW; + int32_t srcImageSizeH; + int8_t cropSwitch; + int32_t loadStartPosW; + int32_t loadStartPosH; + int32_t cropSizeW; + int32_t cropSizeH; + int8_t resizeSwitch; + int32_t resizeOutputW; + int32_t resizeOutputH; + int8_t paddingSwitch; + int32_t leftPaddingSize; + int32_t rightPaddingSize; + int32_t topPaddingSize; + int32_t bottomPaddingSize; + int8_t cscSwitch; + int8_t rbuvSwapSwitch; + int8_t axSwapSwitch; + int8_t singleLineMode; + int32_t matrixR0C0; + int32_t matrixR0C1; + int32_t matrixR0C2; + int32_t matrixR1C0; + int32_t matrixR1C1; + int32_t matrixR1C2; + int32_t matrixR2C0; + int32_t matrixR2C1; + int32_t matrixR2C2; + int32_t outputBias0; + int32_t outputBias1; + int32_t outputBias2; + int32_t inputBias0; + int32_t inputBias1; + int32_t inputBias2; + int32_t meanChn0; + int32_t meanChn1; + int32_t meanChn2; + int32_t meanChn3; + float minChn0; + float minChn1; + float minChn2; + float minChn3; + float varReciChn0; + float varReciChn1; + float varReciChn2; + float varReciChn3; + aclFormat srcFormat; + aclDataType srcDatatype; + size_t srcDimNum; + size_t shapeCount; + aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; + aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ } aclAippInfo; /** @@ -292,7 +292,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc, +ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, + aclTensorDesc *tensorDesc, size_t index); /** @@ -354,7 +355,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, + uint32_t *modelId); /** * @ingroup AscendCL @@ -376,8 +378,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, - size_t workSize, void *weightPtr, size_t weightSize); +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, + uint32_t *modelId, void *workPtr, size_t workSize, + void *weightPtr, size_t weightSize); /** * @ingroup AscendCL @@ -400,9 +403,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, - void *workPtr, size_t workSize, void *weightPtr, - size_t weightSize); +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, + uint32_t *modelId, void *workPtr, size_t workSize, + void *weightPtr, size_t weightSize); /** * @ingroup AscendCL @@ -437,8 +440,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, - const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, - size_t outputQNum); + const uint32_t *inputQ, size_t inputQNum, + const uint32_t *outputQ, size_t outputQNum); /** * @ingroup AscendCL @@ -468,8 +471,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem */ -ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, + aclmdlDataset *output, aclrtStream stream); /** * @ingroup AscendCL @@ -644,7 +647,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, * @param modelDesc [IN] model description * @param opName [IN] op name * @param attr [IN] attr name - * + * * @retval the attr value */ ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr); @@ -856,11 +859,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, - int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, - int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, - int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, + int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, + int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, + int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); @@ -876,7 +879,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); /** @@ -890,7 +893,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); /** @@ -905,7 +908,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, int32_t srcImageSizeH); @@ -925,10 +928,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, - int32_t scfInputSizeH, int32_t scfOutputSizeW, - int32_t scfOutputSizeH, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, + int8_t scfSwitch, + int32_t scfInputSizeW, + int32_t scfInputSizeH, + int32_t scfOutputSizeW, + int32_t scfOutputSizeH, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -946,9 +953,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, - int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, + int8_t cropSwitch, + int32_t cropStartPosW, + int32_t cropStartPosH, + int32_t cropSizeW, + int32_t cropSizeH, uint64_t batchIndex); /** @@ -967,7 +978,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, int32_t paddingSizeTop, int32_t paddingSizeBottom, int32_t paddingSizeLeft, int32_t paddingSizeRight, @@ -988,10 +999,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, - int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, - int16_t dtcPixelMeanChn3, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, + int16_t dtcPixelMeanChn0, + int16_t dtcPixelMeanChn1, + int16_t dtcPixelMeanChn2, + int16_t dtcPixelMeanChn3, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -1008,10 +1022,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, - float dtcPixelMinChn1, float dtcPixelMinChn2, - float dtcPixelMinChn3, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, + float dtcPixelMinChn0, + float dtcPixelMinChn1, + float dtcPixelMinChn2, + float dtcPixelMinChn3, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -1028,10 +1045,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, - float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, - float dtcPixelVarReciChn3, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, + float dtcPixelVarReciChn0, + float dtcPixelVarReciChn1, + float dtcPixelVarReciChn2, + float dtcPixelVarReciChn3, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -1047,8 +1067,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, + aclmdlDataset *dataset, + size_t index, const aclmdlAIPP *aippParmsSet); /** @@ -1065,8 +1087,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, + aclmdlDataset *dataset, + size_t index, const aclmdlAIPP *aippParmsSet); /** @@ -1084,8 +1108,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, + size_t index, + aclmdlInputAippType *type, size_t *dynamicAttachedDataIndex); /** @@ -1102,7 +1128,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); /** @@ -1121,11 +1147,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind * * @retval ACL_SUCCESS The function is successfully executed * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, - char *opName, size_t opNameLen, aclTensorDesc **inputDesc, - size_t *numInputs, aclTensorDesc **outputDesc, - size_t *numOutputs); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, + uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs, + aclTensorDesc **outputDesc, size_t *numOutputs); /** * @ingroup AscendCL @@ -1133,7 +1158,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_ * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); /** @@ -1144,7 +1169,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); /** @@ -1153,7 +1178,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); /** @@ -1165,7 +1190,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); /** @@ -1175,7 +1200,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand * @retval the aclmdlConfigHandle pointer * * @see aclmdlDestroyConfigHandle - */ +*/ ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); /** @@ -1204,7 +1229,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, - const void *attrValue, size_t valueSize); + const void *attrValue, size_t valueSize); /** * @ingroup AscendCL @@ -1222,4 +1247,4 @@ ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelD } #endif -#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ +#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h index d2e59bfb..b1be0d6e 100644 --- a/inc/external/acl/acl_op.h +++ b/inc/external/acl/acl_op.h @@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length); static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; typedef enum aclEngineType { - ACL_ENGINE_SYS, - ACL_ENGINE_AICORE, - ACL_ENGINE_VECTOR, + ACL_ENGINE_SYS, + ACL_ENGINE_AICORE, + ACL_ENGINE_VECTOR, } aclopEngineType; /** @@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, - const uint8_t *values); + const uint8_t *values); /** * @ingroup AscendCL @@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, - const int64_t *values); + const int64_t *values); /** * @ingroup AscendCL @@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, - const float *values); + const float *values); /** * @ingroup AscendCL @@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char * * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, - const char **values); + const char **values); /** * @ingroup AscendCL @@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, - const int *numValues, const int64_t *const values[]); +ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, + const char *attrName, + int numLists, + const int *numValues, + const int64_t *const values[]); /** * @ingroup AscendCL @@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char * @retval OtherValues Failure */ ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") -ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], - const aclDataBuffer *const inputs[], int numOutputs, - const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], - const aclopAttr *attr, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + const aclDataBuffer *const inputs[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + aclDataBuffer *const outputs[], + const aclopAttr *attr, + aclrtStream stream); /** * @ingroup AscendCL @@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], - aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], - aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, + int numInputs, + aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], + int numOutputs, + aclTensorDesc *outputDesc[], + aclDataBuffer *outputs[], + aclopAttr *attr, + aclrtStream stream); /** * @ingroup AscendCL @@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, - const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, +ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *opAttr, aclopHandle **handle); /** @@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); * * @see aclopCreateHandle | aclCreateDataBuffer */ -ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, - const aclDataBuffer *const inputs[], int numOutputs, - aclDataBuffer *const outputs[], aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, + int numInputs, + const aclDataBuffer *const inputs[], + int numOutputs, + aclDataBuffer *const outputs[], + aclrtStream stream); /** * @ingroup AscendCL @@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, - const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, +ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, + const aclDataBuffer *srcBuffer, + const aclTensorDesc *dstDesc, + aclDataBuffer *dstBuffer, + uint8_t truncate, aclrtStream stream); /** @@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, +ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, + aclTensorDesc *dstDesc, + uint8_t truncate, aclopHandle **handle); + /** * @ingroup AscendCL * @brief create kernel @@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac * * @see aclopCompile */ -ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, - void *binData, int binSize, aclopEngineType enginetype, +ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, + const char *kernelId, + const char *kernelName, + void *binData, + int binSize, + aclopEngineType enginetype, aclDataDeallocator deallocator); + /** * @ingroup AscendCL * @brief create kernel @@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, +typedef aclError (*aclopCompileFunc)(int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *opAttr, aclopKernelDesc *aclopKernelDesc); /** @@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, - const void *args, uint32_t argSize); +ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, + const char *kernelId, + uint32_t blockDim, + const void *args, + uint32_t argSize); /** * @ingroup AscendCL @@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, - const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *attr); +ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *attr); /** * @ingroup AscendCL @@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], - aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], +ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, + int numInputs, + aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], + int numOutputs, + aclTensorDesc *outputDesc[], aclopAttr *attr); + #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_OP_H_ +#endif // INC_EXTERNAL_ACL_ACL_OP_H_ diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index d9d1b3da..353d2a1a 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -24,22 +24,28 @@ extern "C" { #endif -typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; +typedef enum aclCompileType { + ACL_COMPILE_SYS, + ACL_COMPILE_UNREGISTERED +} aclopCompileType; typedef enum { - ACL_PRECISION_MODE, - ACL_AICORE_NUM, - ACL_AUTO_TUNE_MODE, - ACL_OP_SELECT_IMPL_MODE, - ACL_OPTYPELIST_FOR_IMPLMODE, - ACL_OP_DEBUG_LEVEL, - ACL_DEBUG_DIR, - ACL_OP_COMPILER_CACHE_MODE, - ACL_OP_COMPILER_CACHE_DIR, - ACL_OP_PERFORMANCE_MODE + ACL_PRECISION_MODE, + ACL_AICORE_NUM, + ACL_AUTO_TUNE_MODE, + ACL_OP_SELECT_IMPL_MODE, + ACL_OPTYPELIST_FOR_IMPLMODE, + ACL_OP_DEBUG_LEVEL, + ACL_DEBUG_DIR, + ACL_OP_COMPILER_CACHE_MODE, + ACL_OP_COMPILER_CACHE_DIR, + ACL_OP_PERFORMANCE_MODE } aclCompileOpt; -typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; +typedef enum aclCompileFlag { + ACL_OP_COMPILE_DEFAULT, + ACL_OP_COMPILE_FUZZ +} aclOpCompileFlag; /** * @ingroup AscendCL @@ -59,10 +65,15 @@ typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclO * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], - int numOutputs, const aclTensorDesc *const outputDesc[], - const aclopAttr *attr, aclopEngineType engineType, - aclopCompileType compileFlag, const char *opPath); +ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *attr, + aclopEngineType engineType, + aclopCompileType compileFlag, + const char *opPath); /** * @ingroup AscendCL @@ -85,10 +96,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( - const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], - int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, - aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, + int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], + int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], + const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag, + const char *opPath, aclrtStream stream); /** * @ingroup AscendCL @@ -118,4 +130,4 @@ ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); } #endif -#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ +#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h index 3784d8c6..f4e81065 100644 --- a/inc/external/acl/acl_prof.h +++ b/inc/external/acl/acl_prof.h @@ -23,30 +23,37 @@ extern "C" { #endif -#define ACL_PROF_ACL_API 0x0001 -#define ACL_PROF_TASK_TIME 0x0002 -#define ACL_PROF_AICORE_METRICS 0x0004 -#define ACL_PROF_AICPU 0x0008 +#define ACL_PROF_ACL_API 0x0001 +#define ACL_PROF_TASK_TIME 0x0002 +#define ACL_PROF_AICORE_METRICS 0x0004 +#define ACL_PROF_AICPU 0x0008 /** * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead */ -#define ACL_PROF_MAX_OP_NAME_LEN 257 -#define ACL_PROF_MAX_OP_TYPE_LEN 65 +#define ACL_PROF_MAX_OP_NAME_LEN 257 +#define ACL_PROF_MAX_OP_TYPE_LEN 65 typedef enum { - ACL_AICORE_ARITHMETIC_UTILIZATION = 0, - ACL_AICORE_PIPE_UTILIZATION = 1, - ACL_AICORE_MEMORY_BANDWIDTH = 2, - ACL_AICORE_L0B_AND_WIDTH = 3, - ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, - ACL_AICORE_NONE = 0xFF + ACL_AICORE_ARITHMETIC_UTILIZATION = 0, + ACL_AICORE_PIPE_UTILIZATION = 1, + ACL_AICORE_MEMORY_BANDWIDTH = 2, + ACL_AICORE_L0B_AND_WIDTH = 3, + ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, + ACL_AICORE_NONE = 0xFF } aclprofAicoreMetrics; +typedef enum { + ACL_STEP_START = 0, // step start + ACL_STEP_END = 1 // step end +} aclprofStepTag; + + typedef struct aclprofConfig aclprofConfig; typedef struct aclprofStopConfig aclprofStopConfig; typedef struct aclprofAicoreEvents aclprofAicoreEvents; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; +typedef struct aclprofStepInfo aclprofStepInfo; /** * @ingroup AscendCL @@ -101,8 +108,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); * @see aclprofDestroyConfig */ ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, - aclprofAicoreMetrics aicoreMetrics, - aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); + aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); /** * @ingroup AscendCL @@ -142,7 +148,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); * * @see aclprofModelUnSubscribe */ -ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); +ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, + const aclprofSubscribeConfig *profSubscribeConfig); /** * @ingroup AscendCL @@ -170,7 +177,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); * @see aclprofDestroySubscribeConfig */ ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, - aclprofAicoreMetrics aicoreMetrics, void *fd); + aclprofAicoreMetrics aicoreMetrics, void *fd); /** * @ingroup AscendCL @@ -222,7 +229,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, - size_t *opTypeLen); + size_t *opTypeLen); /** * @ingroup AscendCL @@ -237,8 +244,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opIn * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, - size_t opTypeLen); +ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, + char *opType, size_t opTypeLen); /** * @ingroup AscendCL @@ -253,7 +260,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, - size_t *opNameLen); + size_t *opNameLen); /** * @ingroup AscendCL @@ -268,8 +275,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opIn * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, - size_t opNameLen); +ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, + char *opName, size_t opNameLen); /** * @ingroup AscendCL @@ -322,8 +329,38 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI */ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); +/** + * @ingroup AscendCL + * @brief + * + * @param stepInfo [IN] pointer to stepInfo data + * @param aclprofstepTag [IN] start or end flag + * @param stream [IN] steam info + * + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo* stepInfo, aclprofStepTag tag, aclrtStream stream); + + /** + * @ingroup AscendCL + * @brief create pointer to aclprofStepInfo data + * + * + * @retval aclprofStepInfo pointer + */ +ACL_FUNC_VISIBILITY aclprofStepInfo* aclprofCreateStepInfo(); + + /** + * @ingroup AscendCL + * @brief destroy aclprofStepInfo pointer + * + * + * @retval void + */ +ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo* stepinfo); + #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_PROF_H_ +#endif // INC_EXTERNAL_ACL_PROF_H_ diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 5ee70724..3c777ecc 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -28,63 +28,63 @@ extern "C" { #define ACL_EVENT_TIME_LINE 0x00000008u typedef enum aclrtRunMode { - ACL_DEVICE, - ACL_HOST, + ACL_DEVICE, + ACL_HOST, } aclrtRunMode; typedef enum aclrtTsId { - ACL_TS_ID_AICORE = 0, - ACL_TS_ID_AIVECTOR = 1, - ACL_TS_ID_RESERVED = 2, + ACL_TS_ID_AICORE = 0, + ACL_TS_ID_AIVECTOR = 1, + ACL_TS_ID_RESERVED = 2, } aclrtTsId; typedef enum aclrtEventStatus { - ACL_EVENT_STATUS_COMPLETE = 0, - ACL_EVENT_STATUS_NOT_READY = 1, - ACL_EVENT_STATUS_RESERVED = 2, + ACL_EVENT_STATUS_COMPLETE = 0, + ACL_EVENT_STATUS_NOT_READY = 1, + ACL_EVENT_STATUS_RESERVED = 2, } aclrtEventStatus; typedef enum aclrtCallbackBlockType { - ACL_CALLBACK_NO_BLOCK, - ACL_CALLBACK_BLOCK, + ACL_CALLBACK_NO_BLOCK, + ACL_CALLBACK_BLOCK, } aclrtCallbackBlockType; typedef enum aclrtMemcpyKind { - ACL_MEMCPY_HOST_TO_HOST, - ACL_MEMCPY_HOST_TO_DEVICE, - ACL_MEMCPY_DEVICE_TO_HOST, - ACL_MEMCPY_DEVICE_TO_DEVICE, + ACL_MEMCPY_HOST_TO_HOST, + ACL_MEMCPY_HOST_TO_DEVICE, + ACL_MEMCPY_DEVICE_TO_HOST, + ACL_MEMCPY_DEVICE_TO_DEVICE, } aclrtMemcpyKind; typedef enum aclrtMemMallocPolicy { - ACL_MEM_MALLOC_HUGE_FIRST, - ACL_MEM_MALLOC_HUGE_ONLY, - ACL_MEM_MALLOC_NORMAL_ONLY, - ACL_MEM_MALLOC_HUGE_FIRST_P2P, - ACL_MEM_MALLOC_HUGE_ONLY_P2P, - ACL_MEM_MALLOC_NORMAL_ONLY_P2P, + ACL_MEM_MALLOC_HUGE_FIRST, + ACL_MEM_MALLOC_HUGE_ONLY, + ACL_MEM_MALLOC_NORMAL_ONLY, + ACL_MEM_MALLOC_HUGE_FIRST_P2P, + ACL_MEM_MALLOC_HUGE_ONLY_P2P, + ACL_MEM_MALLOC_NORMAL_ONLY_P2P, } aclrtMemMallocPolicy; typedef enum aclrtMemAttr { - ACL_DDR_MEM, - ACL_HBM_MEM, - ACL_DDR_MEM_HUGE, - ACL_DDR_MEM_NORMAL, - ACL_HBM_MEM_HUGE, - ACL_HBM_MEM_NORMAL, - ACL_DDR_MEM_P2P_HUGE, - ACL_DDR_MEM_P2P_NORMAL, - ACL_HBM_MEM_P2P_HUGE, - ACL_HBM_MEM_P2P_NORMAL, + ACL_DDR_MEM, + ACL_HBM_MEM, + ACL_DDR_MEM_HUGE, + ACL_DDR_MEM_NORMAL, + ACL_HBM_MEM_HUGE, + ACL_HBM_MEM_NORMAL, + ACL_DDR_MEM_P2P_HUGE, + ACL_DDR_MEM_P2P_NORMAL, + ACL_HBM_MEM_P2P_HUGE, + ACL_HBM_MEM_P2P_NORMAL, } aclrtMemAttr; typedef enum aclrtGroupAttr { - ACL_GROUP_AICORE_INT, - ACL_GROUP_AIV_INT, - ACL_GROUP_AIC_INT, - ACL_GROUP_SDMANUM_INT, - ACL_GROUP_ASQNUM_INT, - ACL_GROUP_GROUPID_INT + ACL_GROUP_AICORE_INT, + ACL_GROUP_AIV_INT, + ACL_GROUP_AIC_INT, + ACL_GROUP_SDMANUM_INT, + ACL_GROUP_ASQNUM_INT, + ACL_GROUP_GROUPID_INT } aclrtGroupAttr; typedef struct tagRtGroupInfo aclrtGroupInfo; @@ -487,7 +487,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre */ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); -/** + /** * @ingroup AscendCL * @brief Queries an event's status * @@ -549,7 +549,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, * * @see aclrtFree | acldvppMalloc | aclrtMallocCached */ -ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); +ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, + size_t size, + aclrtMemMallocPolicy policy); /** * @ingroup AscendCL @@ -572,7 +574,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal * * @see aclrtFree | aclrtMalloc */ -ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); +ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, + size_t size, + aclrtMemMallocPolicy policy); /** * @ingroup AscendCL @@ -663,7 +667,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, +ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, + size_t destMax, + const void *src, + size_t count, aclrtMemcpyKind kind); /** @@ -710,31 +717,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t * * @see aclrtSynchronizeStream */ -ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, - aclrtMemcpyKind kind, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, + size_t destMax, + const void *src, + size_t count, + aclrtMemcpyKind kind, + aclrtStream stream); /** - * @ingroup AscendCL - * @brief Asynchronous initialize memory - * and set contents of memory to specified value async - * - * @par Function +* @ingroup AscendCL +* @brief Asynchronous initialize memory +* and set contents of memory to specified value async +* +* @par Function * The memory to be initialized is on the Host or device side, * and the system determines whether * it is host or device according to the address * - * @param devPtr [IN] destination address pointer - * @param maxCount [IN] Max length of destination address memory - * @param value [IN] set value - * @param count [IN] the number of byte to set - * @param stream [IN] asynchronized task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSynchronizeStream - */ -ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, +* @param devPtr [IN] destination address pointer +* @param maxCount [IN] Max length of destination address memory +* @param value [IN] set value +* @param count [IN] the number of byte to set +* @param stream [IN] asynchronized task stream +* +* @retval ACL_SUCCESS The function is successfully executed. +* @retval OtherValues Failure +* +* @see aclrtSynchronizeStream +*/ +ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, + size_t maxCount, + int32_t value, + size_t count, aclrtStream stream); /** @@ -880,8 +894,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); * * @see aclrtGetGroupCount | aclrtGetAllGroupInfo */ -ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex, - aclrtGroupAttr attr, void *attrValue, size_t valueLen, +ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, + int32_t groupIndex, + aclrtGroupAttr attr, + void *attrValue, + size_t valueLen, size_t *paramRetSize); /** @@ -955,4 +972,5 @@ ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); } #endif -#endif // INC_EXTERNAL_ACL_ACL_RT_H_ +#endif // INC_EXTERNAL_ACL_ACL_RT_H_ + diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h index c357518d..61995121 100644 --- a/inc/external/acl/acl_tdt.h +++ b/inc/external/acl/acl_tdt.h @@ -24,10 +24,10 @@ extern "C" { #endif enum acltdtTensorType { - ACL_TENSOR_DATA_UNDEFINED = -1, - ACL_TENSOR_DATA_TENSOR, - ACL_TENSOR_DATA_END_OF_SEQUENCE, - ACL_TENSOR_DATA_ABNORMAL + ACL_TENSOR_DATA_UNDEFINED = -1, + ACL_TENSOR_DATA_TENSOR, + ACL_TENSOR_DATA_END_OF_SEQUENCE, + ACL_TENSOR_DATA_ABNORMAL }; typedef struct acltdtDataItem acltdtDataItem; @@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem * * * @retval null for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); /** @@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt * * @retval 0 for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); /** @@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI * * @retval 0 for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); /** @@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte * * @see acltdtDestroyDataItem */ -ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, - aclDataType dataType, void *data, size_t size); +ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, + const int64_t *dims, + size_t dimNum, + aclDataType dataType, + void *data, + size_t size); /** * @ingroup AscendCL @@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); * * @see acltdtReceiveTensor */ -ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, +ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, + const acltdtDataset *dataset, int32_t timeout); /** @@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, * * @see acltdtSendTensor */ -ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, +ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, + acltdtDataset *dataset, int32_t timeout); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ +#endif //INC_EXTERNAL_ACL_ACL_TDT_H_ + diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index a1392cc6..c5423d36 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -23,87 +23,87 @@ extern "C" { #endif -static const int32_t ACL_RT_SUCCESS = 0; // success +static const int32_t ACL_RT_SUCCESS = 0; // success -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout -static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception -static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error -static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error -static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect #ifdef __cplusplus } #endif -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h index 3d81eb2b..a2bd8c61 100644 --- a/inc/external/acl/ops/acl_cblas.h +++ b/inc/external/acl/ops/acl_cblas.h @@ -23,9 +23,17 @@ extern "C" { #endif -typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; +typedef enum aclTransType { + ACL_TRANS_N, + ACL_TRANS_T, + ACL_TRANS_NZ, + ACL_TRANS_NZ_T +} aclTransType; -typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; +typedef enum aclComputeType { + ACL_COMPUTE_HIGH_PRECISION, + ACL_COMPUTE_LOW_PRECISION +} aclComputeType; /** * @ingroup AscendCL @@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, - aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, - const void *beta, void *y, int incy, aclDataType dataTypeY, - aclComputeType type, aclrtStream stream); +*/ +ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, + const void *alpha, const void *a, int lda, aclDataType dataTypeA, + const void *x, int incx, aclDataType dataTypeX, + const void *beta, void *y, int incy, aclDataType dataTypeY, + aclComputeType type, aclrtStream stream); /** * @ingroup AscendCL @@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, - aclDataType dataTypeX, aclDataType dataTypeY, - aclComputeType type, aclopHandle **handle); +*/ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, + int m, + int n, + aclDataType dataTypeA, + aclDataType dataTypeX, + aclDataType dataTypeY, + aclComputeType type, + aclopHandle **handle); /** * @ingroup AscendCL @@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, - const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, - const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, + int m, + int n, + const aclFloat16 *alpha, + const aclFloat16 *a, + int lda, + const aclFloat16 *x, + int incx, + const aclFloat16 *beta, + aclFloat16 *y, + int incy, + aclComputeType type, aclrtStream stream); /** @@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, + int m, + int n, + aclComputeType type, aclopHandle **handle); /** @@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, - int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, - int incy, aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, + int m, + int n, + const int32_t *alpha, + const int8_t *a, + int lda, + const int8_t *x, + int incx, + const int32_t *beta, + int32_t *y, + int incy, + aclComputeType type, + aclrtStream stream); /** * @ingroup AscendCL @@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, + int m, + int n, + aclComputeType type, aclopHandle **handle); /** @@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const void *alpha, const void *matrixA, int lda, - aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, - const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, - aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + const void *alpha, + const void *matrixA, + int lda, + aclDataType dataTypeA, + const void *matrixB, + int ldb, + aclDataType dataTypeB, + const void *beta, + void *matrixC, + int ldc, + aclDataType dataTypeC, + aclComputeType type, + aclrtStream stream); + /** * @ingroup AscendCL @@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclDataType dataTypeA, - aclDataType dataTypeB, aclDataType dataTypeC, - aclComputeType type, aclopHandle **handle); +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + aclDataType dataTypeA, + aclDataType dataTypeB, + aclDataType dataTypeC, + aclComputeType type, + aclopHandle **handle); + /** * @ingroup AscendCL @@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, - const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, - aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + const aclFloat16 *alpha, + const aclFloat16 *matrixA, + int lda, + const aclFloat16 *matrixB, + int ldb, + const aclFloat16 *beta, + aclFloat16 *matrixC, + int ldc, + aclComputeType type, + aclrtStream stream); /** * @ingroup AscendCL @@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + aclComputeType type, aclopHandle **handle); /** @@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const int32_t *alpha, const int8_t *matrixA, int lda, - const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, - int ldc, aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + const int32_t *alpha, + const int8_t *matrixA, + int lda, + const int8_t *matrixB, + int ldb, + const int32_t *beta, + int32_t *matrixC, + int ldc, + aclComputeType type, + aclrtStream stream); + /** * @ingroup AscendCL @@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + aclComputeType type, aclopHandle **handle); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index dcaa3936..90dc70e8 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output // Supported Pixel Format enum acldvppPixelFormat { - PIXEL_FORMAT_YUV_400 = 0, // 0 - PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 - PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 - PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 - PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 - PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 - PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 - PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 - PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 - PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 - PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 - PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 - PIXEL_FORMAT_RGB_888 = 12, // 12 - PIXEL_FORMAT_BGR_888 = 13, // 13 - PIXEL_FORMAT_ARGB_8888 = 14, // 14 - PIXEL_FORMAT_ABGR_8888 = 15, // 15 - PIXEL_FORMAT_RGBA_8888 = 16, // 16 - PIXEL_FORMAT_BGRA_8888 = 17, // 17 - PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 - PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 - PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 - PIXEL_FORMAT_YVU_PLANAR_422, - PIXEL_FORMAT_YVU_PLANAR_444, - PIXEL_FORMAT_RGB_444 = 23, - PIXEL_FORMAT_BGR_444, - PIXEL_FORMAT_ARGB_4444, - PIXEL_FORMAT_ABGR_4444, - PIXEL_FORMAT_RGBA_4444, - PIXEL_FORMAT_BGRA_4444, - PIXEL_FORMAT_RGB_555, - PIXEL_FORMAT_BGR_555, - PIXEL_FORMAT_RGB_565, - PIXEL_FORMAT_BGR_565, - PIXEL_FORMAT_ARGB_1555, - PIXEL_FORMAT_ABGR_1555, - PIXEL_FORMAT_RGBA_1555, - PIXEL_FORMAT_BGRA_1555, - PIXEL_FORMAT_ARGB_8565, - PIXEL_FORMAT_ABGR_8565, - PIXEL_FORMAT_RGBA_8565, - PIXEL_FORMAT_BGRA_8565, - PIXEL_FORMAT_RGB_BAYER_8BPP = 50, - PIXEL_FORMAT_RGB_BAYER_10BPP, - PIXEL_FORMAT_RGB_BAYER_12BPP, - PIXEL_FORMAT_RGB_BAYER_14BPP, - PIXEL_FORMAT_RGB_BAYER_16BPP, - PIXEL_FORMAT_BGR_888_PLANAR = 70, - PIXEL_FORMAT_HSV_888_PACKAGE, - PIXEL_FORMAT_HSV_888_PLANAR, - PIXEL_FORMAT_LAB_888_PACKAGE, - PIXEL_FORMAT_LAB_888_PLANAR, - PIXEL_FORMAT_S8C1, - PIXEL_FORMAT_S8C2_PACKAGE, - PIXEL_FORMAT_S8C2_PLANAR, - PIXEL_FORMAT_S16C1, - PIXEL_FORMAT_U8C1, - PIXEL_FORMAT_U16C1, - PIXEL_FORMAT_S32C1, - PIXEL_FORMAT_U32C1, - PIXEL_FORMAT_U64C1, - PIXEL_FORMAT_S64C1, - PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, - PIXEL_FORMAT_YVU_SEMIPLANAR_440, - PIXEL_FORMAT_FLOAT32, - PIXEL_FORMAT_BUTT, - PIXEL_FORMAT_UNKNOWN = 10000 + PIXEL_FORMAT_YUV_400 = 0, // 0 + PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 + PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 + PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 + PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 + PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 + PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 + PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 + PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 + PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 + PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 + PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 + PIXEL_FORMAT_RGB_888 = 12, // 12 + PIXEL_FORMAT_BGR_888 = 13, // 13 + PIXEL_FORMAT_ARGB_8888 = 14, // 14 + PIXEL_FORMAT_ABGR_8888 = 15, // 15 + PIXEL_FORMAT_RGBA_8888 = 16, // 16 + PIXEL_FORMAT_BGRA_8888 = 17, // 17 + PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 + PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 + PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 + PIXEL_FORMAT_YVU_PLANAR_422, + PIXEL_FORMAT_YVU_PLANAR_444, + PIXEL_FORMAT_RGB_444 = 23, + PIXEL_FORMAT_BGR_444, + PIXEL_FORMAT_ARGB_4444, + PIXEL_FORMAT_ABGR_4444, + PIXEL_FORMAT_RGBA_4444, + PIXEL_FORMAT_BGRA_4444, + PIXEL_FORMAT_RGB_555, + PIXEL_FORMAT_BGR_555, + PIXEL_FORMAT_RGB_565, + PIXEL_FORMAT_BGR_565, + PIXEL_FORMAT_ARGB_1555, + PIXEL_FORMAT_ABGR_1555, + PIXEL_FORMAT_RGBA_1555, + PIXEL_FORMAT_BGRA_1555, + PIXEL_FORMAT_ARGB_8565, + PIXEL_FORMAT_ABGR_8565, + PIXEL_FORMAT_RGBA_8565, + PIXEL_FORMAT_BGRA_8565, + PIXEL_FORMAT_RGB_BAYER_8BPP = 50, + PIXEL_FORMAT_RGB_BAYER_10BPP, + PIXEL_FORMAT_RGB_BAYER_12BPP, + PIXEL_FORMAT_RGB_BAYER_14BPP, + PIXEL_FORMAT_RGB_BAYER_16BPP, + PIXEL_FORMAT_BGR_888_PLANAR = 70, + PIXEL_FORMAT_HSV_888_PACKAGE, + PIXEL_FORMAT_HSV_888_PLANAR, + PIXEL_FORMAT_LAB_888_PACKAGE, + PIXEL_FORMAT_LAB_888_PLANAR, + PIXEL_FORMAT_S8C1, + PIXEL_FORMAT_S8C2_PACKAGE, + PIXEL_FORMAT_S8C2_PLANAR, + PIXEL_FORMAT_S16C1, + PIXEL_FORMAT_U8C1, + PIXEL_FORMAT_U16C1, + PIXEL_FORMAT_S32C1, + PIXEL_FORMAT_U32C1, + PIXEL_FORMAT_U64C1, + PIXEL_FORMAT_S64C1, + PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, + PIXEL_FORMAT_YVU_SEMIPLANAR_440, + PIXEL_FORMAT_FLOAT32, + PIXEL_FORMAT_BUTT, + PIXEL_FORMAT_UNKNOWN = 10000 }; // Stream Format -enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; +enum acldvppStreamFormat { + H265_MAIN_LEVEL = 0, + H264_BASELINE_LEVEL, + H264_MAIN_LEVEL, + H264_HIGH_LEVEL +}; // Supported Channel Mode -enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; +enum acldvppChannelMode { + DVPP_CHNMODE_VPC = 1, + DVPP_CHNMODE_JPEGD = 2, + DVPP_CHNMODE_JPEGE = 4 +}; // Supported Border Type -enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; +enum acldvppBorderType { + BORDER_CONSTANT = 0, + BORDER_REPLICATE, + BORDER_REFLECT, + BORDER_REFLECT_101 +}; // Venc parameter type enum aclvencChannelDescParamType { - ACL_VENC_THREAD_ID_UINT64 = 0, - ACL_VENC_CALLBACK_PTR, - ACL_VENC_PIXEL_FORMAT_UINT32, - ACL_VENC_ENCODE_TYPE_UINT32, - ACL_VENC_PIC_WIDTH_UINT32, - ACL_VENC_PIC_HEIGHT_UINT32, - ACL_VENC_KEY_FRAME_INTERVAL_UINT32, - ACL_VENC_BUF_ADDR_PTR, - ACL_VENC_BUF_SIZE_UINT32, - ACL_VENC_RC_MODE_UINT32, - ACL_VENC_SRC_RATE_UINT32, - ACL_VENC_MAX_BITRATE_UINT32, - ACL_VENC_MAX_IP_PROP_UINT32 + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 }; // Jpeg picture format enum acldvppJpegFormat { - ACL_JPEG_CSS_444 = 0, - ACL_JPEG_CSS_422, - ACL_JPEG_CSS_420, - ACL_JPEG_CSS_GRAY, - ACL_JPEG_CSS_440, - ACL_JPEG_CSS_411, - ACL_JPEG_CSS_UNKNOWN = 1000 + ACL_JPEG_CSS_444 = 0, + ACL_JPEG_CSS_422, + ACL_JPEG_CSS_420, + ACL_JPEG_CSS_GRAY, + ACL_JPEG_CSS_440, + ACL_JPEG_CSS_411, + ACL_JPEG_CSS_UNKNOWN = 1000 }; /** @@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD * @retval null for failed. * @retval other success */ -ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, +ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, + uint32_t right, + uint32_t top, uint32_t bottom); /** @@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, + uint32_t left, + uint32_t right, + uint32_t top, uint32_t bottom); /** @@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, - const void *param); + aclvencChannelDescParamType paramType, size_t length, const void *param); /** * @ingroup AscendCL @@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, - size_t *paramRetSize, void *param); + aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param); /** * @ingroup AscendCL @@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, + uint32_t size, + uint32_t *width, + uint32_t *height, int32_t *components); /** @@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, - uint32_t *height, int32_t *components, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, + uint32_t size, + uint32_t *width, + uint32_t *height, + int32_t *components, acldvppJpegFormat *format); /** @@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_ * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, - const acldvppJpegeConfig *config, uint32_t *size); + const acldvppJpegeConfig *config, + uint32_t *size); /** * @ingroup AscendCL @@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, + uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, + uint32_t *decSize); /** * @ingroup AscendCL @@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_ * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, - uint32_t *height, int32_t *components); +ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, + uint32_t dataSize, + uint32_t *width, + uint32_t *height, + int32_t *components); /** * @ingroup AscendCL @@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, + uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, + uint32_t *decSize); /** * @ingroup AscendCL @@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe * @see acldvppCreateChannel | acldvppCreatePicDesc * | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, +ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** @@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, aclrtStream stream); /** @@ -1746,9 +1781,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - acldvppResizeConfig *resizeConfig, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, + acldvppResizeConfig *resizeConfig, + aclrtStream stream); + /** * @ingroup AscendCL @@ -1772,9 +1811,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *chann * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], aclrtStream stream); + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + aclrtStream stream); /** * @ingroup AscendCL @@ -1799,10 +1841,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); + acldvppResizeConfig *resizeConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -1825,9 +1870,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc * * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - acldvppRoiConfig *pasteArea, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, + aclrtStream stream); /** * @ingroup AscendCL @@ -1851,10 +1899,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, acldvppRoiConfig *pasteArea, - acldvppResizeConfig *resizeConfig, aclrtStream stream); + acldvppResizeConfig *resizeConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -1879,11 +1930,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc * * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], - acldvppRoiConfig *pasteAreas[], aclrtStream stream); + ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], + aclrtStream stream); /** * @ingroup AscendCL @@ -1909,10 +1963,16 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( - acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], + acldvppResizeConfig *resizeConfig, + aclrtStream stream); + /** * @ingroup AscendCL @@ -1940,8 +2000,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, + const void *data, + uint32_t size, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -1959,8 +2022,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreateJpegeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - const void *data, uint32_t *size, acldvppJpegeConfig *config, +ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + const void *data, + uint32_t *size, + acldvppJpegeConfig *config, aclrtStream stream); /** @@ -1978,8 +2044,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, + const void *data, + uint32_t size, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -2034,8 +2103,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, + acldvppStreamDesc *input, + acldvppPicDesc *output, + aclvdecFrameConfig *config, + void *userData); /** * @ingroup AscendCL @@ -2054,8 +2126,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame */ -ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - aclvdecFrameConfig *config, void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, + acldvppStreamDesc *input, + aclvdecFrameConfig *config, + void *userData); /** * @ingroup AscendCL @@ -2076,8 +2150,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -2099,8 +2175,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + void *reserve, + aclrtStream stream); /** * @ingroup AscendCL @@ -2112,7 +2191,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, + uint32_t mode); /** * @ingroup AscendCL @@ -2147,7 +2227,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, + uint32_t outMode); /** * @ingroup AscendCL @@ -2244,7 +2325,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, +ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, + uint32_t dim, + uint8_t **data, uint32_t *len); /** * @ingroup AscendCL @@ -2262,8 +2345,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap */ ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppLutMap *lutMap, aclrtStream stream); + const acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + const acldvppLutMap *lutMap, + aclrtStream stream); /** * @ingroup AscendCL @@ -2284,7 +2369,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); * * @retval ACL_SUCCESS for success, other for failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, + uint32_t index, double value); /** @@ -2429,8 +2515,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppBorderConfig *borderConfig, aclrtStream stream); + const acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + const acldvppBorderConfig *borderConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -2447,8 +2535,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, - acldvppHist *hist, void *reserve, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *srcPicDesc, + acldvppHist *hist, + void *reserve, + aclrtStream stream); /** * @ingroup AscendCL @@ -2457,7 +2548,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel * @retval null for failed. * @retval OtherValues success. */ -ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); +ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist(); /** * @ingroup AscendCL @@ -2514,7 +2605,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, * * @see acldvppCreateHist | acldvppVpcCalcHistAsync */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); /** * @ingroup AscendCL @@ -2533,6 +2624,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); */ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); + /** * @ingroup AscendCL * @brief dvpp vpc batch crop, resize config and make border. @@ -2556,13 +2648,18 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( - acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppBorderConfig *borderCfgs[], + acldvppResizeConfig *resizeConfig, + aclrtStream stream); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h index 4bd392c9..59f933a1 100644 --- a/inc/external/acl/ops/acl_fv.h +++ b/inc/external/acl/ops/acl_fv.h @@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult; // search operation type enum aclfvSearchType { - SEARCH_1_N, // 1:N operation type - SEARCH_N_M // N:M operation type + SEARCH_1_N, // 1:N operation type + SEARCH_N_M // N:M operation type }; /** @@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t * @retval OtherValues success. */ ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, - uint32_t featureLen, uint32_t featureCount, - uint8_t *featureData, uint32_t featureDataLen); + uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen); /** * @ingroup AscendCL @@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp * @retval null for failed. OtherValues success */ ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, - uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, - uint32_t *resultOffset, float *resultDistance, - uint32_t dataLen); + uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance, + uint32_t dataLen); /** * @ingroup AscendCL @@ -345,4 +343,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 8261adc4..969d855a 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -27,7 +27,7 @@ #ifdef __cplusplus extern "C" { -#endif // __cplusplus +#endif // __cplusplus /** * @brief Initialize HCCL. @@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root * @param sendBuf A pointer identifying the input data address of the operator. * @param recvBuf A pointer identifying the output data address of the operator. * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, - * float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, - HcclComm comm, aclrtStream stream); +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); /** * @brief Broadcast operator. @@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc * @param root An integer(u32) identifying the the root rank in the operator. * @param comm A pointer identifying the communication resource based on * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, - aclrtStream stream); +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, +aclrtStream stream); /** * @brief ReduceScatter operator. @@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, - HcclReduceOp op, HcclComm comm, aclrtStream stream); +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); /** * @brief AllGather operator. @@ -114,16 +113,16 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, - aclrtStream stream); +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, +HcclComm comm, aclrtStream stream); /** * @brief Get the rank size of this comm. * * @param comm A pointer identifying the communication resource based on. * @param rankSize A pointer identifying the rank size. - * @return HcclResult + * @return HcclResult */ extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); @@ -132,7 +131,7 @@ extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); * * @param comm A pointer identifying the communication resource based on. * @param rankSize A pointer identifying the rank id. - * @return HcclResult + * @return HcclResult */ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); /** @@ -140,7 +139,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); * * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); @@ -155,5 +154,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); #ifdef __cplusplus } -#endif // __cplusplus -#endif // HCCL_H_ +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index 0e832396..50a64795 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -16,10 +16,10 @@ /** * @file hccl_types.h - * @brief HCCL data type definition - * + * @brief HCCL data type definition + * */ - + #ifndef HCCL_TYPES_H_ #define HCCL_TYPES_H_ @@ -27,33 +27,33 @@ #ifdef __cplusplus extern "C" { -#endif // __cplusplus +#endif // __cplusplus /** * @brief HCCL functions return value definition */ typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ } HcclResult; /** @@ -65,37 +65,37 @@ typedef void *HcclComm; * @brief HCCL Reduction opperation */ typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ } HcclReduceOp; /** * @brief HCCL data type */ typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ - HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length /** * @brief HCCL root info */ typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; + char internal[HCCL_ROOT_INFO_BYTES]; } HcclRootInfo; #ifdef __cplusplus } -#endif // __cplusplus -#endif // HCCL_TYPES_H_ +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index a1392cc6..c5423d36 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -23,87 +23,87 @@ extern "C" { #endif -static const int32_t ACL_RT_SUCCESS = 0; // success +static const int32_t ACL_RT_SUCCESS = 0; // success -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout -static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception -static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error -static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error -static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect #ifdef __cplusplus } #endif -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index f61e2939..a20272f3 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -3154,13 +3154,13 @@ REG_OP(FusedMulAddNL2loss) *@brief Tests whether the input exceeds a threshold. \n *@par Inputs: -*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n +* x: A Tensor with any format. Must be one of the following types: float16, float32. \n *@par Attributes: -*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n +* threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n *@par Outputs: -*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. +* y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. *@par Third-party framework compatibility * Compatible with the Caffe operator Threshold. */ @@ -3203,12 +3203,11 @@ REG_OP(ArgMaxWithK) *@brief Multiply tensor with scale. \n *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. \n +*One input, including: +*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". \n +*y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator muls. @@ -3223,12 +3222,11 @@ REG_OP(Muls) *@brief Fill tensor with scale. \n *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. \n +*One input, including: +*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". \n +*y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator fills. @@ -3378,7 +3376,7 @@ REG_OP(TensorMove) *@par Inputs: *One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n *@par Outputs: *output_x: A Tensor. Has the same type as "x". \n diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 6909345a..28bf6228 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -585,9 +585,11 @@ REG_OP(ResizeNearestNeighborV2GradD) channels], The image tensor that was resized . \n *@par Attributes: -*align_corners: An optional bool. Defaults to False. If true, the centers of +*@li align_corners: An optional bool. Defaults to False. If true, the centers of the 4 corner pixels of the input and grad tensors are aligned. Defaults to -false . \n +false . +*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults +to false . \n *@par Outputs: *y: A Tensor. Has the same type as original_image . \n @@ -787,9 +789,10 @@ REG_OP(SampleDistortedBoundingBoxExt2) The new size for the images . \n *@par Attributes: -*align_corners: If true, the centers of the 4 corner pixels of the input and +*@li align_corners: If true, the centers of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Defaults to false . \n +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: *y: 4-D with shape [batch, new_height, new_width, channels] . \n @@ -1253,6 +1256,7 @@ REG_OP(KeepRatioResizeBilinear) No default value. *@li align_corners: An optional bool. If "true", the centers of the corner pixels of the input and output tensors are aligned. Defaults to "false" . \n +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: *y: A Tensor with the same type and format as input "images" . \n diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 319bcf70..b47ba718 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -670,7 +670,8 @@ REG_OP(Conj) *@li weight: A Tensor dtype of float32 . \n *@par Attributes: -*reduction: An optional attribute. Defaults to "mean" . \n +*@li reduction: An optional attribute. Defaults to "mean" . +*@li ignore_index:An optional attribute.Defaults to -100 . \n *@par Outputs: *@li y: A Tensor dtype of float32. @@ -700,7 +701,8 @@ REG_OP(NLLLoss) *@li total_weight:A Tensor dtype of float32 . \n *@par Attributes: -*reduction: An optional attribute. Defaults to "mean" . \n +*@li reduction: An optional attribute. Defaults to "mean" . +*@li ignore_index:An optional attribute.Defaults to -100 . \n *@par Outputs: *x_grad: A Tensor. Must be the following type: float32 . \n @@ -720,24 +722,24 @@ REG_OP(NLLLossGrad) .OP_END_FACTORY_REG(NLLLossGrad) /** -*@brief The ifmr . \n +*@brief IFMR(Input Feature Map Reconstruction). \n *@par Inputs: -*@li data:A Tensor of feature map -*@li data_min:A Tensor of min value of feature map. -*@li data_max:A Tensor of max value of feature map. -*@li cumsum:A Tensor of cumsum bin of data . \n +*@li data: A Tensor of feature map. +*@li data_min: A Tensor of min value of feature map. +*@li data_max: A Tensor of max value of feature map. +*@li cumsum: A Tensor of cumsum bin of data . \n *@par Attributes: -*min_percentile: min init percentile. -*max_percentile: max init percentile. -*search_range: search range. -*search_step: step size of searching. -*with_offset: whether using offset . \n +*@li min_percentile: min init percentile. +*@li max_percentile: max init percentile. +*@li search_range: search range. +*@li search_step: step size of searching. +*@li with_offset: whether using offset . \n *@par Outputs: -*scale: optimal scale. -*offset: optimal offset . \n +*@li scale: optimal scale. +*@li offset: optimal offset . \n *@par Third-party framework compatibility *Compatible with mindspore @@ -758,16 +760,16 @@ REG_OP(IFMR) .OP_END_FACTORY_REG(IFMR) /** -*@brief weights adaptive range quantization. \n +*@brief Weights Adaptive Range Quantization. \n *@par Inputs: -*@li w:A Tensor of weights. \n -*@li w_min:A Tensor of weights reduce_min. \n -*@li w_max:A Tensor of weights reduce_max. \n +*@li w: A Tensor of weights. \n +*@li w_min: A Tensor of weights reduce_min. \n +*@li w_max: A Tensor of weights reduce_max. \n *@par Attributes: -*num_bits: the bits num used for quantize. -*offset_flag: whether using offset. \n +*@li num_bits: the bits num used for quantize. +*@li offset_flag: whether using offset. \n *@par Outputs: *y: fake quantized weights. \n @@ -789,22 +791,22 @@ REG_OP(WtsARQ) .OP_END_FACTORY_REG(WtsARQ) /** -*@brief The acts_ulq. \n +*@brief Activations Universal Linear Quantization. \n *@par Inputs: -*@li x:A Tensor of feature map -*@li clamp _min:A Tensor of min clamp value of feature map. -*@li clamp _max:A Tensor of max clamp value of feature map. +*@li x: A Tensor of feature map. +*@li clamp _min: A Tensor of min clamp value of feature map. +*@li clamp _max: A Tensor of max clamp value of feature map. *@par Attributes: -*fixed_min: fix min to zero. -*num_bits: quant bits. \n +*@li fixed_min: fix min to zero. +*@li num_bits: quant bits. \n *@par Outputs: -*y: output fake quant feature map. -*clamp_min_mask: where x > clamp_min -*clamp_min_mask: where x < clamp_max -*x_clamped_loss: clamp loss. \n +*@li y: output fake quant feature map. +*@li clamp_min_mask: where x > clamp_min. +*@li clamp_min_mask: where x < clamp_max. +*@li x_clamped_loss: clamp loss. \n *@par Third-party framework compatibility *Compatible with mindspore @@ -826,12 +828,12 @@ REG_OP(ActsULQ) .OP_END_FACTORY_REG(ActsULQ) /** -*@brief The acts_ulq_input_grad. \n +*@brief The gradient of Activations Universal Linear Quantization. \n *@par Inputs: -*@li y_grad: A Tensor of gradient -*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed' -*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed' +*@li y_grad: A Tensor of gradient. +*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'. +*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'. *@par Outputs: *x_grapd: The gradient of inpust. \n @@ -851,10 +853,10 @@ REG_OP(ActsULQInputGrad) .OP_END_FACTORY_REG(ActsULQInputGrad) /** -*@brief The act_ulq_clamp_max_grad. \n +*@brief The gradient of Activations Universal Linear Quantization clamp max. \n *@par Inputs: -*@li y_grad: A Tensor of gradient +*@li y_grad: A Tensor of gradient. *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed. *@li x_clamped_loss: A Tensor of gradient. \n @@ -876,10 +878,10 @@ REG_OP(ActULQClampMaxGrad) .OP_END_FACTORY_REG(ActULQClampMaxGrad) /** -*@brief The act_ulq_clamp_min_grad. \n +*@brief The gradient of Activations Universal Linear Quantization clamp min. \n *@par Inputs: -*@li y_grad: A Tensor of gradient +*@li y_grad: A Tensor of gradient. *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed. *@li x_clamped_loss: A Tensor of gradient. \n diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index b317be37..5341a95c 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -67,11 +67,15 @@ REG_OP(MatMul) * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. * @li bias: A 1D Tensor. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC] . \n +* float32, int32. Has format [ND, NHWC] +* @li offset_w: A Optional 1D Tensor for quantized interference. Type is int8. Reserved. \n *@par Attributes: *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . +*@li offset_x: An optional integer for quantized deconvolution. +*The negative offset added to the input image for int8 type. Ensure offset_x within the +*effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, @@ -488,13 +492,13 @@ REG_OP(ScatterElements) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor . \n +*@li var: An ND Tensor . *Must be one of the following types: float16, float32, int32, int8, uint8 *@li indices: An ND Tensor of type int32 or int64 -*@li updates: An Tensor. format:NCHW, NHWC . \n +*@li updates: An Tensor. format:NCHW, NHWC . *Must be one of the following types: float16, float32, int32, int8, uint8 @@ -925,13 +929,13 @@ REG_OP(ScatterMin) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor . \n +*@li var: An ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An NCHW, NHWC, or ND Tensor . \n *Must be one of the following types: int32 or int64 -*@li updates: An NCHW, NHWC, or ND Tensor . \n +*@li updates: An NCHW, NHWC, or ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 @@ -958,13 +962,13 @@ REG_OP(ScatterMax) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor . \n +*@li var: An ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor . \n *Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor . \n +*@li updates: An ND Tensor . *Must be one of the following types: float16, float, int32, int8, uint8 diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 98473c65..4139d1c0 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -367,19 +367,19 @@ REG_OP(BiasAddGrad) * Gradients with respect to the output of the convolution. *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | out_bckprop | filter | y - ------------|-------------|---------|-------- - | Data Type | float16 | float16 | float16 - | |-------------|---------|-------- - | | float32 | float32 | float32 - | |-------------|---------|-------- - | | float64 | float64 | float64 - ------------|-------------|---------|-------- - | Format | NCHW | NCHW | NCHW - | | NHWC | HWCN | NHWC -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | out_bckprop | filter | y\n + ------------|-------------|---------|--------\n + | Data Type | float16 | float16 | float16\n + | |-------------|---------|--------\n + | | float32 | float32 | float32\n + | |-------------|---------|--------\n + | | float64 | float64 | float64\n + ------------|-------------|---------|--------\n + | Format | NCHW | NCHW | NCHW\n + | | NHWC | HWCN | NHWC\n + *\n * For float32 and float64 type, the actual calculation on the chip is based on * float16. *\n @@ -398,36 +398,37 @@ REG_OP(BiasAddGrad) * "NHWC". Specify the data format of the input and output data. *\n *\n - * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | input_size | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Filter | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | out_backprop | H*strideH| [1, 4096] - | | W*strideW| [1, 4096] - -------------------|----------|-------------- - | y(fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | input_size | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Filter | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | out_backprop | H*strideH| [1, 200000]\n + | | W*strideW| [1, 4096]\n + -------------------|----------|--------------\n + | y(fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + *\n -@endverbatim * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * and filter_width > fmap_width * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 *\n * @@ -508,7 +509,7 @@ REG_OP(Conv2DBackpropInputD) /** *@brief Computes the Deconvolution with respect to the input. *@par Inputs: - * Three inputs: + * Two required inputs: * @li x: A Tensor of type float16 or int8. 4D with shape * [batch, out_channels, out_height, out_width]. Gradients with respect * to the output of the convolution. @@ -520,16 +521,16 @@ REG_OP(Conv2DBackpropInputD) * Type is int8. Reserved.\n *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | bias | y - ------------|---------|---------|---------|-------- - | Data Type | float16 | float16 | float16 | float16 - | |---------|---------|---------|-------- - | | int8 | int8 | int32 | int32 - ------------|---------|---------|---------|-------- - | Format | NCHW | NCHW | ND | NCHW -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | x | filter | bias | y\n + ------------|---------|---------|---------|--------\n + | Data Type | float16 | float16 | float16 | float16\n + | |---------|---------|---------|--------\n + | | int8 | int8 | int32 | int32\n + ------------|---------|---------|---------|--------\n + | Format | NCHW | NCHW | ND | NCHW\n + *\n * For int8, a dequant or requant operator must be followed. *\n * @@ -550,35 +551,35 @@ REG_OP(Conv2DBackpropInputD) * within the effective range of int8 [-128, 127]. Defaults to "0". *\n *\n - * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | x (out_backprop) | H*strideH| [1, 4096] - | | W*strideW| [1, 4096] - -------------------|----------|-------------- - | Filter | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | y (fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Offset_x | | [-128, 127] - -@endverbatim + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | x (out_backprop) | H*strideH| [1, 200000]\n + | | W*strideW| [1, 4096]\n + -------------------|----------|--------------\n + | Filter | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | y (fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | Offset_x | | [-128, 127]\n + *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * and filter_width > fmap_width * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 *\n * @@ -628,19 +629,19 @@ REG_OP(Deconvolution) * convolution. *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | out_backprop | y - ------------|---------|--------------|--------- - | Data Type | float16 | float16 | float16 - | |---------|--------------|--------- - | | float32 | float32 | float32 - | |---------|--------------|--------- - | | float64 | float64 | float64 - |-----------|---------|--------------|--------- - | Format | NCHW | NCHW | NCHW - | | NHWC | NHWC | HWCN -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | x | out_backprop | y\n + ------------|---------|--------------|---------\n + | Data Type | float16 | float16 | float16\n + | |---------|--------------|---------\n + | | float32 | float32 | float32\n + | |---------|--------------|---------\n + | | float64 | float64 | float64\n + |-----------|---------|--------------|---------\n + | Format | NCHW | NCHW | NCHW\n + | | NHWC | NHWC | HWCN\n + *\n * For float32 and float64 type of x and outbackprop, the actual calculation on the chip * is based on float16. *\n @@ -658,39 +659,34 @@ REG_OP(Deconvolution) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. *\n -*\n -* The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | x(fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Filter Size | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | out_backprop | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | y | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -@endverbatim - * In Ascend910, out_backprop's H and W not support 1 when - * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 *\n - * + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | x(fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Filter Size | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | out_backprop | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | y | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + *\n *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. *\n @@ -780,16 +776,14 @@ REG_OP(Conv2DBackpropFilterD) *\n *\n * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | bias | y - ------------|---------|---------|---------|-------- - | Data Type | float16 | float16 | float16 | float16 - | | float32 | float32 | float32 | float32 - | | int8 | int8 | int32 | int32 - ------------|---------|---------|---------|-------- - | Format | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | | NHWC -@endverbatim +*\n +| Tensor | x | filter | bias | y |\n +| :-------: | :-----: | :-----: | :-----: | :-----: |\n +| Data Type | float16 | float16 | float16 | float16 |\n +| | float32 | float32 | float32 | float32 |\n +| | int8 | int8 | int32 | int32 |\n +| Format | NCHW | NCHW | ND | NCHW |\n +| | NHWC | HWCN | | NHWC |\n * For float32 type, the actual calculation on the chip is based on * float16. *\n @@ -813,35 +807,28 @@ REG_OP(Conv2DBackpropFilterD) *\n *\n * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | Input Image Size | H | [1, 100000] - | | W | [1, 4096] - -------------------|----------|-------------- - | Filter Size | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Offset_x | | [-128, 127] - -@endverbatim +*\n +| Name | Field | Scope |\n +| :--------------: | :------: | :---------: |\n +| Input Image Size | H | [1, 100000] |\n +| | W | [1, 4096] |\n +| Filter Size | H | [1, 255] |\n +| | W | [1, 255] |\n +| Stride | H | [1, 63] |\n +| | W | [1, 63] |\n +| Padding | Top | [0, 255] |\n +| | Bottom | [0, 255] |\n +| | Left | [0, 255] |\n +| | Right | [0, 255] |\n +| Dilation | H | [1, 255] |\n +| | W | [1, 255] |\n +| Offset_x | - | [-128, 127] |\n * The W dimension of the input image supports cases exceeding 4096, but it may * cause compilation errors. *\n * *@par Outputs: -*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the +* y: A 4D Tensor of output feature map. Has the same type as "x". With the * format "NHWC", the data is stored in the order of: [batch, out_height, * out_width, out_channels]. *\n @@ -956,16 +943,13 @@ REG_OP(Conv2DCompress) *\n *\n * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | offsets | bias | y - ------------|---------|---------|---------|----------|-------- - | Data Type | float16 | float16 | float16 | float16 | float16 - | |---------|---------|---------|----------|-------- - | | float32 | float32 | float32 | float32 | float32 - ------------|---------|---------|---------|----------|-------- - | Format | NCHW | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | NHWC | | NHWC -@endverbatim +*\n +| Tensor | x | filter | offsets | bias | y |\n +| :-------: | :-----: | :-----: | :-----: | :-----: | :-----: |\n +| Data Type | float16 | float16 | float16 | float16 | float16 |\n +| | float32 | float32 | float32 | float32 | float32 |\n +| Format | NCHW | NCHW | NCHW | ND | NCHW |\n +| | NHWC | HWCN | NCHW | | NHWC |\n * For float32 type, the actual convolution calculation part on the chip is * based on float16. *\n @@ -992,19 +976,17 @@ REG_OP(Conv2DCompress) *\n *\n * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - --------------------|--------|---------------------------- - | Input Image Size | H | [1, 100000 / filter_height] - | | W | [1, 4096 / filter_width] - --------------------|--------|---------------------------- - | Filter Size | H | [1, 63] - | | W | [1, 63] -@endverbatim +*\n +| Name | Field | Scope |\n +| :--------------: | :------: | :-------------------------: |\n +| Input Image Size | H | [1, 100000 / filter_height] |\n +| | W | [1, 4096 / filter_width] |\n +| Filter Size | H | [1, 63] |\n +| | W | [1, 63] |\n *\n * *@par Outputs: -*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the +* y: A 4D Tensor of output feature map. Has the same type as "x". With the * format "NHWC", the data is stored in the order of: [batch, out_height, * out_width, out_channels]. *\n @@ -1042,41 +1024,38 @@ REG_OP(DeformableConv2D) /** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. - *@par Inputs: + +*@par Inputs: * @li x: A 5D tensor. Must be one of the following types: float16, * (Currently does not support int8). The format of x is NCDHW or NDHWC. * @li filter: A 5D tensor of the same type as "x". * (Currently does not support int8). - * The format is NCDHW, NDHWC or DHWCN . \n - -*@par Optional input: - * @li bias: An optional 1D tensor of the same type as "x". - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n + * The format is NCDHW, NDHWC or DHWCN. + * @li bias: Optional. An 1D tensor of the same type as "x". + * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n -*@par Required Attributes: - * @li strides: A list of 5 integers. Specifies the stride of the sliding window +*@par Attributes: + * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window * for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A list of 6 integers. + * @li pads: Required. A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, - * tail, top, bottom, left and right . \n - -*@par Attributes: - * @li groups: Number of blocked connections from input channels to output + * tail, top, bottom, left and right. + * @li dilations: Optional. A list of 5 integers. Specifies the dilation factor for each + * dimension of "x". + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li dilations: A list of 5 integers. Specifies the dilation factor for each - * dimension of "x". * The N, C and D dimensions must be 1. Has the same format as "x". - * @li offset_x: An optional int. Input offset, used for quantized inference. - * Defaults to 0. Reserved . \n + * @li offset_x: Optional. An int. Input offset, used for quantized inference. + * Defaults to 0. Reserved. \n *@par Outputs: - *y: A Tensor. Has the same type and data format as "x". \n + * y: A Tensor. Has the same type and data format as "x". \n *@attention Constraints: - *The image size after padding is greater than the filter size . \n + * The image size after padding is greater than the filter size. \n *@par Third-party framework compatibility * @li Compatible with the TensorFlow operator conv3d. @@ -1099,8 +1078,8 @@ REG_OP(Conv3D) /** *@brief Computes the gradients of convolution 3d with respect to the input. + *@par Inputs: - * Three inputs: * @li input_size: A Tensor of type int32, int64. An integer vector representing * the shape of input, where input is a 5-D tensor * [batch, depth, height, width, channels] or @@ -1110,28 +1089,25 @@ REG_OP(Conv3D) * @li out_backprop: A Tensor. Must have the same type as filter. * 5-D with shape [batch, depth, out_height, out_width, out_channels] * or [batch, out_channels, depth, out_height, out_width]. Gradients with - * respect to the output of the convolution . \n + * respect to the output of the convolution. \n -*@par Required Attributes: - * @li strides: A list of 5 integers. Specifies the stride of the sliding window +*@par Attributes: + * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window * for each dimension of "out_backprop". * The N and C dimensions must be 1. Has the same format as "out_backprop". - * @li pads: A list of 6 integers. + * @li pads: Required. A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, - * tail, top, bottom, left and right . \n - -*@par Attributes: - * Three attributes: - * @li groups: Number of blocked connections from input channels to output - * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * tail, top, bottom, left and right. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of the input. * The N, C and D dimensions must be 1. Has the same format as "out_backprop". + * @li groups: Optional. Number of blocked connections from input channels to output + * channels. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor. Has the same type as filter,and has same format as "input_size" + * y: A Tensor. Has the same type as filter,and has same format as "input_size". \n *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -1150,40 +1126,39 @@ REG_OP(Conv3DBackpropInput) /** *@brief Computes the gradients of convolution 3d with respect to the input. + *@par Inputs: - * Two inputs: * @li filter: A Tensor whose type is float16. The format of filter is NCDHW, * NDHWC or DHWCN. * @li out_backprop: A Tensor. Must have the same type as filter. The format is - * NDHWC or NCDHW. \n + * NDHWC or NCDHW. \n -*@par Required Attributes: - * @li strides: A list of 5 integers. Specifies the stride of the sliding window +*@par Attributes: + * @li input_size: Required. A tuple/list of type int32, int64. An integer vector + * representing the shape of input, where input is a 5-D tensor + * [batch, depth, height, width, channels] or + * [batch, channels, depth, height, width]. + * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window * for each dimension of "out_backprop". * The N and C dimensions must be 1. Has the same format as "out_backprop". - * @li pads: A list of 6 integers. Supports only padding along the D, H and W + * @li pads: Required. A list of 6 integers. Supports only padding along the D, H and W * dimensions in sequence of head, tail, top, bottom, left and right. - * @li input_size: A tuple/list of type int32, int64. An integer vector - * representing the shape of input, where input is a 5-D tensor - * [batch, depth, height, width, channels] or - * [batch, channels, depth, height, width] . \n - -*@par Attributes: - * Three attributes: - * @li groups: Number of blocked connections from input channels to output - * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "out_backprop". + * @li groups: Optional. Number of blocked connections from input channels to output + * channels. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n + *@par Outputs: - * y: A Tensor. Has the same type and data format as "out_backprop". + * y: A Tensor. Has the same type and data format as "out_backprop". \n + *@par Third-party framework compatibility - * Compatible with Tensorflow's conv3d_backprop_input + * Compatible with Tensorflow's conv3d_backprop_input. \n *@par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead. + * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead. */ REG_OP(Conv3DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) @@ -1242,8 +1217,8 @@ REG_OP(LSTM) /** *@brief Computes the gradients of convolution3D with respect to the filter + *@par Inputs: - * Three inputs: * @li x: A Tensor. Must be one of the following types: float16, float32. * Currently does not support double. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] @@ -1258,26 +1233,23 @@ REG_OP(LSTM) * or [batch, out_channels, out_depth, out_height, out_width]. * Gradients with respect to the output of the convolution. \n -*@par Required Attributes: - * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding +*@par Attributes: + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". The N and C dimensions must be 1. * Has the same format as "x". - * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map . \n - -*@par Attributes: - * Three attributes: - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right] + * pads on feature map. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Number of blocked connections from input channels to output + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor that has the same type as "x" - * and the format is NDHWC, NCDHW or DHWCN. + * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW or DHWCN. \n + *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter */ @@ -1295,8 +1267,8 @@ REG_OP(Conv3DBackpropFilter) /** *@brief Computes the gradients of convolution with respect to the filter. + *@par Inputs: - * Two inputs: * @li x: A Tensor of type float16. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] * or [batch, in_channels, in_depth, in_height, in_width]. @@ -1305,37 +1277,34 @@ REG_OP(Conv3DBackpropFilter) * or [batch, out_channels, out_depth, out_height, out_width]. * Gradients with respect to the output of the convolution. \n -*@par Required Attributes: - * @li filter_size: A tuple/list of type integers. An integer vector +*@par Attributes: + * @li filter_size: Required. A tuple/list of type integers. An integer vector * representing the tensor shape of filter, where filter is a 5-D tensor * [filter_depth, filter_height, filter_width, in_channels, out_channels], * [out_channels, filter_depth, filter_height, filter_width, in_channels] * or [out_channels, in_channels, filter_depth, filter_height, filter_width]. - * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map. \n - -*@par Attributes: - * Three attributes: - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right] + * pads on feature map. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Number of blocked connections from input channels to output + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". - * Defaults to "NDHWC". Specify the data format of the input and output data. + * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW". + * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. + * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. \n + *@par Third-party framework compatibility - * Compatible with Tensorflow's conv3d_backprop_filter + * Compatible with Tensorflow's conv3d_backprop_filter. \n + *@par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead. + * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead. */ - - REG_OP(Conv3DBackpropFilterD) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) @@ -1350,37 +1319,32 @@ REG_OP(Conv3DBackpropFilterD) /** *@brief Computes the transpose of convolution 3d with respect to the input. + *@par Inputs: - * Three inputs: * @li input_size: A Tensor of type int32. An integer vector representing the * shape of input. * @li x: A Tensor of type float16, currently does not support int8. The format * is NDHWC or NCDHW. * @li filter: A Tensor of type float16, currently does not support int8. * The format is NDHWC, NCDHW or DHWCN. + * @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved. + * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. Reserved. \n -*@par Optional input: - * Two optional inputs - * @li bias: An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n - -*@par Required Attributes: - * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding +*@par Attributes: + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers - -*@par Attributes: - * Five attributes: - * @li groups: Number of blocked connections from input channels to output - * channels. - * @li dilations: A tuple/list of 5 integers, + * @li pads: Required. A tuple/list of 6 integers. + * @li dilations: Optional. A tuple/list of 5 integers, * The dilation factor for each dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li data_format: An optional string from: "NDHWC", "NCDHW". + * @li groups: Optional. Number of blocked connections from input channels to output + * channels. + * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li output_padding: The size will be added in the output shape. - * @li offset_x: Input offset_x value. Reserved. + * @li output_padding: Optional. The size will be added in the output shape. + * @li offset_x: Optional. Input offset_x value. Reserved. \n + *@par Outputs: * y: A Tensor. Has the same type and format as "x". */ @@ -1402,39 +1366,37 @@ REG_OP(Conv3DTranspose) /** *@brief Computes the transpose of convolution 3d with respect to the input. + *@par Inputs: * @li x: A Tensor of type float16, currently does not support int8. * The format is NDHWC or NCDHW. * @li filter: A Tensor of type float16, currently does not support int8. * The format is NDHWC, NCDHW or DHWCN. + * @li bias: Optional. An 1D tensor of the same type as "x". Reserved. + * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n -*@par Optional inputs: - * @li bias: An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n - -*@par Required Attributes: - * @li input_size: A tuple/list of type int32. - * An integer vector representing the shape of input - * @li strides: A tuple/list of 5 integers. +*@par Attributes: + * @li input_size: Required. A tuple/list of type int32. + * An integer vector representing the shape of input. + * @li strides: Required. A tuple/list of 5 integers. * Specifies the stride of the sliding window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers . \n - -*@par Attributes: - * Five attributes: - * @li dilations: A tuple/list of 5 integers, The dilation factor for each + * @li pads: Required. A tuple/list of 6 integers. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Number of blocked connections from input channels to output + * @li groups: Optional. Number of blocked connections from input channels to output * channels. - * @li data_format: An optional string from: "NDHWC", "NCDHW". + * @li data_format: Optional. An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. - * @li output_padding: The size will be added in the output shape. - * @li offset_x: Input offset_x value. Reserved. + * @li output_padding: Optional. The size will be added in the output shape. + * @li offset_x: Optional. Input offset_x value. Reserved. \n + *@par Outputs: - * y: A Tensor. Has the same type and format as "x". + * y: A Tensor. Has the same type and format as "x". \n + *@par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. + * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. */ REG_OP(Conv3DTransposeD) .INPUT(x, TensorType({DT_FLOAT16})) @@ -1469,17 +1431,17 @@ REG_OP(Conv3DTransposeD) * @li offset_w: An optional 1D tensor for quantized inference. Reserved. *\n *\n - * The following are the supported data types and data formats: -*@verbatim - | Tensor | x | filter | bias | y - ------------|---------|---------|---------|-------- - | Data Type | float16 | float16 | float16 | float16 - | |---------|---------|---------|-------- - | | int8 | int8 | int32 | int32 - ------------|---------|---------|---------|-------- - | Format | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | | NHWC -@endverbatim + * The following are the supported data types and data formats:\n + *\n + | Tensor | x | filter | bias | y\n + ------------|---------|---------|---------|--------\n + | Data Type | float16 | float16 | float16 | float16\n + | |---------|---------|---------|--------\n + | | int8 | int8 | int32 | int32\n + ------------|---------|---------|---------|--------\n + | Format | NCHW | NCHW | ND | NCHW\n + | | NHWC | HWCN | | NHWC\n + *\n * For int8, a dequant or requant operator must be followed. *\n * @@ -1504,38 +1466,38 @@ REG_OP(Conv3DTransposeD) * within the effective range of int8 [-128, 127]. Defaults to "0". *\n *\n - * The following value range restrictions must be met: -*@verbatim - | Name | Field | Scope - -------------------|----------|-------------- - | input_size | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | x (out_backprop) | H*strideH| [1, 4096] - | | W*strideW| [1, 4096] - -------------------|----------|-------------- - | filter | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | y (fmap) | H | [1, 4096] - | | W | [1, 4096] - -------------------|----------|-------------- - | Stride | H | [1, 63] - | | W | [1, 63] - -------------------|----------|-------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - -------------------|----------|-------------- - | Dilation | H | [1, 255] - | | W | [1, 255] - -------------------|----------|-------------- - | Offset_x | | [-128, 127] - -@endverbatim + * The following value range restrictions must be met:\n + *\n + | Name | Field | Scope\n + -------------------|----------|--------------\n + | input_size | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | x (out_backprop) | H*strideH| [1, 200000]\n + | | W*strideW| [1, 4096]\n + -------------------|----------|--------------\n + | filter | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | y (fmap) | H | [1, 200000]\n + | | W | [1, 4096]\n + -------------------|----------|--------------\n + | Stride | H | [1, 63]\n + | | W | [1, 63]\n + -------------------|----------|--------------\n + | Padding | Top | [0, 255]\n + | | Bottom | [0, 255]\n + | | Left | [0, 255]\n + | | Right | [0, 255]\n + -------------------|----------|--------------\n + | Dilation | H | [1, 255]\n + | | W | [1, 255]\n + -------------------|----------|--------------\n + | Offset_x | | [-128, 127]\n + *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * and filter_width > fmap_width * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 *\n * diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 5fa40ad6..9f35e27a 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -417,7 +417,7 @@ REG_OP(PSROIPooling) *@brief Returns detection result . \n *@par Inputs: -* Four inputs, including: +* Five inputs, including: *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput. *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI. *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class. @@ -474,7 +474,6 @@ REG_OP(FSRDetectionOutput) *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3 *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1 *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold -*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output". *@par Outputs: *@li out_boxnum: A tensor of type int32, specifying the number of output boxes. *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box. @@ -989,26 +988,26 @@ REG_OP(SPP) * feature map . \n *@attention Constraints: -*@li For the feature map input: -(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50. -(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60. -(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70. -(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70. -(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80. -(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80. -(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80. -(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70. -(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70. -(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70. -(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70. -(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70. -(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70. -(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70. -(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70. -(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50. -(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40. -(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40. -(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40. +* For the feature map input: +*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50. +*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60. +*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80. +*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80. +*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80. +*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70. +*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50. +*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40. +*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40. +*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40. *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ @@ -1429,9 +1428,9 @@ REG_OP(NormalizeBBox) * @li anchors: A Tensor. Must be int32. * *@par Attributes: -* @li scales: optional, listfloat, . +* @li scales: optional, listfloat. * @li decode_clip: optional, float, threahold of decode process. -* @li reversed_boxes: optional, bool,. +* @li reversed_boxes: optional, bool. * *@par Outputs: * y: A Tensor. Must have the same type as box_predictions. diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index b44c0780..10047d55 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -127,9 +127,10 @@ REG_OP(SoftmaxGrad) *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n *@par Inputs: -* Two inputs, including: +* Three inputs, including: *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. -*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n +*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . +*@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n *@par Outputs: *loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n @@ -1198,13 +1199,11 @@ REG_OP(INInferV2D) * @li epsilon: An attribute of type Float. \n * @par Outputs: -*Three outputs, including: +* Three outputs, including: * @li y: A Tensor. Has the same type as "x". \n * @li mean: A Tensor. Has the same type as "x". \n * @li variance: A Tensor. Has the same type as "x". \n -* @par Third-party framework compatibility -* Can be used by onnx InstanceNormalization */ REG_OP(InstanceNorm) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1218,24 +1217,22 @@ REG_OP(InstanceNorm) .OP_END_FACTORY_REG(InstanceNorm) /** -*@brief InstanceNormGrad operator interface implementation. +* @brief InstanceNormGrad operator interface implementation. -*@par Inputs: -*Five inputs, including: +* @par Inputs: +* Five inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32. * @li x: A Tensor. Must be one of the following types: float16, float32. * @li variance: A Tensor. Must be one of the following types: float16, float32. * @li mean: A Tensor. Must be one of the following types: float16, float32. * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n -*@par Outputs: -*Three outputs, including: +* @par Outputs: +* Three outputs, including: * @li pd_x: A Tensor. Must be one of the following types: float16, float32. * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(InstanceNormGrad) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1249,58 +1246,6 @@ REG_OP(InstanceNormGrad) .OP_END_FACTORY_REG(InstanceNormGrad) /** -*@brief InstanceNormXBackprop operator interface implementation. - -*@par Inputs: -*Five inputs, including: -* @li dy: A Tensor. Must be one of the following types: float16, float32. -* @li x: A Tensor. Must be one of the following types: float16, float32. -* @li variance: A Tensor. Must be one of the following types: float16, float32. -* @li mean: A Tensor. Must be one of the following types: float16, float32. -* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n - -*@par Outputs: -*Two outputs, including: -* @li pd_x: A Tensor. Must be one of the following types: float16, float32. -* @li res_for_gamma: A Tensor. Must be one of the following types: float32. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -*/ -REG_OP(InstanceNormXBackprop) - .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) - .OP_END_FACTORY_REG(InstanceNormXBackprop) - -/** -*@brief InstanceNormBetaGammaBackprop operator interface implementation. - -*@par Inputs: -*Two inputs, including: -* @li dy: A Tensor. Must be one of the following types: float16, float32. -* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n - -*@par Outputs: -*Two outputs, including: -* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. -* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -*/ -REG_OP(InstanceNormBetaGammaBackprop) - .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(res_for_gamma, TensorType({DT_FLOAT})) - .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) - .OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop) - -/** * @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n * @par Inputs: diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 80a21333..31cede4f 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -50,6 +50,7 @@ namespace ge { *dilation[2]: An optional int32, specifying the left dilation. Defaults to "1". *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1". *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0". +*@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW". *@par Outputs: *y: An NCHW tensor of type float16, float32, int32. *@attention Constraints: @@ -635,7 +636,8 @@ REG_OP(MaxPoolV2) *@li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value . \n +*@li padding: A required string. No default value . +*@li Targmax:An optional int with default value 7 . \n *@par Outputs: *@li y: A Tensor. Has the same type and format as input "x". @@ -645,7 +647,7 @@ REG_OP(MaxPoolV2) * ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID" . \n +*@li "padding" is either "SAME" or "VALID" . *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolWithArgmax. @@ -710,14 +712,15 @@ REG_OP(MaxPoolGradWithArgmax) *@brief Performs transform mask to argmax . \n *@par Inputs: -* Two input: -*x: An NC1HWC0 Tensor of type float16. -*mask: An NC1HWC0 Tensor of type uint16 . \n +* Two inputs: +*@li x: An NC1HWC0 Tensor of type float16. +*@li mask: An NC1HWC0 Tensor of type uint16 . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string. No default value . \n +*@li padding: A required string. No default value . +*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n *@par Outputs: *argmax: An NC1HWC0 Tensor of type int32 . \n @@ -931,11 +934,11 @@ REG_OP(AvgPoolV2GradD) .OP_END_FACTORY_REG(AvgPoolV2GradD) /** -*@brief :upsample the layer +*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm. *@par Inputs: * one input, including: -*@li x: A tensor of type float16 or float32. +* x: A tensor of type float16 or float32. *@par Attributes: *@li scale: A optional float32, scale factor of x. Defaults to "1.0". *@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2". @@ -1419,7 +1422,7 @@ REG_OP(MaxPoolV3) * the floor function will be used. Default False \n * @par Outputs: -* y: A mutable tensor. Has the same shape and type as "x1" . \n +* out_grad: A mutable tensor. Has the same shape and type as "x1" . \n * @attention Constraints: * @li Computing gradients of global pooling is not supported, which means diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index ca1c24eb..b9df706b 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -28,8 +28,8 @@ namespace ge { *@brief Computes the for the gelu of "x" . \n *@par Inputs: -*Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32 +*One input, including: +*x: A Tensor. Must be one of the following types: float16, float32 *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -66,8 +66,8 @@ REG_OP(GeluGrad) *@brief Computes the for the fast_gelu of "x" . \n *@par Inputs: -*Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32 +*One input, including: +*x: A Tensor. Must be one of the following types: float16, float32 *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -83,7 +83,7 @@ REG_OP(FastGelu) *@brief Computes the gradient for the fast_gelu of "x" . \n *@par Inputs: -*Three inputs, including: +*Two inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32 * @li x: A Tensor of the same type as "dy" . \n diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index b65a68f1..afa3bb45 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -505,15 +505,15 @@ REG_OP(RandomChoiceWithMask) *@par Inputs: *Inputs including: -* @li x: A required Tensor. Must be one of the following types: - float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n +* x: A required Tensor. Must be one of the following types: + float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n *@par Attributes: -*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n +* group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n *@par Outputs: -*y: A required Tensor. Has same type and shape as "x". Must be one of the following types: - float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n +* y: A required Tensor. Has same type and shape as "x". Must be one of the following types: + float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n *@attention Constraints: *@li "group" must be greater than 0 and must evenly divide the channel dimension size. diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 97c7b8e1..5ccac582 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -576,7 +576,7 @@ REG_OP(ReduceAll) *@li axis: A mutable Tensor. The dimensions to reduce . \n *@par Attributes: -*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n +*keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n *@par Outputs: *y: A Tensor. Has the same type and format as input "x" . \n @@ -967,9 +967,9 @@ REG_OP(EuclideanNormD) Defaults to "0.00001" . \n *@par Outputs: -*y: A Tensor of type float16 or float32 for the normalized "x". -*batch_mean: A Tensor of type float32 for the result mean. -*batch_ variance: A Tensor of type float32 for the result variance . \n +*@li y: A Tensor of type float16 or float32 for the normalized "x". +*@li batch_mean: A Tensor of type float32 for the result mean. +*@li batch_ variance: A Tensor of type float32 for the result variance . \n *@attention Constraints: *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. @@ -987,7 +987,7 @@ REG_OP(INInferV2) .OP_END_FACTORY_REG(INInferV2) /** -*@brief Performs reduced instance normalization . \n +*@brief Performs reduce instance normalization. \n *@par Inputs: *x: A Tensor of type float16 or float32. \n @@ -1008,32 +1008,31 @@ REG_OP(INTrainingReduceV2) /** -*@brief Performs update instance normalization . \n +*@brief Performs update instance normalization. \n *@par Inputs: -* Seven inputs, including: (NC1HWC0supported) +* Seven inputs, including: *@li x: A Tensor of type float16 or float32. *@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. *@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. *@li gamma: A Tensor of type float32, for the scaling gamma. *@li beta: A Tensor of type float32, for the scaling beta. *@li mean: A Tensor of type float32, for the updated mean. -*@li variance: A Tensor of type float32, for the updated variance . \n +*@li variance: A Tensor of type float32, for the updated variance. \n *@par Attributes: *@li momentum: A required float32, specifying the momentum to update mean and var. -*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n +*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n *@par Outputs: * Three outputs *@li y: A Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A Tensor of type float32, for the updated mean. -*@li batch_variance: A Tensor of type float32, for the updated variance . \n +*@li batch_variance: A Tensor of type float32, for the updated variance. \n *@attention Constraints: -*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. +* This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with INTrainingReduceV2. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(INTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -1052,6 +1051,80 @@ REG_OP(INTrainingUpdateV2) /** +*@brief Performs the backpropagation of InstanceNorm. \n + +*@par Inputs: +* Seven inputs, including: +*@li dy: A Tensor of type float16 or float32. +*@li x: A Tensor of type float16 or float32. +*@li variance: A Tensor of type float32, for the variance of "x". +*@li mean: A Tensor of type float32, for the mean of "x". +*@li res_gamma: A Tensor of type float32. +*@li res_beta: A Tensor of type float32. +*@li gamma: A Tensor of type float32. \n + +*@par Outputs: +*pd_x: A Tensor of type float16 or float32, for the offset of "x". \n + +*@attention Constraints: +* The preceding layer of this operator must be INTrainingUpdateGrad. \n +*/ +REG_OP(INTrainingReduceGrad) + .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .INPUT(res_gamma, TensorType({DT_FLOAT})) + .INPUT(res_beta, TensorType({DT_FLOAT})) + .INPUT(gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT})) + .OP_END_FACTORY_REG(INTrainingReduceGrad) + +/** +*@brief Performs the backpropagation of InstanceNorm. \n + +*@par Inputs: +* Four inputs, including: +*@li dy: A Tensor of type float16 or float32, for the gradient. +*@li x: A Tensor of type float16 or float32. +*@li variance: A Tensor of type float32, for the variance of "x". +*@li mean: A Tensor of type float32, for the mean of "x". \n + +*@par Outputs: +*@li res_gamma: A Tensor of type float32. +*@li res_beta: A Tensor of type float32. \n + +*/ +REG_OP(INTrainingUpdateGrad) + .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .OUTPUT(res_gamma, TensorType({DT_FLOAT})) + .OUTPUT(res_beta, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(INTrainingUpdateGrad) + +/** +*@brief Performs the backpropagation of InstanceNorm. \n + +*@par Inputs: +* Two inputs, including: +*@li res_gamma: A Tensor of type float32. +*@li res_beta: A Tensor of type float32. \n + +*@par Outputs: +*@li pd_gamma: A Tensor of type float32. +*@li pd_beta: A Tensor of type float32. \n + +*/ +REG_OP(INTrainingUpdateGradGammaBeta) + .INPUT(res_gamma, TensorType({DT_FLOAT})) + .INPUT(res_beta, TensorType({DT_FLOAT})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta) + +/** *@brief Performs reduced group normalization . \n *@par Inputs: @@ -1063,7 +1136,7 @@ REG_OP(INTrainingUpdateV2) *@par Attributes: -*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n +*num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n *@attention Constraints: * This operator is a GroupNorm fusion operator for updating the moving averages for training. @@ -1081,7 +1154,7 @@ REG_OP(GNTrainingReduce) *@brief Performs update group normalization . \n *@par Inputs: -* Eight inputs, including: (NCHW NHWC supported) +* Seven inputs, including: (NCHW NHWC supported) *@li x: A Tensor of type float16 or float32. *@li sum: A 5D Tensor of type float32, shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 80546860..bad3f790 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -491,7 +491,6 @@ REG_OP(DynamicLSTMV2) *ten inputs: \n *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -508,6 +507,7 @@ REG_OP(DynamicLSTMV2) *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. */ REG_OP(LSTMInputGrad) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -571,13 +571,13 @@ REG_OP(DynamicLSTMGradCell) .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(t_state, TensorType({DT_INT32, DT_INT32})) + .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(forget_bias, Float, 1) - .ATTR(activation, String, "") - .ATTR(direction, String, "Forward") + .ATTR(forget_bias, Float, 1.0) + .ATTR(activation, String, "tanh") + .ATTR(direction, String, "UNIDIRECTIONAL") .ATTR(gate_order, String, "ijfo") .OP_END_FACTORY_REG(DynamicLSTMGradCell) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 1c26e033..4b76fa66 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1968,17 +1968,14 @@ REG_OP(WriteSelect) .OP_END_FACTORY_REG(WriteSelect) /** -*@brief Read data by stride . \n +*@brief Read data by stride. *@par Inputs: -*One input: -*x: A Tensor. Must be one of the following types: float16, int8 . \n - -*@par Attributes: -*@li axis: A required int32, specifying the index of axis to read by stride . \n +*x: A Tensor. Must be one of the following types: float16, int8. \n *@par Attributes: -*@li stride: A required int32, specifying the value of reading stride . \n +*@li axis: A required int32, specifying the index of axis to read by stride. \n +*@li stride: A required int32, specifying the value of reading stride. \n *@par Outputs: *y: A Tensor of the same type as "x". @@ -1991,16 +1988,14 @@ REG_OP(StridedRead) .OP_END_FACTORY_REG(StridedRead) /** -*@brief: Write data by stride . \n +*@brief Write data by stride. *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, int8 . \n - -*@par Attributes: -*@li axis: A required int32, specifying the index of axis to write by stride . \n +*x: A Tensor. Must be one of the following types: float16, int8. \n *@par Attributes: -*@li stride: A required int32, specifying the value of writing stride . \n +*@li axis: A required int32, specifying the index of axis to write by stride. \n +*@li stride: A required int32, specifying the value of writing stride. \n *@par Outputs: *y: A Tensor. Has the same type as "x". diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index fe25a46f..98d4d111 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -161,14 +161,11 @@ REG_OP(SplitVD) /** *@brief Concatenates a list of N tensors along the first dimension. *@par Inputs: -* Two inputs, including: -* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, +* One input, including: +* values: A list of Tensors. Must be one of the following types: int8, int16, int32, * int64, uint8, uint16, uint32, uint64, float16, float32. * Tensors to be concatenated. All must have size 1 in the first dimension and same shape. -* It's a dynamic input. -* @li shape: A Tensor of the same type as "x". -* The final shape of the result. Should be equal to the shapes of any input -* but with the number of input values in the first dimension . \n +* It's a dynamic input. \n *@par Attributes: * @li shape: A required list of ints. diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 4a46e35f..40fcf911 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -909,7 +909,7 @@ output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this o *@li either pool_strings or pool_int64s attributes must be present but not both. */ -REG_OP(TfidVectorizer) +REG_OP(TfIdfVectorizer) .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING})) .OUTPUT(output, TensorType({DT_FLOAT})) .REQUIRED_ATTR(max_gram_length, Int) @@ -921,7 +921,7 @@ REG_OP(TfidVectorizer) .ATTR(pool_int64s, ListInt, {}) .ATTR(pool_strings, ListString, {}) .ATTR(weights, ListFloat, {}) - .OP_END_FACTORY_REG(TfidVectorizer) + .OP_END_FACTORY_REG(TfIdfVectorizer) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h index 597ee8fa..bae5a54d 100755 --- a/third_party/fwkacllib/inc/runtime/rt_ffts.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -181,5 +181,4 @@ RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t st #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif - #endif // __CCE_RUNTIME_FFTS_H diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index a5befa3b..a7618b45 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -50,7 +50,6 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_MODEL_EXIT, - RT_MODEL_TASK_FFTS_TASK, RT_MODEL_TASK_ALL_KERNEL, RT_MODEL_TASK_PROFILER_TRACE_EX, RT_MODEL_TASK_FFTS_TASK,