Browse Source

!1882 code_sync_0626_inc

Merge pull request !1882 from mindspore_ding/code_sync_0626
tags/v1.3.0
i-robot Gitee 4 years ago
parent
commit
b4b7c454e6
32 changed files with 1669 additions and 1347 deletions
  1. +5
    -5
      inc/external/acl/acl.h
  2. +49
    -42
      inc/external/acl/acl_base.h
  3. +193
    -168
      inc/external/acl/acl_mdl.h
  4. +82
    -37
      inc/external/acl/acl_op.h
  5. +33
    -21
      inc/external/acl/acl_op_compiler.h
  6. +60
    -23
      inc/external/acl/acl_prof.h
  7. +80
    -62
      inc/external/acl/acl_rt.h
  8. +19
    -12
      inc/external/acl/acl_tdt.h
  9. +75
    -75
      inc/external/acl/error_codes/rt_error_codes.h
  10. +138
    -41
      inc/external/acl/ops/acl_cblas.h
  11. +261
    -164
      inc/external/acl/ops/acl_dvpp.h
  12. +6
    -8
      inc/external/acl/ops/acl_fv.h
  13. +19
    -20
      inc/external/hccl/hccl.h
  14. +42
    -42
      inc/external/hccl/hccl_types.h
  15. +75
    -75
      inc/external/runtime/rt_error_codes.h
  16. +10
    -12
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  17. +7
    -3
      third_party/fwkacllib/inc/ops/image_ops.h
  18. +40
    -38
      third_party/fwkacllib/inc/ops/math_ops.h
  19. +12
    -8
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  20. +304
    -342
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  21. +23
    -24
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  22. +9
    -64
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  23. +12
    -9
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  24. +5
    -5
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  25. +5
    -5
      third_party/fwkacllib/inc/ops/random_ops.h
  26. +87
    -14
      third_party/fwkacllib/inc/ops/reduce_ops.h
  27. +5
    -5
      third_party/fwkacllib/inc/ops/rnn.h
  28. +8
    -13
      third_party/fwkacllib/inc/ops/selection_ops.h
  29. +3
    -6
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  30. +2
    -2
      third_party/fwkacllib/inc/ops/transformation_ops.h
  31. +0
    -1
      third_party/fwkacllib/inc/runtime/rt_ffts.h
  32. +0
    -1
      third_party/fwkacllib/inc/runtime/rt_model.h

+ 5
- 5
inc/external/acl/acl.h View File

@@ -26,9 +26,9 @@ extern "C" {
#endif

// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0

/**
* @ingroup AscendCL
@@ -72,11 +72,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg();

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_
#endif // INC_EXTERNAL_ACL_ACL_H_

+ 49
- 42
inc/external/acl/acl_base.h View File

@@ -136,49 +136,50 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005;
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE

typedef enum {
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
} aclDataType;

typedef enum {
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
} aclFormat;

typedef enum {
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
} aclLogLevel;

typedef enum {
ACL_MEMTYPE_DEVICE = 0,
ACL_MEMTYPE_HOST = 1,
ACL_MEMTYPE_DEVICE = 0,
ACL_MEMTYPE_HOST = 1,
} aclMemType;


/**
* @ingroup AscendCL
* @brief Converts data of type aclFloat16 to data of type float
@@ -311,7 +312,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
* @retval aclTensorDesc pointer.
* @retval nullptr if param is invalid or run out of memory
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
int numDims,
const int64_t *dims,
aclFormat format);

/**
@@ -333,7 +336,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
size_t dimsCount,
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);

/**
@@ -430,7 +434,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
size_t index,
size_t dimRangeNum,
int64_t *dimRange);

/**
@@ -467,7 +473,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
aclTensorDesc **dstDesc);
aclTensorDesc **dstDesc);

/**
* @ingroup AscendCL
@@ -555,7 +561,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu
*
* @retval null for failed.
* @retval OtherValues success.
*/
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);

/**
@@ -566,7 +572,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc,
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);

/**
@@ -618,7 +624,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy
* @param ... [IN] the value of current log
*/
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...);
const char *fmt, ...);

/**
* @ingroup AscendCL
@@ -626,13 +632,14 @@ ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY const char *aclrtGetSocName();

#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
#define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_BASE_H_
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_

+ 193
- 168
inc/external/acl/acl_mdl.h View File

@@ -27,19 +27,19 @@
extern "C" {
#endif

#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6

#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
@@ -52,123 +52,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo;
typedef struct aclmdlConfigHandle aclmdlConfigHandle;

typedef enum {
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
} aclAippInputFormat;

typedef enum {
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
} aclmdlConfigAttr;

typedef enum {
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
} aclmdlInputAippType;

typedef struct aclmdlIODims {
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
} aclmdlIODims;

typedef struct aclAippDims {
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
} aclAippDims;

typedef struct aclmdlBatch {
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
} aclmdlBatch;

typedef struct aclmdlHW {
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
} aclmdlHW;

typedef struct aclAippInfo {
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
} aclAippInfo;

/**
@@ -292,7 +292,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc,
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset,
aclTensorDesc *tensorDesc,
size_t index);

/**
@@ -354,7 +355,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize,
uint32_t *modelId);

/**
* @ingroup AscendCL
@@ -376,8 +378,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
size_t workSize, void *weightPtr, size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -400,9 +403,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
void *workPtr, size_t workSize, void *weightPtr,
size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -437,8 +440,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
size_t outputQNum);
const uint32_t *inputQ, size_t inputQNum,
const uint32_t *outputQ, size_t outputQNum);

/**
* @ingroup AscendCL
@@ -468,8 +471,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem
*/
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
aclmdlDataset *output, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -644,7 +647,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc,
* @param modelDesc [IN] model description
* @param opName [IN] op name
* @param attr [IN] attr name
*
*
* @retval the attr value
*/
ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);
@@ -856,11 +859,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
@@ -876,7 +879,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);

/**
@@ -890,7 +893,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);

/**
@@ -905,7 +908,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
int32_t srcImageSizeH);

@@ -925,10 +928,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
int32_t scfInputSizeH, int32_t scfOutputSizeW,
int32_t scfOutputSizeH, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
int8_t scfSwitch,
int32_t scfInputSizeW,
int32_t scfInputSizeH,
int32_t scfOutputSizeW,
int32_t scfOutputSizeH,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -946,9 +953,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
int8_t cropSwitch,
int32_t cropStartPosW,
int32_t cropStartPosH,
int32_t cropSizeW,
int32_t cropSizeH,
uint64_t batchIndex);

/**
@@ -967,7 +978,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
int32_t paddingSizeTop, int32_t paddingSizeBottom,
int32_t paddingSizeLeft, int32_t paddingSizeRight,
@@ -988,10 +999,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1,
int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1008,10 +1022,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
float dtcPixelMinChn1, float dtcPixelMinChn2,
float dtcPixelMinChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
float dtcPixelMinChn0,
float dtcPixelMinChn1,
float dtcPixelMinChn2,
float dtcPixelMinChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1028,10 +1045,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1,
float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1047,8 +1067,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1065,8 +1087,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1084,8 +1108,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
size_t index,
aclmdlInputAippType *type,
size_t *dynamicAttachedDataIndex);

/**
@@ -1102,7 +1128,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);

/**
@@ -1121,11 +1147,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind
*
* @retval ACL_SUCCESS The function is successfully executed
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
size_t *numInputs, aclTensorDesc **outputDesc,
size_t *numOutputs);
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId,
uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs,
aclTensorDesc **outputDesc, size_t *numOutputs);

/**
* @ingroup AscendCL
@@ -1133,7 +1158,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlInitDump();

/**
@@ -1144,7 +1169,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);

/**
@@ -1153,7 +1178,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();

/**
@@ -1165,7 +1190,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);

/**
@@ -1175,7 +1200,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand
* @retval the aclmdlConfigHandle pointer
*
* @see aclmdlDestroyConfigHandle
*/
*/
ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();

/**
@@ -1204,7 +1229,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
const void *attrValue, size_t valueSize);
const void *attrValue, size_t valueSize);

/**
* @ingroup AscendCL
@@ -1222,4 +1247,4 @@ ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelD
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_

+ 82
- 37
inc/external/acl/acl_op.h View File

@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length);
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;

typedef enum aclEngineType {
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
} aclopEngineType;

/**
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
const uint8_t *values);
const uint8_t *values);

/**
* @ingroup AscendCL
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
const int64_t *values);
const int64_t *values);

/**
* @ingroup AscendCL
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
const float *values);
const float *values);

/**
* @ingroup AscendCL
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
const char **values);
const char **values);

/**
* @ingroup AscendCL
@@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
const int *numValues, const int64_t *const values[]);
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
const char *attrName,
int numLists,
const int *numValues,
const int64_t *const values[]);

/**
* @ingroup AscendCL
@@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[], int numOutputs,
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
aclDataBuffer *const outputs[],
const aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[],
aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopHandle **handle);

/**
@@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
*
* @see aclopCreateHandle | aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
const aclDataBuffer *const inputs[], int numOutputs,
aclDataBuffer *const outputs[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
int numInputs,
const aclDataBuffer *const inputs[],
int numOutputs,
aclDataBuffer *const outputs[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc,
aclDataBuffer *dstBuffer,
uint8_t truncate,
aclrtStream stream);

/**
@@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
aclTensorDesc *dstDesc,
uint8_t truncate,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac
*
* @see aclopCompile
*/
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
void *binData, int binSize, aclopEngineType enginetype,
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
const char *kernelId,
const char *kernelName,
void *binData,
int binSize,
aclopEngineType enginetype,
aclDataDeallocator deallocator);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
typedef aclError (*aclopCompileFunc)(int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopKernelDesc *aclopKernelDesc);

/**
@@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
const void *args, uint32_t argSize);
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
const char *kernelId,
uint32_t blockDim,
const void *args,
uint32_t argSize);

/**
* @ingroup AscendCL
@@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *attr);
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr);

/**
* @ingroup AscendCL
@@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclopAttr *attr);


#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_H_

+ 33
- 21
inc/external/acl/acl_op_compiler.h View File

@@ -24,22 +24,28 @@
extern "C" {
#endif

typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;
typedef enum aclCompileType {
ACL_COMPILE_SYS,
ACL_COMPILE_UNREGISTERED
} aclopCompileType;

typedef enum {
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR,
ACL_OP_PERFORMANCE_MODE
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR,
ACL_OP_PERFORMANCE_MODE
} aclCompileOpt;

typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag;
typedef enum aclCompileFlag {
ACL_OP_COMPILE_DEFAULT,
ACL_OP_COMPILE_FUZZ
} aclOpCompileFlag;

/**
* @ingroup AscendCL
@@ -59,10 +65,15 @@ typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclO
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
int numOutputs, const aclTensorDesc *const outputDesc[],
const aclopAttr *attr, aclopEngineType engineType,
aclopCompileType compileFlag, const char *opPath);
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr,
aclopEngineType engineType,
aclopCompileType compileFlag,
const char *opPath);

/**
* @ingroup AscendCL
@@ -85,10 +96,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
const char *opPath, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -118,4 +130,4 @@ ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_

+ 60
- 23
inc/external/acl/acl_prof.h View File

@@ -23,30 +23,37 @@
extern "C" {
#endif

#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008
#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008

/**
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
*/
#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65
#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65

typedef enum {
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

typedef enum {
ACL_STEP_START = 0, // step start
ACL_STEP_END = 1 // step end
} aclprofStepTag;


typedef struct aclprofConfig aclprofConfig;
typedef struct aclprofStopConfig aclprofStopConfig;
typedef struct aclprofAicoreEvents aclprofAicoreEvents;
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
typedef struct aclprofStepInfo aclprofStepInfo;

/**
* @ingroup AscendCL
@@ -101,8 +108,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
* @see aclprofDestroyConfig
*/
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);

/**
* @ingroup AscendCL
@@ -142,7 +148,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
*
* @see aclprofModelUnSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
@@ -170,7 +177,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
* @see aclprofDestroySubscribeConfig
*/
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
aclprofAicoreMetrics aicoreMetrics, void *fd);
aclprofAicoreMetrics aicoreMetrics, void *fd);

/**
* @ingroup AscendCL
@@ -222,7 +229,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index,
size_t *opTypeLen);
size_t *opTypeLen);

/**
* @ingroup AscendCL
@@ -237,8 +244,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opIn
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
size_t opTypeLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opType, size_t opTypeLen);

/**
* @ingroup AscendCL
@@ -253,7 +260,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index,
size_t *opNameLen);
size_t *opNameLen);

/**
* @ingroup AscendCL
@@ -268,8 +275,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opIn
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
size_t opNameLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opName, size_t opNameLen);

/**
* @ingroup AscendCL
@@ -322,8 +329,38 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI
*/
ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief
*
* @param stepInfo [IN] pointer to stepInfo data
* @param aclprofstepTag [IN] start or end flag
* @param stream [IN] steam info
*
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo* stepInfo, aclprofStepTag tag, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create pointer to aclprofStepInfo data
*
*
* @retval aclprofStepInfo pointer
*/
ACL_FUNC_VISIBILITY aclprofStepInfo* aclprofCreateStepInfo();

/**
* @ingroup AscendCL
* @brief destroy aclprofStepInfo pointer
*
*
* @retval void
*/
ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo* stepinfo);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_PROF_H_
#endif // INC_EXTERNAL_ACL_PROF_H_

+ 80
- 62
inc/external/acl/acl_rt.h View File

@@ -28,63 +28,63 @@ extern "C" {
#define ACL_EVENT_TIME_LINE 0x00000008u

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
} aclrtMemMallocPolicy;

typedef enum aclrtMemAttr {
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
} aclrtMemAttr;

typedef enum aclrtGroupAttr {
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT,
ACL_GROUP_GROUPID_INT
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT,
ACL_GROUP_GROUPID_INT
} aclrtGroupAttr;

typedef struct tagRtGroupInfo aclrtGroupInfo;
@@ -487,7 +487,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
/**
* @ingroup AscendCL
* @brief Queries an event's status
*
@@ -549,7 +549,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start,
*
* @see aclrtFree | acldvppMalloc | aclrtMallocCached
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -572,7 +574,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal
*
* @see aclrtFree | aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -663,7 +667,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind);

/**
@@ -710,31 +717,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
size_t maxCount,
int32_t value,
size_t count,
aclrtStream stream);

/**
@@ -880,8 +894,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex,
aclrtGroupAttr attr, void *attrValue, size_t valueLen,
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
int32_t groupIndex,
aclrtGroupAttr attr,
void *attrValue,
size_t valueLen,
size_t *paramRetSize);

/**
@@ -955,4 +972,5 @@ ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout);
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_
#endif // INC_EXTERNAL_ACL_ACL_RT_H_


+ 19
- 12
inc/external/acl/acl_tdt.h View File

@@ -24,10 +24,10 @@ extern "C" {
#endif

enum acltdtTensorType {
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
};

typedef struct acltdtDataItem acltdtDataItem;
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);

/**
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);

/**
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);

/**
@@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte
*
* @see acltdtDestroyDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
aclDataType dataType, void *data, size_t size);
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
const int64_t *dims,
size_t dimNum,
aclDataType dataType,
void *data,
size_t size);

/**
* @ingroup AscendCL
@@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
*
* @see acltdtReceiveTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
const acltdtDataset *dataset,
int32_t timeout);

/**
@@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
*
* @see acltdtSendTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
acltdtDataset *dataset,
int32_t timeout);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_TDT_H_
#endif //INC_EXTERNAL_ACL_ACL_TDT_H_


+ 75
- 75
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -23,87 +23,87 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 138
- 41
inc/external/acl/ops/acl_cblas.h View File

@@ -23,9 +23,17 @@
extern "C" {
#endif

typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;
typedef enum aclTransType {
ACL_TRANS_N,
ACL_TRANS_T,
ACL_TRANS_NZ,
ACL_TRANS_NZ_T
} aclTransType;

typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;
typedef enum aclComputeType {
ACL_COMPUTE_HIGH_PRECISION,
ACL_COMPUTE_LOW_PRECISION
} aclComputeType;

/**
* @ingroup AscendCL
@@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
const void *alpha, const void *a, int lda, aclDataType dataTypeA,
const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
aclDataType dataTypeX, aclDataType dataTypeY,
aclComputeType type, aclopHandle **handle);
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
int m,
int n,
aclDataType dataTypeA,
aclDataType dataTypeX,
aclDataType dataTypeY,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
@@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
int m,
int n,
const aclFloat16 *alpha,
const aclFloat16 *a,
int lda,
const aclFloat16 *x,
int incx,
const aclFloat16 *beta,
aclFloat16 *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
@@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
@@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
int incy, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
int m,
int n,
const int32_t *alpha,
const int8_t *a,
int lda,
const int8_t *x,
int incx,
const int32_t *beta,
int32_t *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
@@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const void *alpha, const void *matrixA, int lda,
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const void *alpha,
const void *matrixA,
int lda,
aclDataType dataTypeA,
const void *matrixB,
int ldb,
aclDataType dataTypeB,
const void *beta,
void *matrixC,
int ldc,
aclDataType dataTypeC,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclDataType dataTypeA,
aclDataType dataTypeB, aclDataType dataTypeC,
aclComputeType type, aclopHandle **handle);
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclDataType dataTypeA,
aclDataType dataTypeB,
aclDataType dataTypeC,
aclComputeType type,
aclopHandle **handle);


/**
* @ingroup AscendCL
@@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const aclFloat16 *alpha,
const aclFloat16 *matrixA,
int lda,
const aclFloat16 *matrixB,
int ldb,
const aclFloat16 *beta,
aclFloat16 *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

/**
@@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const int32_t *alpha, const int8_t *matrixA, int lda,
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
int ldc, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const int32_t *alpha,
const int8_t *matrixA,
int lda,
const int8_t *matrixB,
int ldb,
const int32_t *beta,
int32_t *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

+ 261
- 164
inc/external/acl/ops/acl_dvpp.h View File

@@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output

// Supported Pixel Format
enum acldvppPixelFormat {
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
};

// Stream Format
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
enum acldvppStreamFormat {
H265_MAIN_LEVEL = 0,
H264_BASELINE_LEVEL,
H264_MAIN_LEVEL,
H264_HIGH_LEVEL
};

// Supported Channel Mode
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
enum acldvppChannelMode {
DVPP_CHNMODE_VPC = 1,
DVPP_CHNMODE_JPEGD = 2,
DVPP_CHNMODE_JPEGE = 4
};

// Supported Border Type
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
enum acldvppBorderType {
BORDER_CONSTANT = 0,
BORDER_REPLICATE,
BORDER_REFLECT,
BORDER_REFLECT_101
};

// Venc parameter type
enum aclvencChannelDescParamType {
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
};

// Jpeg picture format
enum acldvppJpegFormat {
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
};

/**
@@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
* @retval null for failed.
* @retval other success
*/
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom);

/**
@@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom);

/**
@@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
const void *param);
aclvencChannelDescParamType paramType, size_t length, const void *param);

/**
* @ingroup AscendCL
@@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
@@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components);

/**
@@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
uint32_t *height, int32_t *components,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components,
acldvppJpegFormat *format);

/**
@@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
const acldvppJpegeConfig *config, uint32_t *size);
const acldvppJpegeConfig *config,
uint32_t *size);

/**
* @ingroup AscendCL
@@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
uint32_t *height, int32_t *components);
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
uint32_t dataSize,
uint32_t *width,
uint32_t *height,
int32_t *components);

/**
* @ingroup AscendCL
@@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
* @see acldvppCreateChannel | acldvppCreatePicDesc
* | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
@@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
aclrtStream stream);

/**
@@ -1746,9 +1781,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -1772,9 +1811,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *chann
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], aclrtStream stream);
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1799,10 +1841,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1825,9 +1870,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1851,10 +1899,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1879,11 +1930,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1909,10 +1963,16 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -1940,8 +2000,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1959,8 +2022,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreateJpegeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
const void *data, uint32_t *size, acldvppJpegeConfig *config,
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
const void *data,
uint32_t *size,
acldvppJpegeConfig *config,
aclrtStream stream);

/**
@@ -1978,8 +2044,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2034,8 +2103,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
acldvppPicDesc *output,
aclvdecFrameConfig *config,
void *userData);

/**
* @ingroup AscendCL
@@ -2054,8 +2126,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
aclvdecFrameConfig *config,
void *userData);

/**
* @ingroup AscendCL
@@ -2076,8 +2150,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2099,8 +2175,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
void *reserve,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2112,7 +2191,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
uint32_t mode);

/**
* @ingroup AscendCL
@@ -2147,7 +2227,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
uint32_t outMode);

/**
* @ingroup AscendCL
@@ -2244,7 +2325,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
uint32_t dim,
uint8_t **data,
uint32_t *len);
/**
* @ingroup AscendCL
@@ -2262,8 +2345,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2284,7 +2369,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
*
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
uint32_t index,
double value);

/**
@@ -2429,8 +2515,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2447,8 +2535,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
acldvppHist *hist, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *srcPicDesc,
acldvppHist *hist,
void *reserve,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2457,7 +2548,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();

/**
* @ingroup AscendCL
@@ -2514,7 +2605,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
*
* @see acldvppCreateHist | acldvppVpcCalcHistAsync
*/
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);

/**
* @ingroup AscendCL
@@ -2533,6 +2624,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
*/
ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);


/**
* @ingroup AscendCL
* @brief dvpp vpc batch crop, resize config and make border.
@@ -2556,13 +2648,18 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_

+ 6
- 8
inc/external/acl/ops/acl_fv.h View File

@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult;

// search operation type
enum aclfvSearchType {
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
};

/**
@@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
uint32_t featureLen, uint32_t featureCount,
uint8_t *featureData, uint32_t featureDataLen);
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);

/**
* @ingroup AscendCL
@@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);

/**
* @ingroup AscendCL
@@ -345,4 +343,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

+ 19
- 20
inc/external/hccl/hccl.h View File

@@ -27,7 +27,7 @@

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#endif // __cplusplus

/**
* @brief Initialize HCCL.
@@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16,
* float32.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
HcclComm comm, aclrtStream stream);
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief Broadcast operator.
@@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc
* @param root An integer(u32) identifying the the root rank in the operator.
* @param comm A pointer identifying the communication resource based on
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);

/**
* @brief ReduceScatter operator.
@@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief AllGather operator.
@@ -114,16 +113,16 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
aclrtStream stream);
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType,
HcclComm comm, aclrtStream stream);
/**
* @brief Get the rank size of this comm.
*
* @param comm A pointer identifying the communication resource based on.
* @param rankSize A pointer identifying the rank size.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);

@@ -132,7 +131,7 @@ extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);
*
* @param comm A pointer identifying the communication resource based on.
* @param rankSize A pointer identifying the rank id.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
/**
@@ -140,7 +139,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
*
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);

@@ -155,5 +154,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm);

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_H_
#endif // __cplusplus
#endif // HCCL_H_

+ 42
- 42
inc/external/hccl/hccl_types.h View File

@@ -16,10 +16,10 @@

/**
* @file hccl_types.h
* @brief HCCL data type definition
*
* @brief HCCL data type definition
*
*/
#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_

@@ -27,33 +27,33 @@

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#endif // __cplusplus

/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;

/**
@@ -65,37 +65,37 @@ typedef void *HcclComm;
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;

/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;

const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length

/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_
#endif // __cplusplus
#endif // HCCL_TYPES_H_

+ 75
- 75
inc/external/runtime/rt_error_codes.h View File

@@ -23,87 +23,87 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 10
- 12
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -3154,13 +3154,13 @@ REG_OP(FusedMulAddNL2loss)
*@brief Tests whether the input exceeds a threshold. \n

*@par Inputs:
*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n
* x: A Tensor with any format. Must be one of the following types: float16, float32. \n

*@par Attributes:
*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
* threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n

*@par Outputs:
*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
* y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
*@par Third-party framework compatibility
* Compatible with the Caffe operator Threshold.
*/
@@ -3203,12 +3203,11 @@ REG_OP(ArgMaxWithK)
*@brief Multiply tensor with scale. \n

*@par Inputs:
*Five inputs, including:
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
* @li x2: A scale. Must be float. \n
*One input, including:
*x: A Tensor. Must be one of the following types:int32,int16, float16, float32.

*@par Outputs:
*@li y: A Tensor. Has the same type and shape as "x1". \n
*y: A Tensor. Has the same type and shape as "x1". \n

*@par Third-party framework compatibility:
* Compatible with the Pytorch operator muls.
@@ -3223,12 +3222,11 @@ REG_OP(Muls)
*@brief Fill tensor with scale. \n

*@par Inputs:
*Five inputs, including:
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
* @li x2: A scale. Must be float. \n
*One input, including:
*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.

*@par Outputs:
*@li y: A Tensor. Has the same type and shape as "x1". \n
*y: A Tensor. Has the same type and shape as "x1". \n

*@par Third-party framework compatibility:
* Compatible with the Pytorch operator fills.
@@ -3378,7 +3376,7 @@ REG_OP(TensorMove)

*@par Inputs:
*One inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n

*@par Outputs:
*output_x: A Tensor. Has the same type as "x". \n


+ 7
- 3
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -585,9 +585,11 @@ REG_OP(ResizeNearestNeighborV2GradD)
channels], The image tensor that was resized . \n

*@par Attributes:
*align_corners: An optional bool. Defaults to False. If true, the centers of
*@li align_corners: An optional bool. Defaults to False. If true, the centers of
the 4 corner pixels of the input and grad tensors are aligned. Defaults to
false . \n
false .
*@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
to false . \n

*@par Outputs:
*y: A Tensor. Has the same type as original_image . \n
@@ -787,9 +789,10 @@ REG_OP(SampleDistortedBoundingBoxExt2)
The new size for the images . \n

*@par Attributes:
*align_corners: If true, the centers of the 4 corner pixels of the input and
*@li align_corners: If true, the centers of the 4 corner pixels of the input and
output tensors are aligned, preserving the values at the corner pixels.
Defaults to false . \n
*@li half_pixel_centers: An optional bool. Defaults to False . \n

*@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -1253,6 +1256,7 @@ REG_OP(KeepRatioResizeBilinear)
No default value.
*@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" . \n
*@li half_pixel_centers: An optional bool. Defaults to False . \n

*@par Outputs:
*y: A Tensor with the same type and format as input "images" . \n


+ 40
- 38
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -670,7 +670,8 @@ REG_OP(Conj)
*@li weight: A Tensor dtype of float32 . \n

*@par Attributes:
*reduction: An optional attribute. Defaults to "mean" . \n
*@li reduction: An optional attribute. Defaults to "mean" .
*@li ignore_index:An optional attribute.Defaults to -100 . \n

*@par Outputs:
*@li y: A Tensor dtype of float32.
@@ -700,7 +701,8 @@ REG_OP(NLLLoss)
*@li total_weight:A Tensor dtype of float32 . \n

*@par Attributes:
*reduction: An optional attribute. Defaults to "mean" . \n
*@li reduction: An optional attribute. Defaults to "mean" .
*@li ignore_index:An optional attribute.Defaults to -100 . \n

*@par Outputs:
*x_grad: A Tensor. Must be the following type: float32 . \n
@@ -720,24 +722,24 @@ REG_OP(NLLLossGrad)
.OP_END_FACTORY_REG(NLLLossGrad)

/**
*@brief The ifmr . \n
*@brief IFMR(Input Feature Map Reconstruction). \n

*@par Inputs:
*@li data:A Tensor of feature map
*@li data_min:A Tensor of min value of feature map.
*@li data_max:A Tensor of max value of feature map.
*@li cumsum:A Tensor of cumsum bin of data . \n
*@li data: A Tensor of feature map.
*@li data_min: A Tensor of min value of feature map.
*@li data_max: A Tensor of max value of feature map.
*@li cumsum: A Tensor of cumsum bin of data . \n

*@par Attributes:
*min_percentile: min init percentile.
*max_percentile: max init percentile.
*search_range: search range.
*search_step: step size of searching.
*with_offset: whether using offset . \n
*@li min_percentile: min init percentile.
*@li max_percentile: max init percentile.
*@li search_range: search range.
*@li search_step: step size of searching.
*@li with_offset: whether using offset . \n

*@par Outputs:
*scale: optimal scale.
*offset: optimal offset . \n
*@li scale: optimal scale.
*@li offset: optimal offset . \n

*@par Third-party framework compatibility
*Compatible with mindspore
@@ -758,16 +760,16 @@ REG_OP(IFMR)
.OP_END_FACTORY_REG(IFMR)

/**
*@brief weights adaptive range quantization. \n
*@brief Weights Adaptive Range Quantization. \n

*@par Inputs:
*@li w:A Tensor of weights. \n
*@li w_min:A Tensor of weights reduce_min. \n
*@li w_max:A Tensor of weights reduce_max. \n
*@li w: A Tensor of weights. \n
*@li w_min: A Tensor of weights reduce_min. \n
*@li w_max: A Tensor of weights reduce_max. \n

*@par Attributes:
*num_bits: the bits num used for quantize.
*offset_flag: whether using offset. \n
*@li num_bits: the bits num used for quantize.
*@li offset_flag: whether using offset. \n

*@par Outputs:
*y: fake quantized weights. \n
@@ -789,22 +791,22 @@ REG_OP(WtsARQ)
.OP_END_FACTORY_REG(WtsARQ)

/**
*@brief The acts_ulq. \n
*@brief Activations Universal Linear Quantization. \n

*@par Inputs:
*@li x:A Tensor of feature map
*@li clamp _min:A Tensor of min clamp value of feature map.
*@li clamp _max:A Tensor of max clamp value of feature map.
*@li x: A Tensor of feature map.
*@li clamp _min: A Tensor of min clamp value of feature map.
*@li clamp _max: A Tensor of max clamp value of feature map.

*@par Attributes:
*fixed_min: fix min to zero.
*num_bits: quant bits. \n
*@li fixed_min: fix min to zero.
*@li num_bits: quant bits. \n

*@par Outputs:
*y: output fake quant feature map.
*clamp_min_mask: where x > clamp_min
*clamp_min_mask: where x < clamp_max
*x_clamped_loss: clamp loss. \n
*@li y: output fake quant feature map.
*@li clamp_min_mask: where x > clamp_min.
*@li clamp_min_mask: where x < clamp_max.
*@li x_clamped_loss: clamp loss. \n

*@par Third-party framework compatibility
*Compatible with mindspore
@@ -826,12 +828,12 @@ REG_OP(ActsULQ)
.OP_END_FACTORY_REG(ActsULQ)

/**
*@brief The acts_ulq_input_grad. \n
*@brief The gradient of Activations Universal Linear Quantization. \n

*@par Inputs:
*@li y_grad: A Tensor of gradient
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'
*@li y_grad: A Tensor of gradient.
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'.

*@par Outputs:
*x_grapd: The gradient of inpust. \n
@@ -851,10 +853,10 @@ REG_OP(ActsULQInputGrad)
.OP_END_FACTORY_REG(ActsULQInputGrad)

/**
*@brief The act_ulq_clamp_max_grad. \n
*@brief The gradient of Activations Universal Linear Quantization clamp max. \n

*@par Inputs:
*@li y_grad: A Tensor of gradient
*@li y_grad: A Tensor of gradient.
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
*@li x_clamped_loss: A Tensor of gradient. \n

@@ -876,10 +878,10 @@ REG_OP(ActULQClampMaxGrad)
.OP_END_FACTORY_REG(ActULQClampMaxGrad)

/**
*@brief The act_ulq_clamp_min_grad. \n
*@brief The gradient of Activations Universal Linear Quantization clamp min. \n

*@par Inputs:
*@li y_grad: A Tensor of gradient
*@li y_grad: A Tensor of gradient.
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
*@li x_clamped_loss: A Tensor of gradient. \n



+ 12
- 8
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -67,11 +67,15 @@ REG_OP(MatMul)
* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
* @li bias: A 1D Tensor. Must be one of the following types: float16,
* float32, int32. Has format [ND, NHWC] . \n
* float32, int32. Has format [ND, NHWC]
* @li offset_w: A Optional 1D Tensor for quantized interference. Type is int8. Reserved. \n

*@par Attributes:
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] .
*@li offset_x: An optional integer for quantized deconvolution.
*The negative offset added to the input image for int8 type. Ensure offset_x within the
*effective range of int8 [-128, 127]. Defaults to "0". \n

*@par Outputs:
*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -488,13 +492,13 @@ REG_OP(ScatterElements)

*@par Inputs:
* Three inputs, including:
*@li var: An ND Tensor . \n
*@li var: An ND Tensor .

*Must be one of the following types: float16, float32, int32, int8, uint8
*@li indices: An ND Tensor of type int32 or int64


*@li updates: An Tensor. format:NCHW, NHWC . \n
*@li updates: An Tensor. format:NCHW, NHWC .

*Must be one of the following types: float16, float32, int32, int8, uint8

@@ -925,13 +929,13 @@ REG_OP(ScatterMin)

*@par Inputs:
* Three inputs, including:
*@li var: An ND Tensor . \n
*@li var: An ND Tensor .

*Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An NCHW, NHWC, or ND Tensor . \n

*Must be one of the following types: int32 or int64
*@li updates: An NCHW, NHWC, or ND Tensor . \n
*@li updates: An NCHW, NHWC, or ND Tensor .

*Must be one of the following types: float16, float, int32, int8, uint8

@@ -958,13 +962,13 @@ REG_OP(ScatterMax)

*@par Inputs:
* Three inputs, including:
*@li var: An ND Tensor . \n
*@li var: An ND Tensor .

*Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor . \n

*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor . \n
*@li updates: An ND Tensor .

*Must be one of the following types: float16, float, int32, int8, uint8



+ 304
- 342
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -367,19 +367,19 @@ REG_OP(BiasAddGrad)
* Gradients with respect to the output of the convolution.
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | out_bckprop | filter | y
------------|-------------|---------|--------
| Data Type | float16 | float16 | float16
| |-------------|---------|--------
| | float32 | float32 | float32
| |-------------|---------|--------
| | float64 | float64 | float64
------------|-------------|---------|--------
| Format | NCHW | NCHW | NCHW
| | NHWC | HWCN | NHWC
@endverbatim
* The following are the supported data types and data formats:\n
*\n
| Tensor | out_bckprop | filter | y\n
------------|-------------|---------|--------\n
| Data Type | float16 | float16 | float16\n
| |-------------|---------|--------\n
| | float32 | float32 | float32\n
| |-------------|---------|--------\n
| | float64 | float64 | float64\n
------------|-------------|---------|--------\n
| Format | NCHW | NCHW | NCHW\n
| | NHWC | HWCN | NHWC\n
*\n
* For float32 and float64 type, the actual calculation on the chip is based on
* float16.
*\n
@@ -398,36 +398,37 @@ REG_OP(BiasAddGrad)
* "NHWC". Specify the data format of the input and output data.
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| input_size | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Filter | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| out_backprop | H*strideH| [1, 4096]
| | W*strideW| [1, 4096]
-------------------|----------|--------------
| y(fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| input_size | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Filter | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| out_backprop | H*strideH| [1, 200000]\n
| | W*strideW| [1, 4096]\n
-------------------|----------|--------------\n
| y(fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
*\n

@endverbatim
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
* and filter_width > fmap_width
* If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
*\n
*
@@ -508,7 +509,7 @@ REG_OP(Conv2DBackpropInputD)
/**
*@brief Computes the Deconvolution with respect to the input.
*@par Inputs:
* Three inputs:
* Two required inputs:
* @li x: A Tensor of type float16 or int8. 4D with shape
* [batch, out_channels, out_height, out_width]. Gradients with respect
* to the output of the convolution.
@@ -520,16 +521,16 @@ REG_OP(Conv2DBackpropInputD)
* Type is int8. Reserved.\n
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | filter | bias | y
------------|---------|---------|---------|--------
| Data Type | float16 | float16 | float16 | float16
| |---------|---------|---------|--------
| | int8 | int8 | int32 | int32
------------|---------|---------|---------|--------
| Format | NCHW | NCHW | ND | NCHW
@endverbatim
* The following are the supported data types and data formats:\n
*\n
| Tensor | x | filter | bias | y\n
------------|---------|---------|---------|--------\n
| Data Type | float16 | float16 | float16 | float16\n
| |---------|---------|---------|--------\n
| | int8 | int8 | int32 | int32\n
------------|---------|---------|---------|--------\n
| Format | NCHW | NCHW | ND | NCHW\n
*\n
* For int8, a dequant or requant operator must be followed.
*\n
*
@@ -550,35 +551,35 @@ REG_OP(Conv2DBackpropInputD)
* within the effective range of int8 [-128, 127]. Defaults to "0".
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| x (out_backprop) | H*strideH| [1, 4096]
| | W*strideW| [1, 4096]
-------------------|----------|--------------
| Filter | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| y (fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| Offset_x | | [-128, 127]

@endverbatim
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| x (out_backprop) | H*strideH| [1, 200000]\n
| | W*strideW| [1, 4096]\n
-------------------|----------|--------------\n
| Filter | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| y (fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| Offset_x | | [-128, 127]\n
*\n
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
* and filter_width > fmap_width
* If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
*\n
*
@@ -628,19 +629,19 @@ REG_OP(Deconvolution)
* convolution.
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | out_backprop | y
------------|---------|--------------|---------
| Data Type | float16 | float16 | float16
| |---------|--------------|---------
| | float32 | float32 | float32
| |---------|--------------|---------
| | float64 | float64 | float64
|-----------|---------|--------------|---------
| Format | NCHW | NCHW | NCHW
| | NHWC | NHWC | HWCN
@endverbatim
* The following are the supported data types and data formats:\n
*\n
| Tensor | x | out_backprop | y\n
------------|---------|--------------|---------\n
| Data Type | float16 | float16 | float16\n
| |---------|--------------|---------\n
| | float32 | float32 | float32\n
| |---------|--------------|---------\n
| | float64 | float64 | float64\n
|-----------|---------|--------------|---------\n
| Format | NCHW | NCHW | NCHW\n
| | NHWC | NHWC | HWCN\n
*\n
* For float32 and float64 type of x and outbackprop, the actual calculation on the chip
* is based on float16.
*\n
@@ -658,39 +659,34 @@ REG_OP(Deconvolution)
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
* "NHWC". Specify the data format of the input and output data.
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| x(fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Filter Size | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| out_backprop | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| y | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]

@endverbatim
* In Ascend910, out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
*\n
*
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| x(fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Filter Size | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| out_backprop | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| y | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
*\n
*@par Outputs:
* y: A Tensor. Has the same type as x, has the same format as filter_size.
*\n
@@ -780,16 +776,14 @@ REG_OP(Conv2DBackpropFilterD)
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | filter | bias | y
------------|---------|---------|---------|--------
| Data Type | float16 | float16 | float16 | float16
| | float32 | float32 | float32 | float32
| | int8 | int8 | int32 | int32
------------|---------|---------|---------|--------
| Format | NCHW | NCHW | ND | NCHW
| | NHWC | HWCN | | NHWC
@endverbatim
*\n
| Tensor | x | filter | bias | y |\n
| :-------: | :-----: | :-----: | :-----: | :-----: |\n
| Data Type | float16 | float16 | float16 | float16 |\n
| | float32 | float32 | float32 | float32 |\n
| | int8 | int8 | int32 | int32 |\n
| Format | NCHW | NCHW | ND | NCHW |\n
| | NHWC | HWCN | | NHWC |\n
* For float32 type, the actual calculation on the chip is based on
* float16.
*\n
@@ -813,35 +807,28 @@ REG_OP(Conv2DBackpropFilterD)
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| Input Image Size | H | [1, 100000]
| | W | [1, 4096]
-------------------|----------|--------------
| Filter Size | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| Offset_x | | [-128, 127]

@endverbatim
*\n
| Name | Field | Scope |\n
| :--------------: | :------: | :---------: |\n
| Input Image Size | H | [1, 100000] |\n
| | W | [1, 4096] |\n
| Filter Size | H | [1, 255] |\n
| | W | [1, 255] |\n
| Stride | H | [1, 63] |\n
| | W | [1, 63] |\n
| Padding | Top | [0, 255] |\n
| | Bottom | [0, 255] |\n
| | Left | [0, 255] |\n
| | Right | [0, 255] |\n
| Dilation | H | [1, 255] |\n
| | W | [1, 255] |\n
| Offset_x | - | [-128, 127] |\n
* The W dimension of the input image supports cases exceeding 4096, but it may
* cause compilation errors.
*\n
*
*@par Outputs:
*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the
* y: A 4D Tensor of output feature map. Has the same type as "x". With the
* format "NHWC", the data is stored in the order of: [batch, out_height,
* out_width, out_channels].
*\n
@@ -956,16 +943,13 @@ REG_OP(Conv2DCompress)
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | filter | offsets | bias | y
------------|---------|---------|---------|----------|--------
| Data Type | float16 | float16 | float16 | float16 | float16
| |---------|---------|---------|----------|--------
| | float32 | float32 | float32 | float32 | float32
------------|---------|---------|---------|----------|--------
| Format | NCHW | NCHW | NCHW | ND | NCHW
| | NHWC | HWCN | NHWC | | NHWC
@endverbatim
*\n
| Tensor | x | filter | offsets | bias | y |\n
| :-------: | :-----: | :-----: | :-----: | :-----: | :-----: |\n
| Data Type | float16 | float16 | float16 | float16 | float16 |\n
| | float32 | float32 | float32 | float32 | float32 |\n
| Format | NCHW | NCHW | NCHW | ND | NCHW |\n
| | NHWC | HWCN | NCHW | | NHWC |\n
* For float32 type, the actual convolution calculation part on the chip is
* based on float16.
*\n
@@ -992,19 +976,17 @@ REG_OP(Conv2DCompress)
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
--------------------|--------|----------------------------
| Input Image Size | H | [1, 100000 / filter_height]
| | W | [1, 4096 / filter_width]
--------------------|--------|----------------------------
| Filter Size | H | [1, 63]
| | W | [1, 63]
@endverbatim
*\n
| Name | Field | Scope |\n
| :--------------: | :------: | :-------------------------: |\n
| Input Image Size | H | [1, 100000 / filter_height] |\n
| | W | [1, 4096 / filter_width] |\n
| Filter Size | H | [1, 63] |\n
| | W | [1, 63] |\n
*\n
*
*@par Outputs:
*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the
* y: A 4D Tensor of output feature map. Has the same type as "x". With the
* format "NHWC", the data is stored in the order of: [batch, out_height,
* out_width, out_channels].
*\n
@@ -1042,41 +1024,38 @@ REG_OP(DeformableConv2D)

/**
*@brief Computes a 3D convolution given 5D "x" and "filter" tensors.
*@par Inputs:

*@par Inputs:
* @li x: A 5D tensor. Must be one of the following types: float16,
* (Currently does not support int8). The format of x is NCDHW or NDHWC.
* @li filter: A 5D tensor of the same type as "x".
* (Currently does not support int8).
* The format is NCDHW, NDHWC or DHWCN . \n

*@par Optional input:
* @li bias: An optional 1D tensor of the same type as "x".
* @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
* The format is NCDHW, NDHWC or DHWCN.
* @li bias: Optional. An 1D tensor of the same type as "x".
* @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n

*@par Required Attributes:
* @li strides: A list of 5 integers. Specifies the stride of the sliding window
*@par Attributes:
* @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
* for each dimension of "x".
* The N and C dimensions must be 1. Has the same format as "x".
* @li pads: A list of 6 integers.
* @li pads: Required. A list of 6 integers.
* Supports only padding along the D, H and W dimensions in sequence of head,
* tail, top, bottom, left and right . \n
*@par Attributes:
* @li groups: Number of blocked connections from input channels to output
* tail, top, bottom, left and right.
* @li dilations: Optional. A list of 5 integers. Specifies the dilation factor for each
* dimension of "x".
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* @li data_format: Optional. An string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A list of 5 integers. Specifies the dilation factor for each
* dimension of "x".
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li offset_x: An optional int. Input offset, used for quantized inference.
* Defaults to 0. Reserved . \n
* @li offset_x: Optional. An int. Input offset, used for quantized inference.
* Defaults to 0. Reserved. \n

*@par Outputs:
*y: A Tensor. Has the same type and data format as "x". \n
* y: A Tensor. Has the same type and data format as "x". \n

*@attention Constraints:
*The image size after padding is greater than the filter size . \n
* The image size after padding is greater than the filter size. \n

*@par Third-party framework compatibility
* @li Compatible with the TensorFlow operator conv3d.
@@ -1099,8 +1078,8 @@ REG_OP(Conv3D)

/**
*@brief Computes the gradients of convolution 3d with respect to the input.

*@par Inputs:
* Three inputs:
* @li input_size: A Tensor of type int32, int64. An integer vector representing
* the shape of input, where input is a 5-D tensor
* [batch, depth, height, width, channels] or
@@ -1110,28 +1089,25 @@ REG_OP(Conv3D)
* @li out_backprop: A Tensor. Must have the same type as filter.
* 5-D with shape [batch, depth, out_height, out_width, out_channels]
* or [batch, out_channels, depth, out_height, out_width]. Gradients with
* respect to the output of the convolution . \n
* respect to the output of the convolution. \n

*@par Required Attributes:
* @li strides: A list of 5 integers. Specifies the stride of the sliding window
*@par Attributes:
* @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
* for each dimension of "out_backprop".
* The N and C dimensions must be 1. Has the same format as "out_backprop".
* @li pads: A list of 6 integers.
* @li pads: Required. A list of 6 integers.
* Supports only padding along the D, H and W dimensions in sequence of head,
* tail, top, bottom, left and right . \n

*@par Attributes:
* Three attributes:
* @li groups: Number of blocked connections from input channels to output
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
* tail, top, bottom, left and right.
* @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
* dimension of the input.
* The N, C and D dimensions must be 1. Has the same format as "out_backprop".
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: Optional. An string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. \n

*@par Outputs:
* y: A Tensor. Has the same type as filter,and has same format as "input_size"
* y: A Tensor. Has the same type as filter,and has same format as "input_size". \n

*@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_input
@@ -1150,40 +1126,39 @@ REG_OP(Conv3DBackpropInput)

/**
*@brief Computes the gradients of convolution 3d with respect to the input.

*@par Inputs:
* Two inputs:
* @li filter: A Tensor whose type is float16. The format of filter is NCDHW,
* NDHWC or DHWCN.
* @li out_backprop: A Tensor. Must have the same type as filter. The format is
* NDHWC or NCDHW. \n
* NDHWC or NCDHW. \n

*@par Required Attributes:
* @li strides: A list of 5 integers. Specifies the stride of the sliding window
*@par Attributes:
* @li input_size: Required. A tuple/list of type int32, int64. An integer vector
* representing the shape of input, where input is a 5-D tensor
* [batch, depth, height, width, channels] or
* [batch, channels, depth, height, width].
* @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
* for each dimension of "out_backprop".
* The N and C dimensions must be 1. Has the same format as "out_backprop".
* @li pads: A list of 6 integers. Supports only padding along the D, H and W
* @li pads: Required. A list of 6 integers. Supports only padding along the D, H and W
* dimensions in sequence of head, tail, top, bottom, left and right.
* @li input_size: A tuple/list of type int32, int64. An integer vector
* representing the shape of input, where input is a 5-D tensor
* [batch, depth, height, width, channels] or
* [batch, channels, depth, height, width] . \n

*@par Attributes:
* Three attributes:
* @li groups: Number of blocked connections from input channels to output
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
* @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "out_backprop".
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: Optional. An string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. \n

*@par Outputs:
* y: A Tensor. Has the same type and data format as "out_backprop".
* y: A Tensor. Has the same type and data format as "out_backprop". \n

*@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_input
* Compatible with Tensorflow's conv3d_backprop_input. \n

*@par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
*/
REG_OP(Conv3DBackpropInputD)
.INPUT(filter, TensorType({DT_FLOAT16}))
@@ -1242,8 +1217,8 @@ REG_OP(LSTM)

/**
*@brief Computes the gradients of convolution3D with respect to the filter

*@par Inputs:
* Three inputs:
* @li x: A Tensor. Must be one of the following types: float16, float32.
* Currently does not support double.
* 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
@@ -1258,26 +1233,23 @@ REG_OP(LSTM)
* or [batch, out_channels, out_depth, out_height, out_width].
* Gradients with respect to the output of the convolution. \n

*@par Required Attributes:
* @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
*@par Attributes:
* @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
* window for each dimension of "x". The N and C dimensions must be 1.
* Has the same format as "x".
* @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
* pads on feature map . \n

*@par Attributes:
* Three attributes:
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
* @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right]
* pads on feature map.
* @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li data_format: Optional. An string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. \n

*@par Outputs:
* y: A Tensor that has the same type as "x"
* and the format is NDHWC, NCDHW or DHWCN.
* y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW or DHWCN. \n
*@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_filter
*/
@@ -1295,8 +1267,8 @@ REG_OP(Conv3DBackpropFilter)

/**
*@brief Computes the gradients of convolution with respect to the filter.

*@par Inputs:
* Two inputs:
* @li x: A Tensor of type float16.
* 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
* or [batch, in_channels, in_depth, in_height, in_width].
@@ -1305,37 +1277,34 @@ REG_OP(Conv3DBackpropFilter)
* or [batch, out_channels, out_depth, out_height, out_width].
* Gradients with respect to the output of the convolution. \n

*@par Required Attributes:
* @li filter_size: A tuple/list of type integers. An integer vector
*@par Attributes:
* @li filter_size: Required. A tuple/list of type integers. An integer vector
* representing the tensor shape of filter, where filter is a 5-D tensor
* [filter_depth, filter_height, filter_width, in_channels, out_channels],
* [out_channels, filter_depth, filter_height, filter_width, in_channels]
* or [out_channels, in_channels, filter_depth, filter_height, filter_width].
* @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
* @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
* window for each dimension of "x".
* The N and C dimensions must be 1. Has the same format as "x".
* @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
* pads on feature map. \n

*@par Attributes:
* Three attributes:
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
* @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right]
* pads on feature map.
* @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li data_format: Optional. An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. \n

*@par Outputs:
* y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN.
* y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. \n

*@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_filter
* Compatible with Tensorflow's conv3d_backprop_filter. \n

*@par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
*/


REG_OP(Conv3DBackpropFilterD)
.INPUT(x, TensorType({DT_FLOAT16}))
.INPUT(out_backprop, TensorType({DT_FLOAT16}))
@@ -1350,37 +1319,32 @@ REG_OP(Conv3DBackpropFilterD)

/**
*@brief Computes the transpose of convolution 3d with respect to the input.

*@par Inputs:
* Three inputs:
* @li input_size: A Tensor of type int32. An integer vector representing the
* shape of input.
* @li x: A Tensor of type float16, currently does not support int8. The format
* is NDHWC or NCDHW.
* @li filter: A Tensor of type float16, currently does not support int8.
* The format is NDHWC, NCDHW or DHWCN.
* @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved.
* @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. Reserved. \n

*@par Optional input:
* Two optional inputs
* @li bias: An optional 1D tensor of the same type as "x". Reserved.
* @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n

*@par Required Attributes:
* @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
*@par Attributes:
* @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
* window for each dimension of "x".
* The N and C dimensions must be 1. Has the same format as "x".
* @li pads: A tuple/list of 6 integers

*@par Attributes:
* Five attributes:
* @li groups: Number of blocked connections from input channels to output
* channels.
* @li dilations: A tuple/list of 5 integers,
* @li pads: Required. A tuple/list of 6 integers.
* @li dilations: Optional. A tuple/list of 5 integers,
* The dilation factor for each dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: Optional. An string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li output_padding: The size will be added in the output shape.
* @li offset_x: Input offset_x value. Reserved.
* @li output_padding: Optional. The size will be added in the output shape.
* @li offset_x: Optional. Input offset_x value. Reserved. \n

*@par Outputs:
* y: A Tensor. Has the same type and format as "x".
*/
@@ -1402,39 +1366,37 @@ REG_OP(Conv3DTranspose)

/**
*@brief Computes the transpose of convolution 3d with respect to the input.

*@par Inputs:
* @li x: A Tensor of type float16, currently does not support int8.
* The format is NDHWC or NCDHW.
* @li filter: A Tensor of type float16, currently does not support int8.
* The format is NDHWC, NCDHW or DHWCN.
* @li bias: Optional. An 1D tensor of the same type as "x". Reserved.
* @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n

*@par Optional inputs:
* @li bias: An optional 1D tensor of the same type as "x". Reserved.
* @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n

*@par Required Attributes:
* @li input_size: A tuple/list of type int32.
* An integer vector representing the shape of input
* @li strides: A tuple/list of 5 integers.
*@par Attributes:
* @li input_size: Required. A tuple/list of type int32.
* An integer vector representing the shape of input.
* @li strides: Required. A tuple/list of 5 integers.
* Specifies the stride of the sliding window for each dimension of "x".
* The N and C dimensions must be 1. Has the same format as "x".
* @li pads: A tuple/list of 6 integers . \n

*@par Attributes:
* Five attributes:
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
* @li pads: Required. A tuple/list of 6 integers.
* @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output
* @li groups: Optional. Number of blocked connections from input channels to output
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* @li data_format: Optional. An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li output_padding: The size will be added in the output shape.
* @li offset_x: Input offset_x value. Reserved.
* @li output_padding: Optional. The size will be added in the output shape.
* @li offset_x: Optional. Input offset_x value. Reserved. \n

*@par Outputs:
* y: A Tensor. Has the same type and format as "x".
* y: A Tensor. Has the same type and format as "x". \n

*@par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
*/
REG_OP(Conv3DTransposeD)
.INPUT(x, TensorType({DT_FLOAT16}))
@@ -1469,17 +1431,17 @@ REG_OP(Conv3DTransposeD)
* @li offset_w: An optional 1D tensor for quantized inference. Reserved.
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | filter | bias | y
------------|---------|---------|---------|--------
| Data Type | float16 | float16 | float16 | float16
| |---------|---------|---------|--------
| | int8 | int8 | int32 | int32
------------|---------|---------|---------|--------
| Format | NCHW | NCHW | ND | NCHW
| | NHWC | HWCN | | NHWC
@endverbatim
* The following are the supported data types and data formats:\n
*\n
| Tensor | x | filter | bias | y\n
------------|---------|---------|---------|--------\n
| Data Type | float16 | float16 | float16 | float16\n
| |---------|---------|---------|--------\n
| | int8 | int8 | int32 | int32\n
------------|---------|---------|---------|--------\n
| Format | NCHW | NCHW | ND | NCHW\n
| | NHWC | HWCN | | NHWC\n
*\n
* For int8, a dequant or requant operator must be followed.
*\n
*
@@ -1504,38 +1466,38 @@ REG_OP(Conv3DTransposeD)
* within the effective range of int8 [-128, 127]. Defaults to "0".
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| input_size | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| x (out_backprop) | H*strideH| [1, 4096]
| | W*strideW| [1, 4096]
-------------------|----------|--------------
| filter | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| y (fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| Offset_x | | [-128, 127]

@endverbatim
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| input_size | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| x (out_backprop) | H*strideH| [1, 200000]\n
| | W*strideW| [1, 4096]\n
-------------------|----------|--------------\n
| filter | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| y (fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| Offset_x | | [-128, 127]\n
*\n
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
* and filter_width > fmap_width
* If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
*\n
*


+ 23
- 24
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -417,7 +417,7 @@ REG_OP(PSROIPooling)
*@brief Returns detection result . \n

*@par Inputs:
* Four inputs, including:
* Five inputs, including:
*@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
*@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
*@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
@@ -474,7 +474,6 @@ REG_OP(FSRDetectionOutput)
*@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
*@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
*@par Outputs:
*@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
*@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
@@ -989,26 +988,26 @@ REG_OP(SPP)
* feature map . \n

*@attention Constraints:
*@li For the feature map input:
(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
* For the feature map input:
*@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
*@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
*@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
*@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
*@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
*@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
*@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
*@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
@@ -1429,9 +1428,9 @@ REG_OP(NormalizeBBox)
* @li anchors: A Tensor. Must be int32.
*
*@par Attributes:
* @li scales: optional, listfloat, .
* @li scales: optional, listfloat.
* @li decode_clip: optional, float, threahold of decode process.
* @li reversed_boxes: optional, bool,.
* @li reversed_boxes: optional, bool.
*
*@par Outputs:
* y: A Tensor. Must have the same type as box_predictions.


+ 9
- 64
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -127,9 +127,10 @@ REG_OP(SoftmaxGrad)
*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n

*@par Inputs:
* Two inputs, including:
* Three inputs, including:
*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value .
*@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n

*@par Outputs:
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
@@ -1198,13 +1199,11 @@ REG_OP(INInferV2D)
* @li epsilon: An attribute of type Float. \n

* @par Outputs:
*Three outputs, including:
* Three outputs, including:
* @li y: A Tensor. Has the same type as "x". \n
* @li mean: A Tensor. Has the same type as "x". \n
* @li variance: A Tensor. Has the same type as "x". \n

* @par Third-party framework compatibility
* Can be used by onnx InstanceNormalization
*/
REG_OP(InstanceNorm)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1218,24 +1217,22 @@ REG_OP(InstanceNorm)
.OP_END_FACTORY_REG(InstanceNorm)

/**
*@brief InstanceNormGrad operator interface implementation.
* @brief InstanceNormGrad operator interface implementation.

*@par Inputs:
*Five inputs, including:
* @par Inputs:
* Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Three outputs, including:
* @par Outputs:
* Three outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormGrad)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1249,58 +1246,6 @@ REG_OP(InstanceNormGrad)
.OP_END_FACTORY_REG(InstanceNormGrad)

/**
*@brief InstanceNormXBackprop operator interface implementation.

*@par Inputs:
*Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Two outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormXBackprop)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(InstanceNormXBackprop)

/**
*@brief InstanceNormBetaGammaBackprop operator interface implementation.

*@par Inputs:
*Two inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n

*@par Outputs:
*Two outputs, including:
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormBetaGammaBackprop)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop)

/**
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n

* @par Inputs:


+ 12
- 9
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -50,6 +50,7 @@ namespace ge {
*dilation[2]: An optional int32, specifying the left dilation. Defaults to "1".
*dilation[3]: An optional int32, specifying the right dilation. Defaults to "1".
*@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0".
*@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW".
*@par Outputs:
*y: An NCHW tensor of type float16, float32, int32.
*@attention Constraints:
@@ -635,7 +636,8 @@ REG_OP(MaxPoolV2)
*@li strides: A required list of int8, int16, int32, or int64 values,
* specifying the stride of the sliding window for each dimension of
* the input tensor. No default value.
*@li padding: A required string. No default value . \n
*@li padding: A required string. No default value .
*@li Targmax:An optional int with default value 7 . \n

*@par Outputs:
*@li y: A Tensor. Has the same type and format as input "x".
@@ -645,7 +647,7 @@ REG_OP(MaxPoolV2)
* ksize[1] * ksize[2] <= 255.
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
*@li "padding" is either "SAME" or "VALID" . \n
*@li "padding" is either "SAME" or "VALID" .

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolWithArgmax.
@@ -710,14 +712,15 @@ REG_OP(MaxPoolGradWithArgmax)
*@brief Performs transform mask to argmax . \n

*@par Inputs:
* Two input:
*x: An NC1HWC0 Tensor of type float16.
*mask: An NC1HWC0 Tensor of type uint16 . \n
* Two inputs:
*@li x: An NC1HWC0 Tensor of type float16.
*@li mask: An NC1HWC0 Tensor of type uint16 . \n

*@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
*@li padding: A required string. No default value . \n
*@li padding: A required string. No default value .
*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n

*@par Outputs:
*argmax: An NC1HWC0 Tensor of type int32 . \n
@@ -931,11 +934,11 @@ REG_OP(AvgPoolV2GradD)
.OP_END_FACTORY_REG(AvgPoolV2GradD)

/**
*@brief :upsample the layer
*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm.

*@par Inputs:
* one input, including:
*@li x: A tensor of type float16 or float32.
* x: A tensor of type float16 or float32.
*@par Attributes:
*@li scale: A optional float32, scale factor of x. Defaults to "1.0".
*@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
@@ -1419,7 +1422,7 @@ REG_OP(MaxPoolV3)
* the floor function will be used. Default False \n

* @par Outputs:
* y: A mutable tensor. Has the same shape and type as "x1" . \n
* out_grad: A mutable tensor. Has the same shape and type as "x1" . \n

* @attention Constraints:
* @li Computing gradients of global pooling is not supported, which means


+ 5
- 5
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -28,8 +28,8 @@ namespace ge {
*@brief Computes the for the gelu of "x" . \n

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32
*One input, including:
*x: A Tensor. Must be one of the following types: float16, float32

*@par Outputs:
*y: A Tensor. Has the same type as "x".
@@ -66,8 +66,8 @@ REG_OP(GeluGrad)
*@brief Computes the for the fast_gelu of "x" . \n

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32
*One input, including:
*x: A Tensor. Must be one of the following types: float16, float32

*@par Outputs:
*y: A Tensor. Has the same type as "x".
@@ -83,7 +83,7 @@ REG_OP(FastGelu)
*@brief Computes the gradient for the fast_gelu of "x" . \n

*@par Inputs:
*Three inputs, including:
*Two inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32
* @li x: A Tensor of the same type as "dy" . \n



+ 5
- 5
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -505,15 +505,15 @@ REG_OP(RandomChoiceWithMask)

*@par Inputs:
*Inputs including:
* @li x: A required Tensor. Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
* x: A required Tensor. Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n

*@par Attributes:
*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
* group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n

*@par Outputs:
*y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
* y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n

*@attention Constraints:
*@li "group" must be greater than 0 and must evenly divide the channel dimension size.


+ 87
- 14
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -576,7 +576,7 @@ REG_OP(ReduceAll)
*@li axis: A mutable Tensor. The dimensions to reduce . \n

*@par Attributes:
*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
*keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n

*@par Outputs:
*y: A Tensor. Has the same type and format as input "x" . \n
@@ -967,9 +967,9 @@ REG_OP(EuclideanNormD)
Defaults to "0.00001" . \n

*@par Outputs:
*y: A Tensor of type float16 or float32 for the normalized "x".
*batch_mean: A Tensor of type float32 for the result mean.
*batch_ variance: A Tensor of type float32 for the result variance . \n
*@li y: A Tensor of type float16 or float32 for the normalized "x".
*@li batch_mean: A Tensor of type float32 for the result mean.
*@li batch_ variance: A Tensor of type float32 for the result variance . \n

*@attention Constraints:
*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
@@ -987,7 +987,7 @@ REG_OP(INInferV2)
.OP_END_FACTORY_REG(INInferV2)

/**
*@brief Performs reduced instance normalization . \n
*@brief Performs reduce instance normalization. \n

*@par Inputs:
*x: A Tensor of type float16 or float32. \n
@@ -1008,32 +1008,31 @@ REG_OP(INTrainingReduceV2)


/**
*@brief Performs update instance normalization . \n
*@brief Performs update instance normalization. \n

*@par Inputs:
* Seven inputs, including: (NC1HWC0supported)
* Seven inputs, including:
*@li x: A Tensor of type float16 or float32.
*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li gamma: A Tensor of type float32, for the scaling gamma.
*@li beta: A Tensor of type float32, for the scaling beta.
*@li mean: A Tensor of type float32, for the updated mean.
*@li variance: A Tensor of type float32, for the updated variance . \n
*@li variance: A Tensor of type float32, for the updated variance. \n

*@par Attributes:
*@li momentum: A required float32, specifying the momentum to update mean and var.
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n

*@par Outputs:
* Three outputs
*@li y: A Tensor of type float16 or float32, for normalized "x".
*@li batch_mean: A Tensor of type float32, for the updated mean.
*@li batch_variance: A Tensor of type float32, for the updated variance . \n
*@li batch_variance: A Tensor of type float32, for the updated variance. \n

*@attention Constraints:
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
* This operator is a InstanceNorm fusion operator for updating the moving averages for training.
* This operator is used in conjunction with INTrainingReduceV2.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/
REG_OP(INTrainingUpdateV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -1052,6 +1051,80 @@ REG_OP(INTrainingUpdateV2)


/**
*@brief Performs the backpropagation of InstanceNorm. \n

*@par Inputs:
* Seven inputs, including:
*@li dy: A Tensor of type float16 or float32.
*@li x: A Tensor of type float16 or float32.
*@li variance: A Tensor of type float32, for the variance of "x".
*@li mean: A Tensor of type float32, for the mean of "x".
*@li res_gamma: A Tensor of type float32.
*@li res_beta: A Tensor of type float32.
*@li gamma: A Tensor of type float32. \n

*@par Outputs:
*pd_x: A Tensor of type float16 or float32, for the offset of "x". \n

*@attention Constraints:
* The preceding layer of this operator must be INTrainingUpdateGrad. \n
*/
REG_OP(INTrainingReduceGrad)
.INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT}))
.INPUT(res_gamma, TensorType({DT_FLOAT}))
.INPUT(res_beta, TensorType({DT_FLOAT}))
.INPUT(gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingReduceGrad)

/**
*@brief Performs the backpropagation of InstanceNorm. \n

*@par Inputs:
* Four inputs, including:
*@li dy: A Tensor of type float16 or float32, for the gradient.
*@li x: A Tensor of type float16 or float32.
*@li variance: A Tensor of type float32, for the variance of "x".
*@li mean: A Tensor of type float32, for the mean of "x". \n

*@par Outputs:
*@li res_gamma: A Tensor of type float32.
*@li res_beta: A Tensor of type float32. \n

*/
REG_OP(INTrainingUpdateGrad)
.INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT}))
.OUTPUT(res_gamma, TensorType({DT_FLOAT}))
.OUTPUT(res_beta, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingUpdateGrad)

/**
*@brief Performs the backpropagation of InstanceNorm. \n

*@par Inputs:
* Two inputs, including:
*@li res_gamma: A Tensor of type float32.
*@li res_beta: A Tensor of type float32. \n

*@par Outputs:
*@li pd_gamma: A Tensor of type float32.
*@li pd_beta: A Tensor of type float32. \n

*/
REG_OP(INTrainingUpdateGradGammaBeta)
.INPUT(res_gamma, TensorType({DT_FLOAT}))
.INPUT(res_beta, TensorType({DT_FLOAT}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta)

/**
*@brief Performs reduced group normalization . \n

*@par Inputs:
@@ -1063,7 +1136,7 @@ REG_OP(INTrainingUpdateV2)


*@par Attributes:
*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
*num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n

*@attention Constraints:
* This operator is a GroupNorm fusion operator for updating the moving averages for training.
@@ -1081,7 +1154,7 @@ REG_OP(GNTrainingReduce)
*@brief Performs update group normalization . \n

*@par Inputs:
* Eight inputs, including: (NCHW NHWC supported)
* Seven inputs, including: (NCHW NHWC supported)
*@li x: A Tensor of type float16 or float32.
*@li sum: A 5D Tensor of type float32,
shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC


+ 5
- 5
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -491,7 +491,6 @@ REG_OP(DynamicLSTMV2)
*ten inputs: \n
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -508,6 +507,7 @@ REG_OP(DynamicLSTMV2)
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
*/
REG_OP(LSTMInputGrad)
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -571,13 +571,13 @@ REG_OP(DynamicLSTMGradCell)
.INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
.INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(forget_bias, Float, 1)
.ATTR(activation, String, "")
.ATTR(direction, String, "Forward")
.ATTR(forget_bias, Float, 1.0)
.ATTR(activation, String, "tanh")
.ATTR(direction, String, "UNIDIRECTIONAL")
.ATTR(gate_order, String, "ijfo")
.OP_END_FACTORY_REG(DynamicLSTMGradCell)



+ 8
- 13
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -1968,17 +1968,14 @@ REG_OP(WriteSelect)
.OP_END_FACTORY_REG(WriteSelect)

/**
*@brief Read data by stride . \n
*@brief Read data by stride.

*@par Inputs:
*One input:
*x: A Tensor. Must be one of the following types: float16, int8 . \n

*@par Attributes:
*@li axis: A required int32, specifying the index of axis to read by stride . \n
*x: A Tensor. Must be one of the following types: float16, int8. \n

*@par Attributes:
*@li stride: A required int32, specifying the value of reading stride . \n
*@li axis: A required int32, specifying the index of axis to read by stride. \n
*@li stride: A required int32, specifying the value of reading stride. \n

*@par Outputs:
*y: A Tensor of the same type as "x".
@@ -1991,16 +1988,14 @@ REG_OP(StridedRead)
.OP_END_FACTORY_REG(StridedRead)

/**
*@brief: Write data by stride . \n
*@brief Write data by stride.

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, int8 . \n

*@par Attributes:
*@li axis: A required int32, specifying the index of axis to write by stride . \n
*x: A Tensor. Must be one of the following types: float16, int8. \n

*@par Attributes:
*@li stride: A required int32, specifying the value of writing stride . \n
*@li axis: A required int32, specifying the index of axis to write by stride. \n
*@li stride: A required int32, specifying the value of writing stride. \n

*@par Outputs:
*y: A Tensor. Has the same type as "x".


+ 3
- 6
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -161,14 +161,11 @@ REG_OP(SplitVD)
/**
*@brief Concatenates a list of N tensors along the first dimension.
*@par Inputs:
* Two inputs, including:
* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
* One input, including:
* values: A list of Tensors. Must be one of the following types: int8, int16, int32,
* int64, uint8, uint16, uint32, uint64, float16, float32.
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
* It's a dynamic input.
* @li shape: A Tensor of the same type as "x".
* The final shape of the result. Should be equal to the shapes of any input
* but with the number of input values in the first dimension . \n
* It's a dynamic input. \n

*@par Attributes:
* @li shape: A required list of ints.


+ 2
- 2
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -909,7 +909,7 @@ output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this o
*@li either pool_strings or pool_int64s attributes must be present but not both.
*/

REG_OP(TfidVectorizer)
REG_OP(TfIdfVectorizer)
.INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(output, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(max_gram_length, Int)
@@ -921,7 +921,7 @@ REG_OP(TfidVectorizer)
.ATTR(pool_int64s, ListInt, {})
.ATTR(pool_strings, ListString, {})
.ATTR(weights, ListFloat, {})
.OP_END_FACTORY_REG(TfidVectorizer)
.OP_END_FACTORY_REG(TfIdfVectorizer)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_

+ 0
- 1
third_party/fwkacllib/inc/runtime/rt_ffts.h View File

@@ -181,5 +181,4 @@ RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t st
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif

#endif // __CCE_RUNTIME_FFTS_H

+ 0
- 1
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -50,7 +50,6 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
RT_MODEL_TASK_STREAM_LABEL_GOTO,
RT_MODEL_TASK_MODEL_EXIT,
RT_MODEL_TASK_FFTS_TASK,
RT_MODEL_TASK_ALL_KERNEL,
RT_MODEL_TASK_PROFILER_TRACE_EX,
RT_MODEL_TASK_FFTS_TASK,


Loading…
Cancel
Save