Browse Source

code_sync_0617_inc

tags/v1.3.0
dingpeifei 4 years ago
parent
commit
ac2ce17a9a
34 changed files with 1568 additions and 1176 deletions
  1. +4
    -4
      inc/external/acl/acl.h
  2. +42
    -49
      inc/external/acl/acl_base.h
  3. +168
    -193
      inc/external/acl/acl_mdl.h
  4. +37
    -82
      inc/external/acl/acl_op.h
  5. +21
    -33
      inc/external/acl/acl_op_compiler.h
  6. +23
    -23
      inc/external/acl/acl_prof.h
  7. +62
    -80
      inc/external/acl/acl_rt.h
  8. +12
    -19
      inc/external/acl/acl_tdt.h
  9. +75
    -72
      inc/external/acl/error_codes/rt_error_codes.h
  10. +41
    -138
      inc/external/acl/ops/acl_cblas.h
  11. +164
    -261
      inc/external/acl/ops/acl_dvpp.h
  12. +13
    -16
      inc/external/acl/ops/acl_fv.h
  13. +20
    -19
      inc/external/hccl/hccl.h
  14. +42
    -42
      inc/external/hccl/hccl_types.h
  15. +75
    -72
      inc/external/runtime/rt_error_codes.h
  16. +107
    -0
      scripts/format_source_code.sh
  17. +10
    -9
      third_party/fwkacllib/inc/cce/taskdown_common.hpp
  18. +18
    -18
      third_party/fwkacllib/inc/hccl/base.h
  19. +14
    -0
      third_party/fwkacllib/inc/hccl/hcom.h
  20. +3
    -3
      third_party/fwkacllib/inc/ops/array_ops.h
  21. +29
    -0
      third_party/fwkacllib/inc/ops/bitwise_ops.h
  22. +1
    -1
      third_party/fwkacllib/inc/ops/control_flow_ops.h
  23. +20
    -2
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  24. +1
    -1
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  25. +72
    -0
      third_party/fwkacllib/inc/ops/hcom_ops.h
  26. +26
    -0
      third_party/fwkacllib/inc/ops/math_ops.h
  27. +150
    -10
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  28. +42
    -8
      third_party/fwkacllib/inc/ops/reduce_ops.h
  29. +34
    -0
      third_party/fwkacllib/inc/ops/selection_ops.h
  30. +3
    -3
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  31. +39
    -0
      third_party/fwkacllib/inc/ops/string_ops.h
  32. +115
    -17
      third_party/fwkacllib/inc/ops/transformation_ops.h
  33. +56
    -0
      third_party/fwkacllib/inc/runtime/rt_stars.h
  34. +29
    -1
      third_party/fwkacllib/inc/toolchain/prof_callback.h

+ 4
- 4
inc/external/acl/acl.h View File

@@ -26,9 +26,9 @@ extern "C" {
#endif

// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0

/**
* @ingroup AscendCL
@@ -79,4 +79,4 @@ ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg();
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_
#endif // INC_EXTERNAL_ACL_ACL_H_

+ 42
- 49
inc/external/acl/acl_base.h View File

@@ -136,50 +136,49 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005;
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE

typedef enum {
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
} aclDataType;

typedef enum {
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
} aclFormat;

typedef enum {
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
} aclLogLevel;

typedef enum {
ACL_MEMTYPE_DEVICE = 0,
ACL_MEMTYPE_HOST = 1,
ACL_MEMTYPE_DEVICE = 0,
ACL_MEMTYPE_HOST = 1,
} aclMemType;


/**
* @ingroup AscendCL
* @brief Converts data of type aclFloat16 to data of type float
@@ -312,9 +311,7 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
* @retval aclTensorDesc pointer.
* @retval nullptr if param is invalid or run out of memory
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
int numDims,
const int64_t *dims,
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
aclFormat format);

/**
@@ -336,8 +333,7 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
size_t dimsCount,
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);

/**
@@ -434,9 +430,7 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
size_t index,
size_t dimRangeNum,
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
int64_t *dimRange);

/**
@@ -473,7 +467,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
aclTensorDesc **dstDesc);
aclTensorDesc **dstDesc);

/**
* @ingroup AscendCL
@@ -561,7 +555,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu
*
* @retval null for failed.
* @retval OtherValues success.
*/
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);

/**
@@ -572,7 +566,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc,
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);

/**
@@ -624,7 +618,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy
* @param ... [IN] the value of current log
*/
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...);
const char *fmt, ...);

/**
* @ingroup AscendCL
@@ -632,14 +626,13 @@ ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY const char *aclrtGetSocName();

#define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_BASE_H_
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_

+ 168
- 193
inc/external/acl/acl_mdl.h View File

@@ -27,19 +27,19 @@
extern "C" {
#endif

#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6

#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
@@ -52,123 +52,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo;
typedef struct aclmdlConfigHandle aclmdlConfigHandle;

typedef enum {
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
} aclAippInputFormat;

typedef enum {
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
} aclmdlConfigAttr;

typedef enum {
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
} aclmdlInputAippType;

typedef struct aclmdlIODims {
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
} aclmdlIODims;

typedef struct aclAippDims {
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
} aclAippDims;

typedef struct aclmdlBatch {
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
} aclmdlBatch;

typedef struct aclmdlHW {
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
} aclmdlHW;

typedef struct aclAippInfo {
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
} aclAippInfo;

/**
@@ -292,8 +292,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset,
aclTensorDesc *tensorDesc,
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc,
size_t index);

/**
@@ -355,8 +354,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize,
uint32_t *modelId);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);

/**
* @ingroup AscendCL
@@ -378,9 +376,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelS
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
size_t workSize, void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -403,9 +400,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
void *workPtr, size_t workSize, void *weightPtr,
size_t weightSize);

/**
* @ingroup AscendCL
@@ -440,8 +437,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
const uint32_t *inputQ, size_t inputQNum,
const uint32_t *outputQ, size_t outputQNum);
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
size_t outputQNum);

/**
* @ingroup AscendCL
@@ -471,8 +468,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem
*/
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
aclmdlDataset *output, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -647,7 +644,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc,
* @param modelDesc [IN] model description
* @param opName [IN] op name
* @param attr [IN] attr name
*
*
* @retval the attr value
*/
ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);
@@ -859,11 +856,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
@@ -879,7 +876,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);

/**
@@ -893,7 +890,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);

/**
@@ -908,7 +905,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
int32_t srcImageSizeH);

@@ -928,14 +925,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
int8_t scfSwitch,
int32_t scfInputSizeW,
int32_t scfInputSizeH,
int32_t scfOutputSizeW,
int32_t scfOutputSizeH,
uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
int32_t scfInputSizeH, int32_t scfOutputSizeW,
int32_t scfOutputSizeH, uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -953,13 +946,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
int8_t cropSwitch,
int32_t cropStartPosW,
int32_t cropStartPosH,
int32_t cropSizeW,
int32_t cropSizeH,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
uint64_t batchIndex);

/**
@@ -978,7 +967,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
int32_t paddingSizeTop, int32_t paddingSizeBottom,
int32_t paddingSizeLeft, int32_t paddingSizeRight,
@@ -999,13 +988,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1,
int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3,
uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3, uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1022,13 +1008,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
float dtcPixelMinChn0,
float dtcPixelMinChn1,
float dtcPixelMinChn2,
float dtcPixelMinChn3,
uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
float dtcPixelMinChn1, float dtcPixelMinChn2,
float dtcPixelMinChn3, uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1045,13 +1028,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1,
float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3,
uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3, uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1067,10 +1047,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1087,10 +1065,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1108,10 +1084,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
size_t index,
aclmdlInputAippType *type,
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
size_t *dynamicAttachedDataIndex);

/**
@@ -1128,7 +1102,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);

/**
@@ -1147,10 +1121,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind
*
* @retval ACL_SUCCESS The function is successfully executed
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId,
uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs,
aclTensorDesc **outputDesc, size_t *numOutputs);
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
size_t *numInputs, aclTensorDesc **outputDesc,
size_t *numOutputs);

/**
* @ingroup AscendCL
@@ -1158,7 +1133,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlInitDump();

/**
@@ -1169,7 +1144,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);

/**
@@ -1178,7 +1153,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();

/**
@@ -1190,7 +1165,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);

/**
@@ -1200,7 +1175,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand
* @retval the aclmdlConfigHandle pointer
*
* @see aclmdlDestroyConfigHandle
*/
*/
ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();

/**
@@ -1229,7 +1204,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
const void *attrValue, size_t valueSize);
const void *attrValue, size_t valueSize);

/**
* @ingroup AscendCL
@@ -1247,4 +1222,4 @@ ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelD
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_

+ 37
- 82
inc/external/acl/acl_op.h View File

@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length);
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;

typedef enum aclEngineType {
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
} aclopEngineType;

/**
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
const uint8_t *values);
const uint8_t *values);

/**
* @ingroup AscendCL
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
const int64_t *values);
const int64_t *values);

/**
* @ingroup AscendCL
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
const float *values);
const float *values);

/**
* @ingroup AscendCL
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
const char **values);
const char **values);

/**
* @ingroup AscendCL
@@ -208,11 +208,8 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
const char *attrName,
int numLists,
const int *numValues,
const int64_t *const values[]);
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
const int *numValues, const int64_t *const values[]);

/**
* @ingroup AscendCL
@@ -242,15 +239,10 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
aclDataBuffer *const outputs[],
const aclopAttr *attr,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[], int numOutputs,
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -280,15 +272,9 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[],
aclopAttr *attr,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -306,12 +292,9 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
aclopHandle **handle);

/**
@@ -343,12 +326,9 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
*
* @see aclopCreateHandle | aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
int numInputs,
const aclDataBuffer *const inputs[],
int numOutputs,
aclDataBuffer *const outputs[],
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
const aclDataBuffer *const inputs[], int numOutputs,
aclDataBuffer *const outputs[], aclrtStream stream);

/**
* @ingroup AscendCL
@@ -364,11 +344,8 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc,
aclDataBuffer *dstBuffer,
uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
aclrtStream stream);

/**
@@ -383,12 +360,9 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
aclTensorDesc *dstDesc,
uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -407,15 +381,10 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
*
* @see aclopCompile
*/
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
const char *kernelId,
const char *kernelName,
void *binData,
int binSize,
aclopEngineType enginetype,
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
void *binData, int binSize, aclopEngineType enginetype,
aclDataDeallocator deallocator);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -430,11 +399,8 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
typedef aclError (*aclopCompileFunc)(int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
aclopKernelDesc *aclopKernelDesc);

/**
@@ -475,11 +441,8 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
const char *kernelId,
uint32_t blockDim,
const void *args,
uint32_t argSize);
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
const void *args, uint32_t argSize);

/**
* @ingroup AscendCL
@@ -510,12 +473,9 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr);
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *attr);

/**
* @ingroup AscendCL
@@ -533,17 +493,12 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
aclopAttr *attr);


#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_H_

+ 21
- 33
inc/external/acl/acl_op_compiler.h View File

@@ -24,28 +24,22 @@
extern "C" {
#endif

typedef enum aclCompileType {
ACL_COMPILE_SYS,
ACL_COMPILE_UNREGISTERED
} aclopCompileType;
typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;

typedef enum {
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR,
ACL_OP_PERFORMANCE_MODE
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR,
ACL_OP_PERFORMANCE_MODE
} aclCompileOpt;

typedef enum aclCompileFlag {
ACL_OP_COMPILE_DEFAULT,
ACL_OP_COMPILE_FUZZ
} aclOpCompileFlag;
typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag;

/**
* @ingroup AscendCL
@@ -65,15 +59,10 @@ typedef enum aclCompileFlag {
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr,
aclopEngineType engineType,
aclopCompileType compileFlag,
const char *opPath);
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
int numOutputs, const aclTensorDesc *const outputDesc[],
const aclopAttr *attr, aclopEngineType engineType,
aclopCompileType compileFlag, const char *opPath);

/**
* @ingroup AscendCL
@@ -96,11 +85,10 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
const char *opPath, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -130,4 +118,4 @@ ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_

+ 23
- 23
inc/external/acl/acl_prof.h View File

@@ -23,24 +23,24 @@
extern "C" {
#endif

#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008
#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008

/**
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
*/
#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65
#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65

typedef enum {
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

typedef struct aclprofConfig aclprofConfig;
@@ -101,7 +101,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
* @see aclprofDestroyConfig
*/
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);

/**
* @ingroup AscendCL
@@ -141,8 +142,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
*
* @see aclprofModelUnSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
const aclprofSubscribeConfig *profSubscribeConfig);
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
@@ -170,7 +170,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
* @see aclprofDestroySubscribeConfig
*/
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
aclprofAicoreMetrics aicoreMetrics, void *fd);
aclprofAicoreMetrics aicoreMetrics, void *fd);

/**
* @ingroup AscendCL
@@ -222,7 +222,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index,
size_t *opTypeLen);
size_t *opTypeLen);

/**
* @ingroup AscendCL
@@ -237,8 +237,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opIn
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opType, size_t opTypeLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
size_t opTypeLen);

/**
* @ingroup AscendCL
@@ -253,7 +253,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index,
size_t *opNameLen);
size_t *opNameLen);

/**
* @ingroup AscendCL
@@ -268,8 +268,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opIn
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opName, size_t opNameLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
size_t opNameLen);

/**
* @ingroup AscendCL
@@ -326,4 +326,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe
}
#endif

#endif // INC_EXTERNAL_ACL_PROF_H_
#endif // INC_EXTERNAL_ACL_PROF_H_

+ 62
- 80
inc/external/acl/acl_rt.h View File

@@ -28,63 +28,63 @@ extern "C" {
#define ACL_EVENT_TIME_LINE 0x00000008u

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
} aclrtMemMallocPolicy;

typedef enum aclrtMemAttr {
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
} aclrtMemAttr;

typedef enum aclrtGroupAttr {
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT,
ACL_GROUP_GROUPID_INT
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT,
ACL_GROUP_GROUPID_INT
} aclrtGroupAttr;

typedef struct tagRtGroupInfo aclrtGroupInfo;
@@ -487,7 +487,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
/**
* @ingroup AscendCL
* @brief Queries an event's status
*
@@ -549,9 +549,7 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start,
*
* @see aclrtFree | acldvppMalloc | aclrtMallocCached
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -574,9 +572,7 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
*
* @see aclrtFree | aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -667,10 +663,7 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
size_t destMax,
const void *src,
size_t count,
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind);

/**
@@ -717,38 +710,31 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
size_t maxCount,
int32_t value,
size_t count,
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
aclrtStream stream);

/**
@@ -894,11 +880,8 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
int32_t groupIndex,
aclrtGroupAttr attr,
void *attrValue,
size_t valueLen,
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex,
aclrtGroupAttr attr, void *attrValue, size_t valueLen,
size_t *paramRetSize);

/**
@@ -972,5 +955,4 @@ ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout);
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_

#endif // INC_EXTERNAL_ACL_ACL_RT_H_

+ 12
- 19
inc/external/acl/acl_tdt.h View File

@@ -24,10 +24,10 @@ extern "C" {
#endif

enum acltdtTensorType {
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
};

typedef struct acltdtDataItem acltdtDataItem;
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);

/**
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);

/**
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);

/**
@@ -118,12 +118,8 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte
*
* @see acltdtDestroyDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
const int64_t *dims,
size_t dimNum,
aclDataType dataType,
void *data,
size_t size);
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
aclDataType dataType, void *data, size_t size);

/**
* @ingroup AscendCL
@@ -254,8 +250,7 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
*
* @see acltdtReceiveTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
const acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
int32_t timeout);

/**
@@ -271,13 +266,11 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
*
* @see acltdtSendTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
int32_t timeout);

#ifdef __cplusplus
}
#endif

#endif //INC_EXTERNAL_ACL_ACL_TDT_H_

#endif // INC_EXTERNAL_ACL_ACL_TDT_H_

+ 75
- 72
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -23,84 +23,87 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 41
- 138
inc/external/acl/ops/acl_cblas.h View File

@@ -23,17 +23,9 @@
extern "C" {
#endif

typedef enum aclTransType {
ACL_TRANS_N,
ACL_TRANS_T,
ACL_TRANS_NZ,
ACL_TRANS_NZ_T
} aclTransType;
typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;

typedef enum aclComputeType {
ACL_COMPUTE_HIGH_PRECISION,
ACL_COMPUTE_LOW_PRECISION
} aclComputeType;
typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;

/**
* @ingroup AscendCL
@@ -61,12 +53,11 @@ typedef enum aclComputeType {
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
const void *alpha, const void *a, int lda, aclDataType dataTypeA,
const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -83,15 +74,10 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
int m,
int n,
aclDataType dataTypeA,
aclDataType dataTypeX,
aclDataType dataTypeY,
aclComputeType type,
aclopHandle **handle);
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
aclDataType dataTypeX, aclDataType dataTypeY,
aclComputeType type, aclopHandle **handle);

/**
* @ingroup AscendCL
@@ -115,18 +101,9 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
int m,
int n,
const aclFloat16 *alpha,
const aclFloat16 *a,
int lda,
const aclFloat16 *x,
int incx,
const aclFloat16 *beta,
aclFloat16 *y,
int incy,
aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
aclrtStream stream);

/**
@@ -142,10 +119,7 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
int m,
int n,
aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
aclopHandle **handle);

/**
@@ -171,19 +145,9 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
int m,
int n,
const int32_t *alpha,
const int8_t *a,
int lda,
const int8_t *x,
int incx,
const int32_t *beta,
int32_t *y,
int incy,
aclComputeType type,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
int incy, aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -198,10 +162,7 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
int m,
int n,
aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
aclopHandle **handle);

/**
@@ -233,26 +194,11 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const void *alpha,
const void *matrixA,
int lda,
aclDataType dataTypeA,
const void *matrixB,
int ldb,
aclDataType dataTypeB,
const void *beta,
void *matrixC,
int ldc,
aclDataType dataTypeC,
aclComputeType type,
aclrtStream stream);

ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const void *alpha, const void *matrixA, int lda,
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -274,18 +220,10 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclDataType dataTypeA,
aclDataType dataTypeB,
aclDataType dataTypeC,
aclComputeType type,
aclopHandle **handle);

ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclDataType dataTypeA,
aclDataType dataTypeB, aclDataType dataTypeC,
aclComputeType type, aclopHandle **handle);

/**
* @ingroup AscendCL
@@ -313,22 +251,10 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const aclFloat16 *alpha,
const aclFloat16 *matrixA,
int lda,
const aclFloat16 *matrixB,
int ldb,
const aclFloat16 *beta,
aclFloat16 *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -346,13 +272,8 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
aclopHandle **handle);

/**
@@ -381,23 +302,10 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const int32_t *alpha,
const int8_t *matrixA,
int lda,
const int8_t *matrixB,
int ldb,
const int32_t *beta,
int32_t *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);

ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const int32_t *alpha, const int8_t *matrixA, int lda,
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
int ldc, aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -415,17 +323,12 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
aclopHandle **handle);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

+ 164
- 261
inc/external/acl/ops/acl_dvpp.h View File

@@ -53,123 +53,109 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output

// Supported Pixel Format
enum acldvppPixelFormat {
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
};

// Stream Format
enum acldvppStreamFormat {
H265_MAIN_LEVEL = 0,
H264_BASELINE_LEVEL,
H264_MAIN_LEVEL,
H264_HIGH_LEVEL
};
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };

// Supported Channel Mode
enum acldvppChannelMode {
DVPP_CHNMODE_VPC = 1,
DVPP_CHNMODE_JPEGD = 2,
DVPP_CHNMODE_JPEGE = 4
};
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };

// Supported Border Type
enum acldvppBorderType {
BORDER_CONSTANT = 0,
BORDER_REPLICATE,
BORDER_REFLECT,
BORDER_REFLECT_101
};
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };

// Venc parameter type
enum aclvencChannelDescParamType {
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
};

// Jpeg picture format
enum acldvppJpegFormat {
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
};

/**
@@ -523,9 +509,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
* @retval null for failed.
* @retval other success
*/
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
uint32_t right,
uint32_t top,
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
uint32_t bottom);

/**
@@ -604,10 +588,7 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
uint32_t left,
uint32_t right,
uint32_t top,
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
uint32_t bottom);

/**
@@ -1096,7 +1077,8 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length, const void *param);
aclvencChannelDescParamType paramType, size_t length,
const void *param);

/**
* @ingroup AscendCL
@@ -1245,7 +1227,8 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);
aclvencChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
@@ -1545,10 +1528,7 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
int32_t *components);

/**
@@ -1565,11 +1545,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
uint32_t *height, int32_t *components,
acldvppJpegFormat *format);

/**
@@ -1584,8 +1561,7 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
const acldvppJpegeConfig *config,
uint32_t *size);
const acldvppJpegeConfig *config, uint32_t *size);

/**
* @ingroup AscendCL
@@ -1599,10 +1575,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1617,11 +1591,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
uint32_t dataSize,
uint32_t *width,
uint32_t *height,
int32_t *components);
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
uint32_t *height, int32_t *components);

/**
* @ingroup AscendCL
@@ -1635,10 +1606,8 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1702,10 +1671,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
* @see acldvppCreateChannel | acldvppCreatePicDesc
* | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppResizeConfig *resizeConfig,
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
@@ -1741,10 +1708,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
aclrtStream stream);

/**
@@ -1781,13 +1746,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1811,12 +1772,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *chann
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
aclrtStream stream);
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1841,13 +1799,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1870,12 +1825,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1899,13 +1851,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1930,14 +1879,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[], aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1963,16 +1909,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2000,11 +1940,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelD
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2022,11 +1959,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreateJpegeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
const void *data,
uint32_t *size,
acldvppJpegeConfig *config,
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
const void *data, uint32_t *size, acldvppJpegeConfig *config,
aclrtStream stream);

/**
@@ -2044,11 +1978,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2103,11 +2034,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
acldvppPicDesc *output,
aclvdecFrameConfig *config,
void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);

/**
* @ingroup AscendCL
@@ -2126,10 +2054,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
aclvdecFrameConfig *config,
void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
aclvdecFrameConfig *config, void *userData);

/**
* @ingroup AscendCL
@@ -2150,10 +2076,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2175,11 +2099,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
void *reserve,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2191,8 +2112,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
uint32_t mode);
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);

/**
* @ingroup AscendCL
@@ -2227,8 +2147,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
uint32_t outMode);
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);

/**
* @ingroup AscendCL
@@ -2325,9 +2244,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
uint32_t dim,
uint8_t **data,
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
uint32_t *len);
/**
* @ingroup AscendCL
@@ -2345,10 +2262,8 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap,
aclrtStream stream);
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2369,8 +2284,7 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
*
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
uint32_t index,
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
double value);

/**
@@ -2515,10 +2429,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig,
aclrtStream stream);
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2535,11 +2447,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *srcPicDesc,
acldvppHist *hist,
void *reserve,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
acldvppHist *hist, void *reserve, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2548,7 +2457,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();

/**
* @ingroup AscendCL
@@ -2605,7 +2514,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
*
* @see acldvppCreateHist | acldvppVpcCalcHistAsync
*/
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);

/**
* @ingroup AscendCL
@@ -2624,7 +2533,6 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);
*/
ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);


/**
* @ingroup AscendCL
* @brief dvpp vpc batch crop, resize config and make border.
@@ -2648,18 +2556,13 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_

+ 13
- 16
inc/external/acl/ops/acl_fv.h View File

@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult;

// search operation type
enum aclfvSearchType {
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
};

/**
@@ -104,7 +104,8 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);
uint32_t featureLen, uint32_t featureCount,
uint8_t *featureData, uint32_t featureDataLen);

/**
* @ingroup AscendCL
@@ -233,8 +234,9 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);

/**
* @ingroup AscendCL
@@ -285,12 +287,11 @@ ACL_FUNC_VISIBILITY aclError aclfvRelease();
*
* @param type [IN] repo add type
* @param featureInfo [IN] add feature information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo);

/**
* @ingroup AscendCL
@@ -298,36 +299,33 @@ ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo
*
* @param type [IN] repo delete type
* @param repoRange [IN] repo range information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange);

/**
* @ingroup AscendCL
* @brief fv accurate del.
*
* @param featureInfo [IN] accurate delete feature information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo);

/**
* @ingroup AscendCL
* @brief fv accurate modify.
*
* @param featureInfo [IN] accurate modify feature information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo);

/**
* @ingroup AscendCL
@@ -336,16 +334,15 @@ ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStr
* @param type [IN] search type
* @param searchInput [IN] search input
* @param searchRst [OUT] search result
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
aclfvSearchResult *searchRst, aclrtStream stream);
aclfvSearchResult *searchRst);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

+ 20
- 19
inc/external/hccl/hccl.h View File

@@ -27,7 +27,7 @@

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#endif // __cplusplus

/**
* @brief Initialize HCCL.
@@ -66,14 +66,15 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16,
* float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
HcclComm comm, aclrtStream stream);

/**
* @brief Broadcast operator.
@@ -84,10 +85,10 @@ HcclReduceOp op, HcclComm comm, aclrtStream stream);
* @param root An integer(u32) identifying the the root rank in the operator.
* @param comm A pointer identifying the communication resource based on
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);

/**
* @brief ReduceScatter operator.
@@ -99,10 +100,10 @@ aclrtStream stream);
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief AllGather operator.
@@ -113,16 +114,16 @@ HcclReduceOp op, HcclComm comm, aclrtStream stream);
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType,
HcclComm comm, aclrtStream stream);
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
aclrtStream stream);
/**
* @brief Get the rank size of this comm.
*
* @param comm A pointer identifying the communication resource based on.
* @param rankSize A pointer identifying the rank size.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);

@@ -131,7 +132,7 @@ extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);
*
* @param comm A pointer identifying the communication resource based on.
* @param rankSize A pointer identifying the rank id.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
/**
@@ -139,7 +140,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
*
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);

@@ -154,5 +155,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm);

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_H_
#endif // __cplusplus
#endif // HCCL_H_

+ 42
- 42
inc/external/hccl/hccl_types.h View File

@@ -16,10 +16,10 @@

/**
* @file hccl_types.h
* @brief HCCL data type definition
*
* @brief HCCL data type definition
*
*/
#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_

@@ -27,33 +27,33 @@

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#endif // __cplusplus

/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;

/**
@@ -65,37 +65,37 @@ typedef void *HcclComm;
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;

/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;

const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length

/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_
#endif // __cplusplus
#endif // HCCL_TYPES_H_

+ 75
- 72
inc/external/runtime/rt_error_codes.h View File

@@ -23,84 +23,87 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 107
- 0
scripts/format_source_code.sh View File

@@ -0,0 +1,107 @@
#!/bin/bash
# Copyright 2019-2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

set -e

CLANG_FORMAT=$(which clang-format) || (echo "Please install 'clang-format' tool first"; exit 1)

version=$("${CLANG_FORMAT}" --version | sed -n "s/.*\ \([0-9]*\)\.[0-9]*\.[0-9]*.*/\1/p")
if [[ "${version}" -lt "8" ]]; then
echo "clang-format's version must be at least 8.0.0"
exit 1
fi

CURRENT_PATH=$(pwd)
SCRIPTS_PATH=$(dirname "$0")

echo "CURRENT_PATH=${CURRENT_PATH}"
echo "SCRIPTS_PATH=${SCRIPTS_PATH}"

# print usage message
function usage()
{
echo "Format the specified source files to conform the code style."
echo "Usage:"
echo "bash $0 [-a] [-c] [-l] [-h]"
echo "e.g. $0 -c"
echo ""
echo "Options:"
echo " -a format of all files"
echo " -c format of the files changed compared to last commit, default case"
echo " -l format of the files changed in last commit"
echo " -h Print usage"
}

# check and set options
function checkopts()
{
# init variable
mode="changed" # default format changed files

# Process the options
while getopts 'aclh' opt
do
case "${opt}" in
a)
mode="all"
;;
c)
mode="changed"
;;
l)
mode="lastcommit"
;;
h)
usage
exit 0
;;
*)
echo "Unknown option ${opt}!"
usage
exit 1
esac
done
}

# init variable
# check options
checkopts "$@"

# switch to project root path, which contains clang-format config file '.clang-format'
cd "${SCRIPTS_PATH}/.." || exit 1

FMT_FILE_LIST='__format_files_list__'

if [[ "X${mode}" == "Xall" ]]; then
find src -type f -name "*" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true
find inc -type f -name "*" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true
elif [[ "X${mode}" == "Xchanged" ]]; then
# --diff-filter=ACMRTUXB will ignore deleted files in commit
git diff --diff-filter=ACMRTUXB --name-only | grep "^inc\|^src" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true
else # "X${mode}" == "Xlastcommit"
git diff --diff-filter=ACMRTUXB --name-only HEAD~ HEAD | grep "^inc\|^src" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true
fi

while read line; do
if [ -f "${line}" ]; then
${CLANG_FORMAT} -i "${line}"
fi
done < "${FMT_FILE_LIST}"

rm "${FMT_FILE_LIST}"
cd "${CURRENT_PATH}" || exit 1

echo "Specified cpp source files have been format successfully."

+ 10
- 9
third_party/fwkacllib/inc/cce/taskdown_common.hpp View File

@@ -27,15 +27,16 @@ namespace cce {
#define CC_FUSION_OP_MAX 32

typedef enum tagccKernelType {
CCE_AI_CORE = 0, /* cce aicore */
CCE_AI_CPU = 1, /* cce aicpu */
TE = 2, /* te operator*/
CUSTOMIZED = 3, /* customized operator */
TE_AI_CORE = 4, /* te aicore operator*/
TE_AI_CPU = 5, /* te aicpu operator */
AI_CPU = 6, /* aicpu */
CUST_AI_CPU = 7, /* custom aicpu*/
INVALID = 8, /* unknown kernel type */
CCE_AI_CORE = 0, /* cce aicore */
CCE_AI_CPU = 1, /* cce aicpu */
TE = 2, /* te operator*/
CUSTOMIZED = 3, /* customized operator */
TE_AI_CORE = 4, /* te aicore operator*/
TE_AI_CPU = 5, /* te aicpu operator */
AI_CPU = 6, /* aicpu */
CUST_AI_CPU = 7, /* custom aicpu*/
HOST_CPU = 8, /* host cpu */
INVALID = 10000 /* unknown kernel type */
} ccKernelType;

typedef struct tagOpContext {


+ 18
- 18
third_party/fwkacllib/inc/hccl/base.h View File

@@ -124,27 +124,27 @@ struct HcomRemoteAccessAddrInfo {
};

struct HcomAllToAllVParams {
void *sendbuf;
void *sendcounts;
void *sdispls;
HcclDataType sendtype;
void *recvbuf;
void *recvcounts;
void *rdispls;
HcclDataType recvtype;
const char *group;
void *sendbuf; // device mem
void *sendcounts; // device mem; Type: uint_64
void *sdispls; // device mem; Type: uint_64
HcclDataType sendtype;
void *recvbuf; // device mem
void *recvcounts; // device mem; Type: uint_64
void *rdispls; // device mem; Type: uint_64
HcclDataType recvtype;
const char *group; // not used now
};

struct HcomGatherAllToAllVParams {
void *addrInfo;
void *addrInfoCountPerRank;
void *recvbuf;
void *recvcounts;
void *rdispls;
void *gatheredbuf;
s32 addrLength;
HcclDataType recvtype;
const char *group;
void *addrInfo; // device mem; contains host VA[uint_64]: [addr, length, addr, length, addr, length, ...]
void *addrInfoCountPerRank; // device mem; length: ranksize; contains addrInfoCounts for every rank
void *recvbuf; // device mem
void *recvcounts; // device mem; Type: uint_64
void *rdispls; // device mem; Type: uint_64
void *gatheredbuf; // device mem
s32 addrLength;
HcclDataType recvtype;
const char *group; // not used now
};

#ifdef __cplusplus


+ 14
- 0
third_party/fwkacllib/inc/hccl/hcom.h View File

@@ -164,8 +164,22 @@ HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
std::function<void(HcclResult status)> callback);

/**
* @brief Put alltoallv communication operation into hcom executor.
*
* @param params information about alltoallv communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback);

/**
* @brief Put agther alltoallv communication operation into hcom executor.
*
* @param params information about agther alltoallv communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params,
std::function<void(HcclResult status)> callback);



+ 3
- 3
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -626,7 +626,7 @@ REG_OP(StopGradient)
*x: A tensor. \n

*@par Outputs:
*y: A tensor. \n
*y: A tensor with the same shape and contents as input. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Identity.
@@ -666,7 +666,7 @@ REG_OP(IdentityN)
*@li axis: The dimension index at which to expand. \n

*@par Outputs:
*y: A tensor. \n
*y: A tensor with the same data as input, with an additional dimension inserted at the index specified by axis. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator ExpandDims.
@@ -713,7 +713,7 @@ REG_OP(Unsqueeze)
*@par Outputs:
*y: A tensor. \n

*@par Attention:
*@attention Constraints:
*This operator cannot be directly called by the acllopExecute API. \n

*@par Third-party framework compatibility


+ 29
- 0
third_party/fwkacllib/inc/ops/bitwise_ops.h View File

@@ -26,6 +26,35 @@
namespace ge {

/**
*@brief Element-wise computes the bitwise left-shift of x and y . \n

*@par Inputs:
*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
are 0D scalars.
* @li x: A Tensor. Must be one of the following types: int8, int16, int32,
int64, uint8, uint16, uint32, uint64.
* @li y: A Tensor. Has the same type as "x". \n

*@par Outputs:
* z: A Tensor. Has the same type as "x". \n

*@attention Constraints:
*Unique runs on the Ascend AI CPU, which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator LeftShift.
*/

REG_OP(LeftShift)
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
.INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
.OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
.OP_END_FACTORY_REG(LeftShift)

/**
*@brief Element-wise computes the bitwise right-shift of x and y . \n

*@par Inputs:


+ 1
- 1
third_party/fwkacllib/inc/ops/control_flow_ops.h View File

@@ -96,7 +96,7 @@ REG_OP(RefMerge)
* Otherwise, the data is forwarded to "output_false" . \n

*@par Inputs:
*@li data: The tensor to be forwarded. \ n
*@li data: The tensor to be forwarded. \n
* Must be one of the following types: float16, float32, float64,
* int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
*@li pred: A boolean scalar. The output port that will receive data . \n


+ 20
- 2
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -2340,9 +2340,9 @@ REG_OP(CacheAllIndexToLocal)
*@par Attributes:
*output_types: types of all outputs
*output_shapes: shapes of all outputs
*_dynamic_graph_execute_mode: dynamic graph execution mode,
*_dynamic_graph_execute_mode: dynamic graph execution mode,
value is one of lazy_recompile and dynamic_execute
*_getnext_inputs_shape_range: shape ranges of outputs,
*_getnext_inputs_shape_range: shape ranges of outputs,
it works where _dynamic_graph_execute_mode is dynamic_execute
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -2355,6 +2355,24 @@ REG_OP(DynamicGetNext)
.ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
.ATTR(_getnext_inputs_shape_range, String, "")
.OP_END_FACTORY_REG(DynamicGetNext)

/**
*@brief AdpGetNext
*@par Outputs:
*y: the data in iterator, all types are available
*@par Attributes:
*output_types: types of all outputs
*output_shapes: shapes of all outputs
*queue_name: cdqm queue name
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(AdpGetNext)
.DYNAMIC_OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {{}, {}})
.ATTR(queue_name, String, "")
.OP_END_FACTORY_REG(AdpGetNext)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -3250,7 +3250,7 @@ REG_OP(Fills)
*value: A scale. Must be float. \n

*@par Outputs:
*@li y: A Tensor. Has the same type and shape as "x1". \n
*y: A Tensor. Has the same type and shape as "x1". \n

*@par Third-party framework compatibility:
* Compatible with the Pytorch operator adds.


+ 72
- 0
third_party/fwkacllib/inc/ops/hcom_ops.h View File

@@ -280,5 +280,77 @@ REG_OP(HcomRemoteScatterWrite)
.OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64}))
.OP_END_FACTORY_REG(HcomRemoteScatterWrite)

/**
* @brief All ranks send different amount of data to, and receive different
amount of data from, all ranks.
* @par Inputs:
* Five inputs, including:
* @li send_data: A tensor. the memory to send.
* @li send_counts: A list, where entry i specifies the number of elements in
send_data to send to rank i.
* @li send_displacements: A list, where entry i specifies the displacement
(offset from sendbuf) from which to send data to rank i.
* @li recv_counts: A list, where entry i specifies the number of
elements to receive from rank i.
* @li recv_displacements: A list, , where entry i specifies the displacement
(offset from recv_data) to which data from rank i should be written.
* @par Outputs:
* recv_data: A Tensor has same element type as send_data.
* @par Attributes:
* @li group: A string identifying the group name of ranks participating in
the op.
* @attention all ranks participating in the op should be full-mesh networking
using the RDMA.
*/
REG_OP(HcomAllToAllV)
.INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.INPUT(send_counts, TensorType({DT_INT64}))
.INPUT(send_displacements, TensorType({DT_INT64}))
.INPUT(recv_counts, TensorType({DT_INT64}))
.INPUT(recv_displacements, TensorType({DT_INT64}))
.OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(group, String)
.OP_END_FACTORY_REG(HcomAllToAllV)

/**
* @brief All ranks send different amount of data to, and receive different
amount of data from, all ranks. And concat all data descripting by addrinfo
togather into output gathered.
* @par Inputs:
* Four inputs, including:
* @li addrinfo: A tensor, descripting the memory info(address, length) to send.
* @li addrinfo_count_per_rank: A list, where entry i specifies the number of
elements in send_data to send to rank i.
* @li recv_counts: A list, where entry i specifies the number of
elements to receive from rank i.
* @li recv_displacements: A list, , where entry i specifies the displacement
(offset from recv_data) to which data from rank i should be written.
* @par Outputs:
* Two outputs, including:
* @li recv_data: A Tensor has same element type as dtype.
* @li gathered: A Tensor has same element type as dtype.
* @par Attributes:
* @li group: A string identifying the group name of ranks participating in
the op.
* @li dtype: Datatype of send buffer elements.
* @li addr_length: descripting the element memory length in the addrinfo.
-2: all element memory length in the addrinfo is the same, but it is unknown.
-1: all element memory length is unknown.
>0: all element memory length in the addrinfo is the same. the attr value is the memory length.
* @attention all ranks participating in the op should be full-mesh networking
using the RDMA.
*/
REG_OP(HcomGatherAllToAllV)
.INPUT(addrinfo, TensorType({DT_UINT64}))
.INPUT(addrinfo_count_per_rank, TensorType({DT_INT64}))
.INPUT(recv_counts, TensorType({DT_INT64}))
.INPUT(recv_displacements, TensorType({DT_INT64}))
.OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(group, String)
.REQUIRED_ATTR(dtype, Type)
.REQUIRED_ATTR(addr_length, Int)
.OP_END_FACTORY_REG(HcomGatherAllToAllV)

} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_

+ 26
- 0
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -1006,6 +1006,32 @@ REG_OP(SoftMarginLossGrad)
.OP_END_FACTORY_REG(SoftMarginLossGrad)

/**
*@brief Calculate the cross product of two tensors. \n

*@par Inputs:
*One inputs, including:
* @li x1: A tensor. Must be one of the following types:
* float16, float32, int32, int8, uint8, int16. \n
* @li x2: A tensor. Must be one of the following types:
* float16, float32, int32, int8, uint8, int16. \n

*@par Attributes:
*@li dim: the dimination of compute.Defaults to -65530. \n

*@par Outputs:
*y: A Tensor with the same type and shape of x1's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator cross. \n
*/
REG_OP(Cross)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
.ATTR(dim, Int, -65530)
.OP_END_FACTORY_REG(Cross)

/**
*@brief Computes batched the p-norm distance between each pair of
*the two collections of row vectors. \n



+ 150
- 10
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -337,6 +337,41 @@ REG_OP(SoftmaxV2)
.OP_END_FACTORY_REG(SoftmaxV2)

/**
*@brief Function softmax with dropoutDoMaskV3D

*@par Inputs:
*Two inputs, including:
* @li x: A mutable Tensor. The type only support float16.
* @li mask: A mutable Tensor. Must met all of the following rules:
* shape of mask should be 1D.
* dtype of mask should be uint8.
* value of shape should met the following algorithm:
* value = (size(x) + 128 - 1) // 128 * 128

*@par Attributes:
* @li keep_prob: A mutable Tensor. Must met all of the following rules:
* shape of "keep_prob" should be (1,) or [1,].
* Has the same type as "x" . \n
* @li axes: A list of int. The dimension softmax would be performed on. Defaults
* to "[-1]" . \n

*@par Outputs:
*y1: A mutable Tensor. Has the same type as "x".
*y2: A mutable Tensor. Has the same type as "x". \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(SoftmaxV2WithDropOutDoMaskV3D)
.INPUT(x, TensorType({DT_FLOAT16}))
.INPUT(mask, TensorType({DT_UINT8}))
.OUTPUT(y1, TensorType({DT_FLOAT16}))
.OUTPUT(y2, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(keep_prob, Float)
.ATTR(axes, ListInt, {-1})
.OP_END_FACTORY_REG(SoftmaxV2WithDropOutDoMaskV3D)

/**
*@brief Computes log softmax activations . \n

*@par Inputs:
@@ -1150,22 +1185,23 @@ REG_OP(INInferV2D)
.OP_END_FACTORY_REG(INInferV2D)

/**
* @brief perform instance normalization to x. \n
* @brief InstanceNorm operator interface implementation.

* @par Inputs:
* Three inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0.
* @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND.
* @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32.
* @li beta: A Tensor. Must be one of the following types: float16, float32.

* @par Attributes:
* @li data_format: An attribute of type String \n
* @li epsilon: An attribute of type Float, . \n
* @li epsilon: An attribute of type Float. \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n
* @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
* @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
*Three outputs, including:
* @li y: A Tensor. Has the same type as "x". \n
* @li mean: A Tensor. Has the same type as "x". \n
* @li variance: A Tensor. Has the same type as "x". \n

* @par Third-party framework compatibility
* Can be used by onnx InstanceNormalization
@@ -1177,11 +1213,94 @@ REG_OP(InstanceNorm)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(data_format, String)
.REQUIRED_ATTR(epsilon, Float)
.ATTR(data_format, String, "NDHWC")
.ATTR(epsilon, Float, 1e-6)
.OP_END_FACTORY_REG(InstanceNorm)

/**
*@brief InstanceNormGrad operator interface implementation.

*@par Inputs:
*Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Three outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormGrad)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(InstanceNormGrad)

/**
*@brief InstanceNormXBackprop operator interface implementation.

*@par Inputs:
*Five inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li variance: A Tensor. Must be one of the following types: float16, float32.
* @li mean: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*Two outputs, including:
* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormXBackprop)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(InstanceNormXBackprop)

/**
*@brief InstanceNormBetaGammaBackprop operator interface implementation.

*@par Inputs:
*Two inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32.
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n

*@par Outputs:
*Two outputs, including:
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InstanceNormBetaGammaBackprop)
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(res_for_gamma, TensorType({DT_FLOAT}))
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop)

/**
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n

* @par Inputs:
@@ -1571,6 +1690,27 @@ REG_OP(MultilabelMarginLoss)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(MultilabelMarginLoss)

/**
*@brief Performs batch normalization . \n
*@par Inputs:
* Two inputs
*@li input_x: A Tensor. Support float32. shape (n, c, d).
*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
*@par Attributes:
*@li normalize_type: Str. Support "per_feature" or "all_features".
*@li epsilon: An optional float32, specifying the small value added to
variance to avoid dividing by zero. Defaults to "0.00001" . \n
*@par Outputs:
* One outputs
*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
*/
REG_OP(NormalizeBatch)
.INPUT(input_x, TensorType({ DT_FLOAT }))
.INPUT(seq_len, TensorType({ DT_INT32 }))
.OUTPUT(output_y, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(normalize_type, String)
.ATTR(epsilon, Float, 0.00001)
.OP_END_FACTORY_REG(NormalizeBatch)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_

+ 42
- 8
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -990,7 +990,7 @@ REG_OP(INInferV2)
*@brief Performs reduced instance normalization . \n

*@par Inputs:
*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n
*x: A Tensor of type float16 or float32. \n

*@par Outputs:
*@li sum: A Tensor of type float32 for SUM reduced "x".
@@ -1013,19 +1013,19 @@ REG_OP(INTrainingReduceV2)
*@par Inputs:
* Seven inputs, including: (NC1HWC0supported)
*@li x: A Tensor of type float16 or float32.
*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2.
*@li square_sum: A [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
*@li mean: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean.
*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n
*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li gamma: A Tensor of type float32, for the scaling gamma.
*@li beta: A Tensor of type float32, for the scaling beta.
*@li mean: A Tensor of type float32, for the updated mean.
*@li variance: A Tensor of type float32, for the updated variance . \n

*@par Attributes:
*@li momentum: A required float32, specifying the momentum to update mean and var.
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n

*@par Outputs:
* Three outputs, including: (NC1HWC0 supported)
* Three outputs
*@li y: A Tensor of type float16 or float32, for normalized "x".
*@li batch_mean: A Tensor of type float32, for the updated mean.
*@li batch_variance: A Tensor of type float32, for the updated variance . \n
@@ -1191,6 +1191,40 @@ REG_OP(ReduceStd)
.ATTR(unbiased, Bool, true)
.ATTR(keepdim, Bool, false)
.OP_END_FACTORY_REG(ReduceStd)

/**
* @brief Calculates the standard deviation of Tensors.

* @par Inputs:
* include:
* @li x: A Tensor. Must be one of the following types: float16, float32. \n
* @li mean: A Tensor. It's the mean of X. Must be one of the following types: float16, float32. \n


* @par Attributes:
* Three Attributes, including:
* @li dim: An optional listint, Defaults to "None". \n
* @li unbiased: An optional bool. Defaults to "True".
* If "True", Use Bessel Correction.
* If "False", Do not use Bessel Correction. \n
* @li keepdim: An optional bool. Defaults to "False".
* If "True", Keep the original tensor dimension.
* If "False", Do not keep the original tensor dimension. \n

* @par Outputs:
* @li y: A Tensor. It's the std of X. Has the same type as "x".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator ReduceStdWithMean.
*/
REG_OP(ReduceStdWithMean)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(dim, ListInt, {})
.ATTR(unbiased, Bool, true)
.ATTR(keepdim, Bool, false)
.OP_END_FACTORY_REG(ReduceStdWithMean)
} //namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_

+ 34
- 0
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -2273,6 +2273,40 @@ REG_OP(AddRowRanges)
.OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(AddRowRanges)

/**
*@brief masked fill tensor along with one axis by range.
* boxes. It is a customized masked fill range operator . \n

*@par Inputs:
* Four inputs, including:
*@li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes
* 1-D (D,), 2-D(N, D), 3-D(N, C, D)
*@li start: masked fill start pos. A 3D Tensor of int32 with
* shape (num, N). "num" indicates the number of loop masked fill, and the value N
* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. \n
*@li end: masked fill end pos. A 3D Tensor of int32 with
* shape (num, N). "num" indicates the number of loop masked fill, and the value N
* indicates the batch of ND Tensor. \n
*@li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with
* shape (num,). "num" indicates the number of loop masked fill

*@par Attributes:
*@li axis: axis with masked fill of int32. Defaults to -1.

*@par Outputs:
*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)

* @par Restrictions:
* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024.
*/
REG_OP(MaskedFillRange)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
.INPUT(start, TensorType({DT_INT32}))
.INPUT(end, TensorType({DT_INT32}))
.INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
.REQUIRED_ATTR(axis, Int)
.OP_END_FACTORY_REG(MaskedFillRange)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 3
- 3
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -317,15 +317,15 @@ REG_OP(Concat)
* int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n

*@par Attributes:
*@li axis: A optional int, defaultvalue is 0.
*@li axis: A optional int, default value is 0.
* Dimension along which to pack. The range is [-(R+1), R+1).
*@li N: A required int. Number of tensors . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x".

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Pack.
It's a dynamic output.
* Compatible with the TensorFlow operator Pack.
*/
REG_OP(Pack)
.DYNAMIC_INPUT(x, TensorType::BasicType())


+ 39
- 0
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -896,6 +896,45 @@ REG_OP(DecodeBase64)
.INPUT(x, TensorType({DT_STRING}))
.OUTPUT(y, TensorType({DT_STRING}))
.OP_END_FACTORY_REG(DecodeBase64)

/**
*@brief StringNormalization performs string operations for basic cleaning . \n

*@par Inputs:
*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n

*@par Outputs:
*@li output: UTF-8 strings tensor after cleaning . \n

*@par Attributes:
*@li stopwords : list of strings (default is empty).
*List of stop words. If not set, no word would be removed from input strings
tensor.

*@li is_case_sensitive : bool (default is false).
*Boolean. Whether the identification of stop words in input strings tensor is
case-sensitive. Default is false.

*@li case_change_action : string (default is "NONE").
*string enum that cases output to be lowercased/uppercases/unchanged. Valid
values are "LOWER", "UPPER", "NONE". Default is "NONE".

*@li local : string (default is "en_US").
*Environment dependent string that denotes the locale according to which output
strings needs to be upper/lowercased.Default en_US or platform specific equivalent
as decided by the implementation . \n

*@attention Constraints:
*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
*/
REG_OP(StringNormalizer)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(stopwords, ListString, {})
.ATTR(is_case_sensitive, Bool, false)
.ATTR(case_change_action, String, "NONE")
.ATTR(local, String, "en_US")
.OP_END_FACTORY_REG(StringNormalizer)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_

+ 115
- 17
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -130,21 +130,20 @@ REG_OP(Transpose)
.OP_END_FACTORY_REG(Transpose)

/**
*@brief Doing format_transfer for various data format only
support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW"
"NCHW" to "FRACTAL_Zn" or "FRACTAL_Zn" to "NCHW".
"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN" . \n
*@brief Do format transfer for various data format.
* In general, the framework will insert it atomatically . \n

*@par Inputs:
*src: A Tensor dtype of all types . \n
*src: A Tensor. For all branches can be types: float16, float32, int32, int8, bool.
* For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n

*@par Attributes:
*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc.
*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc.
*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Z" etc.
*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Z" etc.
*@li group: A optional int32, default value is 1. \n

*@par Outputs:
*dst: A Tensor dtype of all types.
*dst: A Tensor. Has the same type as "src".
*/
REG_OP(TransData)
.INPUT(src, TensorType::BasicType())
@@ -174,21 +173,27 @@ REG_OP(Permute)
.OP_END_FACTORY_REG(Permute)

/**
*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors
* along axis 1 . \n
*@brief Flattens the inputs tensor into a 2D matrix. If input tensor has shape (d_0, d_1,..., d_n),
* then the output will have shape (d_0 X d_1 ... d_(axis-1), d_axis X d_(axis + 1)...X d_n)\n

*@par Inputs:
*One input:
*x: A multi-dimensional Tensor. Must be one of the following types:
* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . \n
* One input:
* x: A multi-dimensional Tensor. Must be one of the following types:
* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32.

*@par Outputs:
*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors
* along axis 1). Must be one of the following data types: int8, uint8, int16,
* uint16, int32, uint32, int64,uint64, float16, float32 . \n
* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened
* to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output.
* Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 .

*@par Attributes:
* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened
* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of
* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of
* the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n).

*@par Third-party framework compatibility
* Compatible with TensorFlow operator Flatten.
* Compatible with TensorFlow / ONNX operator Flatten.
*/
REG_OP(Flatten)
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
@@ -197,6 +202,7 @@ REG_OP(Flatten)
.OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64,
DT_FLOAT, DT_FLOAT16}))
.ATTR(axis, Int, 1)
.OP_END_FACTORY_REG(Flatten)

/**
@@ -824,6 +830,98 @@ REG_OP(AffineGrid)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(AffineGrid)

/**
*@brief Make memory of a view be contiguous. \n

*@par Inputs:
*Four inputs, including:
*@li x: The input tensor.
*@li size: The shape of output tensor.
*@li stride: The stride of output tensor.
*@li storage_offset: The offset in the underlying storage of the output tensor. \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
*Compatible with the pytorch operator as_strided.
*/
REG_OP(AsStrided)
.INPUT(x, TensorType::BasicType())
.INPUT(size, TensorType::IndexNumberType())
.INPUT(stride, TensorType::IndexNumberType())
.INPUT(storage_offset, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(AsStrided)

/**
*@brief This transform extracts n-grams from the input sequence and save them as a
vector. \n

*@par Inputs:
*@li input: can be either a 1-D or 2-D tensor for n-gram extraction, It is ether string UTF-8 or int32/int64 . \n

*@par Attributes:
*@li max_gram_length : int (required)
*Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output .
*@li max_skip_count : int (required)
*Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X.
If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams
with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1.
*@li min_gram_length : int (required)
*Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of
2-grams and 3-grams.
*@li mode : string (required)
*The weighting criteria. It can be one of "TF" (term frequency), "IDF" (inverse document frequency),
and "TFIDF" (the combination of TF and IDF).
*@li ngram_counts : list of ints (required)
*The starting indexes of 1-grams, 2-grams, and so on in pool. It is useful when determining the boundary
between two consecutive collections of n-grams. For example, if ngram_counts is [0, 17, 36],
the first index (zero-based) of 1-gram/2-gram/3-gram in pool are 0/17/36. This format is essentially identical
to CSR (or CSC) sparse matrix format, and we choose to use this due to its popularity.
*@li ngram_indexes : list of ints (required)
*list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute. The i-th element
in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor.
*@li pool_int64s : list of ints
*List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both.
It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element
in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector.
*@li pool_strings : list of strings
*List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both.
It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element
in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector.
*@li weights : list of floats
*list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights is the weight of
the i-th n-gram in pool. Its length equals to the size of ngram_indexes. By default, weights is an all-one tensor.This attribute
is used when mode is "IDF" or "TFIDF" to scale the associated word counts. \n

*@par Outputs:
*@li output: tensor(float)
*For 1-D input, output is the n-gram representation of that input. For 2-D input, the output is also a 2-D tensor
whose i-th row is the n-gram representation of the i-th input row. More specifically, if input shape is [C], the corresponding
output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor. \n

*@attention Constraints:
*@li input can be either a 1-D or 2-D tensor, shape is [C] or [N, C].
*@li max(ngram_indexes) + 1 == len(weights), len(y) == len(weights).
*@li ngram_counts and pool(pool_int64s or pool_strings) must match.
*@li either pool_strings or pool_int64s attributes must be present but not both.
*/

REG_OP(TfidVectorizer)
.INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(output, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(max_gram_length, Int)
.REQUIRED_ATTR(max_skip_count, Int)
.REQUIRED_ATTR(min_gram_length, Int)
.REQUIRED_ATTR(mode, String)
.REQUIRED_ATTR(ngram_counts, ListInt)
.REQUIRED_ATTR(ngram_indexes, ListInt)
.ATTR(pool_int64s, ListInt, {})
.ATTR(pool_strings, ListString, {})
.ATTR(weights, ListFloat, {})
.OP_END_FACTORY_REG(TfidVectorizer)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_

+ 56
- 0
third_party/fwkacllib/inc/runtime/rt_stars.h View File

@@ -23,6 +23,62 @@ extern "C" {
*/
RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);

/**
* @ingroup rt_stars
* @brief create cdq instance.
* @param [in] batchNum batch number
* @param [in] batchSize batch size
* @param [in] queName cdq name
* @return RT_ERROR_NONE for ok, ACL_ERROR_RT_NO_CDQ_RESOURCE for no cdq resources
*/
RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char *queName);

/**
* @ingroup rt_stars
* @brief destroy cdq instance.
* @param [in] queName cdq name
* @return RT_ERROR_NONE for ok, others failed
*/
RTS_API rtError_t rtCdqDestroy(const char *queName);

/**
* @ingroup rt_stars
* @brief get free batch in the queue.
* @param [in] queName cdq name
* @param [in] timeout batch size
* @param [out] batchId batch index
* @return RT_ERROR_NONE for ok, ACL_ERROR_RT_WAIT_TIMEOUT for timeout
*/
RTS_API rtError_t rtCdqAllocBatch(const char *queName, int32_t timeout, uint32_t *batchId);

/**
* @ingroup rt_stars
* @brief launch a write_cdqm task on the stream.
* When the task is executed, the data information will be inserted into the cdqe index position of the queue.
* @param [in] queName cdq name
* @param [in] cdqeIndex cdqe index
* @param [in] data cdqe infomation
* @param [in] dataSize data size
* @param [in] stream launch task on the stream
* @return RT_ERROR_NONE for ok, others failed
*/
RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize,
rtStream_t stream);

/**
* @ingroup rt_stars
* @brief launch a write_cdqm task on the stream.
* When the task is executed, the data information will be inserted into the cdqe index position of the queue.
* @param [in] queName cdq name
* @param [in] cdqeIndex cdqe index
* @param [in] data cdqe infomation
* @param [in] dataSize data size
* @param [in] stream launch task on the stream
* @return RT_ERROR_NONE for ok, others failed
*/
RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr,
rtStream_t stream);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif


+ 29
- 1
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -108,7 +108,18 @@ enum MsprofCtrlCallbackType {
MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env
MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json
MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options
MSPROF_CTRL_FINALIZE // stop profiling
MSPROF_CTRL_FINALIZE, // stop profiling
MSPROF_CTRL_REPORT_FUN_P, // for report callback
MSPROF_CTRL_PROF_SWITCH // for prof switch
};

#define MSPROF_MAX_DEV_NUM (64)

struct MsprofCommandHandle {
uint64_t profSwitch;
uint32_t devNums; // length of device id list
uint32_t devIdList[MSPROF_MAX_DEV_NUM];
uint32_t modelId;
};

/**
@@ -129,6 +140,23 @@ typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);
*/
typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);

/*
* @name MsprofInit
* @brief Profiling module init
* @param [in] dataType: profiling type: ACL Env/ACL Json/GE Option
* @param [in] data: profiling switch data
* @param [in] dataLen: Length of data
* @return 0:SUCCESS, >0:FAILED
*/
int32_t MsprofInit(uint32_t dataType, void *data, uint32_t dataLen);

/*
* @name AscendCL
* @brief Finishing Profiling
* @param NULL
* @return 0:SUCCESS, >0:FAILED
*/
int32_t MsprofFinalize();
#ifdef __cplusplus
}
#endif


Loading…
Cancel
Save