Browse Source

!1561 code_sync_0420_inc

From: @ding_fei_fei
Reviewed-by: @xsmq,@liucunwei
Signed-off-by: @liucunwei
tags/v1.3.0
mindspore-ci-bot Gitee 4 years ago
parent
commit
708f523f05
38 changed files with 2051 additions and 391 deletions
  1. +13
    -4
      inc/external/acl/acl.h
  2. +9
    -0
      inc/external/acl/acl_base.h
  3. +15
    -0
      inc/external/acl/acl_mdl.h
  4. +19
    -1
      inc/external/acl/acl_op_compiler.h
  5. +11
    -0
      inc/external/acl/acl_rt.h
  6. +1
    -0
      inc/external/acl/error_codes/rt_error_codes.h
  7. +261
    -164
      inc/external/acl/ops/acl_dvpp.h
  8. +9
    -0
      inc/external/hccl/hccl.h
  9. +1
    -0
      inc/external/runtime/rt_error_codes.h
  10. +0
    -0
      third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
  11. +1
    -0
      third_party/fwkacllib/inc/mmpa/mmpa_api.h
  12. +4
    -0
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
  13. +4
    -0
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
  14. +36
    -0
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  15. +98
    -0
      third_party/fwkacllib/inc/ops/image_ops.h
  16. +47
    -0
      third_party/fwkacllib/inc/ops/linalg_ops.h
  17. +23
    -0
      third_party/fwkacllib/inc/ops/math_ops.h
  18. +106
    -48
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  19. +183
    -106
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  20. +139
    -1
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  21. +102
    -7
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  22. +26
    -3
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  23. +2
    -2
      third_party/fwkacllib/inc/ops/pad_ops.h
  24. +240
    -0
      third_party/fwkacllib/inc/ops/parsing_ops.h
  25. +20
    -0
      third_party/fwkacllib/inc/ops/quantize_ops.h
  26. +33
    -0
      third_party/fwkacllib/inc/ops/random_ops.h
  27. +25
    -2
      third_party/fwkacllib/inc/ops/reduce_ops.h
  28. +118
    -0
      third_party/fwkacllib/inc/ops/rnn.h
  29. +36
    -9
      third_party/fwkacllib/inc/ops/selection_ops.h
  30. +337
    -0
      third_party/fwkacllib/inc/ops/string_ops.h
  31. +2
    -6
      third_party/fwkacllib/inc/ops/transformation_ops.h
  32. +8
    -2
      third_party/fwkacllib/inc/runtime/base.h
  33. +12
    -0
      third_party/fwkacllib/inc/runtime/event.h
  34. +1
    -0
      third_party/fwkacllib/inc/runtime/rt.h
  35. +9
    -0
      third_party/fwkacllib/inc/runtime/rt_model.h
  36. +29
    -0
      third_party/fwkacllib/inc/runtime/rt_stars.h
  37. +36
    -34
      third_party/fwkacllib/inc/toolchain/prof_reporter.h
  38. +35
    -2
      third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

+ 13
- 4
inc/external/acl/acl.h View File

@@ -26,9 +26,9 @@ extern "C" {
#endif #endif


// Current version is 1.0.0 // Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -66,8 +66,17 @@ ACL_FUNC_VISIBILITY aclError aclFinalize();
*/ */
ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);


/**
* @ingroup AscendCL
* @brief get recent error message
*
* @retval null for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg();

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


#endif // INC_EXTERNAL_ACL_ACL_H_
#endif // INC_EXTERNAL_ACL_ACL_H_

+ 9
- 0
inc/external/acl/acl_base.h View File

@@ -626,6 +626,15 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...); const char *fmt, ...);


/**
* @ingroup AscendCL
* @brief get soc name
*
* @retval null for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY const char *aclrtGetSocName();

#define ACL_APP_LOG(level, fmt, ...) \ #define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)




+ 15
- 0
inc/external/acl/acl_mdl.h View File

@@ -282,6 +282,21 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD


/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief Set aclTensorDesc to aclmdlDataset
*
* @param dataset [OUT] aclmdlDataset address of aclDataBuffer to be added
* @param tensorDesc [IN] aclTensorDesc address to be added
* @param index [IN] index of tensorDesc which to be added
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset,
aclTensorDesc *tensorDesc,
size_t index);

/**
* @ingroup AscendCL
* @brief Get the number of aclDataBuffer in aclmdlDataset * @brief Get the number of aclDataBuffer in aclmdlDataset
* *
* @param dataset [IN] aclmdlDataset poiter * @param dataset [IN] aclmdlDataset poiter


+ 19
- 1
inc/external/acl/acl_op_compiler.h View File

@@ -38,9 +38,15 @@ typedef enum {
ACL_OP_DEBUG_LEVEL, ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR, ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE, ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR
ACL_OP_COMPILER_CACHE_DIR,
ACL_OP_PERFORMANCE_MODE
} aclCompileOpt; } aclCompileOpt;


typedef enum aclCompileFlag {
ACL_OP_COMPILE_DEFAULT,
ACL_OP_COMPILE_FUZZ
} aclOpCompileFlag;

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief compile op * @brief compile op
@@ -108,6 +114,18 @@ ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
*/ */
ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value);


/**
* @ingroup AscendCL
* @brief set compile flag
*
* @param flag [IN] compile flag, ACL_OP_COMPILE_DEFAULT means compile with default mode
* ACL_OP_COMPILE_FUZZ means compile with fuzz mode
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 11
- 0
inc/external/acl/acl_rt.h View File

@@ -957,6 +957,17 @@ ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);
*/ */
ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);


/**
* @ingroup AscendCL
* @brief Set the timeout interval for waitting of op
*
* @param timeout [IN] op wait timeout
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 1
- 0
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -94,6 +94,7 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect


#ifdef __cplusplus #ifdef __cplusplus
} }


+ 261
- 164
inc/external/acl/ops/acl_dvpp.h View File

@@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output


// Supported Pixel Format // Supported Pixel Format
enum acldvppPixelFormat { enum acldvppPixelFormat {
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
}; };


// Stream Format // Stream Format
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
enum acldvppStreamFormat {
H265_MAIN_LEVEL = 0,
H264_BASELINE_LEVEL,
H264_MAIN_LEVEL,
H264_HIGH_LEVEL
};


// Supported Channel Mode // Supported Channel Mode
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
enum acldvppChannelMode {
DVPP_CHNMODE_VPC = 1,
DVPP_CHNMODE_JPEGD = 2,
DVPP_CHNMODE_JPEGE = 4
};


// Supported Border Type // Supported Border Type
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
enum acldvppBorderType {
BORDER_CONSTANT = 0,
BORDER_REPLICATE,
BORDER_REFLECT,
BORDER_REFLECT_101
};


// Venc parameter type // Venc parameter type
enum aclvencChannelDescParamType { enum aclvencChannelDescParamType {
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
}; };


// Jpeg picture format // Jpeg picture format
enum acldvppJpegFormat { enum acldvppJpegFormat {
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
}; };


/** /**
@@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
* @retval null for failed. * @retval null for failed.
* @retval other success * @retval other success
*/ */
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom); uint32_t bottom);


/** /**
@@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom); uint32_t bottom);


/** /**
@@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
* @retval ACL_SUCCESS for success, other for failure * @retval ACL_SUCCESS for success, other for failure
*/ */
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
const void *param);
aclvencChannelDescParamType paramType, size_t length, const void *param);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
* @retval ACL_SUCCESS for success, other for failure * @retval ACL_SUCCESS for success, other for failure
*/ */
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components); int32_t *components);


/** /**
@@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
uint32_t *height, int32_t *components,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components,
acldvppJpegFormat *format); acldvppJpegFormat *format);


/** /**
@@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
const acldvppJpegeConfig *config, uint32_t *size);
const acldvppJpegeConfig *config,
uint32_t *size);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
uint32_t *height, int32_t *components);
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
uint32_t dataSize,
uint32_t *width,
uint32_t *height,
int32_t *components);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
* @see acldvppCreateChannel | acldvppCreatePicDesc * @see acldvppCreateChannel | acldvppCreatePicDesc
* | acldvppCreateResizeConfig * | acldvppCreateResizeConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppResizeConfig *resizeConfig,
aclrtStream stream); aclrtStream stream);


/** /**
@@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
aclrtStream stream); aclrtStream stream);


/** /**
@@ -1746,9 +1781,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);



/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1772,9 +1811,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *chann
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], aclrtStream stream);
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1799,10 +1841,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc, ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], acldvppRoiConfig *cropAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);
acldvppResizeConfig *resizeConfig,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1825,9 +1870,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1851,10 +1899,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, acldvppRoiConfig *pasteArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);
acldvppResizeConfig *resizeConfig,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1879,11 +1930,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *
* *
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1909,10 +1963,16 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
* *
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);



/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1940,8 +2000,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc * @see acldvppCreateChannel | acldvppCreatePicDesc
*/ */
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -1959,8 +2022,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
* *
* @see acldvppCreateChannel | acldvppCreateJpegeConfig * @see acldvppCreateChannel | acldvppCreateJpegeConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
const void *data, uint32_t *size, acldvppJpegeConfig *config,
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
const void *data,
uint32_t *size,
acldvppJpegeConfig *config,
aclrtStream stream); aclrtStream stream);


/** /**
@@ -1978,8 +2044,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc * @see acldvppCreateChannel | acldvppCreatePicDesc
*/ */
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2034,8 +2103,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
* *
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
*/ */
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
acldvppPicDesc *output,
aclvdecFrameConfig *config,
void *userData);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2054,8 +2126,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a
* *
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
*/ */
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
aclvdecFrameConfig *config,
void *userData);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2076,8 +2150,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc * @see acldvppCreateChannel | acldvppCreatePicDesc
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2099,8 +2175,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc * @see acldvppCreateChannel | acldvppCreatePicDesc
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
void *reserve,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2112,7 +2191,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
uint32_t mode);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2147,7 +2227,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
uint32_t outMode);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2244,7 +2325,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
uint32_t dim,
uint8_t **data,
uint32_t *len); uint32_t *len);
/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2262,8 +2345,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2284,7 +2369,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
* *
* @retval ACL_SUCCESS for success, other for failure * @retval ACL_SUCCESS for success, other for failure
*/ */
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
uint32_t index,
double value); double value);


/** /**
@@ -2429,8 +2515,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2447,8 +2535,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
* *
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
acldvppHist *hist, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *srcPicDesc,
acldvppHist *hist,
void *reserve,
aclrtStream stream);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2457,7 +2548,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
* @retval null for failed. * @retval null for failed.
* @retval OtherValues success. * @retval OtherValues success.
*/ */
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2514,7 +2605,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
* *
* @see acldvppCreateHist | acldvppVpcCalcHistAsync * @see acldvppCreateHist | acldvppVpcCalcHistAsync
*/ */
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -2533,6 +2624,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
*/ */
ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);



/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief dvpp vpc batch crop, resize config and make border. * @brief dvpp vpc batch crop, resize config and make border.
@@ -2556,13 +2648,18 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
* *
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/ */
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig,
aclrtStream stream);


#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_

+ 9
- 0
inc/external/hccl/hccl.h View File

@@ -119,6 +119,15 @@ extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount
HcclComm comm, aclrtStream stream); HcclComm comm, aclrtStream stream);


/** /**
* @brief Barrier operator.
*
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);

/**
* @brief Destroy HCCL comm * @brief Destroy HCCL comm
* *
* @param comm A pointer identifying the communication resource targetting * @param comm A pointer identifying the communication resource targetting


+ 1
- 0
inc/external/runtime/rt_error_codes.h View File

@@ -94,6 +94,7 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect


#ifdef __cplusplus #ifdef __cplusplus
} }


+ 0
- 0
third_party/fwkacllib/inc/external/runtime/rt_error_codes.h View File


+ 1
- 0
third_party/fwkacllib/inc/mmpa/mmpa_api.h View File

@@ -56,6 +56,7 @@
#include <dirent.h> #include <dirent.h>
#include <getopt.h> #include <getopt.h>
#include <libgen.h> #include <libgen.h>
#include <malloc.h>


#include <linux/types.h> #include <linux/types.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>


+ 4
- 0
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h View File

@@ -550,6 +550,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);

MMPA_FUNC_VISIBILITY mmSize mmGetPageSize();
MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize);
MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr);
#define MMPA_DLL_API #define MMPA_DLL_API


#ifdef __cplusplus #ifdef __cplusplus


+ 4
- 0
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h View File

@@ -557,6 +557,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);

MMPA_FUNC_VISIBILITY mmSize mmGetPageSize();
MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize);
MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr);
#ifdef __cplusplus #ifdef __cplusplus
#if __cplusplus #if __cplusplus
} }


+ 36
- 0
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -146,6 +146,8 @@ REG_OP(Cast)


/** /**
*@brief Returns the truth value of (x1 >= x2) element-wise. \n *@brief Returns the truth value of (x1 >= x2) element-wise. \n
*when input is int32 and (x2 - x1) > 2**31 or < -2**31
*aicore accuracy is not guaranteed \n


*@par Inputs: *@par Inputs:
*Two inputs, including: *Two inputs, including:
@@ -167,6 +169,8 @@ REG_OP(GreaterEqual)


/** /**
*@brief Returns the truth value of (x1 < x2) element-wise. \n *@brief Returns the truth value of (x1 < x2) element-wise. \n
*when input is int32 and (x2 - x1) > 2**31 or < -2**31
*aicore accuracy is not guaranteed \n


*@par Inputs: *@par Inputs:
*Two inputs, including: *Two inputs, including:
@@ -567,6 +571,8 @@ REG_OP(InvGrad)


/** /**
*@brief: Returns the truth value of (x <= y) element-wise. \n *@brief: Returns the truth value of (x <= y) element-wise. \n
*when input is int32 and (x2 - x1) > 2**31 or < -2**31
*aicore accuracy is not guaranteed \n


*@par Inputs: *@par Inputs:
* Two inputs, including: * Two inputs, including:
@@ -1464,6 +1470,8 @@ REG_OP(ReciprocalGrad)


/** /**
*@brief Returns the truth value of (x1 > x2) element-wise. \n *@brief Returns the truth value of (x1 > x2) element-wise. \n
*when input is int32 and (x2 - x1) > 2**31 or < -2**31
*aicore accuracy is not guaranteed \n


*@par Inputs: *@par Inputs:
*@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8,
@@ -3801,6 +3809,34 @@ REG_OP(ArgMaxGradD)
.OP_END_FACTORY_REG(ArgMaxGradD) .OP_END_FACTORY_REG(ArgMaxGradD)


/** /**
*@brief Calculates the reversed outputs of the function "AddMatMatElements"
* c = c * beta + alpha * a * b

*@par Inputs:
*Three inputs, including:
* @li c: A mutable Tensor. Must be one of the following types:
* float16, float32.
* @li a: A mutable Tensor of the same type as "c".
* @li b: A mutable Tensor of the same type as "c".
* @li beta: A mutable scalar of the same type as "c".
* @li alpha: A mutable scalar of the same type as "c". \n

*@par Outputs:
* @li c: A mutable Tensor. Has the same type as "c". \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator AddMatMatElements.
*/
REG_OP(AddMatMatElements)
.INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(c, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(AddMatMatElements)

/**
*@brief Returns cosine similarity between x1 and x2,computed along dim. \n *@brief Returns cosine similarity between x1 and x2,computed along dim. \n


*@par Inputs: *@par Inputs:


+ 98
- 0
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -24,6 +24,22 @@
#include "graph/operator_reg.h" #include "graph/operator_reg.h"


namespace ge { namespace ge {
/**
*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n

*@par Inputs:
*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n

*@par Outputs:
*image:A Tensor of type uint8. \n

*@par Third-party framework compatibility
*Compatible with tensorflow DecodeGif operator.
*/
REG_OP(DecodeGif)
.INPUT(contents, TensorType({DT_STRING}))
.OUTPUT(image, TensorType({DT_UINT8}))
.OP_END_FACTORY_REG(DecodeGif)


/** /**
*@brief Adjust the hue of one or more images . \n *@brief Adjust the hue of one or more images . \n
@@ -1071,6 +1087,88 @@ REG_OP(EncodePng)
.ATTR(compression, Int, -1) .ATTR(compression, Int, -1)
.OP_END_FACTORY_REG(EncodePng) .OP_END_FACTORY_REG(EncodePng)



/**
*@brief PNG-decode an image.
*@par Inputs:
*contents: 0-D. PNG-decoded image .

*@par Attributes:
*channels: graph channels \n
*dtype: type of image

*@par Outputs:
*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB;
4: for RGBA . \n

*@par Third-party framework compatibility
*Compatible with tensorflow DecodePng operator.
*/
REG_OP(DecodePng)
.INPUT(contents, TensorType({DT_STRING}))
.OUTPUT(image, TensorType({DT_UINT8, DT_UINT16}))
.ATTR(dtype, Type, DT_UINT8)
.ATTR(channels, Int, 0)
.OP_END_FACTORY_REG(DecodePng)

/**
*@brief Bmp-decode an image. \n

*@par Inputs:
*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n

*@par Attributes:
*@li channels: Decode the desired number of color channels of the image. \n

*@par Outputs:
*image: A Tensor dtype of uint8.

* @par Third-party framework compatibility
* Compatible with tensorflow DecodeBmp operator.
*/

REG_OP(DecodeBmp)
.INPUT(contents, TensorType({DT_STRING}))
.OUTPUT(image, TensorType({DT_UINT8}))
.ATTR(channels, Int, 0)
.OP_END_FACTORY_REG(DecodeBmp)

/*
*@brief Function parse image from string to int. \n

*@par Inputs:
*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
*@li crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. \n

*@par Attributes:
*@li channels: An optional int. Defaults to 0. Number of color channels for the
*decoded image.
*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower
*but nicer upscaling of the chroma planes
*@li try_recover_truncated: An optional bool. Defaults to False. If true try to
*recover an image from truncated input.
*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required
fraction of lines before a truncated input is accepted.
*@li dct_method: An optional string. Defaults to "". string specifying a hint
*about the algorithm used for decompression. \n

*@par Outputs:
*image: A Tensor dtype of uint8.
*/
REG_OP(DecodeAndCropJpeg)
.INPUT(contents, TensorType({DT_STRING}))
.INPUT(crop_window, TensorType({DT_INT32}))
.OUTPUT(image, TensorType({DT_UINT8}))
.ATTR(channels, Int, 0)
.ATTR(ratio, Int, 1)
.ATTR(fancy_upscaling, Bool, true)
.ATTR(try_recover_truncated, Bool, false)
.ATTR(acceptable_fraction, Float, 1.0)
.ATTR(dct_method, String, "")
.OP_END_FACTORY_REG(DecodeAndCropJpeg)

/** /**
*@brief Resizes "images" to "size" using bilinear interpolation . \n *@brief Resizes "images" to "size" using bilinear interpolation . \n




+ 47
- 0
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -83,6 +83,25 @@ REG_OP(Cholesky)
.OP_END_FACTORY_REG(Cholesky) .OP_END_FACTORY_REG(Cholesky)


/** /**
*@brief Computes the outer product of two 1D vectors . \n

*@par Inputs:
*The input x1 and x2 has to be a 1D vector.Inputs include:
*@li x1:A Tensor. Must be one of the following types: float16, float32.
Shape is [N] . \n
*@li x2:A Tensor. Must have the same type as x. Shape is [M] . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n
*/

REG_OP(Ger)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(Ger)

/**
*@brief Computes the sign and the log of the absolute value of the determinant *@brief Computes the sign and the log of the absolute value of the determinant
of one or more square matrices . \n of one or more square matrices . \n


@@ -328,6 +347,34 @@ REG_OP(SelfAdjointEig)
.OP_END_FACTORY_REG(SelfAdjointEig) .OP_END_FACTORY_REG(SelfAdjointEig)


/** /**
*@brief Computes the sign and the log of the absolute value of the determinant
of one or more square matrices . \n

*@par Inputs:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor. Must be one of the following types: double, float32, float16
Shape is [..., M, M] . \n

*@par Outputs:
*@li y:A Tensor. Has the same type as x.
*@li sign:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow LogMatrixDeterminant operator.
*/

REG_OP(Slogdet)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(sign, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(Slogdet)

/**
*@brief Computes the singular value decompositions of one or more matrices . \n *@brief Computes the singular value decompositions of one or more matrices . \n


*@par Inputs: *@par Inputs:


+ 23
- 0
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -534,6 +534,29 @@ REG_OP(NextAfter)
.OP_END_FACTORY_REG(NextAfter) .OP_END_FACTORY_REG(NextAfter)


/** /**
*@brief Calculate the P-norm distance between vectors function. \n

*@par Inputs:
*One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

*@par Attributes:
*@li p: An optional float.Defaults to 2. \n

*@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Pdist. \n
*/
REG_OP(Pdist)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Float, 2.0)
.OP_END_FACTORY_REG(Pdist)

/**
*@brief Compute element-wise finiteness, return a boolean tensor. *@brief Compute element-wise finiteness, return a boolean tensor.


*@par Inputs: *@par Inputs:


+ 106
- 48
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -91,6 +91,36 @@ REG_OP(MatMulV2)
.ATTR(offset_x, Int, 0) .ATTR(offset_x, Int, 0)
.OP_END_FACTORY_REG(MatMulV2) .OP_END_FACTORY_REG(MatMulV2)


/**
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n

*@par Inputs:
*Two inputs, including:
* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
* @li compress_index: A compress index matrix of type int8.
* @li bias: A 1D Tensor. Must be one of the following types: int32, float16.

*@par Attributes:
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n

*@par Outputs:
*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
* int32. \n

*/
REG_OP(MatMulV2Compress)
.INPUT(x1, TensorType({DT_INT8}))
.INPUT(x2, TensorType({DT_INT8}))
.INPUT(compress_index, TensorType({DT_INT8}))
.OPTIONAL_INPUT(bias, TensorType({DT_INT32, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_INT32, DT_FLOAT16}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
.ATTR(transpose_x1, Bool, false)
.ATTR(transpose_x2, Bool, false)
.ATTR(offset_x, Int, 0)
.OP_END_FACTORY_REG(MatMulV2Compress)


/** /**
*@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n
@@ -189,8 +219,8 @@ REG_OP(BatchMatMul)
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n


* @par Attributes: * @par Attributes:
* @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
* @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n


* @par Outputs: * @par Outputs:
* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
@@ -201,15 +231,16 @@ REG_OP(BatchMatMul)
*/ */


REG_OP(BatchMatMulV2) REG_OP(BatchMatMulV2)
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.ATTR(adj_x1, Bool, false) .ATTR(adj_x1, Bool, false)
.ATTR(adj_x2, Bool, false) .ATTR(adj_x2, Bool, false)
.ATTR(offset_x, Int, 0)
.OP_END_FACTORY_REG(BatchMatMulV2) .OP_END_FACTORY_REG(BatchMatMulV2)



/** /**
*@brief Computes half the L2 norm of a tensor without the sqrt . \n *@brief Computes half the L2 norm of a tensor without the sqrt . \n


@@ -369,7 +400,7 @@ REG_OP(MatrixSetDiagD)
* int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
* uint64 * uint64
*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32, int64
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor. *@li updates: An ND Tensor.
*Must be one of the following types: float16, float32, int8, uint8, double, *Must be one of the following types: float16, float32, int8, uint8, double,
* int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
@@ -429,7 +460,7 @@ REG_OP(TensorScatterUpdate)
*@li var: An ND Tensor . \n *@li var: An ND Tensor . \n


*Must be one of the following types: float16, float32, int32, int8, uint8 *Must be one of the following types: float16, float32, int32, int8, uint8
*@li indices: An ND Tensor of type int32 or int64.
*@li indices: An ND Tensor of type int32 or int64




*@li updates: An Tensor. format:NCHW, NHWC . \n *@li updates: An Tensor. format:NCHW, NHWC . \n
@@ -447,10 +478,10 @@ REG_OP(TensorScatterUpdate)
* Compatible with the TensorFlow operator ScatterAdd. * Compatible with the TensorFlow operator ScatterAdd.
*/ */
REG_OP(ScatterAdd) REG_OP(ScatterAdd)
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType()) .INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterAdd) .OP_END_FACTORY_REG(ScatterAdd)


@@ -463,7 +494,7 @@ REG_OP(ScatterAdd)
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8


*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor. *@li updates: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8


@@ -478,10 +509,10 @@ REG_OP(ScatterAdd)
* Compatible with the TensorFlow operator ScatterDiv. * Compatible with the TensorFlow operator ScatterDiv.
*/ */
REG_OP(ScatterDiv) REG_OP(ScatterDiv)
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterDiv) .OP_END_FACTORY_REG(ScatterDiv)


@@ -493,7 +524,7 @@ REG_OP(ScatterDiv)
*@li var: An ND Tensor. *@li var: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor. *@li updates: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@par Attributes: *@par Attributes:
@@ -507,10 +538,10 @@ REG_OP(ScatterDiv)
* Compatible with the TensorFlow operator ScatterNdAdd. * Compatible with the TensorFlow operator ScatterNdAdd.
*/ */
REG_OP(ScatterNdAdd) REG_OP(ScatterNdAdd)
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType()) .INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterNdAdd) .OP_END_FACTORY_REG(ScatterNdAdd)


@@ -550,7 +581,7 @@ REG_OP(TensorScatterAdd)
*@li var: An ND Tensor. *@li var: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32, int64
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor. *@li updates: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8


@@ -565,10 +596,10 @@ REG_OP(TensorScatterAdd)
* Compatible with the TensorFlow operator ScatterNdSub. * Compatible with the TensorFlow operator ScatterNdSub.
*/ */
REG_OP(ScatterNdSub) REG_OP(ScatterNdSub)
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType()) .INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterNdSub) .OP_END_FACTORY_REG(ScatterNdSub)


@@ -608,7 +639,7 @@ REG_OP(TensorScatterSub)
*@li var: An ND Tensor. *@li var: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32, int64
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor. *@li updates: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@par Attributes: *@par Attributes:
@@ -622,10 +653,10 @@ REG_OP(TensorScatterSub)
* Compatible with the TensorFlow operator ScatterSub. * Compatible with the TensorFlow operator ScatterSub.
*/ */
REG_OP(ScatterSub) REG_OP(ScatterSub)
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType()) .INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterSub) .OP_END_FACTORY_REG(ScatterSub)


@@ -796,7 +827,7 @@ REG_OP(ConfusionMatrix)
*@li var: An ND Tensor. *@li var: An ND Tensor.
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor . \n *@li updates: An ND Tensor . \n


*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
@@ -813,7 +844,7 @@ REG_OP(ConfusionMatrix)
*/ */
REG_OP(ScatterMul) REG_OP(ScatterMul)
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
@@ -826,13 +857,13 @@ REG_OP(ScatterMul)
*@par Inputs: *@par Inputs:
* Three inputs, including: * Three inputs, including:
*@li var: An ND Tensor. *@li var: An ND Tensor.
*Must be one of the following types: float16, float, int32
*Must be one of the following types: float16, float, int32, int8, uint8


*@li indices: An ND Tensor. *@li indices: An ND Tensor.
*Must be one of the following types: int32
*Must be one of the following types: int32 or int64


*@li updates: An ND Tensor. *@li updates: An ND Tensor.
*Must be one of the following types: float16, float, int32
*Must be one of the following types: float16, float, int32, int8, uint8


*@par Attributes: *@par Attributes:
*use_locking: An optional bool. Defaults to "False". If "True", the operation *use_locking: An optional bool. Defaults to "False". If "True", the operation
@@ -845,10 +876,10 @@ REG_OP(ScatterMul)
* Compatible with the TensorFlow operator ScatterMin. * Compatible with the TensorFlow operator ScatterMin.
*/ */
REG_OP(ScatterMin) REG_OP(ScatterMin)
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterMin) .OP_END_FACTORY_REG(ScatterMin)


@@ -859,13 +890,13 @@ REG_OP(ScatterMin)
* Three inputs, including: * Three inputs, including:
*@li var: An ND Tensor . \n *@li var: An ND Tensor . \n


*Must be one of the following types: float16, float, int32
*Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An NCHW, NHWC, or ND Tensor . \n *@li indices: An NCHW, NHWC, or ND Tensor . \n


*Must be one of the following types: int32
*Must be one of the following types: int32 or int64
*@li updates: An NCHW, NHWC, or ND Tensor . \n *@li updates: An NCHW, NHWC, or ND Tensor . \n


*Must be one of the following types: float16, float, int32
*Must be one of the following types: float16, float, int32, int8, uint8


*@par Attributes: *@par Attributes:
*use_locking: An optional bool. Defaults to "False". *use_locking: An optional bool. Defaults to "False".
@@ -878,10 +909,10 @@ REG_OP(ScatterMin)
* Compatible with the TensorFlow operator ScatterMax. * Compatible with the TensorFlow operator ScatterMax.
*/ */
REG_OP(ScatterMax) REG_OP(ScatterMax)
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterMax) .OP_END_FACTORY_REG(ScatterMax)


@@ -895,7 +926,7 @@ REG_OP(ScatterMax)
*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor . \n *@li indices: An ND Tensor . \n


*Must be one of the following types: int32
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor . \n *@li updates: An ND Tensor . \n


*Must be one of the following types: float16, float, int32, int8, uint8 *Must be one of the following types: float16, float, int32, int8, uint8
@@ -911,10 +942,10 @@ REG_OP(ScatterMax)
* Compatible with the TensorFlow operator ScatterUpdate. * Compatible with the TensorFlow operator ScatterUpdate.
*/ */
REG_OP(ScatterUpdate) REG_OP(ScatterUpdate)
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.ATTR(use_locking, Bool, false) .ATTR(use_locking, Bool, false)
.OP_END_FACTORY_REG(ScatterUpdate) .OP_END_FACTORY_REG(ScatterUpdate)


@@ -1096,6 +1127,33 @@ REG_OP(EinSum)
.REQUIRED_ATTR(equation, String) .REQUIRED_ATTR(equation, String)
.REQUIRED_ATTR(tensor_size, Int) .REQUIRED_ATTR(tensor_size, Int)
.OP_END_FACTORY_REG(EinSum) .OP_END_FACTORY_REG(EinSum)

/**
*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n

*@par Inputs:
*No inputs

*@par Attributes:
*@li num_rows: An required int. \n
*@li num_columns: An optional int.Defaults to 0. \n
*@li batch_shape: An optional ListInt.Defaults to []. \n
*@li dtype: An optional int.Defaults to 0. \n

*@par Outputs:
*y: A Tensor with targeted type and shape. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Eye. \n
*/
REG_OP(Eye)
.OUTPUT(y, TensorType::BasicType()) /* "Result, has targeted element type" */
.REQUIRED_ATTR(num_rows, Int)
.ATTR(num_columns, Int, 0)
.ATTR(batch_shape, ListInt, {})
.ATTR(dtype, Int, 0)
.OP_END_FACTORY_REG(Eye)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 183
- 106
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -254,22 +254,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
*@par Third-party framework compatibility *@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(PriorBox)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(min_size, ListFloat)
.REQUIRED_ATTR(max_size, ListFloat)
.REQUIRED_ATTR(aspect_ratio, ListFloat)
.ATTR(img_h, Int, 0)
.ATTR(img_w, Int, 0)
.ATTR(step_h, Float, 0.0)
.ATTR(step_w, Float, 0.0)
.ATTR(flip, Bool, true)
.ATTR(clip, Bool, false)
.ATTR(offset, Float, 0.5)
.ATTR(variance, ListFloat, {0.1})
.OP_END_FACTORY_REG(PriorBox);
REG_OP(PriorBox)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(min_size, ListFloat)
.REQUIRED_ATTR(max_size, ListFloat)
.REQUIRED_ATTR(aspect_ratio, ListFloat)
.ATTR(img_h, Int, 0)
.ATTR(img_w, Int, 0)
.ATTR(step_h, Float, 0.0)
.ATTR(step_w, Float, 0.0)
.ATTR(flip, Bool, true)
.ATTR(clip, Bool, false)
.ATTR(offset, Float, 0.5)
.ATTR(variance, ListFloat, {0.1})
.OP_END_FACTORY_REG(PriorBox);


/** /**
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
@@ -306,25 +306,25 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
*@par Restrictions: *@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
*/ */
REG_OP(PriorBoxD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(min_size, ListFloat)
.REQUIRED_ATTR(max_size, ListFloat)
.ATTR(img_h, Int, 0)
.ATTR(img_w, Int, 0)
.ATTR(step_h, Float, 0.0)
.ATTR(step_w, Float, 0.0)
.ATTR(flip, Bool, true)
.ATTR(clip, Bool, false)
.ATTR(offset, Float, 0.5)
.ATTR(variance, ListFloat, {0.1})
.OP_END_FACTORY_REG(PriorBoxD);
REG_OP(PriorBoxD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(min_size, ListFloat)
.REQUIRED_ATTR(max_size, ListFloat)
.ATTR(img_h, Int, 0)
.ATTR(img_w, Int, 0)
.ATTR(step_h, Float, 0.0)
.ATTR(step_w, Float, 0.0)
.ATTR(flip, Bool, true)
.ATTR(clip, Bool, false)
.ATTR(offset, Float, 0.5)
.ATTR(variance, ListFloat, {0.1})
.OP_END_FACTORY_REG(PriorBoxD);


/** /**
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
@@ -358,22 +358,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
*@par Restrictions: *@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
*/ */
REG_OP(PriorBoxDV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(min_size, ListFloat)
.REQUIRED_ATTR(max_size, ListFloat)
.ATTR(img_h, Int, 0)
.ATTR(img_w, Int, 0)
.ATTR(step_h, Float, 0.0)
.ATTR(step_w, Float, 0.0)
.ATTR(flip, Bool, true)
.ATTR(clip, Bool, false)
.ATTR(offset, Float, 0.5)
.ATTR(variance, ListFloat, {0.1})
.OP_END_FACTORY_REG(PriorBoxDV2);
REG_OP(PriorBoxDV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(min_size, ListFloat)
.REQUIRED_ATTR(max_size, ListFloat)
.ATTR(img_h, Int, 0)
.ATTR(img_w, Int, 0)
.ATTR(step_h, Float, 0.0)
.ATTR(step_w, Float, 0.0)
.ATTR(flip, Bool, true)
.ATTR(clip, Bool, false)
.ATTR(offset, Float, 0.5)
.ATTR(variance, ListFloat, {0.1})
.OP_END_FACTORY_REG(PriorBoxDV2);


/** /**
*@brief Performs Position Sensitive ROI Pooling . \n *@brief Performs Position Sensitive ROI Pooling . \n
@@ -531,10 +531,10 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(Yolo) REG_OP(Yolo)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(boxes, Int, 3) .ATTR(boxes, Int, 3)
.ATTR(coords, Int, 4) .ATTR(coords, Int, 4)
.ATTR(classes, Int, 80) .ATTR(classes, Int, 80)
@@ -584,10 +584,10 @@ REG_OP(Yolo)
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(YoloV2DetectionOutput) REG_OP(YoloV2DetectionOutput)
.INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases, ListFloat) .REQUIRED_ATTR(biases, ListFloat)
.ATTR(boxes, Int, 5) .ATTR(boxes, Int, 5)
.ATTR(coords, Int, 4) .ATTR(coords, Int, 4)
@@ -598,7 +598,7 @@ REG_OP(YoloV2DetectionOutput)
.ATTR(score_threshold, Float, 0.5) .ATTR(score_threshold, Float, 0.5)
.ATTR(iou_threshold, Float, 0.45) .ATTR(iou_threshold, Float, 0.45)
.ATTR(pre_nms_topn, Int, 512) .ATTR(pre_nms_topn, Int, 512)
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32})) .OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV2DetectionOutput) .OP_END_FACTORY_REG(YoloV2DetectionOutput)


@@ -647,12 +647,12 @@ REG_OP(YoloV2DetectionOutput)
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead. *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
*/ */
REG_OP(YoloV2DetectionOutputD) REG_OP(YoloV2DetectionOutputD)
.INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases, ListFloat) .REQUIRED_ATTR(biases, ListFloat)
.ATTR(boxes, Int, 5) .ATTR(boxes, Int, 5)
.ATTR(coords, Int, 4) .ATTR(coords, Int, 4)
@@ -663,7 +663,7 @@ REG_OP(YoloV2DetectionOutputD)
.ATTR(score_threshold, Float, 0.5) .ATTR(score_threshold, Float, 0.5)
.ATTR(iou_threshold, Float, 0.45) .ATTR(iou_threshold, Float, 0.45)
.ATTR(pre_nms_topn, Int, 512) .ATTR(pre_nms_topn, Int, 512)
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32})) .OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV2DetectionOutputD) .OP_END_FACTORY_REG(YoloV2DetectionOutputD)


@@ -707,16 +707,16 @@ REG_OP(YoloV2DetectionOutputD)
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(YoloV3DetectionOutput) REG_OP(YoloV3DetectionOutput)
.INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases_low, ListFloat) .REQUIRED_ATTR(biases_low, ListFloat)
.REQUIRED_ATTR(biases_mid, ListFloat) .REQUIRED_ATTR(biases_mid, ListFloat)
.REQUIRED_ATTR(biases_high, ListFloat) .REQUIRED_ATTR(biases_high, ListFloat)
@@ -729,7 +729,7 @@ REG_OP(YoloV3DetectionOutput)
.ATTR(score_threshold, Float, 0.5) .ATTR(score_threshold, Float, 0.5)
.ATTR(iou_threshold, Float, 0.45) .ATTR(iou_threshold, Float, 0.45)
.ATTR(pre_nms_topn, Int, 512) .ATTR(pre_nms_topn, Int, 512)
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32})) .OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV3DetectionOutput) .OP_END_FACTORY_REG(YoloV3DetectionOutput)


@@ -776,22 +776,22 @@ s
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead. *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
*/ */
REG_OP(YoloV3DetectionOutputD) REG_OP(YoloV3DetectionOutputD)
.INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases_low, ListFloat) .REQUIRED_ATTR(biases_low, ListFloat)
.REQUIRED_ATTR(biases_mid, ListFloat) .REQUIRED_ATTR(biases_mid, ListFloat)
.REQUIRED_ATTR(biases_high, ListFloat) .REQUIRED_ATTR(biases_high, ListFloat)
@@ -804,7 +804,7 @@ REG_OP(YoloV3DetectionOutputD)
.ATTR(score_threshold, Float, 0.5) .ATTR(score_threshold, Float, 0.5)
.ATTR(iou_threshold, Float, 0.45) .ATTR(iou_threshold, Float, 0.45)
.ATTR(pre_nms_topn, Int, 512) .ATTR(pre_nms_topn, Int, 512)
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32})) .OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV3DetectionOutputD) .OP_END_FACTORY_REG(YoloV3DetectionOutputD)


@@ -848,7 +848,7 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol
* It is a custom operator. It has no corresponding operator in Caffe. * It is a custom operator. It has no corresponding operator in Caffe.
*/ */
REG_OP(YoloV3DetectionOutputV2) REG_OP(YoloV3DetectionOutputV2)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases, ListFloat) .REQUIRED_ATTR(biases, ListFloat)
.ATTR(boxes, Int, 3) .ATTR(boxes, Int, 3)
.ATTR(coords, Int, 4) .ATTR(coords, Int, 4)
@@ -862,7 +862,7 @@ REG_OP(YoloV3DetectionOutputV2)
.ATTR(N, Int, 10) .ATTR(N, Int, 10)
.ATTR(resize_origin_img_to_net, Bool, false) .ATTR(resize_origin_img_to_net, Bool, false)
.ATTR(out_box_dim, Int, 3) .ATTR(out_box_dim, Int, 3)
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32})) .OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV3DetectionOutputV2) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)


@@ -910,9 +910,9 @@ REG_OP(YoloV3DetectionOutputV2)
* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
*/ */
REG_OP(YoloV3DetectionOutputV2D) REG_OP(YoloV3DetectionOutputV2D)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases, ListFloat) .REQUIRED_ATTR(biases, ListFloat)
.ATTR(boxes, Int, 3) .ATTR(boxes, Int, 3)
.ATTR(coords, Int, 4) .ATTR(coords, Int, 4)
@@ -926,7 +926,7 @@ REG_OP(YoloV3DetectionOutputV2D)
.ATTR(N, Int, 10) .ATTR(N, Int, 10)
.ATTR(resize_origin_img_to_net, Bool, false) .ATTR(resize_origin_img_to_net, Bool, false)
.ATTR(out_box_dim, Int, 3) .ATTR(out_box_dim, Int, 3)
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32})) .OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV3DetectionOutputV2D) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)


@@ -1466,9 +1466,9 @@ REG_OP(NormalizeBBox)
* y: A Tensor. Must have the same type as box_predictions. * y: A Tensor. Must have the same type as box_predictions.
*/ */
REG_OP(DecodeBboxV2) REG_OP(DecodeBboxV2)
.INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0}) .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
.ATTR(decode_clip, Float, 0.0) .ATTR(decode_clip, Float, 0.0)
.ATTR(reversed_box, Bool, false) .ATTR(reversed_box, Bool, false)
@@ -1592,7 +1592,6 @@ selected indices from the boxes tensor, where M <= max_output_size. \n
*Compatible with onnx NonMaxSuppression operator. *Compatible with onnx NonMaxSuppression operator.
*/ */



REG_OP(NonMaxSuppressionV7) REG_OP(NonMaxSuppressionV7)
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1641,7 +1640,7 @@ REG_OP(RoiExtractor)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(finest_scale, Int, 56) .ATTR(finest_scale, Int, 56)
.ATTR(roi_scale_factor, Float, 0) .ATTR(roi_scale_factor, Float, 0)
.ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 })
.ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32})
.ATTR(pooled_height, Int, 7) .ATTR(pooled_height, Int, 7)
.ATTR(pooled_width, Int, 7) .ATTR(pooled_width, Int, 7)
.ATTR(sample_num, Int, 0) .ATTR(sample_num, Int, 0)
@@ -1649,6 +1648,84 @@ REG_OP(RoiExtractor)
.ATTR(aligned, Bool, true) .ATTR(aligned, Bool, true)
.OP_END_FACTORY_REG(RoiExtractor) .OP_END_FACTORY_REG(RoiExtractor)


/**
*@brief Performs Position Sensitive PS ROI Pooling . \n

*@par Inputs:
* Two inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map, dimension C1 must be equal to
* (int(output_dim+15)/C0))*group_size*group_size.
*@li rois: A tensor of type float16 or float32, with shape
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
* greater than or equal to "0.0" . \n

*@par Attributes:
*@li output_dim: A required int32, specifying the number of output channels,
* must be greater than 0.
*@li group_size: A required int32, specifying the number of groups to encode
* position-sensitive score maps, must be within the range (0, 128).
*@li spatial_scale: A required float32, scaling factor for mapping the input
* coordinates to the ROI coordinates . \n

*@par Outputs:
*y: An NC1HWC0 tensor of type float16 or float32, describing the result
* feature map . \n

*@attention Constraints:
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
*/
REG_OP(PSROIPoolingV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(spatial_scale, Float)
.REQUIRED_ATTR(output_dim, Int)
.REQUIRED_ATTR(group_size, Int)
.OP_END_FACTORY_REG(PSROIPoolingV2)

/**
*@brief Performs Position Sensitive PS ROI Pooling Grad . \n

*@par Inputs:
* Two inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result
* feature map . \n
*@li rois: A tensor of type float16 or float32, with shape
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
* greater than or equal to "0.0" . \n

*@par Attributes:
*@li output_dim: A required int32, specifying the number of output channels,
* must be greater than 0.
*@li group_size: A required int32, specifying the number of groups to encode
* position-sensitive score maps, must be within the range (0, 128).
*@li spatial_scale: A required float32, scaling factor for mapping the input
* coordinates to the ROI coordinates . \n
*@li input_size: A required listInt, mapping the gradinput size: (H, W)

*@par Outputs:
*y: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map, dimension C1 must be equal to
* (int(output_dim+15)/C0))*group_size*group_size.

*@attention Constraints:
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
*/
REG_OP(PSROIPoolingGradV2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(spatial_scale, Float)
.REQUIRED_ATTR(output_dim, Int)
.REQUIRED_ATTR(group_size, Int)
.REQUIRED_ATTR(input_size, ListInt)
.OP_END_FACTORY_REG(PSROIPoolingGradV2D)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_

+ 139
- 1
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -526,6 +526,31 @@ REG_OP(LayerNorm)
.OP_END_FACTORY_REG(LayerNorm) .OP_END_FACTORY_REG(LayerNorm)


/** /**
*@brief Returns a tensor where each sub-tensor of input along dimension
* dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n

*@par Inputs:
*One input, including:
* @li x: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Attributes:
* @li p: Specify L_p norm, the type is float.
* @li dim: The processed dim, the type is int.
* @li maxnorm: Threshold for comparison, the type is float. \n

*@par Outputs:
*One outputs, including:
* @li y: shape and dtype of output, should be same shape and type as input.
*/
REG_OP(Renorm)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.REQUIRED_ATTR(p, Float)
.REQUIRED_ATTR(dim, Int)
.REQUIRED_ATTR(maxnorm, Float)
.OP_END_FACTORY_REG(Renorm)

/**
*@brief LayerNormGrad operator interface implementation *@brief LayerNormGrad operator interface implementation
* calculating: dy, x, variance, mean, gamma * calculating: dy, x, variance, mean, gamma
* pd_xl = data_dy*data_gamma * pd_xl = data_dy*data_gamma
@@ -683,7 +708,68 @@ REG_OP(DropOutDoMask)
.INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DropOutDoMask) .OP_END_FACTORY_REG(DropOutDoMask)

/**
*@brief Return "output" according to the algorithm of dropout_do_mask:
* scale_x = x *(1 / keep_prob)
* output = select(mask == 1, scale_x, 0)

*@par Inputs:
*Three inputs, including:
* @li x: A mutable Tensor. Must be one of the following types:
* float16, float32
* @li mask: A mutable Tensor. Must met all of the following rules:
* shape of mask should be 1D.
* dtype of mask should be uint8.
* value of shape should met the following algorithm:
* value = (size(x) + 128 - 1) // 128 * 128
* @li keep_prob: A mutable Tensor. Must met all of the following rules:
* shape of "keep_prob" should be (1,) or [1,].
* Has the same type as "x" . \n

*@par Output:
*y: A mutable Tensor. Has the same type as "x".
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DropOutDoMaskV3)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mask, TensorType({DT_UINT8}))
.INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DropOutDoMaskV3)

/**
*@brief Return "output" according to the algorithm of dropout_do_mask:
* scale_x = x *(1 / keep_prob)
* output = select(mask == 1, scale_x, 0)

*@par Inputs:
*Two inputs, including:
* @li x: A mutable Tensor. Must be one of the following types:
* float16, float32
* @li mask: A mutable Tensor. Must met all of the following rules:
* shape of mask should be 1D.
* dtype of mask should be uint8.
* value of shape should met the following algorithm:
* value = (size(x) + 128 - 1) // 128 * 128
*@par Attributes:
* @li keep_prob: A mutable Tensor. Must met all of the following rules:
* shape of "keep_prob" should be (1,) or [1,].
* Has the same type as "x" . \n

*@par Output:
*y: A mutable Tensor. Has the same type as "x".
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DropOutDoMaskV3D)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mask, TensorType({DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(keep_prob, Float)
.OP_END_FACTORY_REG(DropOutDoMaskV3D)

/** /**
*@brief Scales the input . \n *@brief Scales the input . \n


@@ -1356,6 +1442,58 @@ REG_OP(PoissonNllLoss)
.ATTR(eps, Float, 1e-8) .ATTR(eps, Float, 1e-8)
.ATTR(reduction, String, "mean") .ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(PoissonNllLoss) .OP_END_FACTORY_REG(PoissonNllLoss)
/**
*@brief rnn_gen_mask
* @par Inputs:
* @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
*
* @par Attributes:
* @li num_step: A required int.\n
* @li hidden_size: A required int. \n
*
*
* @par Output:
* y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n
*
*/
REG_OP(RnnGenMask)
.INPUT(seq_length, TensorType({DT_INT32}))
.OUTPUT(seq_mask, TensorType({DT_INT32}))
.REQUIRED_ATTR(num_step, Int)
.REQUIRED_ATTR(hidden_size, Int)
.OP_END_FACTORY_REG(RnnGenMask)

/**
* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss)
* between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n
* @par Inputs:
* Two inputs, including:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n
*
* @par Inputs:
* @li target: A tensor. Must be the following types:
* int32. \n

* @par Attributes:
* @li reduction: An optional string. Defaults to "mean" \n

* @par Outputs:
* y: A Tensor has same element type as input x. \n
* is_target: A Tensor has same element type as input target. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator MultiLabelMarginLoss. \n
*/
REG_OP(MultilabelMarginLoss)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(target, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(is_target, TensorType({DT_INT32}))
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(MultilabelMarginLoss)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_

+ 102
- 7
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -397,8 +397,8 @@ No default value.
specifying the stride of the sliding window for each dimension of specifying the stride of the sliding window for each dimension of
the input tensor. No default value. the input tensor. No default value.
*@li padding: A required string type of float16. *@li padding: A required string type of float16.
*@li pads: A list type of int32. Default value {0, 0, 0}.
*@li dilation: A list type of int32. Default value {1, 1, 1}.
*@li pads: A list type of int32. Default value {0,0,0,0,0,0}.
*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}.
*@li ceil_mode: A ceil mode number of int32 . Default value 0. *@li ceil_mode: A ceil mode number of int32 . Default value 0.
*@li data_format: An optional string. Defaults to "NDHWC" . \n *@li data_format: An optional string. Defaults to "NDHWC" . \n


@@ -421,8 +421,8 @@ REG_OP(MaxPool3D)
.REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(padding, String) .REQUIRED_ATTR(padding, String)
.ATTR(pads, ListInt, {0,0,0})
.ATTR(dilation, ListInt, {1,1,1})
.ATTR(pads, ListInt, {0,0,0,0,0,0})
.ATTR(dilation, ListInt, {1,1,1,1,1,1})
.ATTR(ceil_mode, Int, 0) .ATTR(ceil_mode, Int, 0)
.ATTR(data_format, String, "NDHWC") .ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(MaxPool3D) .OP_END_FACTORY_REG(MaxPool3D)
@@ -1184,6 +1184,7 @@ REG_OP(MaxPool3DGrad)
.OUTPUT(y, TensorType::RealNumberType()) .OUTPUT(y, TensorType::RealNumberType())
.REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(strides, ListInt)
.ATTR(padding, String, "SAME")
.REQUIRED_ATTR(pads, ListInt) .REQUIRED_ATTR(pads, ListInt)
.ATTR(data_format, String, "NDHWC") .ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(MaxPool3DGrad) .OP_END_FACTORY_REG(MaxPool3DGrad)
@@ -1440,12 +1441,11 @@ REG_OP(MaxPoolV3Grad)
.OP_END_FACTORY_REG(MaxPoolV3Grad) .OP_END_FACTORY_REG(MaxPoolV3Grad)


/** /**
*@brief Performs dilation2d on the input . \n
*@brief Performs Dilation2D on the input . \n


*@par Inputs: *@par Inputs:
*x: A tensor of shape is 4d, format is support NHWC. *x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x,
and the c dimension is same with x. \n
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n


*@par Attributes: *@par Attributes:
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
@@ -1474,6 +1474,84 @@ REG_OP(Dilation2D)
.OP_END_FACTORY_REG(Dilation2D) .OP_END_FACTORY_REG(Dilation2D)


/** /**
*@brief Performs Dilation2DBackpropFilter on the input. \n

*@par Inputs:
*x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n

*@par Attributes
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1.
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
*@li pads: A optional list of 4 ints.
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n

*@par Outputs:
*y: The output tensor. Has the same type and format as input "filter" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Dilation2DBackpropFilter.
*/

REG_OP(Dilation2DBackpropFilter)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.INPUT(filter,
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.INPUT(out_backprop,
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.OUTPUT(y,
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(rates, ListInt)
.ATTR(padding_mode, String, "SAME")
.ATTR(pads, ListInt, {0, 0, 0, 0})
.ATTR(ceil_mode, Bool, false)
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(Dilation2DBackpropFilter)

/**
*@brief Performs Dilation2DBackpropInput on the input. \n

*@par Inputs:
*x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n

*@par Attributes
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1.
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
*@li pads: A optional list of 4 ints.
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n

*@par Outputs:
*y: The output tensor. Has the same type and format as input "x" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Dilation2DBackpropInput.
*/

REG_OP(Dilation2DBackpropInput)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.INPUT(filter,
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.INPUT(out_backprop,
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.OUTPUT(y,
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(rates, ListInt)
.ATTR(padding_mode, String, "SAME")
.ATTR(pads, ListInt, {0, 0, 0, 0})
.ATTR(ceil_mode, Bool, false)
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(Dilation2DBackpropInput)

/**
* @brief Applies a 2D adaptive average pooling over * @brief Applies a 2D adaptive average pooling over
* an input signal composed of several input planes. \n * an input signal composed of several input planes. \n


@@ -1604,5 +1682,22 @@ REG_OP(MaxPoolWithArgmaxV1)
.ATTR(ceil_mode, Bool, false) .ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)


// SubSample
REG_OP(SubSample)
.INPUT(labels, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(batch_size_per_images, Int)
.REQUIRED_ATTR(positive_fraction, Float)
.OP_END_FACTORY_REG(SubSample)

// SubSampleLabels
REG_OP(SubSampleLabels)
.INPUT(labels, TensorType({DT_INT32}))
.INPUT(shuffle_matrix, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(batch_size_per_images, Int)
.REQUIRED_ATTR(positive_fraction, Float)
.OP_END_FACTORY_REG(SubSampleLabels)

} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H

+ 26
- 3
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -836,7 +836,7 @@ REG_OP(HardShrink)
*backprops: A Tensor with the same type and shape of features's. \n *backprops: A Tensor with the same type and shape of features's. \n
* *
*@par Attributes: *@par Attributes:
*@li lambda: An optional float.Defaults to 0.5. \n
*@li lambd: An optional float.Defaults to 0.5. \n
* *
*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator Hardshrink_backward. \n *Compatible with the Pytorch operator Hardshrink_backward. \n
@@ -845,7 +845,7 @@ REG_OP(HardShrink)
.INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambda, Float, 0.5)
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(HardShrinkGrad) .OP_END_FACTORY_REG(HardShrinkGrad)


/** /**
@@ -920,7 +920,30 @@ REG_OP(SoftShrinkGrad)
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5) .ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(SoftShrinkGrad) .OP_END_FACTORY_REG(SoftShrinkGrad)

/**
*@brief Calculate the gradient of log simoid. \n

*@par Inputs:
*Two inputs, including:
* @li grads: A tensor, gradient of previous layer. Must be one of the following types:
* float16, float32. \n
* @li features: A tensor, input of log sigmoid. Must be one of the following types:
* float16, float32. \n

*@par Outputs:
*One outputs, including:
* @li backprops: A tensor with the same type of and shape of grads. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator LogSigmoidBackward. \n
*/
REG_OP(LogSigmoidGrad)
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(LogSigmoidGrad)

/** /**
*@brief Calculate -ln(1+e^(-x)). \n *@brief Calculate -ln(1+e^(-x)). \n




+ 2
- 2
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -418,7 +418,7 @@ REG_OP(EmbeddingRankId)
*/ */
REG_OP(FillV2) REG_OP(FillV2)
.INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.ATTR(value, Float, 0) .ATTR(value, Float, 0)
.OP_END_FACTORY_REG(FillV2) .OP_END_FACTORY_REG(FillV2)


@@ -437,7 +437,7 @@ REG_OP(FillV2)
* Compatible with the ONNX operator ConstantOfShape. * Compatible with the ONNX operator ConstantOfShape.
*/ */
REG_OP(FillV2D) REG_OP(FillV2D)
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
.ATTR(value, Float, 0) .ATTR(value, Float, 0)
.REQUIRED_ATTR(dims, ListInt) .REQUIRED_ATTR(dims, ListInt)
.OP_END_FACTORY_REG(FillV2D) .OP_END_FACTORY_REG(FillV2D)


+ 240
- 0
third_party/fwkacllib/inc/ops/parsing_ops.h View File

@@ -51,6 +51,246 @@ REG_OP(StringToNumber)
.ATTR(out_type, Type, DT_FLOAT) .ATTR(out_type, Type, DT_FLOAT)
.OP_END_FACTORY_REG(StringToNumber) .OP_END_FACTORY_REG(StringToNumber)


/**
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
*@brief Parse an Example prototype.
*@par Input:
*serialized: A Tensor of type string.
*dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n

*@par Attributes:
*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
*out_type: output type
*sparse_keys: ListString
*sparse_types: types of sparse_values
*dense_keys: ListString
*dense_shapes: output of dense_defaults shape
*dense_types: output of dense_defaults type \n

*@par Outputs:
*sparse_indices: A Tensor of type string.
*sparse_values: Has the same type as sparse_types.
*sparse_shapes: A Tensor of type int64
*dense_values: Has the same type as dense_defaults.

*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
**/
REG_OP(ParseSingleExample)
.INPUT(serialized, TensorType({DT_STRING}))
.DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
.DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(sparse_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
.DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(dense_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
.ATTR(num_sparse, Int, 0)
.ATTR(sparse_keys, ListString, {})
.ATTR(dense_keys, ListString, {})
.ATTR(sparse_types, ListType, {})
.ATTR(dense_types, ListType, {})
.ATTR(dense_shapes, ListListInt, {})
.OP_END_FACTORY_REG(ParseSingleExample)

/**
*@brief Decodes raw file into tensor . \n
*@par Input:
*bytes: A Tensor of type string.

*@par Attributes:
*little_endian: bool ture
*out_type: output type

*@par Outputs:
*Output: A Tensor
**/
REG_OP(DecodeRaw)
.INPUT(bytes, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT,
DT_INT64,DT_INT32,DT_INT8,DT_UINT8,DT_INT16,
DT_UINT16,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(out_type, Type, DT_FLOAT)
.ATTR(little_endian, Bool, true)
.OP_END_FACTORY_REG(DecodeRaw)

/**
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. \n

*@par Inputs:
*serialized: A Tensor of string type. Scalar string containing serialized
*TensorProto prototype. \n

*@par Attributes:
*out_type: The type of the serialized tensor. The provided type must match the
*type of the serialized tensor and no implicit conversion will take place. \n

*@par Outputs:
*output: A Tensor of type out_type. \n

*@attention Constraints:
*The implementation for StringToNumber on Ascend uses AICPU,
*with badperformance. \n

*@par Third-party framework compatibility
*@li compatible with tensorflow ParseTensor operator.
*/
REG_OP(ParseTensor)
.INPUT(serialized, TensorType({DT_STRING}))
.OUTPUT(output, TensorType(DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING,
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(out_type, Type, DT_FLOAT)
.OP_END_FACTORY_REG(ParseTensor)

/**
*@brief Converts each string in the input Tensor to the specified numeric
*type . \n

*@par Inputs:
*Inputs include:
*records: Each string is a record/row in the csv and all records should have the
*same format. \n
*record_defaults: One tensor per column of the input record, with either a
*scalar default value for that column or an empty vector if the column is
*required. \n

*@par Attributes:
*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
*field_delim: char delimiter to separate fields in a record. \n
*use_quote_delim: If false, treats double quotation marks as regular characters
*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n
*na_value: Additional string to recognize as NA/NaN. \n

*@par Outputs:
*output: A Tensor. Has the same type as x . \n

*@attention Constraints:
*The implementation for StringToNumber on Ascend uses AICPU, with bad
*performance. \n

*@par Third-party framework compatibility
*@li compatible with tensorflow StringToNumber operator.
*/
REG_OP(DecodeCSV)
.INPUT(records, TensorType({DT_STRING}))
.DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_STRING, DT_RESOURCE}))
.DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_STRING, DT_RESOURCE}))
.ATTR(OUT_TYPE, ListType, {})
.ATTR(field_delim, String, ",")
.ATTR(use_quote_delim, Bool, true)
.ATTR(na_value, String, ",")
.ATTR(select_cols, ListInt, {})
.OP_END_FACTORY_REG(DecodeCSV)

/**
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
*@brief Parse an Example prototype.
*@par Input:
*serialized: A Tensor of type string. \n
*name:A Tensor of type string. \n
*sparse_keys: Dynamic input tensor of string. \n
*dense_keys: Dynamic input tensor of string \n
*dense_defaults: Dynamic input tensor type as string, float, int64. \n

*@par Attributes:
*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
*Ndense: Number of dense_keys \n
*sparse_types: types of sparse_values \n
*Tdense: Type of dense_defaults dense_defaults and dense_values \n
*dense_shapes: output of dense_defaults shape \n

*@par Outputs:
*sparse_indices: A Tensor of type string. \n
*sparse_values: Has the same type as sparse_types. \n
*sparse_shapes: A Tensor of type int64 \n
*dense_values: Has the same type as dense_defaults. \n
*@par Third-party framework compatibility \n
*@li compatible with tensorflow StringToNumber operator. \n
*/
REG_OP(ParseExample)
.INPUT(serialized, TensorType({DT_STRING}))
.INPUT(name, TensorType({DT_STRING}))
.DYNAMIC_INPUT(sparse_keys, TensorType({DT_STRING}))
.DYNAMIC_INPUT(dense_keys, TensorType({DT_STRING}))
.DYNAMIC_INPUT(dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.ATTR(Nsparse, Int, 0)
.ATTR(Ndense, Int, 0)
.ATTR(sparse_types, ListType, {})
.ATTR(Tdense, ListType, {})
.ATTR(dense_shapes, ListListInt, {})
.OP_END_FACTORY_REG(ParseExample)

/**
*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed
*tensors.
*@par Input:
*serialized: A Tensor of type string. \n
*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
*context_sparse_keys: Dynamic input tensor of string. \n
*context_dense_keys: Dynamic input tensor of string \n
*feature_list_sparse_keys: Dynamic input tensor of string \n
*feature_list_dense_keys: Dynamic input tensor of string \n
*context_dense_defaults: Dynamic input tensor of string, float, int64 \n
*debug_name: A Tensor of type string. \n

*@par Attributes:
*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
*Ncontext_dense: Number of context_dense_keys \n
*Nfeature_list_sparse: Number of feature_list_sparse_keys \n
*Nfeature_list_dense: Number of feature_list_dense_keys \n
*context_sparse_types: Types of context_sparse_values \n
*Tcontext_dense: Number of dense_keys \n
*feature_list_dense_types: Types of feature_list_dense_values \n
*context_dense_shapes: Shape of context_dense \n
*feature_list_sparse_types: Type of feature_list_sparse_values \n
*feature_list_dense_shapes: Shape of feature_list_dense \n

*@par Outputs:
*context_sparse_indices: Dynamic output tensor of type int64. \n
*context_sparse_values: Dynamic output tensor of type string, float, int64. \n
*context_sparse_shapes: Dynamic output tensor of type int64 \n
*context_dense_values: Dynamic output tensor of type string, float, int64. \n
*feature_list_sparse_indices: Dynamic output tensor of type int64. \n
*feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n
*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
*feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n
*@par Third-party framework compatibility \n
*@li compatible with tensorflow StringToNumber operator. \n
*/
REG_OP(ParseSingleSequenceExample)
.INPUT(serialized, TensorType({DT_STRING}))
.INPUT(feature_list_dense_missing_assumed_empty, TensorType({DT_STRING}))
.DYNAMIC_INPUT(context_sparse_keys, TensorType({DT_STRING}))
.DYNAMIC_INPUT(context_dense_keys, TensorType({DT_STRING}))
.DYNAMIC_INPUT(feature_list_sparse_keys, TensorType({DT_STRING}))
.DYNAMIC_INPUT(feature_list_dense_keys, TensorType({DT_STRING}))
.DYNAMIC_INPUT(context_dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.INPUT(debug_name, TensorType({DT_STRING}))
.DYNAMIC_OUTPUT(context_sparse_indices, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(context_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(context_sparse_shapes, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(context_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(feature_list_sparse_indices, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(feature_list_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.DYNAMIC_OUTPUT(feature_list_sparse_shapes, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(feature_list_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
.ATTR(Ncontext_sparse, Int, 0)
.ATTR(Ncontext_dense, Int, 0)
.ATTR(Nfeature_list_sparse, Int, 0)
.ATTR(Nfeature_list_dense, Int, 0)
.REQUIRED_ATTR(context_sparse_types, ListType)
.REQUIRED_ATTR(Tcontext_dense, ListType)
.REQUIRED_ATTR(feature_list_dense_types, ListType)
.REQUIRED_ATTR(context_dense_shapes, ListListInt)
.REQUIRED_ATTR(feature_list_sparse_types, ListType)
.REQUIRED_ATTR(feature_list_dense_shapes, ListListInt)
.OP_END_FACTORY_REG(ParseSingleSequenceExample)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_

+ 20
- 0
third_party/fwkacllib/inc/ops/quantize_ops.h View File

@@ -62,6 +62,26 @@ REG_OP(Dequantize)


/** /**
*@brief Quantizes the input . \n *@brief Quantizes the input . \n
*@par Inputs:
*x: shape and dtype of input_x. \n
*scales: shape and dtype of input_scales. \n
*zero_points: shape and dtype of input_zero_points \n
*@par Attributes:
*@li axis: the processed dim. \n
*@par Outputs:
*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
*/
REG_OP(Quantize)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scales, TensorType({DT_FLOAT}))
.INPUT(zero_points, TensorType({DT_INT8,DT_UINT8,DT_INT32}))
.OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT32}))
.REQUIRED_ATTR(dtype, String)
.ATTR(axis, Int, 1)
.OP_END_FACTORY_REG(Quantize)

/**
*@brief Quantizes the input . \n


*@par Inputs: *@par Inputs:
*x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n *x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n


+ 33
- 0
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -356,6 +356,39 @@ REG_OP(DropOutGenMask)
.ATTR(seed2, Int, 0) .ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(DropOutGenMask) .OP_END_FACTORY_REG(DropOutGenMask)



/**
*@brief Generate random uint8 mask for dropout v3 . \n

*@par Inputs:
include:
*@li shape:The shape of the output tensor.
*@li prob:0-D. Prob of 1 . \n

*@par Attributes:
*@li seed:If either seed or seed2 are set to be non-zero, the random number
*generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2:A second seed to avoid seed collision . \n

*@par Outputs:
*y:Output (1-D) random number using uint8 data format . \n

*@attention Constraints:
*The output is aligned with 16

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.

*@see DropOutGenMaskV3()
*/
REG_OP(DropOutGenMaskV3)
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
.INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
.OUTPUT(y, TensorType({ DT_UINT8 }))
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(DropOutGenMaskV3)

/** /**
*@brief Generates values in an interval . \n *@brief Generates values in an interval . \n




+ 25
- 2
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -898,14 +898,14 @@ REG_OP(Reduction)
*@brief Computes the euclidean norm of elements across dimensions of a tensor . \n *@brief Computes the euclidean norm of elements across dimensions of a tensor . \n


*@par Inputs: *@par Inputs:
*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32.
*@li x: A Tensor. Must be one of the following types: float16, float32, int32.
*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n *@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n


*@par Attributes: *@par Attributes:
*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n *keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n


*@par Outputs: *@par Outputs:
*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*y: A Tensor. Must be one of the following types: float16, float32, int32 . \n


*@attention Constraints: *@attention Constraints:
* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
@@ -1134,6 +1134,29 @@ REG_OP(GNTrainingUpdate)
.OP_END_FACTORY_REG(GNTrainingUpdate) .OP_END_FACTORY_REG(GNTrainingUpdate)


/** /**
*@brief Joins a string Tensor across the given dimensions. \n

*@par Inputs:
include:
*@li input:A Tensor of type string. The text to be processed.
*@li reduction_indices:A Tensor of type int. The text to be processed.

*@par Attributes:
*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1..
*@li separator:string.

*@par output:
*@li output::A Tensor of type string..
*/
REG_OP(ReduceJoin)
.INPUT(input, TensorType({DT_STRING}))
.INPUT(reduction_indices, TensorType({DT_INT32}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(keep_dims, Bool, true)
.ATTR(separator, String, "")
.OP_END_FACTORY_REG(ReduceJoin)

/**
* @brief Calculates the standard deviation and average value of Tensors. * @brief Calculates the standard deviation and average value of Tensors.


* @par Inputs: * @par Inputs:


+ 118
- 0
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -257,6 +257,83 @@ REG_OP(DynamicRNN)
.OP_END_FACTORY_REG(DynamicRNN) .OP_END_FACTORY_REG(DynamicRNN)


/** /**
*@brief: DynamicRNNV3 calculation.
*@par Inputs:
*ten inputs:
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Attributes:
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
*@li num_proj:An integer identifying the num projection in the op. Default to 0.
*@li time_major:An bool identifying the time major in the op. Default to true.
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
*@li is_training:An bool identifying is training in the op. Default to true . \n

*@par Outputs:
*eight outputs:
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@par Third-party framework compatibility:
* Compatible with the TF operator LSTM.
*/
REG_OP(DynamicRNNV3)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
.OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(cell_type, String, "LSTM")
.ATTR(direction, String, "UNIDIRECTIONAL")
.ATTR(cell_depth, Int, 1)
.ATTR(use_peephole, Bool, false)
.ATTR(keep_prob, Float, 1.0)
.ATTR(cell_clip, Float, -1.0)
.ATTR(num_proj, Int, 0)
.ATTR(time_major, Bool, true)
.ATTR(activation, String, "tanh")
.ATTR(forget_bias, Float, 0.0)
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(DynamicRNNV3)

/**
*@brief: DynamicLSTMV2 calculation. *@brief: DynamicLSTMV2 calculation.
*@par Inputs: *@par Inputs:
*ten inputs: *ten inputs:
@@ -960,6 +1037,47 @@ REG_OP(CommonGRU)
.REQUIRED_ATTR(hidden_size, Int) .REQUIRED_ATTR(hidden_size, Int)
.ATTR(linear_before_reset , Int, 0) .ATTR(linear_before_reset , Int, 0)
.OP_END_FACTORY_REG(CommonGRU) .OP_END_FACTORY_REG(CommonGRU)
/**
* @brief Calculates the reversed outputs of the function "embedding". \n

* @par Inputs:
* Four inputs, including:
* @li weight: A mutable Tensor of word grad. Must be one of the following types:
* float32.
* @li indices: A mutable word index Tensor of the int32 type.\n
* @li offsets: A mutable word index Tensor of the int32 type.\n
* @li per_sample_weights: to indicate all weights should be taken to be 1.
* If specified, per_sample_weights must have exactly the same shape as input
* and is treated as having the same offsets, if those are not None.
* Only supported for mode='sum'..\n

* @par Attributes:
* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n

* @li scale_grad_by_freq: An optional bool. Defaults to "False".
* If "True", "grad_weight" will be scale by word_frequency.
* If "False", "grad_weight" will not be scale by word_frequency. \n
* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n
* @li include_last_offset: if True, attr offsets has one additional element, where the last element
* is equivalent to the size of indices. This matches the CSR format.. \n

* @par Outputs:
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator EmbeddingBag.
*/
REG_OP(EmbeddingBag)
.INPUT(weight, TensorType({ DT_FLOAT32 }))
.INPUT(indices, TensorType({ DT_INT32 }))
.OPTIONAL_INPUT(offsets, TensorType({DT_INT32}))
.OPTIONAL_INPUT(per_sample_weights, TensorType({DT_FLOAT32}))
.OUTPUT(y, TensorType({ DT_FLOAT32 }))
.ATTR(mode, String, "mean")
.ATTR(scale_grad_by_freq, Bool, false)
.ATTR(sparse, Bool, false)
.ATTR(include_last_offset, Bool, false)
.OP_END_FACTORY_REG(EmbeddingBag)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_

+ 36
- 9
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -1006,9 +1006,9 @@ REG_OP(TopK)


*@par Inputs: *@par Inputs:
*Inputs including: *Inputs including:
* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
* @li shape: A required list of int32, specifying the output shape.
* @li indices: A required index tensor. Must be one of the following types: int32 or int64.
* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8...
* @li shape: A required list of int32 or int64, specifying the output shape.
*@par Outputs: *@par Outputs:
*y:A output Tensor with same datatype as "updates" . \n *y:A output Tensor with same datatype as "updates" . \n


@@ -1019,7 +1019,7 @@ REG_OP(TopK)
* Compatible with the TensorFlow operator ScatterNd. * Compatible with the TensorFlow operator ScatterNd.
*/ */
REG_OP(ScatterNd) REG_OP(ScatterNd)
.INPUT(indices, TensorType::BasicType())
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(x, TensorType::BasicType()) .INPUT(x, TensorType::BasicType())
.INPUT(shape, TensorType::IndexNumberType()) .INPUT(shape, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType())
@@ -1032,11 +1032,11 @@ REG_OP(ScatterNd)
*@par Inputs: *@par Inputs:
*Inputs including: *Inputs including:
* @li indices: A required index tensor. Must be one of the following types: * @li indices: A required index tensor. Must be one of the following types:
* float, float16, int32, int16. format:ND.
* int32 or int64. format:ND.
* @li x: A required slice tensor. Must be one of the following types: * @li x: A required slice tensor. Must be one of the following types:
* float, float16, int32, int16. format:ND.
* float16, float, int32, int8, uint8. format:ND.
*@par Attributes: *@par Attributes:
* @li shape: A required list of int32, specifying the output shape.
* @li shape: A required list of int32 or int64, specifying the output shape.
*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x". format:ND . \n *y: A Tensor. Has the same type as "x". format:ND . \n


@@ -1051,8 +1051,8 @@ REG_OP(ScatterNd)
*/ */
REG_OP(ScatterNdD) REG_OP(ScatterNdD)
.INPUT(indices, TensorType::IndexNumberType()) .INPUT(indices, TensorType::IndexNumberType())
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(ScatterNdD) .OP_END_FACTORY_REG(ScatterNdD)


@@ -1877,6 +1877,33 @@ REG_OP(Crop)
.OP_END_FACTORY_REG(Crop) .OP_END_FACTORY_REG(Crop)


/** /**
*@brief Returns a namedtuple (values, indices) where values is the cumulative
* the cumulative minimum of elements of input in the dimension dim.
* And indices is the index location of each maximum value found in the dimension dim. \n

*@par Inputs:
*One inputs, including:
* @li x: A tensor . Must be one of the following types:
* float16, float32, int32, uint32, int8, uint8. \n

*@par Attributes:
* @li axis: Axis along which to cummin. \n

*@par Outputs:
* y: A Tensor with the same type and shape of x's. \n
* indices: A Tensor with the int32 type and the same shape of x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Cummin. \n
*/
REG_OP(Cummin)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.OUTPUT(indices, TensorType::BasicType())
.REQUIRED_ATTR(axis, Int)
.OP_END_FACTORY_REG(Cummin)

/**
*@brief Extends the input with copies of data along a specified dimension. For example: *@brief Extends the input with copies of data along a specified dimension. For example:
*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); *(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);
*(2) axis = 1; *(2) axis = 1;


+ 337
- 0
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -25,6 +25,233 @@
#include "graph/operator_reg.h" #include "graph/operator_reg.h"


namespace ge { namespace ge {
/**
*@brief Creates ngrams from ragged string data . \n

*@par Inputs:
include:
*@li data:1-D.The values tensor of the ragged string tensor to make ngrams out of.
*@li data_splits:The splits tensor of the ragged string tensor to make ngrams out of . \n

*@par Attributes:
* separator:The string to append between elements of the token. Use "" for no separator.
* ngram_widths:The sizes of the ngrams to create.
* left_pad:The string to use to pad the left side of the ngram sequence. Only used if pad_width != 0.
* right_pad:The string to use to pad the right side of the ngram sequence. Only used if pad_width != 0.
* pad_width:The number of padding elements to add to each side of each sequence.
* preserve_short_sequences: Preserve short sequences. \n

*@par Outputs:
*@li ngrams:The values tensor of the output ngrams ragged tensor.
*@li ngrams_splits:The splits tensor of the output ngrams ragged tensor. \n

*@see StringNGrams()

*@par Third-party framework compatibility
*compatible with StringNGrams op of tensorflow

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(StringNGrams)
.INPUT(data, TensorType({DT_STRING}))
.INPUT(data_splits, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(ngrams, TensorType({DT_STRING}))
.OUTPUT(ngrams_splits, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(separator, String)
.ATTR(ngram_widths, ListInt, {})
.REQUIRED_ATTR(left_pad, String)
.REQUIRED_ATTR(right_pad, String)
.REQUIRED_ATTR(pad_width, Int)
.REQUIRED_ATTR(preserve_short_sequences, Bool)
.OP_END_FACTORY_REG(StringNGrams)

/**
*@brief Decodes each string in `input` into a sequence of Unicode code points . \n

*@par Inputs:
include:
*@li input:The text to be decoded. Can have any shape. Note that the output is flattened
to a vector of char values. \n

*@par Attributes:
* input_encoding:Text encoding of the input strings. This is any of the encodings supported
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
* errors:Error handling policy when there is invalid formatting found in the input.
The value of 'strict' will cause the operation to produce a InvalidArgument
error on any invalid input formatting. A value of 'replace' (the default) will
cause the operation to replace any invalid formatting in the input with the
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
skip any invalid formatting in the input and produce no corresponding output
character.
* replacement_char:The replacement character codepoint to be used in place of any invalid
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
be used. The default value is the default unicode replacement character is
0xFFFD or U+65533.
* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
`replacement_char`. Default is false. \n

*@par Outputs:
*@li row_splits:A 1D tensor containing the row splits.
*@li char_values:A 1D tensor containing the decoded codepoints.
*@li char_to_byte_starts:A 1D int32 Tensor containing the byte index in the input string where each
character in `char_values` starts. \n

*@see UnicodeDecodeWithOffsets()

*@par Third-party framework compatibility
*compatible with UnicodeDecodeWithOffsets op of tensorflow

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(UnicodeDecodeWithOffsets)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(row_splits, TensorType({DT_INT64}))
.OUTPUT(char_values, TensorType({DT_INT32}))
.OUTPUT(char_to_byte_starts, TensorType({DT_INT64}))
.REQUIRED_ATTR(input_encoding, String)
.ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false)
.OP_END_FACTORY_REG(UnicodeDecodeWithOffsets)

/**
*@brief Decodes each string in `input` into a sequence of Unicode code points. \n

*@par Inputs:
include:
*@li input:The text to be decoded. Can have any shape. Note that the output is flattened
to a vector of char values. \n

*@par Attributes:
* input_encoding:Text encoding of the input strings. This is any of the encodings supported
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
* errors:Error handling policy when there is invalid formatting found in the input.
The value of 'strict' will cause the operation to produce a InvalidArgument
error on any invalid input formatting. A value of 'replace' (the default) will
cause the operation to replace any invalid formatting in the input with the
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
skip any invalid formatting in the input and produce no corresponding output
character.
* replacement_char:The replacement character codepoint to be used in place of any invalid
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
be used. The default value is the default unicode replacement character is
0xFFFD or U+65533.
* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
`replacement_char`. Default is false. \n

*@par Outputs:
*@li row_splits:A 1D tensor containing the row splits.
*@li char_values:A 1D tensor containing the decoded codepoints. \n

*@see UnicodeDecode()

*@par Third-party framework compatibility
*compatible with UnicodeDecode op of tensorflow

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(UnicodeDecode)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(row_splits, TensorType({DT_INT64}))
.OUTPUT(char_values, TensorType({DT_INT32}))
.REQUIRED_ATTR(input_encoding, String)
.ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false)
.OP_END_FACTORY_REG(UnicodeDecode)

/**
*@brief Transcode the input text from a source encoding to a destination encoding. \n

*@par Inputs:
include:
*@li input:The text to be processed. Can have any shape. \n

*@par Attributes:
* input_encoding:Text encoding of the input strings. This is any of the encodings supported
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
Multi-byte encodings will be big-endian.
* errors:Error handling policy when there is invalid formatting found in the input.
The value of 'strict' will cause the operation to produce a InvalidArgument
error on any invalid input formatting. A value of 'replace' (the default) will
cause the operation to replace any invalid formatting in the input with the
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
skip any invalid formatting in the input and produce no corresponding output
character.
* replacement_char:The replacement character codepoint to be used in place of any invalid
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
be used. The default value is the default unicode replacement character is
0xFFFD or U+65533.
* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
`replacement_char`. Default is false. \n

*@par Outputs:
*@li output:A string tensor containing unicode text encoded using `output_encoding`. \n

*@see UnicodeTranscode()

*@par Third-party framework compatibility
*compatible with UnicodeTranscode op of tensorflow

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(UnicodeTranscode)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_STRING}))
.REQUIRED_ATTR(input_encoding, String)
.ATTR(output_encoding, String, "UTF-8")
.ATTR(errors, String, "replace")
.ATTR(replacement_char, Int, 65533)
.ATTR(replace_control_characters, Bool, false)
.OP_END_FACTORY_REG(UnicodeTranscode)

/**
*@brief Encode a tensor of ints into unicode strings. \n

*@par Inputs:
include:
*@li input_values:A 1D tensor containing the unicode codepoints that should be encoded.
*@li input_splits:A 1D tensor specifying how the unicode codepoints should be split into strings. \n

*@par Attributes:
* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
Multi-byte encodings will be big-endian.
* errors:Error handling policy when there is invalid formatting found in the input.
The value of 'strict' will cause the operation to produce a InvalidArgument
error on any invalid input formatting. A value of 'replace' (the default) will
cause the operation to replace any invalid formatting in the input with the
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
skip any invalid formatting in the input and produce no corresponding output
character.
* replacement_char:The replacement character codepoint to be used in place of any invalid
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
be used. The default value is the default unicode replacement character is
0xFFFD or U+65533. \n

*@par Outputs:
*@li output:The 1-D Tensor of strings encoded from the provided unicode codepoints. \n

*@see UnicodeEncode()

*@par Third-party framework compatibility
*compatible with UnicodeEncode op of tensorflow

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(UnicodeEncode)
.INPUT(input_values, TensorType({DT_INT32}))
.INPUT(input_splits, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(errors, String, "replace")
.ATTR(output_encoding, String, "UTF-8")
.ATTR(replacement_char, Int, 65533)
.OP_END_FACTORY_REG(UnicodeEncode)


/** /**
*@brief Split elements of input based on delimiter into a SparseTensor . \n *@brief Split elements of input based on delimiter into a SparseTensor . \n
@@ -62,6 +289,116 @@ REG_OP(StringSplit)
.OP_END_FACTORY_REG(StringSplit) .OP_END_FACTORY_REG(StringSplit)


/** /**
*@brief Replaces the match of pattern in input with rewrite. \n

*@par Inputs:
include:
*@li input:A Tensor of type string. The text to be processed. \n

*@par Attributes:
*@li pattern:A string. The regular expression to match the input.
*@li rewrite:A string. The rewrite to be applied to the matched expression.
*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global,
otherwise the replacement is done only on the first match.

*@par output:
*@li output::A Tensor of type string.
*/
REG_OP(StaticRegexReplace)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(pattern, String, "")
.ATTR(rewrite, String, "")
.ATTR(replace_global, Bool, true)
.OP_END_FACTORY_REG(StaticRegexReplace)

/**
*@brief The input is a string tensor of any shape. The pattern is the
*regular expression to be matched with every element of the input tensor.
*The boolean values (True or False) of the output tensor indicate
*if the input matches the regex pattern provided.

*@par Inputs:
include:
*@li input:A Tensor of type string. The text to be processed. \n

*@par Attributes:
*@li pattern:A string. The regular expression to match the input.

*@par output:
*@li output::A bool tensor with the same shape as `input`.
*/
REG_OP(StaticRegexFullMatch)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_BOOL}))
.ATTR(pattern, String, "")
.OP_END_FACTORY_REG(StaticRegexFullMatch)

/**
*@brief A Tensor of type string. The input to be joined. \n

*@par Inputs:
include:
*@li input:A Tensor of type string. The text to be processed.
*@li segment_ids:A Tensor. Must be one of the following types: int32, int64.
*A tensor whose shape is a prefix of data.shape. Negative segment ids are not supported.
*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar.

*@par Attributes:
*@li separator:An optional string. Defaults to "". The separator to use when joining.

*@par output:
*@li output::A Tensor of type string..
*/
REG_OP(UnsortedSegmentJoin)
.INPUT(input, TensorType({DT_STRING}))
.INPUT(segment_ids, TensorType({DT_INT32,DT_INT64}))
.INPUT(num_segments, TensorType({DT_INT32,DT_INT64}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(separator, String, "")
.OP_END_FACTORY_REG(UnsortedSegmentJoin)

/**
*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation.
*This method is used to obtain a symbolic handle that represents the computation of the input.

*@par Inputs:
include:
*@li input:A Tensor of type string. The text to be processed.

*@par Attributes:
*@li encoding:An optional string. Defaults to "".

*@par output:
*@li output::A Tensor of type string..
*/
REG_OP(StringLower)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(encoding, String, "")
.OP_END_FACTORY_REG(StringLower)

/**
*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation.
*This method is used to obtain a symbolic handle that represents the computation of the input.

*@par Inputs:
include:
*@li input:A Tensor of type string. The text to be processed.

*@par Attributes:
*@li encoding:An optional string. Defaults to "".

*@par output:
*@li output::A Tensor of type string..
*/
REG_OP(StringUpper)
.INPUT(input, TensorType({DT_STRING}))
.OUTPUT(output, TensorType({DT_STRING}))
.ATTR(encoding, String, "")
.OP_END_FACTORY_REG(StringUpper)

/**
*@brief Split elements of source based on sep into a SparseTensor . \n *@brief Split elements of source based on sep into a SparseTensor . \n


*@par Inputs: *@par Inputs:


+ 2
- 6
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -418,12 +418,8 @@ REG_OP(BatchToSpace)
* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead.
*/ */
REG_OP(BatchToSpaceD) REG_OP(BatchToSpaceD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.REQUIRED_ATTR(block_size, Int) .REQUIRED_ATTR(block_size, Int)
.REQUIRED_ATTR(crops, ListInt) .REQUIRED_ATTR(crops, ListInt)
.OP_END_FACTORY_REG(BatchToSpaceD) .OP_END_FACTORY_REG(BatchToSpaceD)


+ 8
- 2
third_party/fwkacllib/inc/runtime/base.h View File

@@ -156,6 +156,12 @@ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtSt


/** /**
* @ingroup profiling_base * @ingroup profiling_base
* @brief ts send keypoint profiler log.
*/
RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream);

/**
* @ingroup profiling_base
* @brief ts set profiling reporter callback. * @brief ts set profiling reporter callback.
*/ */
RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);
@@ -200,7 +206,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal


/** /**
* @ingroup dvrt_base * @ingroup dvrt_base
* @brief register callback for fail task
* @brief register callback for fail task
* @param [in] uniName unique register name, can't be null * @param [in] uniName unique register name, can't be null
* @param [in] callback fail task callback function * @param [in] callback fail task callback function
* @param [out] NA * @param [out] NA
@@ -343,7 +349,7 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_


/** /**
* @ingroup dvrt_base * @ingroup dvrt_base
* @brief get current thread last stream id and task id
* @brief get current thread last stream id and task id
* @param [out] stream id and task id * @param [out] stream id and task id
* @param [in] null * @param [in] null
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok


+ 12
- 0
third_party/fwkacllib/inc/runtime/event.h View File

@@ -183,6 +183,18 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream);


/** /**
* @ingroup dvrt_event * @ingroup dvrt_event
* @brief Wait for a notify with time out
* @param [in] notify_ notify to be wait
* @param [in] stream_ input stream
* @param [in] timeOut input timeOut
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
* @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
*/
RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut);

/**
* @ingroup dvrt_event
* @brief Name a notify * @brief Name a notify
* @param [in] notify_ notify to be named * @param [in] notify_ notify to be named
* @param [in|out] name identification name * @param [in|out] name identification name


+ 1
- 0
third_party/fwkacllib/inc/runtime/rt.h View File

@@ -27,5 +27,6 @@
#include "mem.h" #include "mem.h"
#include "rt_model.h" #include "rt_model.h"
#include "stream.h" #include "stream.h"
#include "rt_stars.h"


#endif // __CCE_RUNTIME_RT_H__ #endif // __CCE_RUNTIME_RT_H__

+ 9
- 0
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -51,6 +51,7 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_STREAM_LABEL_GOTO,
RT_MODEL_TASK_MODEL_EXIT, RT_MODEL_TASK_MODEL_EXIT,
RT_MODEL_TASK_ALL_KERNEL, RT_MODEL_TASK_ALL_KERNEL,
RT_MODEL_TASK_PROFILER_TRACE_EX,
} rtModelTaskType_t; } rtModelTaskType_t;


typedef enum tagModelStreamType { typedef enum tagModelStreamType {
@@ -197,6 +198,13 @@ typedef struct tagProfilerTraceTaskInfo {
uint32_t reserved[6]; uint32_t reserved[6];
} rtProfilerTrace_t; } rtProfilerTrace_t;


typedef struct tagProfilerTraceExTaskInfo {
uint64_t profilerTraceId;
uint64_t modelId;
uint16_t tagId;
uint8_t reserved[22];
} rtProfilerTraceEx_t;

typedef struct tagrtMemcpyAsyncTaskInfo { typedef struct tagrtMemcpyAsyncTaskInfo {
void *dst; void *dst;
uint64_t destMax; uint64_t destMax;
@@ -272,6 +280,7 @@ typedef struct tagTaskInfo {
rtLabelSwitchTaskInfo_t labelSwitchTask; rtLabelSwitchTaskInfo_t labelSwitchTask;
rtLabelGotoTaskInfo_t labelGotoTask; rtLabelGotoTaskInfo_t labelGotoTask;
rtProfilerTrace_t profilertraceTask; rtProfilerTrace_t profilertraceTask;
rtProfilerTraceEx_t profilertraceExTask;
rtMemcpyAsyncTaskInfo_t memcpyAsyncTask; rtMemcpyAsyncTaskInfo_t memcpyAsyncTask;
rtNotifyTaskInfo_t notifyTask; rtNotifyTaskInfo_t notifyTask;
rtReduceAsyncTaskInfo_t reduceAsyncTask; rtReduceAsyncTaskInfo_t reduceAsyncTask;


+ 29
- 0
third_party/fwkacllib/inc/runtime/rt_stars.h View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* Description:
*/

#ifndef __CCE_RUNTIME_STARS_H
#define __CCE_RUNTIME_STARS_H

#include "base.h"

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" {
#endif

/**
* @ingroup rt_stars
* @brief launch stars task.
* used for send star sqe directly.
* @param [in] taskSqe stars task sqe
* @param [in] sqeLen stars task sqe length
* @param [in] stream associated stream
* @return RT_ERROR_NONE for ok, others failed
*/
RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif
#endif // __CCE_RUNTIME_STARS_H

+ 36
- 34
third_party/fwkacllib/inc/toolchain/prof_reporter.h View File

@@ -41,42 +41,44 @@ namespace Engine {
* the Reporter class .used to send data to profiling * the Reporter class .used to send data to profiling
*/ */
class MSVP_PROF_API Reporter { class MSVP_PROF_API Reporter {
public:
virtual ~Reporter() {}
public:
virtual ~Reporter() {}


public:
/**
* @ingroup reporter
* @name : Report
* @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
The data will be firstly appended to cache, if the cache is full, data will be ignored
* @param data [IN] const ReporterData * the data send to libmsporf
* @retval PROFILING_SUCCESS 0 (success)
* @retval PROFILING_FAILED -1 (failed)
*
* @par depend:
* @li libmsprof
* @li prof_reporter.h
* @since c60
* @see Flush
*/
virtual int Report(const ReporterData *data) = 0;
public:
/**
* @ingroup reporter
* @name : Report
* @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
The data will be firstly appended to cache, if the cache is full, data will be ignored
* @param data [IN] const ReporterData * the data send to libmsporf
* @retval PROFILING_SUCCESS 0 (success)
* @retval PROFILING_FAILED -1 (failed)
*
* @par depend:
* @li libmsprof
* @li prof_reporter.h
* @since c60
* @see Flush
*/
virtual int Report(const ReporterData *data) = 0;


/**
* @ingroup reporter
* @name : Flush
* @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
The all datas of cache will be write to file or send to host
* @retval PROFILING_SUCCESS 0 (success)
* @retval PROFILING_FAILED -1 (failed)
*
* @par depend:
* @li libmsprof
* @li prof_reporter.h
* @since c60
* @see ProfMgrStop
*/
virtual int Flush() = 0;
/**
* @ingroup reporter
* @name : Flush
* @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
The all datas of cache will be write to file or send to host
* @retval PROFILING_SUCCESS 0 (success)
* @retval PROFILING_FAILED -1 (failed)
*
* @par depend:
* @li libmsprof
* @li prof_reporter.h
* @since c60
* @see ProfMgrStop
*/
virtual int Flush() = 0;

virtual uint32_t GetReportDataMaxLen() = 0;
}; };


} // namespace Engine } // namespace Engine


+ 35
- 2
third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h View File

@@ -55,7 +55,40 @@ extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Gr


/** /**
* @ingroup aoe * @ingroup aoe
* @par 描述: 梯度调优
* @par 描述: 调优初始化
*
* @attention 无
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" AoeStatus AoeOnlineInitialize(ge::Session *session, const std::map<std::string, std::string> &option);

/**
* @ingroup aoe
* @par 描述: 调优去初始化
*
* @attention 无
* @param 无
* @retval #AOE_SUCCESS 执行成功
* @retval #AOE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" AoeStatus AoeOnlineFinalize();

/**
* @ingroup aoe
* @par 描述: 调优处理
* *
* @attention 无 * @attention 无
* @param tuningGraph [IN] 调优图 * @param tuningGraph [IN] 调优图
@@ -71,5 +104,5 @@ extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Gr
* @since * @since
*/ */
extern "C" AoeStatus AoeOnlineTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph, extern "C" AoeStatus AoeOnlineTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
ge::Session *session, const std::map<std::string, std::string> &option);
#endif #endif

Loading…
Cancel
Save