Browse Source

update include headers 0311

tags/v1.2.0
shenwei41 4 years ago
parent
commit
54a48678ae
21 changed files with 1178 additions and 323 deletions
  1. +3
    -3
      inc/external/acl/error_codes/ge_error_codes.h
  2. +303
    -228
      inc/external/acl/ops/acl_dvpp.h
  3. +0
    -0
      scripts/format_source_code.sh
  4. +58
    -0
      third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
  5. +19
    -0
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  6. +166
    -0
      third_party/fwkacllib/inc/ops/image_ops.h
  7. +274
    -0
      third_party/fwkacllib/inc/ops/list_ops.h
  8. +59
    -0
      third_party/fwkacllib/inc/ops/math_ops.h
  9. +30
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  10. +54
    -0
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  11. +55
    -0
      third_party/fwkacllib/inc/ops/nn_ops.h
  12. +2
    -2
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  13. +28
    -0
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  14. +1
    -1
      third_party/fwkacllib/inc/ops/pad_ops.h
  15. +56
    -0
      third_party/fwkacllib/inc/ops/selection_ops.h
  16. +1
    -1
      third_party/fwkacllib/inc/runtime/base.h
  17. +23
    -0
      third_party/fwkacllib/inc/runtime/config.h
  18. +4
    -4
      third_party/fwkacllib/inc/runtime/kernel.h
  19. +20
    -2
      third_party/fwkacllib/inc/runtime/rt_model.h
  20. +22
    -0
      third_party/fwkacllib/inc/runtime/stream.h
  21. +0
    -82
      third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

+ 3
- 3
inc/external/acl/error_codes/ge_error_codes.h View File

@@ -53,9 +53,9 @@ static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022;
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;


+ 303
- 228
inc/external/acl/ops/acl_dvpp.h View File

@@ -53,123 +53,109 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output

// Supported Pixel Format
enum acldvppPixelFormat {
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
};

// Stream Format
enum acldvppStreamFormat {
H265_MAIN_LEVEL = 0,
H264_BASELINE_LEVEL,
H264_MAIN_LEVEL,
H264_HIGH_LEVEL
};
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };

// Supported Channel Mode
enum acldvppChannelMode {
DVPP_CHNMODE_VPC = 1,
DVPP_CHNMODE_JPEGD = 2,
DVPP_CHNMODE_JPEGE = 4
};
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };

// Supported Border Type
enum acldvppBorderType {
BORDER_CONSTANT = 0,
BORDER_REPLICATE,
BORDER_REFLECT,
BORDER_REFLECT_101
};
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };

// Venc parameter type
enum aclvencChannelDescParamType {
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
};

// Jpeg picture format
enum acldvppJpegFormat {
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
};

/**
@@ -523,9 +509,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
* @retval null for failed.
* @retval other success
*/
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
uint32_t right,
uint32_t top,
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
uint32_t bottom);

/**
@@ -604,10 +588,7 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
uint32_t left,
uint32_t right,
uint32_t top,
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
uint32_t bottom);

/**
@@ -1096,7 +1077,8 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length, const void *param);
aclvencChannelDescParamType paramType, size_t length,
const void *param);

/**
* @ingroup AscendCL
@@ -1245,7 +1227,8 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);
aclvencChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
@@ -1545,10 +1528,7 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
int32_t *components);

/**
@@ -1565,11 +1545,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
uint32_t *height, int32_t *components,
acldvppJpegFormat *format);

/**
@@ -1584,8 +1561,7 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
const acldvppJpegeConfig *config,
uint32_t *size);
const acldvppJpegeConfig *config, uint32_t *size);

/**
* @ingroup AscendCL
@@ -1599,10 +1575,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1617,11 +1591,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
uint32_t dataSize,
uint32_t *width,
uint32_t *height,
int32_t *components);
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
uint32_t *height, int32_t *components);

/**
* @ingroup AscendCL
@@ -1635,10 +1606,8 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1702,10 +1671,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
* @see acldvppCreateChannel | acldvppCreatePicDesc
* | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppResizeConfig *resizeConfig,
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
@@ -1714,7 +1681,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
*
* @par Function
* crop the input picture according to the specified area,
* and then store the picture in the output memory as the output picture
* and then store the picture in the output memory as the output picture
*
* @par Restriction
* Width alignment requirements:
@@ -1732,23 +1699,59 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
* @li The height of the input image is aligned to 2.
* High stride minimum 6 and maximum 4096.
*
* @param channelDesc [IN] the channel destruction
* @param inputDesc [IN] crop input picture destruction
* @param outputDesc [IN|OUT] crop output picture destruction
* @param cropArea [IN] crop area config
* @param stream [IN] crop task stream
* @param channelDesc [IN] the channel destruction
* @param inputDesc [IN] crop input picture destruction
* @param outputDesc [IN|OUT] crop output picture destruction
* @param cropArea [IN] crop area config
* @param stream [IN] crop task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief dvpp vpc crop and resize config.
*
* @par Function
* crop the input picture with resize config according to the specified area,
* and then store the picture in the output memory as the output picture
*
* @par Restriction
* Width alignment requirements:
* @li The minimum stride is 32 and the maximum is 4096 * 4
* (that is, an image in argb format with a width of 4096);
* @li For 8K scaling, widthStride is required to be aligned to 2;
* @li For non 8K scaling, the calculation formula for widthStride
* is different for different image formats:
* @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
* @li yuv422packed: input image width * 2 and then align to 16
* @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
* @li xrgb8888: input image width * 4, align to 16
* @li HFBC:input image width
* Height alignment requirements:
* @li The height of the input image is aligned to 2.
* High stride minimum 6 and maximum 4096.
*
* @param channelDesc [IN] the channel destruction
* @param inputDesc [IN] crop input picture destruction
* @param outputDesc [IN|OUT] crop output picture destruction
* @param cropArea [IN] crop area config
* @param resizeConfig [IN] resize config
* @param stream [IN] crop and resize config task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief dvpp vpc batch crop.
*
* @par Function
@@ -1769,12 +1772,37 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
aclrtStream stream);
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], aclrtStream stream);

/**
* @ingroup AscendCL
* @brief dvpp vpc batch crop and resize config.
*
* @par Function
* crop the input batch picture with resize config according to the specified area
* as the output batch pictures
*
* @param channelDesc [IN] the channel destruction
* @param srcBatchPicDescs [IN] crop input batch picture destruction
* @param roiNums [IN] roi config numbers
* @param size [IN] roiNum size
* @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction
* @param cropAreas [IN] crop area configs
* @param resizeConfig [IN] resize config
* @param stream [IN] crop batch and resize config task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1797,12 +1825,36 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief dvpp vpc crop, resize config and paste.
*
* @par Function
* crop the input picture with resize config according to the specified area,
* and paste the picture to the specified position of the target picture
* as the output picture
*
* @param channelDesc [IN] thechannel destruction
* @param inputDesc [IN] crop and paste input picture destruction
* @param outputDesc [IN|OUT] crop and paste output picture destruction
* @param cropArea [IN] crop area config
* @param pasteArea [IN] paste area config
* @param resizeConfig [IN] resize config
* @param stream [IN] crop, paste and resize task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1827,14 +1879,40 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[], aclrtStream stream);

/**
* @ingroup AscendCL
* @brief dvpp vpc batch crop, resize config and paste.
*
* @par Function
* crop the input batch picture with resize config according to the specified area,
* and paste the pictures to the specified position of the target pictures
* as the output batch pictures
*
* @param channelDesc [IN] the channel destruction
* @param srcBatchPicDescs [IN] crop input batch picture destruction
* @param roiNums [IN] roi config numbers
* @param size [IN] roiNum size
* @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction
* @param cropAreas [IN] crop area configs
* @param pasteAreas [IN] paste area configs
* @param resizeConfig [IN] resize config
* @param stream [IN] crop batch and resize config task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1862,11 +1940,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1884,11 +1959,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreateJpegeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
const void *data,
uint32_t *size,
acldvppJpegeConfig *config,
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
const void *data, uint32_t *size, acldvppJpegeConfig *config,
aclrtStream stream);

/**
@@ -1906,11 +1978,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1965,11 +2034,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
acldvppPicDesc *output,
aclvdecFrameConfig *config,
void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);

/**
* @ingroup AscendCL
@@ -1988,10 +2054,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
aclvdecFrameConfig *config,
void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
aclvdecFrameConfig *config, void *userData);

/**
* @ingroup AscendCL
@@ -2012,10 +2076,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2037,11 +2099,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
void *reserve,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2053,8 +2112,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
uint32_t mode);
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);

/**
* @ingroup AscendCL
@@ -2089,8 +2147,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
uint32_t outMode);
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);

/**
* @ingroup AscendCL
@@ -2187,9 +2244,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
uint32_t dim,
uint8_t **data,
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
uint32_t *len);
/**
* @ingroup AscendCL
@@ -2207,10 +2262,8 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap,
aclrtStream stream);
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2231,8 +2284,7 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
*
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
uint32_t index,
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
double value);

/**
@@ -2377,10 +2429,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig,
aclrtStream stream);
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2397,11 +2447,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *srcPicDesc,
acldvppHist *hist,
void *reserve,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
acldvppHist *hist, void *reserve, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2410,7 +2457,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();

/**
* @ingroup AscendCL
@@ -2467,7 +2514,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
*
* @see acldvppCreateHist | acldvppVpcCalcHistAsync
*/
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);

/**
* @ingroup AscendCL
@@ -2486,8 +2533,36 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);
*/
ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);

/**
* @ingroup AscendCL
* @brief dvpp vpc batch crop, resize config and make border.
*
* @par Function
* crop the input batch picture with resize config and border configs according to the specified area
* as the output batch pictures
*
* @param channelDesc [IN] the channel destruction
* @param srcBatchPicDescs [IN] crop input batch picture destruction
* @param roiNums [IN] roi config numbers
* @param size [IN] roiNum size
* @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction
* @param cropAreas [IN] crop area configs
* @param borderCfgs [IN] border configs
* @param resizeConfig [IN] resize config
* @param stream [IN] crop batch, resize config and make border task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig, aclrtStream stream);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_

+ 0
- 0
scripts/format_source_code.sh View File


+ 58
- 0
third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h View File

@@ -0,0 +1,58 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file avg_pool_1d_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
#include "graph/operator_reg.h"

namespace ge {
/**
*@brief Generate an auxiliary matrix . \n

*@par Inputs:
* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32,
int64, float16, float, double.The format must be NHWC NCHW NC1HWC0.

*@par Attributes:
*@li ksize: Kernel size. Input type is int.
*@li strides: Input type is int.
*@li pads: Input type is listInt .
*@li ceil_mode: Bool, default value is false.
*@li count_include_pad: Bool, default value is false. \n

*@par Outputs:
*y_tensor: A tensor with the same types as "x" . \n
*@par Third-party framework compatibility

*Compatible with the TensorFlow operator Unbatch.
*/
REG_OP(AvgPool1DAvgMatrix)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
DT_INT32, DT_INT64, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
DT_INT32, DT_INT64, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, Int)
.REQUIRED_ATTR(strides, Int)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, false)
.OP_END_FACTORY_REG(AvgPool1DAvgMatrix)
}
#endif

+ 19
- 0
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -2455,6 +2455,25 @@ REG_OP(Eltwise)
.OP_END_FACTORY_REG(Eltwise)

/**
*@brief Computes the inverse error function of each element of input. \n

*@par Inputs:
*One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

*@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Erfinv. \n
*/
REG_OP(Erfinv)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(Erfinv)

/**
*@brief Computes element-wise population count. \n

*@par Inputs:


+ 166
- 0
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1517,6 +1517,96 @@ REG_OP(DenseImageWarp)
.OP_END_FACTORY_REG(DenseImageWarp)

/**
*@brief Calculate the resize_d function. \n

*@par Inputs:
*One inputs, including:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n

*@par Attributes:
*@li sizes: An optional listInt. \n
*@li scales: An optional listFloat.
Defaults to none. \n
*@li roi: An optional listInt.
Defaults to none. \n
*@li coordinate_transformation_mode: An optional String.
Defaults to "half_pixel". \n
*@li cubic_coeff_a: An optional float.
Defaults to -0.75. \n
*@li exclude_outside: An optional int.
Defaults to 0. \n
*@li extrapolation_value: An optional float.
Defaults to 0.0. \n
*@li mode: An optional String.
Defaults to "nearest". \n
*@li nearest_mode: An optional String.
Defaults to "round_prefer_floor". \n

*@par Outputs:
*y: A Tensor with the same type of x's,
shape depends on x and sizes. \n
*/
REG_OP(ResizeD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(sizes, ListInt)
.ATTR(scales, ListFloat, {})
.ATTR(roi, ListInt, {})
.ATTR(coordinate_transformation_mode, String, "half_pixel")
.ATTR(cubic_coeff_a, Float, -0.75)
.ATTR(exclude_outside, Int, 0)
.ATTR(extrapolation_value, Float, 0.0)
.ATTR(mode, String, "nearest")
.ATTR(nearest_mode, String, "round_prefer_floor")
.OP_END_FACTORY_REG(ResizeD)

/**
*@brief Calculate the resize_grad_d function. \n

*@par Inputs:
*One inputs, including:
* @li grads: A tensor. Must be one of the following types:
* float16, float32. \n

*@par Attributes:
*@li original_size: An optional listInt. \n
*@li roi: An optional listInt.
Defaults to none. \n
*@li scales: An optional listFloat.
Defaults to none. \n
*@li coordinate_transformation_mode: An optional String.
Defaults to "half_pixel". \n
*@li cubic_coeff_a: An optional float.
Defaults to -0.75. \n
*@li exclude_outside: An optional int.
Defaults to 0. \n
*@li extrapolation_value: An optional float.
Defaults to 0.0. \n
*@li mode: An optional String.
Defaults to "nearest". \n
*@li nearest_mode: An optional String.
Defaults to "round_prefer_floor". \n

*@par Outputs:
*y: A Tensor with the same type of x's,
shape depends on x and sizes. \n
*/
REG_OP(ResizeGradD)
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(original_size, ListInt)
.ATTR(roi, ListInt, {})
.ATTR(scales, ListFloat, {})
.ATTR(coordinate_transformation_mode, String, "half_pixel")
.ATTR(cubic_coeff_a, Float, -0.75)
.ATTR(exclude_outside, Int, 0)
.ATTR(extrapolation_value, Float, 0.0)
.ATTR(mode, String, "nearest")
.ATTR(nearest_mode, String, "round_prefer_floor")
.OP_END_FACTORY_REG(ResizeGradD)

/**
*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n

*@par Inputs:
@@ -1535,5 +1625,81 @@ REG_OP(DenseImageWarpGrad)
.OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DenseImageWarpGrad)

/**
*@brief This operation samples input X by using interpolation based on flow field grid,
which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of
(x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of
input data x and y is indexng the 3rd dimention (in height dimension), finally results is
the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W].

*@par Inputs:
*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.

*@par Attributes:
*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is
supported for now .
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
*@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" .

*@par Outputs:
*y: Returns 4-D Tensor with the same dtype as `X`.

*@par Third-party framework compatibility
*Compatible with pytorch GridSampler2D operator.
*/
REG_OP(GridSampler2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(interpolation_mode, String, "bilinear")
.ATTR(padding_mode, String, "zeros")
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(GridSampler2D)

/**
*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid.

*@par Inputs:
*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
*@li assist: Assist matrix, a 4-D tensor of type float16.

*@par Attributes:
*@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" .

*@par Outputs:
*diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
*position: Returns 4-D Tensor with the same shape as `grid`.
*/
REG_OP(GridUnnormal)
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(position, TensorType({DT_INT32}))
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(GridUnnormal)

/**
*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal.

*@par Inputs:
*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.

*@par Attributes:
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .

*@par Outputs:
*y: Returns 4-D Tensor with the same dtype as `x`.
*/
REG_OP(ImageUnfold)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(position, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(padding_mode, String, "zeros")
.OP_END_FACTORY_REG(ImageUnfold)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_

+ 274
- 0
third_party/fwkacllib/inc/ops/list_ops.h View File

@@ -225,6 +225,280 @@ REG_OP(TensorListSetItem)
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListSetItem)

/**
*@brief Push tensor to list. \n

*@par Inputs:
*@li input_handles: The input tensor lists.
*@li tensor: The tensor push into tensor list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handles: The output tensor lists. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPushBackBatch operator.
*/
REG_OP(TensorListPushBackBatch)
.INPUT(input_handles, TensorType({DT_VARIANT}))
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(output_handles, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListPushBackBatch)

/**
*@brief Stacks all tensors in the list. \n

*@par Inputs:
*@li input_handle: The input tensor list.
*@li element_shape: A shape compatible with that of elements in the tensor. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list.
*@li num_elements: The number of elements in the list. \n

*@par Outputs:
*@li tensor: The tensor of list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListStack operator.
*/
REG_OP(TensorListStack)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.ATTR(num_elements, Int, -1)
.OP_END_FACTORY_REG(TensorListStack)

/**
*@brief Concats all tensors in the list along the 0th dimension.
Requires that all tensors have the same shape except the first dimension. \n

*@par Inputs:
*@li input_handle: The input list.
*@li element_shape: The shape of the uninitialized elements in the list.
If the first dimension is not -1, it is assumed that all list elements have
the same leading dim.
*@li leading_dims: The list of leading dims of uninitialized list elements. Used if
the leading dim of input_handle.element_shape or the element_shape input arg
is not already set. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li tensor: The concated result.
*@li lengths: Output tensor containing sizes of the 0th dimension of tensors
in the list, used for computing the gradient. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListConcatV2 operator.
*/
REG_OP(TensorListConcatV2)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(leading_dims, TensorType({DT_INT64}))
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(lengths, TensorType({DT_INT64}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListConcatV2)

/**
*@brief Splits a tensor into a list. \n

*@par Inputs:
*@li tensor: The input tensor.
*@li element_shape: A shape compatible with that of elements in the tensor.
*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle: The list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListSplit operator.
*/
REG_OP(TensorListSplit)
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(lengths, TensorType({DT_INT64}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListSplit)

/**
*@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n

*@par Inputs:
*@li tensor: The input tensor.
*@li element_shape: The shape of elements in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle: An output tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListFromTensor operator.
*/
REG_OP(TensorListFromTensor)
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListFromTensor)

/**
*@brief Resizes the list. \n

*@par Inputs:
*@li input_handle: The input tensor list.
*@li size: size of the output list. \n

*@par Outputs:
*@li output_handle: The output tensor list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListResize operator.
*/
REG_OP(TensorListResize)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(size, TensorType({DT_INT32}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.OP_END_FACTORY_REG(TensorListResize)

/**
*@brief Creates a Tensor by indexing into the TensorList. \n

*@par Inputs:
*@li input_handle: The input tensor list.
*@li indices: The indices used to index into the list.
*@li element_shape: The shape of elements in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li values: The tensor. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListGather operator.
*/
REG_OP(TensorListGather)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListGather)

/**
*@brief Creates a TensorList by indexing into a Tensor. \n

*@par Inputs:
*@li tensor: The input tensor.
*@li indices: The indices used to index into the list.
*@li element_shape: The shape of the elements in the list (can be less specified than
the shape of the tensor).
*@li num_elements: The size of the output list. Must be large enough to accommodate
the largest index in indices. If -1, the list is just large enough to include
the largest index in indices. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle: The TensorList. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListScatterV2 operator.
*/
REG_OP(TensorListScatterV2)
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(num_elements, TensorType({DT_INT32}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListScatterV2)

/**
*@brief Scatters tensor at indices in an input list. \n

*@par Inputs:
*@li input_handle: The input tensor list.
*@li tensor: The input tensor.
*@li indices: The indices used to index into the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle: The TensorList. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListScatterIntoExistingList operator.
*/
REG_OP(TensorListScatterIntoExistingList)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.INPUT(indices, TensorType({DT_INT32}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListScatterIntoExistingList)

/**
*@brief Concat two tensor lists to a new tensor list. \n

*@par Inputs:
*@li input_a: The input tensor list A.
*@li input_b: The input tensor list B. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output: The output list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListConcatLists operator.
*/
REG_OP(TensorListConcatLists)
.INPUT(input_a, TensorType({DT_VARIANT}))
.INPUT(input_b, TensorType({DT_VARIANT}))
.OUTPUT(output, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListConcatLists)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_

+ 59
- 0
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -982,6 +982,65 @@ REG_OP(SoftMarginLossGrad)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SoftMarginLossGrad)

/**
*@brief Computes batched the p-norm distance between each pair of
*the two collections of row vectors. \n

*@par Inputs:
*Two inputs, including:
* @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
* float16, float32. \n
* @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
* float16, float32. \n

*@par Attributes:
* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n

*@par Outputs:
* y: A Tensor with the same type of x1's and with shape BxPxR. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Cdist. \n
*/
REG_OP(Cdist)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Float, 2.0)
.OP_END_FACTORY_REG(Cdist)

/**
*@brief Computes the grad of x1 in cdist. \n

*@par Inputs:
*Four inputs, including:
* @li grad: Grad with shape BxPxR. Must be one of the following types:
* float16, float32. \n
* @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
* float16, float32. \n
* @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
* float16, float32. \n
* @li cdist: Output tensor of cdist forward with shpae: BxPXR.
* Must be one of the following types: float16, float32. \n

*@par Attributes:
* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n

*@par Outputs:
* y: A Tensor with the same type and shape of x1's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Cdist Backward. \n
*/
REG_OP(CdistGrad)
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(p, Float, 2.0)
.OP_END_FACTORY_REG(CdistGrad)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

+ 30
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -1065,7 +1065,37 @@ REG_OP(Tril)
.ATTR(diagonal, Int, 0)
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Tril)
/**
*@brief Concatenates a list of N tensors along the first dimension.
*@par Inputs:
* Two inputs, including:
* @li values: A list of Tensors. Must be one of the following types: int32, float16, float32.
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
* It's a dynamic input.
* @li shape: A Tensor of the same type as "x".
* The final shape of the result. Should be equal to the shapes of any input
* but with the number of input values in the first dimension . \n

*@par Attributes:
*equation: The subscripts for the Einstein summation. \n
*tensor_size: tensor size of input \n

*@par Outputs:
*@li y: Sums the product of the elements of the input operands along dimensions specified
using a notation based on the Einstein summation convention. \n

*@attention Constraints:
*Input tensor_size must be Int. \n

*@par Third-party framework compatibility
*Compatible with Pytorch einsum operator.
*/
REG_OP(EinSum)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(equation, String)
.REQUIRED_ATTR(tensor_size, Int)
.OP_END_FACTORY_REG(EinSum)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 54
- 0
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -428,6 +428,33 @@ REG_OP(MVN)
.OP_END_FACTORY_REG(MVN)

/**
*@brief Normalizes the input . \n

*@par Inputs:
* One input:
*x: An NCHW tensor of type float16 or float32 . \n

*@par Attributes:
*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n
*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n

*@par Outputs:
*y: An NCHW tensor of type float16 or float32 . \n

*@attention Constraints:
* The input tensor must have the NCHW format, whose shape length must be 4.
*@par Third-party framework compatibility
* Compatible with the ONNX operator MeanVarianceNormalization.
*/

REG_OP(MVNV2)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as inputs" */
.ATTR(eps, Float, 1e-9)
.ATTR(axes, ListInt, {0, 2, 3})
.OP_END_FACTORY_REG(MVNV2)

/**
*@brief Normalizes the input "x1" . \n

*@par Inputs:
@@ -1206,6 +1233,33 @@ REG_OP(Centralization)
.OP_END_FACTORY_REG(Centralization)

/**
*@brief Roll the tensor along the given dimension(s).
* Elements that are shifted beyond the last position are re-introduced at the first position.
* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n

*@par Inputs:
*One inputs, including:
* @li x: A tensor . Must be one of the following types:
* float16, float32, int32, uint32, int8, uint8. \n

*@par Attributes:
* @li shifts: The number of places by which the elements of the tensor are shifted. \n
* @li dims: Axis along which to roll. \n

*@par Outputs:
* y: A Tensor with the same type and shape of x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Roll. \n
*/
REG_OP(Roll)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
.REQUIRED_ATTR(shifts, ListInt)
.ATTR(dims, ListInt, {})
.OP_END_FACTORY_REG(Roll)

/**
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification
logistic loss between input_x and input_y (containing 1 or -1). \n



+ 55
- 0
third_party/fwkacllib/inc/ops/nn_ops.h View File

@@ -49,5 +49,60 @@ REG_OP(InTopKV2)
.INPUT(k, TensorType({IndexNumberType}))
.OUTPUT(precision, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(InTopKV2)

/**
*@brief Performs batch normalization . \n

*@par Inputs:
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the scaling factor.
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the offset.
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
operation is used for training.
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
if the operation is used for training . \n

*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

*@par Outputs:
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
*/
REG_OP(FusedBatchNormV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(FusedBatchNormV2)
}// namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

+ 2
- 2
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -1502,14 +1502,14 @@ REG_OP(AdaptiveAvgPool2d)
* @brief Compute gradients of adaptive averagev2 pooling function.

* @par Inputs:
* @li input_grad: A NCHW Tensor. Must be one of the following data types:
* @li input_grad: A Tensor. Must be one of the following data types:
* float16, float32.

* @par Attributes:
* @li orig_input_shape: A required tuple or list of type int32.

* @par Outputs:
* @li output_grad: A tensor with the same shape and type as "orig_input_shape".
* @li output_grad: A tensor with the same type as "input_grad".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.


+ 28
- 0
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -531,6 +531,34 @@ REG_OP(Elu)
.OP_END_FACTORY_REG(Elu)

/**
*@brief Continuously Differentiable Exponential Linear Uints:
* Perform the linear uint element-wise on the input tensor X using formula:
* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n

*@par Inputs:
*x: A float16, float32 or double, for the input data type . \n

*@par Attributes:
*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Outputs:
*y: A float16, float32 or double, for the normalized result . \n

*@attention Constraints:
*@li The input is of type float16 or float32 . \n

*@par Multiple batches supported or not
*Supported
*@par Third-party framework compatibility
*@li Compatible with ONNX's Celu operator
*/
REG_OP(Celu)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(Celu)

/**
*@brief Computes gradients for the exponential linear (Elu) operation.
*
*@par Inputs:


+ 1
- 1
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -101,7 +101,7 @@ REG_OP(FillD)
*/
REG_OP(BroadcastTo)
.INPUT(x, TensorType::BasicType())
.INPUT(shape, TensorType({DT_INT32}))
.INPUT(shape, TensorType({DT_INT32,DT_INT64}))
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(BroadcastTo)



+ 56
- 0
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -240,6 +240,30 @@ REG_OP(GatherV2D)
.OP_END_FACTORY_REG(GatherV2D)

/**
*@Gathers values along an axis specified by dim . \n

*@par Inputs:
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
*@li index: A Tensor. Must be one of the following types: int64 . \n

*@par Attributes:
* dim: the axis along which to index . \n

*@par Outputs:
* y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
*Compatible with the PyTorch operator Gather.
*/

REG_OP(GatherElements)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(index, TensorType({DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.ATTR(dim, Int, 0)
.OP_END_FACTORY_REG(GatherElements)

/**
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
extracts a slice of size (end-begin)/stride from the given input tensor.
Starting at the location specified by begin the slice continues by
@@ -487,6 +511,38 @@ REG_OP(UnsortedSegmentSum)
.OP_END_FACTORY_REG(UnsortedSegmentSum)

/**
*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to
* end, inclusive, on a logarithmic scale with base base. \n

*@par Inputs:
*One inputs, including:
* @li assist: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li start: An required float. Used to select the start. \n
* @li end: An required float. Used to select the end. \n
* @li steps: An optional int.Defaults to 100. \n
* @li base: An optional float.Defaults to 10.0. \n
* @li dtype: An optional int.Defaults to 1. \n

*@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator logspaced. \n
*/
REG_OP(LogSpaceD)
.INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR (start, Float)
.REQUIRED_ATTR (end, Float)
.ATTR(steps, Int, 100)
.ATTR(base, Float, 10.0)
.ATTR(dtype, Int, 1)
.OP_END_FACTORY_REG(LogSpaceD)

/**
*@brief Computes the sum along segments of a tensor . \n

*@par Inputs:


+ 1
- 1
third_party/fwkacllib/inc/runtime/base.h View File

@@ -339,7 +339,7 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);

/**
* @ingroup dvrt_base


+ 23
- 0
third_party/fwkacllib/inc/runtime/config.h View File

@@ -132,6 +132,11 @@ typedef struct tagRtPlatformConfig {
uint32_t platformConfig;
} rtPlatformConfig_t;

typedef enum tagRTTaskTimeoutType {
RT_TIMEOUT_TYPE_OP_WAIT = 0,
RT_TIMEOUT_TYPE_OP_EXECUTE,
} rtTaskTimeoutType_t;

/**
* @ingroup
* @brief get AI core count
@@ -203,6 +208,24 @@ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
*/
RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);

/**
* @ingroup
* @brief set event wait task timeout time.
* @param [in] timeout
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);

/**
* @ingroup
* @brief set op execute task timeout time.
* @param [in] timeout
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif


+ 4
- 4
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -188,7 +188,7 @@ typedef void (*rtCallback_t)(void *fnData);
/**
* @ingroup rt_kernel
* @brief kernel mode
*/
**/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)
@@ -211,7 +211,7 @@ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register device binary
* @brief register device binary with all kernel
* @param [in] bin device binary description
* @param [out] handle device binary handle
* @return RT_ERROR_NONE for ok
@@ -330,7 +330,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @ingroup rt_kernel
* @brief launch kernel with handle to device
* @param [in] handle program
* @param [in] devFunc device function description
* @param [in] devFunc device function description.
* @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function
* @param [in] argsSize argements size
@@ -341,7 +341,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);

/**
* @ingroup rt_kernel


+ 20
- 2
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -133,12 +133,13 @@ typedef struct tagAllKernelTaskInfo {
uint16_t argsCount;
uint16_t argsSize;
uint16_t reserved;
const void *dev_func;
void *devfunc;
void *handle;
uint8_t *smDesc;
uint8_t *args;
uint16_t *argsOffset;
} rtAllKernelTaskInfo_t;

typedef struct tagKernelTaskInfoEx {
uint32_t flags;
uint32_t argsSize;
@@ -263,7 +264,7 @@ typedef struct tagTaskInfo {
union {
rtKernelTaskInfoEx_t kernelTaskEx;
rtKernelTaskInfo_t kernelTask;
rtAllKernelTaskInfo_t allkernelTask;
rtAllKernelTaskInfo_t allKernelTask;
rtEventTaskInfo_t eventTask;
rtStreamSwitchTaskInfo_t streamSwitchTask;
rtStreamActiveTaskInfo_t streamActiveTask;
@@ -285,10 +286,27 @@ typedef struct tagTaskInfo {
} u;
} rtTaskInfo_t;

typedef struct tagNodeInfo_t {
uint32_t nodeIdx;
uint32_t reserved[1];
} rtNodeInfo;

typedef struct tagHwtsInfo_t {
uint16_t taskId;
uint16_t sqExeHead;
uint16_t streamExeHead;
uint16_t reserved[2];
} rtHwtsInfo;

typedef struct tagLabelDevInfo_t {
uint16_t modelId;
uint16_t streamId;
uint16_t labelId;
union {
rtNodeInfo nodeInfo;
rtHwtsInfo hwtsInfo;
uint16_t reserved[5];
}u;
}rtLabelDevInfo;

typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);


+ 22
- 0
third_party/fwkacllib/inc/runtime/stream.h View File

@@ -189,6 +189,28 @@ RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
*/
RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);

/*
* @ingroup dvrt_stream
* @brief enable debug for dump overflow exception with stream
* @param [in] addr: ddr address of kernel exception dumpped
* @param [in] stream: stream handle
* @param [in] flag: debug flag
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr,
uint32_t *streamId, uint32_t *taskId);

/*
* @ingroup rt_model
* @brief disable debug for dump overflow exception with stream
* @param [in] stream: stream handle
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif


+ 0
- 82
third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h View File

@@ -11,95 +11,13 @@
/** @defgroup aoe aoe调优接口 */
#ifndef TUNE_API_H
#define TUNE_API_H
#include <vector>
#include <map>
#include <string>
#include "graph/graph.h"
#include "ge/ge_api.h"
#include "aoe_types.h"

/**
* @ingroup aoe
*
* aoe status
*/
enum MsTuneStatus {
MSTUNE_SUCCESS, /** tune success */
MSTUNE_FAILED, /** tune failed */
};

// Option key: for train options sets
const std::string MSTUNE_SELF_KEY = "mstune";
const std::string MSTUNE_GEINIT_KEY = "initialize";
const std::string MSTUNE_GESESS_KEY = "session";

#ifdef __cplusplus
extern "C" {
#endif

struct RunnerInitConfig {
// onilne online
std::string profPath;
std::string parserPath;
// ncs only
std::vector<uint32_t> devList;
};

struct RunnerOpInfo {
std::string opName;
uint64_t opCostTime;
uint64_t aicoreCostTime;
// gradient_split only
std::string modelName;
std::string opType;
std::vector<uint64_t> start;
std::vector<uint64_t> end;
};

struct RunnerModelInfo {
uint64_t totalCostTime;
};

struct RunnerRunResult {
std::vector<RunnerModelInfo> modelInfo;
std::vector<RunnerOpInfo> opInfo;
};

struct RunnerResult {
uint64_t totalCostTime;
std::map<std::string, uint64_t> opCostTime;
std::map<std::string, uint64_t> aicoreCostTime;
};

struct RunnerDataBuf {
void *ptr = nullptr;
size_t size = 0;
};

struct AOEBufferData {
std::shared_ptr<uint8_t> data = nullptr;
uint64_t length;
};

struct RunnerConfig {
bool isProf;
uint32_t loop;
// offline only
std::vector<RunnerDataBuf> input;
std::vector<RunnerDataBuf> output;
std::string modelPath;
RunnerDataBuf modelData;
// online only
uint32_t devId;
std::vector<std::vector<ge::Tensor>> inputs;
std::vector<ge::Graph> dependGraph; // run graph (for training)
};
#ifdef __cplusplus
}
#endif

/**
* @ingroup aoe
* @par 描述: 命令行调优
*
* @attention 无


Loading…
Cancel
Save