diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h index ef5b4772..a53d029d 100644 --- a/inc/external/acl/acl.h +++ b/inc/external/acl/acl.h @@ -26,9 +26,9 @@ extern "C" { #endif // Current version is 1.0.0 -#define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 0 -#define ACL_PATCH_VERSION 0 +#define ACL_MAJOR_VERSION 1 +#define ACL_MINOR_VERSION 0 +#define ACL_PATCH_VERSION 0 /** * @ingroup AscendCL @@ -66,8 +66,17 @@ ACL_FUNC_VISIBILITY aclError aclFinalize(); */ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); +/** + * @ingroup AscendCL + * @brief get recent error message + * + * @retval null for failed + * @retval OtherValues success +*/ +ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg(); + #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_H_ +#endif // INC_EXTERNAL_ACL_ACL_H_ diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index 657bf67c..417a80c8 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -626,6 +626,15 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, const char *fmt, ...); +/** + * @ingroup AscendCL + * @brief get soc name + * + * @retval null for failed + * @retval OtherValues success +*/ +ACL_FUNC_VISIBILITY const char *aclrtGetSocName(); + #define ACL_APP_LOG(level, fmt, ...) \ aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 3a62ce32..e2f95a56 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -282,6 +282,21 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD /** * @ingroup AscendCL + * @brief Set aclTensorDesc to aclmdlDataset + * + * @param dataset [OUT] aclmdlDataset address of aclDataBuffer to be added + * @param tensorDesc [IN] aclTensorDesc address to be added + * @param index [IN] index of tensorDesc which to be added + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, + aclTensorDesc *tensorDesc, + size_t index); + +/** + * @ingroup AscendCL * @brief Get the number of aclDataBuffer in aclmdlDataset * * @param dataset [IN] aclmdlDataset poiter diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index 6bbb855c..353d2a1a 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -38,9 +38,15 @@ typedef enum { ACL_OP_DEBUG_LEVEL, ACL_DEBUG_DIR, ACL_OP_COMPILER_CACHE_MODE, - ACL_OP_COMPILER_CACHE_DIR + ACL_OP_COMPILER_CACHE_DIR, + ACL_OP_PERFORMANCE_MODE } aclCompileOpt; +typedef enum aclCompileFlag { + ACL_OP_COMPILE_DEFAULT, + ACL_OP_COMPILE_FUZZ +} aclOpCompileFlag; + /** * @ingroup AscendCL * @brief compile op @@ -108,6 +114,18 @@ ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, */ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); +/** + * @ingroup AscendCL + * @brief set compile flag + * + * @param flag [IN] compile flag, ACL_OP_COMPILE_DEFAULT means compile with default mode + * ACL_OP_COMPILE_FUZZ means compile with fuzz mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 703ca4ca..3c777ecc 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -957,6 +957,17 @@ ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); */ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); +/** + * @ingroup AscendCL + * @brief Set the timeout interval for waitting of op + * + * @param timeout [IN] op wait timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 47f16d9f..02a357fc 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -94,6 +94,7 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect #ifdef __cplusplus } diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index dcaa3936..90dc70e8 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output // Supported Pixel Format enum acldvppPixelFormat { - PIXEL_FORMAT_YUV_400 = 0, // 0 - PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 - PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 - PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 - PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 - PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 - PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 - PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 - PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 - PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 - PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 - PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 - PIXEL_FORMAT_RGB_888 = 12, // 12 - PIXEL_FORMAT_BGR_888 = 13, // 13 - PIXEL_FORMAT_ARGB_8888 = 14, // 14 - PIXEL_FORMAT_ABGR_8888 = 15, // 15 - PIXEL_FORMAT_RGBA_8888 = 16, // 16 - PIXEL_FORMAT_BGRA_8888 = 17, // 17 - PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 - PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 - PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 - PIXEL_FORMAT_YVU_PLANAR_422, - PIXEL_FORMAT_YVU_PLANAR_444, - PIXEL_FORMAT_RGB_444 = 23, - PIXEL_FORMAT_BGR_444, - PIXEL_FORMAT_ARGB_4444, - PIXEL_FORMAT_ABGR_4444, - PIXEL_FORMAT_RGBA_4444, - PIXEL_FORMAT_BGRA_4444, - PIXEL_FORMAT_RGB_555, - PIXEL_FORMAT_BGR_555, - PIXEL_FORMAT_RGB_565, - PIXEL_FORMAT_BGR_565, - PIXEL_FORMAT_ARGB_1555, - PIXEL_FORMAT_ABGR_1555, - PIXEL_FORMAT_RGBA_1555, - PIXEL_FORMAT_BGRA_1555, - PIXEL_FORMAT_ARGB_8565, - PIXEL_FORMAT_ABGR_8565, - PIXEL_FORMAT_RGBA_8565, - PIXEL_FORMAT_BGRA_8565, - PIXEL_FORMAT_RGB_BAYER_8BPP = 50, - PIXEL_FORMAT_RGB_BAYER_10BPP, - PIXEL_FORMAT_RGB_BAYER_12BPP, - PIXEL_FORMAT_RGB_BAYER_14BPP, - PIXEL_FORMAT_RGB_BAYER_16BPP, - PIXEL_FORMAT_BGR_888_PLANAR = 70, - PIXEL_FORMAT_HSV_888_PACKAGE, - PIXEL_FORMAT_HSV_888_PLANAR, - PIXEL_FORMAT_LAB_888_PACKAGE, - PIXEL_FORMAT_LAB_888_PLANAR, - PIXEL_FORMAT_S8C1, - PIXEL_FORMAT_S8C2_PACKAGE, - PIXEL_FORMAT_S8C2_PLANAR, - PIXEL_FORMAT_S16C1, - PIXEL_FORMAT_U8C1, - PIXEL_FORMAT_U16C1, - PIXEL_FORMAT_S32C1, - PIXEL_FORMAT_U32C1, - PIXEL_FORMAT_U64C1, - PIXEL_FORMAT_S64C1, - PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, - PIXEL_FORMAT_YVU_SEMIPLANAR_440, - PIXEL_FORMAT_FLOAT32, - PIXEL_FORMAT_BUTT, - PIXEL_FORMAT_UNKNOWN = 10000 + PIXEL_FORMAT_YUV_400 = 0, // 0 + PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 + PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 + PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 + PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 + PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 + PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 + PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 + PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 + PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 + PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 + PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 + PIXEL_FORMAT_RGB_888 = 12, // 12 + PIXEL_FORMAT_BGR_888 = 13, // 13 + PIXEL_FORMAT_ARGB_8888 = 14, // 14 + PIXEL_FORMAT_ABGR_8888 = 15, // 15 + PIXEL_FORMAT_RGBA_8888 = 16, // 16 + PIXEL_FORMAT_BGRA_8888 = 17, // 17 + PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 + PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 + PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 + PIXEL_FORMAT_YVU_PLANAR_422, + PIXEL_FORMAT_YVU_PLANAR_444, + PIXEL_FORMAT_RGB_444 = 23, + PIXEL_FORMAT_BGR_444, + PIXEL_FORMAT_ARGB_4444, + PIXEL_FORMAT_ABGR_4444, + PIXEL_FORMAT_RGBA_4444, + PIXEL_FORMAT_BGRA_4444, + PIXEL_FORMAT_RGB_555, + PIXEL_FORMAT_BGR_555, + PIXEL_FORMAT_RGB_565, + PIXEL_FORMAT_BGR_565, + PIXEL_FORMAT_ARGB_1555, + PIXEL_FORMAT_ABGR_1555, + PIXEL_FORMAT_RGBA_1555, + PIXEL_FORMAT_BGRA_1555, + PIXEL_FORMAT_ARGB_8565, + PIXEL_FORMAT_ABGR_8565, + PIXEL_FORMAT_RGBA_8565, + PIXEL_FORMAT_BGRA_8565, + PIXEL_FORMAT_RGB_BAYER_8BPP = 50, + PIXEL_FORMAT_RGB_BAYER_10BPP, + PIXEL_FORMAT_RGB_BAYER_12BPP, + PIXEL_FORMAT_RGB_BAYER_14BPP, + PIXEL_FORMAT_RGB_BAYER_16BPP, + PIXEL_FORMAT_BGR_888_PLANAR = 70, + PIXEL_FORMAT_HSV_888_PACKAGE, + PIXEL_FORMAT_HSV_888_PLANAR, + PIXEL_FORMAT_LAB_888_PACKAGE, + PIXEL_FORMAT_LAB_888_PLANAR, + PIXEL_FORMAT_S8C1, + PIXEL_FORMAT_S8C2_PACKAGE, + PIXEL_FORMAT_S8C2_PLANAR, + PIXEL_FORMAT_S16C1, + PIXEL_FORMAT_U8C1, + PIXEL_FORMAT_U16C1, + PIXEL_FORMAT_S32C1, + PIXEL_FORMAT_U32C1, + PIXEL_FORMAT_U64C1, + PIXEL_FORMAT_S64C1, + PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, + PIXEL_FORMAT_YVU_SEMIPLANAR_440, + PIXEL_FORMAT_FLOAT32, + PIXEL_FORMAT_BUTT, + PIXEL_FORMAT_UNKNOWN = 10000 }; // Stream Format -enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; +enum acldvppStreamFormat { + H265_MAIN_LEVEL = 0, + H264_BASELINE_LEVEL, + H264_MAIN_LEVEL, + H264_HIGH_LEVEL +}; // Supported Channel Mode -enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; +enum acldvppChannelMode { + DVPP_CHNMODE_VPC = 1, + DVPP_CHNMODE_JPEGD = 2, + DVPP_CHNMODE_JPEGE = 4 +}; // Supported Border Type -enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; +enum acldvppBorderType { + BORDER_CONSTANT = 0, + BORDER_REPLICATE, + BORDER_REFLECT, + BORDER_REFLECT_101 +}; // Venc parameter type enum aclvencChannelDescParamType { - ACL_VENC_THREAD_ID_UINT64 = 0, - ACL_VENC_CALLBACK_PTR, - ACL_VENC_PIXEL_FORMAT_UINT32, - ACL_VENC_ENCODE_TYPE_UINT32, - ACL_VENC_PIC_WIDTH_UINT32, - ACL_VENC_PIC_HEIGHT_UINT32, - ACL_VENC_KEY_FRAME_INTERVAL_UINT32, - ACL_VENC_BUF_ADDR_PTR, - ACL_VENC_BUF_SIZE_UINT32, - ACL_VENC_RC_MODE_UINT32, - ACL_VENC_SRC_RATE_UINT32, - ACL_VENC_MAX_BITRATE_UINT32, - ACL_VENC_MAX_IP_PROP_UINT32 + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 }; // Jpeg picture format enum acldvppJpegFormat { - ACL_JPEG_CSS_444 = 0, - ACL_JPEG_CSS_422, - ACL_JPEG_CSS_420, - ACL_JPEG_CSS_GRAY, - ACL_JPEG_CSS_440, - ACL_JPEG_CSS_411, - ACL_JPEG_CSS_UNKNOWN = 1000 + ACL_JPEG_CSS_444 = 0, + ACL_JPEG_CSS_422, + ACL_JPEG_CSS_420, + ACL_JPEG_CSS_GRAY, + ACL_JPEG_CSS_440, + ACL_JPEG_CSS_411, + ACL_JPEG_CSS_UNKNOWN = 1000 }; /** @@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD * @retval null for failed. * @retval other success */ -ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, +ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, + uint32_t right, + uint32_t top, uint32_t bottom); /** @@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, + uint32_t left, + uint32_t right, + uint32_t top, uint32_t bottom); /** @@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, - const void *param); + aclvencChannelDescParamType paramType, size_t length, const void *param); /** * @ingroup AscendCL @@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, - size_t *paramRetSize, void *param); + aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param); /** * @ingroup AscendCL @@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, + uint32_t size, + uint32_t *width, + uint32_t *height, int32_t *components); /** @@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, - uint32_t *height, int32_t *components, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, + uint32_t size, + uint32_t *width, + uint32_t *height, + int32_t *components, acldvppJpegFormat *format); /** @@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_ * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, - const acldvppJpegeConfig *config, uint32_t *size); + const acldvppJpegeConfig *config, + uint32_t *size); /** * @ingroup AscendCL @@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, + uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, + uint32_t *decSize); /** * @ingroup AscendCL @@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_ * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, - uint32_t *height, int32_t *components); +ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, + uint32_t dataSize, + uint32_t *width, + uint32_t *height, + int32_t *components); /** * @ingroup AscendCL @@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, + uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, + uint32_t *decSize); /** * @ingroup AscendCL @@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe * @see acldvppCreateChannel | acldvppCreatePicDesc * | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, +ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** @@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, aclrtStream stream); /** @@ -1746,9 +1781,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - acldvppResizeConfig *resizeConfig, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, + acldvppResizeConfig *resizeConfig, + aclrtStream stream); + /** * @ingroup AscendCL @@ -1772,9 +1811,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *chann * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], aclrtStream stream); + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + aclrtStream stream); /** * @ingroup AscendCL @@ -1799,10 +1841,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); + acldvppResizeConfig *resizeConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -1825,9 +1870,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc * * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - acldvppRoiConfig *pasteArea, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, + aclrtStream stream); /** * @ingroup AscendCL @@ -1851,10 +1899,13 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, acldvppRoiConfig *pasteArea, - acldvppResizeConfig *resizeConfig, aclrtStream stream); + acldvppResizeConfig *resizeConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -1879,11 +1930,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc * * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], - acldvppRoiConfig *pasteAreas[], aclrtStream stream); + ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], + aclrtStream stream); /** * @ingroup AscendCL @@ -1909,10 +1963,16 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( - acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], + acldvppResizeConfig *resizeConfig, + aclrtStream stream); + /** * @ingroup AscendCL @@ -1940,8 +2000,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, + const void *data, + uint32_t size, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -1959,8 +2022,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreateJpegeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - const void *data, uint32_t *size, acldvppJpegeConfig *config, +ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + const void *data, + uint32_t *size, + acldvppJpegeConfig *config, aclrtStream stream); /** @@ -1978,8 +2044,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, + const void *data, + uint32_t size, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -2034,8 +2103,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, + acldvppStreamDesc *input, + acldvppPicDesc *output, + aclvdecFrameConfig *config, + void *userData); /** * @ingroup AscendCL @@ -2054,8 +2126,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame */ -ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - aclvdecFrameConfig *config, void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, + acldvppStreamDesc *input, + aclvdecFrameConfig *config, + void *userData); /** * @ingroup AscendCL @@ -2076,8 +2150,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -2099,8 +2175,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + void *reserve, + aclrtStream stream); /** * @ingroup AscendCL @@ -2112,7 +2191,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, + uint32_t mode); /** * @ingroup AscendCL @@ -2147,7 +2227,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, + uint32_t outMode); /** * @ingroup AscendCL @@ -2244,7 +2325,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, +ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, + uint32_t dim, + uint8_t **data, uint32_t *len); /** * @ingroup AscendCL @@ -2262,8 +2345,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap */ ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppLutMap *lutMap, aclrtStream stream); + const acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + const acldvppLutMap *lutMap, + aclrtStream stream); /** * @ingroup AscendCL @@ -2284,7 +2369,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); * * @retval ACL_SUCCESS for success, other for failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, + uint32_t index, double value); /** @@ -2429,8 +2515,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppBorderConfig *borderConfig, aclrtStream stream); + const acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + const acldvppBorderConfig *borderConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -2447,8 +2535,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, - acldvppHist *hist, void *reserve, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *srcPicDesc, + acldvppHist *hist, + void *reserve, + aclrtStream stream); /** * @ingroup AscendCL @@ -2457,7 +2548,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel * @retval null for failed. * @retval OtherValues success. */ -ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); +ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist(); /** * @ingroup AscendCL @@ -2514,7 +2605,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, * * @see acldvppCreateHist | acldvppVpcCalcHistAsync */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); /** * @ingroup AscendCL @@ -2533,6 +2624,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); */ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); + /** * @ingroup AscendCL * @brief dvpp vpc batch crop, resize config and make border. @@ -2556,13 +2648,18 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( - acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], - acldvppResizeConfig *resizeConfig, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppBorderConfig *borderCfgs[], + acldvppResizeConfig *resizeConfig, + aclrtStream stream); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 311e78f2..cc4bea8e 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -119,6 +119,15 @@ extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount HcclComm comm, aclrtStream stream); /** + * @brief Barrier operator. + * + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); + +/** * @brief Destroy HCCL comm * * @param comm A pointer identifying the communication resource targetting diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 47f16d9f..02a357fc 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -94,6 +94,7 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index 38a689ee..f8d5ccf3 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -56,6 +56,7 @@ #include #include #include +#include #include #include diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 993f36ba..3d196e41 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -550,6 +550,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); + +MMPA_FUNC_VISIBILITY mmSize mmGetPageSize(); +MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize); +MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr); #define MMPA_DLL_API #ifdef __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 49e97a5d..e6b6f71e 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -557,6 +557,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); + +MMPA_FUNC_VISIBILITY mmSize mmGetPageSize(); +MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize); +MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr); #ifdef __cplusplus #if __cplusplus } diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 0ec15367..209967bd 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -146,6 +146,8 @@ REG_OP(Cast) /** *@brief Returns the truth value of (x1 >= x2) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: *Two inputs, including: @@ -167,6 +169,8 @@ REG_OP(GreaterEqual) /** *@brief Returns the truth value of (x1 < x2) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: *Two inputs, including: @@ -567,6 +571,8 @@ REG_OP(InvGrad) /** *@brief: Returns the truth value of (x <= y) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: * Two inputs, including: @@ -1464,6 +1470,8 @@ REG_OP(ReciprocalGrad) /** *@brief Returns the truth value of (x1 > x2) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, @@ -3801,6 +3809,34 @@ REG_OP(ArgMaxGradD) .OP_END_FACTORY_REG(ArgMaxGradD) /** +*@brief Calculates the reversed outputs of the function "AddMatMatElements" +* c = c * beta + alpha * a * b + +*@par Inputs: +*Three inputs, including: +* @li c: A mutable Tensor. Must be one of the following types: +* float16, float32. +* @li a: A mutable Tensor of the same type as "c". +* @li b: A mutable Tensor of the same type as "c". +* @li beta: A mutable scalar of the same type as "c". +* @li alpha: A mutable scalar of the same type as "c". \n + +*@par Outputs: +* @li c: A mutable Tensor. Has the same type as "c". \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AddMatMatElements. +*/ +REG_OP(AddMatMatElements) + .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(c, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(AddMatMatElements) + +/** *@brief Returns cosine similarity between x1 and x2,computed along dim. \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 6ae1613c..3ef4d95e 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -24,6 +24,22 @@ #include "graph/operator_reg.h" namespace ge { +/** +*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n + +*@par Inputs: +*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n + +*@par Outputs: +*image:A Tensor of type uint8. \n + +*@par Third-party framework compatibility +*Compatible with tensorflow DecodeGif operator. +*/ +REG_OP(DecodeGif) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8})) + .OP_END_FACTORY_REG(DecodeGif) /** *@brief Adjust the hue of one or more images . \n @@ -1071,6 +1087,88 @@ REG_OP(EncodePng) .ATTR(compression, Int, -1) .OP_END_FACTORY_REG(EncodePng) + +/** +*@brief PNG-decode an image. +*@par Inputs: +*contents: 0-D. PNG-decoded image . + +*@par Attributes: +*channels: graph channels \n +*dtype: type of image + +*@par Outputs: +*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] +where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB; +4: for RGBA . \n + +*@par Third-party framework compatibility +*Compatible with tensorflow DecodePng operator. +*/ +REG_OP(DecodePng) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8, DT_UINT16})) + .ATTR(dtype, Type, DT_UINT8) + .ATTR(channels, Int, 0) + .OP_END_FACTORY_REG(DecodePng) + +/** +*@brief Bmp-decode an image. \n + +*@par Inputs: +*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n + +*@par Attributes: +*@li channels: Decode the desired number of color channels of the image. \n + +*@par Outputs: +*image: A Tensor dtype of uint8. + +* @par Third-party framework compatibility +* Compatible with tensorflow DecodeBmp operator. +*/ + +REG_OP(DecodeBmp) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8})) + .ATTR(channels, Int, 0) + .OP_END_FACTORY_REG(DecodeBmp) + +/* +*@brief Function parse image from string to int. \n + +*@par Inputs: +*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n +*@li crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. \n + +*@par Attributes: +*@li channels: An optional int. Defaults to 0. Number of color channels for the +*decoded image. +*@li ratio: An optional int. Defaults to 1. Downscaling ratio. +*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower +*but nicer upscaling of the chroma planes +*@li try_recover_truncated: An optional bool. Defaults to False. If true try to +*recover an image from truncated input. +*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required +fraction of lines before a truncated input is accepted. +*@li dct_method: An optional string. Defaults to "". string specifying a hint +*about the algorithm used for decompression. \n + +*@par Outputs: +*image: A Tensor dtype of uint8. +*/ +REG_OP(DecodeAndCropJpeg) + .INPUT(contents, TensorType({DT_STRING})) + .INPUT(crop_window, TensorType({DT_INT32})) + .OUTPUT(image, TensorType({DT_UINT8})) + .ATTR(channels, Int, 0) + .ATTR(ratio, Int, 1) + .ATTR(fancy_upscaling, Bool, true) + .ATTR(try_recover_truncated, Bool, false) + .ATTR(acceptable_fraction, Float, 1.0) + .ATTR(dct_method, String, "") + .OP_END_FACTORY_REG(DecodeAndCropJpeg) + /** *@brief Resizes "images" to "size" using bilinear interpolation . \n diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 1a59d88e..532a641d 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -83,6 +83,25 @@ REG_OP(Cholesky) .OP_END_FACTORY_REG(Cholesky) /** +*@brief Computes the outer product of two 1D vectors . \n + +*@par Inputs: +*The input x1 and x2 has to be a 1D vector.Inputs include: +*@li x1:A Tensor. Must be one of the following types: float16, float32. +Shape is [N] . \n +*@li x2:A Tensor. Must have the same type as x. Shape is [M] . \n + +*@par Outputs: +*y:A Tensor. Has the same type as x . \n +*/ + +REG_OP(Ger) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(Ger) + +/** *@brief Computes the sign and the log of the absolute value of the determinant of one or more square matrices . \n @@ -328,6 +347,34 @@ REG_OP(SelfAdjointEig) .OP_END_FACTORY_REG(SelfAdjointEig) /** +*@brief Computes the sign and the log of the absolute value of the determinant +of one or more square matrices . \n + +*@par Inputs: +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions +form square matrices. Inputs include: +*x:A Tensor. Must be one of the following types: double, float32, float16 +Shape is [..., M, M] . \n + +*@par Outputs: +*@li y:A Tensor. Has the same type as x. +*@li sign:A Tensor. Has the same type as x . \n + +*@attention Constraints: +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions +form square matrices. \n + +*@par Third-party framework compatibility +*Compatible with tensorflow LogMatrixDeterminant operator. +*/ + +REG_OP(Slogdet) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(sign, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(Slogdet) + +/** *@brief Computes the singular value decompositions of one or more matrices . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 28f7f0aa..4a44d744 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -534,6 +534,29 @@ REG_OP(NextAfter) .OP_END_FACTORY_REG(NextAfter) /** +*@brief Calculate the P-norm distance between vectors function. \n + +*@par Inputs: +*One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +*@li p: An optional float.Defaults to 2. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Pdist. \n +*/ +REG_OP(Pdist) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Float, 2.0) + .OP_END_FACTORY_REG(Pdist) + +/** *@brief Compute element-wise finiteness, return a boolean tensor. *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 49a49931..3340007c 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -91,6 +91,36 @@ REG_OP(MatMulV2) .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(MatMulV2) +/** +*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n + +*@par Inputs: +*Two inputs, including: +* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. +* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. +* @li compress_index: A compress index matrix of type int8. +* @li bias: A 1D Tensor. Must be one of the following types: int32, float16. + +*@par Attributes: +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n + +*@par Outputs: +*y: The result matrix Tensor. 2D. Must be one of the following types: float16, +* int32. \n + +*/ +REG_OP(MatMulV2Compress) + .INPUT(x1, TensorType({DT_INT8})) + .INPUT(x2, TensorType({DT_INT8})) + .INPUT(compress_index, TensorType({DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_INT32, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_INT32, DT_FLOAT16})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .ATTR(transpose_x1, Bool, false) + .ATTR(transpose_x2, Bool, false) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(MatMulV2Compress) /** *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n @@ -189,8 +219,8 @@ REG_OP(BatchMatMul) * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n * @par Attributes: -* @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. -* @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n +* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. +* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n * @par Outputs: * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, @@ -201,15 +231,16 @@ REG_OP(BatchMatMul) */ REG_OP(BatchMatMulV2) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .ATTR(adj_x1, Bool, false) .ATTR(adj_x2, Bool, false) + .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(BatchMatMulV2) - /** *@brief Computes half the L2 norm of a tensor without the sqrt . \n @@ -369,7 +400,7 @@ REG_OP(MatrixSetDiagD) * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, * uint64 *@li indices: An ND Tensor. -*Must be one of the following types: int32, int64 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float32, int8, uint8, double, * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, @@ -429,7 +460,7 @@ REG_OP(TensorScatterUpdate) *@li var: An ND Tensor . \n *Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor of type int32 or int64. +*@li indices: An ND Tensor of type int32 or int64 *@li updates: An Tensor. format:NCHW, NHWC . \n @@ -447,10 +478,10 @@ REG_OP(TensorScatterUpdate) * Compatible with the TensorFlow operator ScatterAdd. */ REG_OP(ScatterAdd) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterAdd) @@ -463,7 +494,7 @@ REG_OP(ScatterAdd) *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 @@ -478,10 +509,10 @@ REG_OP(ScatterAdd) * Compatible with the TensorFlow operator ScatterDiv. */ REG_OP(ScatterDiv) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterDiv) @@ -493,7 +524,7 @@ REG_OP(ScatterDiv) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: @@ -507,10 +538,10 @@ REG_OP(ScatterDiv) * Compatible with the TensorFlow operator ScatterNdAdd. */ REG_OP(ScatterNdAdd) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterNdAdd) @@ -550,7 +581,7 @@ REG_OP(TensorScatterAdd) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32, int64 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 @@ -565,10 +596,10 @@ REG_OP(TensorScatterAdd) * Compatible with the TensorFlow operator ScatterNdSub. */ REG_OP(ScatterNdSub) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterNdSub) @@ -608,7 +639,7 @@ REG_OP(TensorScatterSub) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32, int64 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: @@ -622,10 +653,10 @@ REG_OP(TensorScatterSub) * Compatible with the TensorFlow operator ScatterSub. */ REG_OP(ScatterSub) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterSub) @@ -796,7 +827,7 @@ REG_OP(ConfusionMatrix) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 @@ -813,7 +844,7 @@ REG_OP(ConfusionMatrix) */ REG_OP(ScatterMul) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .INPUT(indices, TensorType({DT_INT32})) + .INPUT(indices, TensorType::IndexNumberType()) .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) @@ -826,13 +857,13 @@ REG_OP(ScatterMul) *@par Inputs: * Three inputs, including: *@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation @@ -845,10 +876,10 @@ REG_OP(ScatterMul) * Compatible with the TensorFlow operator ScatterMin. */ REG_OP(ScatterMin) - .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterMin) @@ -859,13 +890,13 @@ REG_OP(ScatterMin) * Three inputs, including: *@li var: An ND Tensor . \n -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An NCHW, NHWC, or ND Tensor . \n -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An NCHW, NHWC, or ND Tensor . \n -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". @@ -878,10 +909,10 @@ REG_OP(ScatterMin) * Compatible with the TensorFlow operator ScatterMax. */ REG_OP(ScatterMax) - .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterMax) @@ -895,7 +926,7 @@ REG_OP(ScatterMax) *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor . \n -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 @@ -911,10 +942,10 @@ REG_OP(ScatterMax) * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(ScatterUpdate) - .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterUpdate) @@ -1096,6 +1127,33 @@ REG_OP(EinSum) .REQUIRED_ATTR(equation, String) .REQUIRED_ATTR(tensor_size, Int) .OP_END_FACTORY_REG(EinSum) + +/** +*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n + +*@par Inputs: +*No inputs + +*@par Attributes: +*@li num_rows: An required int. \n +*@li num_columns: An optional int.Defaults to 0. \n +*@li batch_shape: An optional ListInt.Defaults to []. \n +*@li dtype: An optional int.Defaults to 0. \n + +*@par Outputs: +*y: A Tensor with targeted type and shape. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Eye. \n +*/ +REG_OP(Eye) + .OUTPUT(y, TensorType::BasicType()) /* "Result, has targeted element type" */ + .REQUIRED_ATTR(num_rows, Int) + .ATTR(num_columns, Int, 0) + .ATTR(batch_shape, ListInt, {}) + .ATTR(dtype, Int, 0) + .OP_END_FACTORY_REG(Eye) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index af59b4e2..33148e62 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -254,22 +254,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ - REG_OP(PriorBox) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .REQUIRED_ATTR(aspect_ratio, ListFloat) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBox); +REG_OP(PriorBox) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(min_size, ListFloat) + .REQUIRED_ATTR(max_size, ListFloat) + .REQUIRED_ATTR(aspect_ratio, ListFloat) + .ATTR(img_h, Int, 0) + .ATTR(img_w, Int, 0) + .ATTR(step_h, Float, 0.0) + .ATTR(step_w, Float, 0.0) + .ATTR(flip, Bool, true) + .ATTR(clip, Bool, false) + .ATTR(offset, Float, 0.5) + .ATTR(variance, ListFloat, {0.1}) + .OP_END_FACTORY_REG(PriorBox); /** *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n @@ -306,25 +306,25 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. */ - REG_OP(PriorBoxD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBoxD); +REG_OP(PriorBoxD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(min_size, ListFloat) + .REQUIRED_ATTR(max_size, ListFloat) + .ATTR(img_h, Int, 0) + .ATTR(img_w, Int, 0) + .ATTR(step_h, Float, 0.0) + .ATTR(step_w, Float, 0.0) + .ATTR(flip, Bool, true) + .ATTR(clip, Bool, false) + .ATTR(offset, Float, 0.5) + .ATTR(variance, ListFloat, {0.1}) + .OP_END_FACTORY_REG(PriorBoxD); /** *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n @@ -358,22 +358,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. */ - REG_OP(PriorBoxDV2) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBoxDV2); +REG_OP(PriorBoxDV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(min_size, ListFloat) + .REQUIRED_ATTR(max_size, ListFloat) + .ATTR(img_h, Int, 0) + .ATTR(img_w, Int, 0) + .ATTR(step_h, Float, 0.0) + .ATTR(step_w, Float, 0.0) + .ATTR(flip, Bool, true) + .ATTR(clip, Bool, false) + .ATTR(offset, Float, 0.5) + .ATTR(variance, ListFloat, {0.1}) + .OP_END_FACTORY_REG(PriorBoxDV2); /** *@brief Performs Position Sensitive ROI Pooling . \n @@ -531,10 +531,10 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(Yolo) - .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(boxes, Int, 3) .ATTR(coords, Int, 4) .ATTR(classes, Int, 80) @@ -584,10 +584,10 @@ REG_OP(Yolo) * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV2DetectionOutput) - .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 5) .ATTR(coords, Int, 4) @@ -598,7 +598,7 @@ REG_OP(YoloV2DetectionOutput) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV2DetectionOutput) @@ -647,12 +647,12 @@ REG_OP(YoloV2DetectionOutput) *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead. */ REG_OP(YoloV2DetectionOutputD) - .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 5) .ATTR(coords, Int, 4) @@ -663,7 +663,7 @@ REG_OP(YoloV2DetectionOutputD) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV2DetectionOutputD) @@ -707,16 +707,16 @@ REG_OP(YoloV2DetectionOutputD) * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV3DetectionOutput) - .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases_low, ListFloat) .REQUIRED_ATTR(biases_mid, ListFloat) .REQUIRED_ATTR(biases_high, ListFloat) @@ -729,7 +729,7 @@ REG_OP(YoloV3DetectionOutput) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutput) @@ -776,22 +776,22 @@ s *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead. */ REG_OP(YoloV3DetectionOutputD) - .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases_low, ListFloat) .REQUIRED_ATTR(biases_mid, ListFloat) .REQUIRED_ATTR(biases_high, ListFloat) @@ -804,7 +804,7 @@ REG_OP(YoloV3DetectionOutputD) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutputD) @@ -848,7 +848,7 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV3DetectionOutputV2) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 3) .ATTR(coords, Int, 4) @@ -862,7 +862,7 @@ REG_OP(YoloV3DetectionOutputV2) .ATTR(N, Int, 10) .ATTR(resize_origin_img_to_net, Bool, false) .ATTR(out_box_dim, Int, 3) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) @@ -910,9 +910,9 @@ REG_OP(YoloV3DetectionOutputV2) * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. */ REG_OP(YoloV3DetectionOutputV2D) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) - .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) - .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 3) .ATTR(coords, Int, 4) @@ -926,7 +926,7 @@ REG_OP(YoloV3DetectionOutputV2D) .ATTR(N, Int, 10) .ATTR(resize_origin_img_to_net, Bool, false) .ATTR(out_box_dim, Int, 3) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D) @@ -1466,9 +1466,9 @@ REG_OP(NormalizeBBox) * y: A Tensor. Must have the same type as box_predictions. */ REG_OP(DecodeBboxV2) - .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0}) .ATTR(decode_clip, Float, 0.0) .ATTR(reversed_box, Bool, false) @@ -1592,7 +1592,6 @@ selected indices from the boxes tensor, where M <= max_output_size. \n *Compatible with onnx NonMaxSuppression operator. */ - REG_OP(NonMaxSuppressionV7) .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1641,7 +1640,7 @@ REG_OP(RoiExtractor) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(finest_scale, Int, 56) .ATTR(roi_scale_factor, Float, 0) - .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 }) + .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32}) .ATTR(pooled_height, Int, 7) .ATTR(pooled_width, Int, 7) .ATTR(sample_num, Int, 0) @@ -1649,6 +1648,84 @@ REG_OP(RoiExtractor) .ATTR(aligned, Bool, true) .OP_END_FACTORY_REG(RoiExtractor) +/** +*@brief Performs Position Sensitive PS ROI Pooling . \n + +*@par Inputs: +* Two inputs, including: +*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature +* map, dimension C1 must be equal to +* (int(output_dim+15)/C0))*group_size*group_size. +*@li rois: A tensor of type float16 or float32, with shape +* [batch, 5, rois_num], describing the ROIs, each ROI consists of five +* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates +* the index of the input feature map, "x1", "y1", "x2", or "y2" must be +* greater than or equal to "0.0" . \n + +*@par Attributes: +*@li output_dim: A required int32, specifying the number of output channels, +* must be greater than 0. +*@li group_size: A required int32, specifying the number of groups to encode +* position-sensitive score maps, must be within the range (0, 128). +*@li spatial_scale: A required float32, scaling factor for mapping the input +* coordinates to the ROI coordinates . \n + +*@par Outputs: +*y: An NC1HWC0 tensor of type float16 or float32, describing the result +* feature map . \n + +*@attention Constraints: +* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 +*/ +REG_OP(PSROIPoolingV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(spatial_scale, Float) + .REQUIRED_ATTR(output_dim, Int) + .REQUIRED_ATTR(group_size, Int) + .OP_END_FACTORY_REG(PSROIPoolingV2) + +/** +*@brief Performs Position Sensitive PS ROI Pooling Grad . \n + +*@par Inputs: +* Two inputs, including: +*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result +* feature map . \n +*@li rois: A tensor of type float16 or float32, with shape +* [batch, 5, rois_num], describing the ROIs, each ROI consists of five +* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates +* the index of the input feature map, "x1", "y1", "x2", or "y2" must be +* greater than or equal to "0.0" . \n + +*@par Attributes: +*@li output_dim: A required int32, specifying the number of output channels, +* must be greater than 0. +*@li group_size: A required int32, specifying the number of groups to encode +* position-sensitive score maps, must be within the range (0, 128). +*@li spatial_scale: A required float32, scaling factor for mapping the input +* coordinates to the ROI coordinates . \n +*@li input_size: A required listInt, mapping the gradinput size: (H, W) + +*@par Outputs: +*y: An NC1HWC0 tensor of type float16 or float32, describing the feature +* map, dimension C1 must be equal to +* (int(output_dim+15)/C0))*group_size*group_size. + +*@attention Constraints: +* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 +*/ +REG_OP(PSROIPoolingGradV2D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(spatial_scale, Float) + .REQUIRED_ATTR(output_dim, Int) + .REQUIRED_ATTR(group_size, Int) + .REQUIRED_ATTR(input_size, ListInt) + .OP_END_FACTORY_REG(PSROIPoolingGradV2D) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 7d38beb5..a0251d88 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -526,6 +526,31 @@ REG_OP(LayerNorm) .OP_END_FACTORY_REG(LayerNorm) /** +*@brief Returns a tensor where each sub-tensor of input along dimension +* dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n + +*@par Inputs: +*One input, including: +* @li x: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Attributes: +* @li p: Specify L_p norm, the type is float. +* @li dim: The processed dim, the type is int. +* @li maxnorm: Threshold for comparison, the type is float. \n + +*@par Outputs: +*One outputs, including: +* @li y: shape and dtype of output, should be same shape and type as input. +*/ +REG_OP(Renorm) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .REQUIRED_ATTR(p, Float) + .REQUIRED_ATTR(dim, Int) + .REQUIRED_ATTR(maxnorm, Float) + .OP_END_FACTORY_REG(Renorm) + +/** *@brief LayerNormGrad operator interface implementation * calculating: dy, x, variance, mean, gamma * pd_xl = data_dy*data_gamma @@ -683,7 +708,68 @@ REG_OP(DropOutDoMask) .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(DropOutDoMask) - + +/** +*@brief Return "output" according to the algorithm of dropout_do_mask: +* scale_x = x *(1 / keep_prob) +* output = select(mask == 1, scale_x, 0) + +*@par Inputs: +*Three inputs, including: +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32 +* @li mask: A mutable Tensor. Must met all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should met the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 +* @li keep_prob: A mutable Tensor. Must met all of the following rules: +* shape of "keep_prob" should be (1,) or [1,]. +* Has the same type as "x" . \n + +*@par Output: +*y: A mutable Tensor. Has the same type as "x". +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DropOutDoMaskV3) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(DropOutDoMaskV3) + +/** +*@brief Return "output" according to the algorithm of dropout_do_mask: +* scale_x = x *(1 / keep_prob) +* output = select(mask == 1, scale_x, 0) + +*@par Inputs: +*Two inputs, including: +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32 +* @li mask: A mutable Tensor. Must met all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should met the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 +*@par Attributes: +* @li keep_prob: A mutable Tensor. Must met all of the following rules: +* shape of "keep_prob" should be (1,) or [1,]. +* Has the same type as "x" . \n + +*@par Output: +*y: A mutable Tensor. Has the same type as "x". +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DropOutDoMaskV3D) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(keep_prob, Float) + .OP_END_FACTORY_REG(DropOutDoMaskV3D) + /** *@brief Scales the input . \n @@ -1356,6 +1442,58 @@ REG_OP(PoissonNllLoss) .ATTR(eps, Float, 1e-8) .ATTR(reduction, String, "mean") .OP_END_FACTORY_REG(PoissonNllLoss) +/** + *@brief rnn_gen_mask + * @par Inputs: + * @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n + * + * @par Attributes: + * @li num_step: A required int.\n + * @li hidden_size: A required int. \n + * + * + * @par Output: + * y: A mutable Tensor of type int32, with the shape of [num_step, batch_size, hidden_size]. \n + * + */ +REG_OP(RnnGenMask) + .INPUT(seq_length, TensorType({DT_INT32})) + .OUTPUT(seq_mask, TensorType({DT_INT32})) + .REQUIRED_ATTR(num_step, Int) + .REQUIRED_ATTR(hidden_size, Int) + .OP_END_FACTORY_REG(RnnGenMask) + +/** +* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) +* between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n + +* @par Inputs: +* Two inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32. \n +* +* @par Inputs: +* @li target: A tensor. Must be the following types: +* int32. \n + +* @par Attributes: +* @li reduction: An optional string. Defaults to "mean" \n + +* @par Outputs: +* y: A Tensor has same element type as input x. \n +* is_target: A Tensor has same element type as input target. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator MultiLabelMarginLoss. \n +*/ +REG_OP(MultilabelMarginLoss) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(target, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(is_target, TensorType({DT_INT32})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(MultilabelMarginLoss) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 32eb148b..743c28b7 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -397,8 +397,8 @@ No default value. specifying the stride of the sliding window for each dimension of the input tensor. No default value. *@li padding: A required string type of float16. -*@li pads: A list type of int32. Default value {0, 0, 0}. -*@li dilation: A list type of int32. Default value {1, 1, 1}. +*@li pads: A list type of int32. Default value {0,0,0,0,0,0}. +*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}. *@li ceil_mode: A ceil mode number of int32 . Default value 0. *@li data_format: An optional string. Defaults to "NDHWC" . \n @@ -421,8 +421,8 @@ REG_OP(MaxPool3D) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(padding, String) - .ATTR(pads, ListInt, {0,0,0}) - .ATTR(dilation, ListInt, {1,1,1}) + .ATTR(pads, ListInt, {0,0,0,0,0,0}) + .ATTR(dilation, ListInt, {1,1,1,1,1,1}) .ATTR(ceil_mode, Int, 0) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(MaxPool3D) @@ -1184,6 +1184,7 @@ REG_OP(MaxPool3DGrad) .OUTPUT(y, TensorType::RealNumberType()) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding, String, "SAME") .REQUIRED_ATTR(pads, ListInt) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(MaxPool3DGrad) @@ -1440,12 +1441,11 @@ REG_OP(MaxPoolV3Grad) .OP_END_FACTORY_REG(MaxPoolV3Grad) /** -*@brief Performs dilation2d on the input . \n +*@brief Performs Dilation2D on the input . \n *@par Inputs: *x: A tensor of shape is 4d, format is support NHWC. -*filter: A tensor of shape is 3d, the type is same with x, -and the c dimension is same with x. \n +*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n *@par Attributes: *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. @@ -1474,6 +1474,84 @@ REG_OP(Dilation2D) .OP_END_FACTORY_REG(Dilation2D) /** +*@brief Performs Dilation2DBackpropFilter on the input. \n + +*@par Inputs: +*x: A tensor of shape is 4d, format is support NHWC. +*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n + +*@par Attributes +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. +*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. +*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. +*@li pads: A optional list of 4 ints. +*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n + +*@par Outputs: +*y: The output tensor. Has the same type and format as input "filter" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Dilation2DBackpropFilter. +*/ + +REG_OP(Dilation2DBackpropFilter) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(filter, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(out_backprop, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .OUTPUT(y, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(rates, ListInt) + .ATTR(padding_mode, String, "SAME") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(ceil_mode, Bool, false) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(Dilation2DBackpropFilter) + +/** +*@brief Performs Dilation2DBackpropInput on the input. \n + +*@par Inputs: +*x: A tensor of shape is 4d, format is support NHWC. +*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n + +*@par Attributes +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. +*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. +*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. +*@li pads: A optional list of 4 ints. +*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n + +*@par Outputs: +*y: The output tensor. Has the same type and format as input "x" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Dilation2DBackpropInput. +*/ + +REG_OP(Dilation2DBackpropInput) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(filter, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(out_backprop, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .OUTPUT(y, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(rates, ListInt) + .ATTR(padding_mode, String, "SAME") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(ceil_mode, Bool, false) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(Dilation2DBackpropInput) + +/** * @brief Applies a 2D adaptive average pooling over * an input signal composed of several input planes. \n @@ -1604,5 +1682,22 @@ REG_OP(MaxPoolWithArgmaxV1) .ATTR(ceil_mode, Bool, false) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) +// SubSample +REG_OP(SubSample) + .INPUT(labels, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_INT32})) + .REQUIRED_ATTR(batch_size_per_images, Int) + .REQUIRED_ATTR(positive_fraction, Float) + .OP_END_FACTORY_REG(SubSample) + +// SubSampleLabels +REG_OP(SubSampleLabels) + .INPUT(labels, TensorType({DT_INT32})) + .INPUT(shuffle_matrix, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_INT32})) + .REQUIRED_ATTR(batch_size_per_images, Int) + .REQUIRED_ATTR(positive_fraction, Float) + .OP_END_FACTORY_REG(SubSampleLabels) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 0d0be241..4cbe4057 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -836,7 +836,7 @@ REG_OP(HardShrink) *backprops: A Tensor with the same type and shape of features's. \n * *@par Attributes: -*@li lambda: An optional float.Defaults to 0.5. \n +*@li lambd: An optional float.Defaults to 0.5. \n * *@par Third-party framework compatibility *Compatible with the Pytorch operator Hardshrink_backward. \n @@ -845,7 +845,7 @@ REG_OP(HardShrink) .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(lambda, Float, 0.5) + .ATTR(lambd, Float, 0.5) .OP_END_FACTORY_REG(HardShrinkGrad) /** @@ -920,7 +920,30 @@ REG_OP(SoftShrinkGrad) .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(lambd, Float, 0.5) .OP_END_FACTORY_REG(SoftShrinkGrad) - + +/** +*@brief Calculate the gradient of log simoid. \n + +*@par Inputs: +*Two inputs, including: +* @li grads: A tensor, gradient of previous layer. Must be one of the following types: +* float16, float32. \n +* @li features: A tensor, input of log sigmoid. Must be one of the following types: +* float16, float32. \n + +*@par Outputs: +*One outputs, including: +* @li backprops: A tensor with the same type of and shape of grads. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator LogSigmoidBackward. \n +*/ +REG_OP(LogSigmoidGrad) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(LogSigmoidGrad) + /** *@brief Calculate -ln(1+e^(-x)). \n diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 50eb98c8..42da3828 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -418,7 +418,7 @@ REG_OP(EmbeddingRankId) */ REG_OP(FillV2) .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) .ATTR(value, Float, 0) .OP_END_FACTORY_REG(FillV2) @@ -437,7 +437,7 @@ REG_OP(FillV2) * Compatible with the ONNX operator ConstantOfShape. */ REG_OP(FillV2D) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) .ATTR(value, Float, 0) .REQUIRED_ATTR(dims, ListInt) .OP_END_FACTORY_REG(FillV2D) diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index 9a5cf504..452961a9 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -51,6 +51,246 @@ REG_OP(StringToNumber) .ATTR(out_type, Type, DT_FLOAT) .OP_END_FACTORY_REG(StringToNumber) +/** +*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. +*@brief Parse an Example prototype. +*@par Input: +*serialized: A Tensor of type string. +*dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n + +*@par Attributes: +*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes +*out_type: output type +*sparse_keys: ListString +*sparse_types: types of sparse_values +*dense_keys: ListString +*dense_shapes: output of dense_defaults shape +*dense_types: output of dense_defaults type \n + +*@par Outputs: +*sparse_indices: A Tensor of type string. +*sparse_values: Has the same type as sparse_types. +*sparse_shapes: A Tensor of type int64 +*dense_values: Has the same type as dense_defaults. + +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +**/ +REG_OP(ParseSingleExample) + .INPUT(serialized, TensorType({DT_STRING})) + .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) + .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) + .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(dense_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) + .ATTR(num_sparse, Int, 0) + .ATTR(sparse_keys, ListString, {}) + .ATTR(dense_keys, ListString, {}) + .ATTR(sparse_types, ListType, {}) + .ATTR(dense_types, ListType, {}) + .ATTR(dense_shapes, ListListInt, {}) + .OP_END_FACTORY_REG(ParseSingleExample) + +/** +*@brief Decodes raw file into tensor . \n +*@par Input: +*bytes: A Tensor of type string. + +*@par Attributes: +*little_endian: bool ture +*out_type: output type + +*@par Outputs: +*Output: A Tensor +**/ +REG_OP(DecodeRaw) + .INPUT(bytes, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT, + DT_INT64,DT_INT32,DT_INT8,DT_UINT8,DT_INT16, + DT_UINT16,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(out_type, Type, DT_FLOAT) + .ATTR(little_endian, Bool, true) + .OP_END_FACTORY_REG(DecodeRaw) + +/** +*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. \n + +*@par Inputs: +*serialized: A Tensor of string type. Scalar string containing serialized +*TensorProto prototype. \n + +*@par Attributes: +*out_type: The type of the serialized tensor. The provided type must match the +*type of the serialized tensor and no implicit conversion will take place. \n + +*@par Outputs: +*output: A Tensor of type out_type. \n + +*@attention Constraints: +*The implementation for StringToNumber on Ascend uses AICPU, +*with badperformance. \n + +*@par Third-party framework compatibility +*@li compatible with tensorflow ParseTensor operator. +*/ +REG_OP(ParseTensor) + .INPUT(serialized, TensorType({DT_STRING})) + .OUTPUT(output, TensorType(DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, + DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, + DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING, + DT_COMPLEX64, DT_COMPLEX128})) + .ATTR(out_type, Type, DT_FLOAT) + .OP_END_FACTORY_REG(ParseTensor) + +/** +*@brief Converts each string in the input Tensor to the specified numeric +*type . \n + +*@par Inputs: +*Inputs include: +*records: Each string is a record/row in the csv and all records should have the +*same format. \n +*record_defaults: One tensor per column of the input record, with either a +*scalar default value for that column or an empty vector if the column is +*required. \n + +*@par Attributes: +*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n +*field_delim: char delimiter to separate fields in a record. \n +*use_quote_delim: If false, treats double quotation marks as regular characters +*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n +*na_value: Additional string to recognize as NA/NaN. \n + +*@par Outputs: +*output: A Tensor. Has the same type as x . \n + +*@attention Constraints: +*The implementation for StringToNumber on Ascend uses AICPU, with bad +*performance. \n + +*@par Third-party framework compatibility +*@li compatible with tensorflow StringToNumber operator. +*/ +REG_OP(DecodeCSV) + .INPUT(records, TensorType({DT_STRING})) + .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, + DT_INT64, DT_STRING, DT_RESOURCE})) + .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, + DT_INT64, DT_STRING, DT_RESOURCE})) + .ATTR(OUT_TYPE, ListType, {}) + .ATTR(field_delim, String, ",") + .ATTR(use_quote_delim, Bool, true) + .ATTR(na_value, String, ",") + .ATTR(select_cols, ListInt, {}) + .OP_END_FACTORY_REG(DecodeCSV) + +/** +*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. +*@brief Parse an Example prototype. +*@par Input: +*serialized: A Tensor of type string. \n +*name:A Tensor of type string. \n +*sparse_keys: Dynamic input tensor of string. \n +*dense_keys: Dynamic input tensor of string \n +*dense_defaults: Dynamic input tensor type as string, float, int64. \n + +*@par Attributes: +*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n +*Ndense: Number of dense_keys \n +*sparse_types: types of sparse_values \n +*Tdense: Type of dense_defaults dense_defaults and dense_values \n +*dense_shapes: output of dense_defaults shape \n + +*@par Outputs: +*sparse_indices: A Tensor of type string. \n +*sparse_values: Has the same type as sparse_types. \n +*sparse_shapes: A Tensor of type int64 \n +*dense_values: Has the same type as dense_defaults. \n +*@par Third-party framework compatibility \n +*@li compatible with tensorflow StringToNumber operator. \n +*/ +REG_OP(ParseExample) + .INPUT(serialized, TensorType({DT_STRING})) + .INPUT(name, TensorType({DT_STRING})) + .DYNAMIC_INPUT(sparse_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(dense_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .ATTR(Nsparse, Int, 0) + .ATTR(Ndense, Int, 0) + .ATTR(sparse_types, ListType, {}) + .ATTR(Tdense, ListType, {}) + .ATTR(dense_shapes, ListListInt, {}) + .OP_END_FACTORY_REG(ParseExample) + +/** +*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed +*tensors. +*@par Input: +*serialized: A Tensor of type string. \n +*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n +*context_sparse_keys: Dynamic input tensor of string. \n +*context_dense_keys: Dynamic input tensor of string \n +*feature_list_sparse_keys: Dynamic input tensor of string \n +*feature_list_dense_keys: Dynamic input tensor of string \n +*context_dense_defaults: Dynamic input tensor of string, float, int64 \n +*debug_name: A Tensor of type string. \n + +*@par Attributes: +*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n +*Ncontext_dense: Number of context_dense_keys \n +*Nfeature_list_sparse: Number of feature_list_sparse_keys \n +*Nfeature_list_dense: Number of feature_list_dense_keys \n +*context_sparse_types: Types of context_sparse_values \n +*Tcontext_dense: Number of dense_keys \n +*feature_list_dense_types: Types of feature_list_dense_values \n +*context_dense_shapes: Shape of context_dense \n +*feature_list_sparse_types: Type of feature_list_sparse_values \n +*feature_list_dense_shapes: Shape of feature_list_dense \n + +*@par Outputs: +*context_sparse_indices: Dynamic output tensor of type int64. \n +*context_sparse_values: Dynamic output tensor of type string, float, int64. \n +*context_sparse_shapes: Dynamic output tensor of type int64 \n +*context_dense_values: Dynamic output tensor of type string, float, int64. \n +*feature_list_sparse_indices: Dynamic output tensor of type int64. \n +*feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n +*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n +*feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n +*@par Third-party framework compatibility \n +*@li compatible with tensorflow StringToNumber operator. \n +*/ +REG_OP(ParseSingleSequenceExample) + .INPUT(serialized, TensorType({DT_STRING})) + .INPUT(feature_list_dense_missing_assumed_empty, TensorType({DT_STRING})) + .DYNAMIC_INPUT(context_sparse_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(context_dense_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(feature_list_sparse_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(feature_list_dense_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(context_dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .INPUT(debug_name, TensorType({DT_STRING})) + .DYNAMIC_OUTPUT(context_sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(context_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(context_sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(context_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(feature_list_sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(feature_list_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(feature_list_sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(feature_list_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .ATTR(Ncontext_sparse, Int, 0) + .ATTR(Ncontext_dense, Int, 0) + .ATTR(Nfeature_list_sparse, Int, 0) + .ATTR(Nfeature_list_dense, Int, 0) + .REQUIRED_ATTR(context_sparse_types, ListType) + .REQUIRED_ATTR(Tcontext_dense, ListType) + .REQUIRED_ATTR(feature_list_dense_types, ListType) + .REQUIRED_ATTR(context_dense_shapes, ListListInt) + .REQUIRED_ATTR(feature_list_sparse_types, ListType) + .REQUIRED_ATTR(feature_list_dense_shapes, ListListInt) + .OP_END_FACTORY_REG(ParseSingleSequenceExample) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 806e28df..eafc8cc4 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -62,6 +62,26 @@ REG_OP(Dequantize) /** *@brief Quantizes the input . \n +*@par Inputs: +*x: shape and dtype of input_x. \n +*scales: shape and dtype of input_scales. \n +*zero_points: shape and dtype of input_zero_points \n +*@par Attributes: +*@li axis: the processed dim. \n +*@par Outputs: +*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n +*/ +REG_OP(Quantize) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scales, TensorType({DT_FLOAT})) + .INPUT(zero_points, TensorType({DT_INT8,DT_UINT8,DT_INT32})) + .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT32})) + .REQUIRED_ATTR(dtype, String) + .ATTR(axis, Int, 1) + .OP_END_FACTORY_REG(Quantize) + +/** +*@brief Quantizes the input . \n *@par Inputs: *x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index e2b00ce3..8104cb01 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -356,6 +356,39 @@ REG_OP(DropOutGenMask) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(DropOutGenMask) + +/** +*@brief Generate random uint8 mask for dropout v3 . \n + +*@par Inputs: +include: +*@li shape:The shape of the output tensor. +*@li prob:0-D. Prob of 1 . \n + +*@par Attributes: +*@li seed:If either seed or seed2 are set to be non-zero, the random number +*generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2:A second seed to avoid seed collision . \n + +*@par Outputs: +*y:Output (1-D) random number using uint8 data format . \n + +*@attention Constraints: +*The output is aligned with 16 + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. + +*@see DropOutGenMaskV3() +*/ +REG_OP(DropOutGenMaskV3) + .INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) + .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT })) + .OUTPUT(y, TensorType({ DT_UINT8 })) + .ATTR(seed, Int, 0) + .ATTR(seed2, Int, 0) + .OP_END_FACTORY_REG(DropOutGenMaskV3) + /** *@brief Generates values in an interval . \n diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 5b97d226..7d9e6df1 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -898,14 +898,14 @@ REG_OP(Reduction) *@brief Computes the euclidean norm of elements across dimensions of a tensor . \n *@par Inputs: -*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32. +*@li x: A Tensor. Must be one of the following types: float16, float32, int32. *@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n *@par Attributes: *keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n *@par Outputs: -*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n +*y: A Tensor. Must be one of the following types: float16, float32, int32 . \n *@attention Constraints: * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n @@ -1134,6 +1134,29 @@ REG_OP(GNTrainingUpdate) .OP_END_FACTORY_REG(GNTrainingUpdate) /** +*@brief Joins a string Tensor across the given dimensions. \n + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. +*@li reduction_indices:A Tensor of type int. The text to be processed. + +*@par Attributes: +*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1.. +*@li separator:string. + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(ReduceJoin) + .INPUT(input, TensorType({DT_STRING})) + .INPUT(reduction_indices, TensorType({DT_INT32})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(keep_dims, Bool, true) + .ATTR(separator, String, "") + .OP_END_FACTORY_REG(ReduceJoin) + +/** * @brief Calculates the standard deviation and average value of Tensors. * @par Inputs: diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 12bb0ee8..d4b3b102 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -257,6 +257,83 @@ REG_OP(DynamicRNN) .OP_END_FACTORY_REG(DynamicRNN) /** +*@brief: DynamicRNNV3 calculation. +*@par Inputs: +*ten inputs: +*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n +*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li use_peephole:An bool identifying if use peephole in the op. Default to false. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li is_training:An bool identifying is training in the op. Default to true . \n + +*@par Outputs: +*eight outputs: +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Third-party framework compatibility: +* Compatible with the TF operator LSTM. +*/ +REG_OP(DynamicRNNV3) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(cell_type, String, "LSTM") + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(use_peephole, Bool, false) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(forget_bias, Float, 0.0) + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicRNNV3) + +/** *@brief: DynamicLSTMV2 calculation. *@par Inputs: *ten inputs: @@ -960,6 +1037,47 @@ REG_OP(CommonGRU) .REQUIRED_ATTR(hidden_size, Int) .ATTR(linear_before_reset , Int, 0) .OP_END_FACTORY_REG(CommonGRU) +/** +* @brief Calculates the reversed outputs of the function "embedding". \n + +* @par Inputs: +* Four inputs, including: +* @li weight: A mutable Tensor of word grad. Must be one of the following types: +* float32. +* @li indices: A mutable word index Tensor of the int32 type.\n +* @li offsets: A mutable word index Tensor of the int32 type.\n +* @li per_sample_weights: to indicate all weights should be taken to be 1. +* If specified, per_sample_weights must have exactly the same shape as input +* and is treated as having the same offsets, if those are not None. +* Only supported for mode='sum'..\n + +* @par Attributes: +* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n + +* @li scale_grad_by_freq: An optional bool. Defaults to "False". +* If "True", "grad_weight" will be scale by word_frequency. +* If "False", "grad_weight" will not be scale by word_frequency. \n +* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n +* @li include_last_offset: if True, attr offsets has one additional element, where the last element +* is equivalent to the size of indices. This matches the CSR format.. \n + +* @par Outputs: +* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator EmbeddingBag. +*/ +REG_OP(EmbeddingBag) + .INPUT(weight, TensorType({ DT_FLOAT32 })) + .INPUT(indices, TensorType({ DT_INT32 })) + .OPTIONAL_INPUT(offsets, TensorType({DT_INT32})) + .OPTIONAL_INPUT(per_sample_weights, TensorType({DT_FLOAT32})) + .OUTPUT(y, TensorType({ DT_FLOAT32 })) + .ATTR(mode, String, "mean") + .ATTR(scale_grad_by_freq, Bool, false) + .ATTR(sparse, Bool, false) + .ATTR(include_last_offset, Bool, false) + .OP_END_FACTORY_REG(EmbeddingBag) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 7fe05b1e..da5bdb6a 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1006,9 +1006,9 @@ REG_OP(TopK) *@par Inputs: *Inputs including: -* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8. -* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8. -* @li shape: A required list of int32, specifying the output shape. +* @li indices: A required index tensor. Must be one of the following types: int32 or int64. +* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8... +* @li shape: A required list of int32 or int64, specifying the output shape. *@par Outputs: *y:A output Tensor with same datatype as "updates" . \n @@ -1019,7 +1019,7 @@ REG_OP(TopK) * Compatible with the TensorFlow operator ScatterNd. */ REG_OP(ScatterNd) - .INPUT(indices, TensorType::BasicType()) + .INPUT(indices, TensorType::IndexNumberType()) .INPUT(x, TensorType::BasicType()) .INPUT(shape, TensorType::IndexNumberType()) .OUTPUT(y, TensorType::BasicType()) @@ -1032,11 +1032,11 @@ REG_OP(ScatterNd) *@par Inputs: *Inputs including: * @li indices: A required index tensor. Must be one of the following types: - * float, float16, int32, int16. format:ND. + * int32 or int64. format:ND. * @li x: A required slice tensor. Must be one of the following types: - * float, float16, int32, int16. format:ND. + * float16, float, int32, int8, uint8. format:ND. *@par Attributes: -* @li shape: A required list of int32, specifying the output shape. +* @li shape: A required list of int32 or int64, specifying the output shape. *@par Outputs: *y: A Tensor. Has the same type as "x". format:ND . \n @@ -1051,8 +1051,8 @@ REG_OP(ScatterNd) */ REG_OP(ScatterNdD) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(ScatterNdD) @@ -1877,6 +1877,33 @@ REG_OP(Crop) .OP_END_FACTORY_REG(Crop) /** +*@brief Returns a namedtuple (values, indices) where values is the cumulative +* the cumulative minimum of elements of input in the dimension dim. +* And indices is the index location of each maximum value found in the dimension dim. \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor . Must be one of the following types: +* float16, float32, int32, uint32, int8, uint8. \n + +*@par Attributes: +* @li axis: Axis along which to cummin. \n + +*@par Outputs: +* y: A Tensor with the same type and shape of x's. \n +* indices: A Tensor with the int32 type and the same shape of x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Cummin. \n +*/ +REG_OP(Cummin) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .OUTPUT(indices, TensorType::BasicType()) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(Cummin) + +/** *@brief Extends the input with copies of data along a specified dimension. For example: *(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); *(2) axis = 1; diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index 29aec302..d7233906 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -25,6 +25,233 @@ #include "graph/operator_reg.h" namespace ge { +/** +*@brief Creates ngrams from ragged string data . \n + +*@par Inputs: +include: +*@li data:1-D.The values tensor of the ragged string tensor to make ngrams out of. +*@li data_splits:The splits tensor of the ragged string tensor to make ngrams out of . \n + +*@par Attributes: +* separator:The string to append between elements of the token. Use "" for no separator. +* ngram_widths:The sizes of the ngrams to create. +* left_pad:The string to use to pad the left side of the ngram sequence. Only used if pad_width != 0. +* right_pad:The string to use to pad the right side of the ngram sequence. Only used if pad_width != 0. +* pad_width:The number of padding elements to add to each side of each sequence. +* preserve_short_sequences: Preserve short sequences. \n + +*@par Outputs: +*@li ngrams:The values tensor of the output ngrams ragged tensor. +*@li ngrams_splits:The splits tensor of the output ngrams ragged tensor. \n + +*@see StringNGrams() + +*@par Third-party framework compatibility +*compatible with StringNGrams op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(StringNGrams) + .INPUT(data, TensorType({DT_STRING})) + .INPUT(data_splits, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(ngrams, TensorType({DT_STRING})) + .OUTPUT(ngrams_splits, TensorType({DT_INT32, DT_INT64})) + .REQUIRED_ATTR(separator, String) + .ATTR(ngram_widths, ListInt, {}) + .REQUIRED_ATTR(left_pad, String) + .REQUIRED_ATTR(right_pad, String) + .REQUIRED_ATTR(pad_width, Int) + .REQUIRED_ATTR(preserve_short_sequences, Bool) + .OP_END_FACTORY_REG(StringNGrams) + +/** +*@brief Decodes each string in `input` into a sequence of Unicode code points . \n + +*@par Inputs: +include: +*@li input:The text to be decoded. Can have any shape. Note that the output is flattened +to a vector of char values. \n + +*@par Attributes: +* input_encoding:Text encoding of the input strings. This is any of the encodings supported +by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. +* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the +`replacement_char`. Default is false. \n + +*@par Outputs: +*@li row_splits:A 1D tensor containing the row splits. +*@li char_values:A 1D tensor containing the decoded codepoints. +*@li char_to_byte_starts:A 1D int32 Tensor containing the byte index in the input string where each +character in `char_values` starts. \n + +*@see UnicodeDecodeWithOffsets() + +*@par Third-party framework compatibility +*compatible with UnicodeDecodeWithOffsets op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeDecodeWithOffsets) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(row_splits, TensorType({DT_INT64})) + .OUTPUT(char_values, TensorType({DT_INT32})) + .OUTPUT(char_to_byte_starts, TensorType({DT_INT64})) + .REQUIRED_ATTR(input_encoding, String) + .ATTR(errors, String, "replace") + .ATTR(replacement_char, Int, 65533) + .ATTR(replace_control_characters, Bool, false) + .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) + +/** +*@brief Decodes each string in `input` into a sequence of Unicode code points. \n + +*@par Inputs: +include: +*@li input:The text to be decoded. Can have any shape. Note that the output is flattened +to a vector of char values. \n + +*@par Attributes: +* input_encoding:Text encoding of the input strings. This is any of the encodings supported +by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. +* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the +`replacement_char`. Default is false. \n + +*@par Outputs: +*@li row_splits:A 1D tensor containing the row splits. +*@li char_values:A 1D tensor containing the decoded codepoints. \n + +*@see UnicodeDecode() + +*@par Third-party framework compatibility +*compatible with UnicodeDecode op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeDecode) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(row_splits, TensorType({DT_INT64})) + .OUTPUT(char_values, TensorType({DT_INT32})) + .REQUIRED_ATTR(input_encoding, String) + .ATTR(errors, String, "replace") + .ATTR(replacement_char, Int, 65533) + .ATTR(replace_control_characters, Bool, false) + .OP_END_FACTORY_REG(UnicodeDecode) + +/** +*@brief Transcode the input text from a source encoding to a destination encoding. \n + +*@par Inputs: +include: +*@li input:The text to be processed. Can have any shape. \n + +*@par Attributes: +* input_encoding:Text encoding of the input strings. This is any of the encodings supported +by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. +Multi-byte encodings will be big-endian. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. +* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the +`replacement_char`. Default is false. \n + +*@par Outputs: +*@li output:A string tensor containing unicode text encoded using `output_encoding`. \n + +*@see UnicodeTranscode() + +*@par Third-party framework compatibility +*compatible with UnicodeTranscode op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeTranscode) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .REQUIRED_ATTR(input_encoding, String) + .ATTR(output_encoding, String, "UTF-8") + .ATTR(errors, String, "replace") + .ATTR(replacement_char, Int, 65533) + .ATTR(replace_control_characters, Bool, false) + .OP_END_FACTORY_REG(UnicodeTranscode) + +/** +*@brief Encode a tensor of ints into unicode strings. \n + +*@par Inputs: +include: +*@li input_values:A 1D tensor containing the unicode codepoints that should be encoded. +*@li input_splits:A 1D tensor specifying how the unicode codepoints should be split into strings. \n + +*@par Attributes: +* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. +Multi-byte encodings will be big-endian. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. \n + +*@par Outputs: +*@li output:The 1-D Tensor of strings encoded from the provided unicode codepoints. \n + +*@see UnicodeEncode() + +*@par Third-party framework compatibility +*compatible with UnicodeEncode op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeEncode) + .INPUT(input_values, TensorType({DT_INT32})) + .INPUT(input_splits, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(errors, String, "replace") + .ATTR(output_encoding, String, "UTF-8") + .ATTR(replacement_char, Int, 65533) + .OP_END_FACTORY_REG(UnicodeEncode) /** *@brief Split elements of input based on delimiter into a SparseTensor . \n @@ -62,6 +289,116 @@ REG_OP(StringSplit) .OP_END_FACTORY_REG(StringSplit) /** +*@brief Replaces the match of pattern in input with rewrite. \n + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. \n + +*@par Attributes: +*@li pattern:A string. The regular expression to match the input. +*@li rewrite:A string. The rewrite to be applied to the matched expression. +*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global, +otherwise the replacement is done only on the first match. + +*@par output: +*@li output::A Tensor of type string. +*/ +REG_OP(StaticRegexReplace) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(pattern, String, "") + .ATTR(rewrite, String, "") + .ATTR(replace_global, Bool, true) + .OP_END_FACTORY_REG(StaticRegexReplace) + +/** +*@brief The input is a string tensor of any shape. The pattern is the +*regular expression to be matched with every element of the input tensor. +*The boolean values (True or False) of the output tensor indicate +*if the input matches the regex pattern provided. + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. \n + +*@par Attributes: +*@li pattern:A string. The regular expression to match the input. + +*@par output: +*@li output::A bool tensor with the same shape as `input`. +*/ +REG_OP(StaticRegexFullMatch) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_BOOL})) + .ATTR(pattern, String, "") + .OP_END_FACTORY_REG(StaticRegexFullMatch) + +/** +*@brief A Tensor of type string. The input to be joined. \n + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. +*@li segment_ids:A Tensor. Must be one of the following types: int32, int64. +*A tensor whose shape is a prefix of data.shape. Negative segment ids are not supported. +*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. + +*@par Attributes: +*@li separator:An optional string. Defaults to "". The separator to use when joining. + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(UnsortedSegmentJoin) + .INPUT(input, TensorType({DT_STRING})) + .INPUT(segment_ids, TensorType({DT_INT32,DT_INT64})) + .INPUT(num_segments, TensorType({DT_INT32,DT_INT64})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(separator, String, "") + .OP_END_FACTORY_REG(UnsortedSegmentJoin) + +/** +*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation. +*This method is used to obtain a symbolic handle that represents the computation of the input. + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. + +*@par Attributes: +*@li encoding:An optional string. Defaults to "". + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(StringLower) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(encoding, String, "") + .OP_END_FACTORY_REG(StringLower) + +/** +*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation. +*This method is used to obtain a symbolic handle that represents the computation of the input. + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. + +*@par Attributes: +*@li encoding:An optional string. Defaults to "". + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(StringUpper) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(encoding, String, "") + .OP_END_FACTORY_REG(StringUpper) + +/** *@brief Split elements of source based on sep into a SparseTensor . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 32baf56c..48a094f7 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -418,12 +418,8 @@ REG_OP(BatchToSpace) * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. */ REG_OP(BatchToSpaceD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, - DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64, - DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, - DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64, - DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) .REQUIRED_ATTR(block_size, Int) .REQUIRED_ATTR(crops, ListInt) .OP_END_FACTORY_REG(BatchToSpaceD) diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 1b264843..7fc1cdea 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -156,6 +156,12 @@ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtSt /** * @ingroup profiling_base + * @brief ts send keypoint profiler log. + */ +RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream); + +/** + * @ingroup profiling_base * @brief ts set profiling reporter callback. */ RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); @@ -200,7 +206,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal /** * @ingroup dvrt_base - * @brief register callback for fail task + * @brief register callback for fail task * @param [in] uniName unique register name, can't be null * @param [in] callback fail task callback function * @param [out] NA @@ -343,7 +349,7 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_ /** * @ingroup dvrt_base - * @brief get current thread last stream id and task id + * @brief get current thread last stream id and task id * @param [out] stream id and task id * @param [in] null * @return RT_ERROR_NONE for ok diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index d4219559..9e555230 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -183,6 +183,18 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream); /** * @ingroup dvrt_event + * @brief Wait for a notify with time out + * @param [in] notify_ notify to be wait + * @param [in] stream_ input stream + * @param [in] timeOut input timeOut + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx + */ +RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut); + +/** + * @ingroup dvrt_event * @brief Name a notify * @param [in] notify_ notify to be named * @param [in|out] name identification name diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index 83cafa3c..fb6e2e20 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -27,5 +27,6 @@ #include "mem.h" #include "rt_model.h" #include "stream.h" +#include "rt_stars.h" #endif // __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index df2eddc9..8c93fbd5 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -51,6 +51,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_MODEL_EXIT, RT_MODEL_TASK_ALL_KERNEL, + RT_MODEL_TASK_PROFILER_TRACE_EX, } rtModelTaskType_t; typedef enum tagModelStreamType { @@ -197,6 +198,13 @@ typedef struct tagProfilerTraceTaskInfo { uint32_t reserved[6]; } rtProfilerTrace_t; +typedef struct tagProfilerTraceExTaskInfo { + uint64_t profilerTraceId; + uint64_t modelId; + uint16_t tagId; + uint8_t reserved[22]; +} rtProfilerTraceEx_t; + typedef struct tagrtMemcpyAsyncTaskInfo { void *dst; uint64_t destMax; @@ -272,6 +280,7 @@ typedef struct tagTaskInfo { rtLabelSwitchTaskInfo_t labelSwitchTask; rtLabelGotoTaskInfo_t labelGotoTask; rtProfilerTrace_t profilertraceTask; + rtProfilerTraceEx_t profilertraceExTask; rtMemcpyAsyncTaskInfo_t memcpyAsyncTask; rtNotifyTaskInfo_t notifyTask; rtReduceAsyncTaskInfo_t reduceAsyncTask; diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h new file mode 100644 index 00000000..3b944065 --- /dev/null +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * Description: + */ + +#ifndef __CCE_RUNTIME_STARS_H +#define __CCE_RUNTIME_STARS_H + +#include "base.h" + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +extern "C" { +#endif + +/** + * @ingroup rt_stars + * @brief launch stars task. + * used for send star sqe directly. + * @param [in] taskSqe stars task sqe + * @param [in] sqeLen stars task sqe length + * @param [in] stream associated stream + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream); + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +} +#endif +#endif // __CCE_RUNTIME_STARS_H diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index ff91351b..d5ed7569 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -41,42 +41,44 @@ namespace Engine { * the Reporter class .used to send data to profiling */ class MSVP_PROF_API Reporter { - public: - virtual ~Reporter() {} +public: + virtual ~Reporter() {} - public: - /** - * @ingroup reporter - * @name : Report - * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n - The data will be firstly appended to cache, if the cache is full, data will be ignored - * @param data [IN] const ReporterData * the data send to libmsporf - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_reporter.h - * @since c60 - * @see Flush - */ - virtual int Report(const ReporterData *data) = 0; +public: + /** + * @ingroup reporter + * @name : Report + * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n + The data will be firstly appended to cache, if the cache is full, data will be ignored + * @param data [IN] const ReporterData * the data send to libmsporf + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_reporter.h + * @since c60 + * @see Flush + */ + virtual int Report(const ReporterData *data) = 0; - /** - * @ingroup reporter - * @name : Flush - * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n - The all datas of cache will be write to file or send to host - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_reporter.h - * @since c60 - * @see ProfMgrStop - */ - virtual int Flush() = 0; + /** + * @ingroup reporter + * @name : Flush + * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n + The all datas of cache will be write to file or send to host + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_reporter.h + * @since c60 + * @see ProfMgrStop + */ + virtual int Flush() = 0; + + virtual uint32_t GetReportDataMaxLen() = 0; }; } // namespace Engine diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h index 3634b8a8..71226d87 100644 --- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h +++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h @@ -55,7 +55,40 @@ extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &option); + +/** + * @ingroup aoe + * @par 描述: 调优去初始化 + * + * @attention 无 + * @param 无 + * @retval #AOE_SUCCESS 执行成功 + * @retval #AOE_FAILED 执行失败 + * @par 依赖: + * @li tune_api.cpp:该接口所属的开发包。 + * @li tune_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +extern "C" AoeStatus AoeOnlineFinalize(); + +/** + * @ingroup aoe + * @par 描述: 调优处理 * * @attention 无 * @param tuningGraph [IN] 调优图 @@ -71,5 +104,5 @@ extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &dependGraph, - ge::Session *session, const std::map> &option); + ge::Session *session, const std::map &option); #endif