From f49a21d293f577dd3f57b9156930a850206d114f Mon Sep 17 00:00:00 2001 From: dingpeifei Date: Mon, 5 Jul 2021 16:53:45 +0800 Subject: [PATCH] code_sync_0705_inc --- inc/external/acl/acl.h | 4 +- inc/external/acl/ops/acl_dvpp.h | 97 +++++++++++++ third_party/fwkacllib/inc/ops/array_ops.h | 7 +- .../fwkacllib/inc/ops/elewise_calculation_ops.h | 62 ++++----- third_party/fwkacllib/inc/ops/image_ops.h | 152 +++++++++++++++++++++ .../fwkacllib/inc/ops/matrix_calculation_ops.h | 22 +-- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 33 ++--- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 30 ++-- third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h | 32 ++--- third_party/fwkacllib/inc/ops/pad_ops.h | 6 +- third_party/fwkacllib/inc/ops/random_ops.h | 24 ++++ third_party/fwkacllib/inc/ops/transformation_ops.h | 18 +-- third_party/fwkacllib/inc/runtime/event.h | 20 ++- 13 files changed, 397 insertions(+), 110 deletions(-) diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h index 8d261201..a5194472 100644 --- a/inc/external/acl/acl.h +++ b/inc/external/acl/acl.h @@ -25,9 +25,9 @@ extern "C" { #endif -// Current version is 1.0.0 +// Current version is 1.1.0 #define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 0 +#define ACL_MINOR_VERSION 1 #define ACL_PATCH_VERSION 0 /** diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index dcaa3936..3c0723c5 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -158,6 +158,20 @@ enum acldvppJpegFormat { ACL_JPEG_CSS_UNKNOWN = 1000 }; +enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 }; + +enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 }; + +// Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType +enum acldvppCscMatrix { + ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0, + ACL_DVPP_CSC_MATRIX_BT601_NARROW, + ACL_DVPP_CSC_MATRIX_BT709_WIDE, + ACL_DVPP_CSC_MATRIX_BT709_NARROW, + ACL_DVPP_CSC_MATRIX_BT2020_WIDE, + ACL_DVPP_CSC_MATRIX_BT2020_NARROW +}; + /** * @ingroup AscendCL * @brief alloc device memory for dvpp. @@ -2560,7 +2574,90 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], acldvppResizeConfig *resizeConfig, aclrtStream stream); +/** + * @ingroup AscendCL + * @brief set param for dvpp channel desc + * + * @par Function + * set attribution in dvpp channelDesc for specified type + * + * @param channelDesc [OUT] the channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length of param + * @param param [IN] pointer to param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc, + acldvppChannelDescParamType paramType, size_t length, + const void *param); + +/** + * @ingroup AscendCL + * @brief get param of dvpp channel desc + * + * @par Function + * get attribution value in dvpp channelDesc for specified type + * + * @param channelDesc [IN] the channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length allocated for output param + * @param paramRetSize [OUT] mem length of output param + * @param param [OUT] pointer to output param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc, + acldvppChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); +/** + * @ingroup AscendCL + * @brief set param for vdec channel desc + * + * @par Function + * set attribution in channelDesc for specified type + * + * @param channelDesc [OUT] the vdec channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length of param + * @param param [IN] pointer to param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc, + aclvdecChannelDescParamType paramType, size_t length, + const void *param); +/** + * @ingroup AscendCL + * @brief get param of vdec channel desc + * + * @par Function + * get attribution value in channelDesc for specified type + * + * @param channelDesc [IN] the vdec channel destruction + * @param paramType [IN] specified param type + * @param length [IN] mem length allocated for output param + * @param paramRetSize [OUT] mem length of output param + * @param param [OUT] pointer to output param + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc, + aclvdecChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index fd35b546..c203b737 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1154,18 +1154,17 @@ REG_OP(EditDistance) .OP_END_FACTORY_REG(EditDistance) /** -* @brief sort_v2. +* @brief sort the input tensor without returning the value of index. * @par Inputs: -* @li x: An ND tensor of type float16. +* x: An ND tensor of type float16. * @par Attributes: - * @li axis: An optional int. The dimension to sort along. This value defaults to -1. * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. * @par Outputs: -* @li y: An ND tensor of type float16. +* y: An ND tensor of type float16. * @attention Constraints: * @li Axis should select the last dim. diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index a20272f3..1f85c152 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -624,9 +624,9 @@ REG_OP(Log1p) *@attention Constraints: *@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the *requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator +*@li Due to different architectures, the calculation results of this operator *on NPU and CPU may be inconsistent *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 @@ -2066,9 +2066,9 @@ REG_OP(FloorDiv) *@attention Constraints: *@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the *requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator +*@li Due to different architectures, the calculation results of this operator *on NPU and CPU may be inconsistent *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 @@ -2200,9 +2200,9 @@ REG_OP(Tan) *@attention Constraints: *@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the *requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator +*@li Due to different architectures, the calculation results of this operator *on NPU and CPU may be inconsistent *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 @@ -3395,7 +3395,7 @@ REG_OP(TensorRedirect) * multiply the result by the scalar value and add it to tensor x1 * @par Inputs: -* Three inputs, including: +* Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: * float16, float32. * @li x1: A mutable input Tensor of the same type as x1. @@ -3404,7 +3404,7 @@ REG_OP(TensorRedirect) * float16, float32, int32. \n * @par Outputs: -* @li y: A mutable Tensor. Has the same type as "x1". \n +* y: A mutable Tensor. Has the same type as "x1". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Addcdiv. @@ -3418,12 +3418,12 @@ REG_OP(Addcdiv) .OP_END_FACTORY_REG(Addcdiv) /** -* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, -* multiply the result by the scalar value and add it to tensor input_data +* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, +* multiply the result by the scalar value and add it to tensor input_data * @par Inputs: -* Three inputs, including: +* Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: * float16, float32, int8, int32, uint8. * @li x1: A mutable input Tensor of the same type as x1. @@ -3431,7 +3431,7 @@ REG_OP(Addcdiv) * @li value: A tensor which includes only one element of the same type as x1. \n * @par Outputs: -* @li y: A mutable output Tensor. Has the same type as "x1". \n +* y: A mutable output Tensor. Has the same type as "x1". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Addcmul. @@ -3453,7 +3453,7 @@ REG_OP(Addcmul) * @li alpha: A scalar tensor of type float16, float32. \n * @par Outputs: -* @li y: An ND tensor tensor with the same shape and type as "x1". \n +* y: An ND tensor tensor with the same shape and type as "x1". \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Axpy. @@ -3533,21 +3533,21 @@ REG_OP(TensorEqual) .OP_END_FACTORY_REG(TensorEqual) /** - * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). - * All inputs and outputs must have the same data type. This operator supports multidirectional + * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). + * All inputs and outputs must have the same data type. This operator supports multidirectional * (i.e., Numpy-style) broadcasting - * - * @par inputs + * + * @par Inputs: * one input including: - * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 - * - * @par output + * x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 + * + * @par Outputs: * one output including: - * @li y:A Tensor of the same type as x - * + * y:A Tensor of the same type as x + * */ REG_OP(MaxN) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) .OP_END_FACTORY_REG(MaxN) @@ -3632,16 +3632,16 @@ REG_OP(DataCompare) *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the *corresponding input. * -*@par inputs +*@par Inputs: *one input including: -*@li x: input A Tensor.Must be one of the following types:float32,float16 +*x: input A Tensor.Must be one of the following types:float32,float16 * *@par Attributes: -*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max +*axis:A required int attribute that decides which dimension will be used to cal the hard_max * -*@par output: +*@par Outputs: *one output including: -*@li y:A Tensor of the same type as x +*y:A Tensor of the same type as x * */ REG_OP(HardMax) @@ -3669,7 +3669,7 @@ REG_OP(Dot) .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .OP_END_FACTORY_REG(Dot) - + /** *@brief Returns a new tensor with boolean elements representing \n *if each element of input is “close” to the corresponding element of other \n @@ -3717,7 +3717,7 @@ REG_OP(IsClose) * *@attention Constraints: *@li indices: only support int32,and shape same to "updates" -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". *@li y:A Tensor, the type and shape is same to "var" \n *@par Third-party framework compatibility @@ -3752,7 +3752,7 @@ REG_OP(ArgMaxGrad) *@attention Constraints: *@li indices: only support int32,and shape same to "updates" -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". *@li y:A Tensor, the type and shape is same to "var" \n *@par Third-party framework compatibility diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 28bf6228..0a796cd6 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1512,6 +1512,9 @@ REG_OP(IMGWarp) *@par Outputs: *map_img: A Tensor after resize. \n + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(Remap) .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) @@ -1848,6 +1851,9 @@ REG_OP(GridUnnormal) *@par Outputs: *y: Returns 4-D Tensor with the same dtype as `x`. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ImageUnfold) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1940,5 +1946,151 @@ REG_OP(GridSampler3DGrad) .ATTR(align_corners, Bool, false) .OP_END_FACTORY_REG(GridSampler3DGrad) +/** +*@brief Upsample the 3-D data with the nearest neighbor ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +* @li x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float32, float64. \n + +*@par Attributes: +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' + should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and + 'output_size' MUST be specified and it is an error if both are specified. \n + +*@par Outputs: +*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ + +REG_OP(UpsampleNearest3d) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .OP_END_FACTORY_REG(UpsampleNearest3d) + +/** +*@brief Upsample the 3-D data with the trilinear ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +* @li x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float32, float64. \n + +*@par Attributes: +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should + be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'x'. + One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n +*@li align_corners: An optional bool. Defaults to false. + If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the + values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their + corner pixels, and the interpolation use edge value padding for out of boundary values. \n + +*@par Outputs: +*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ + +REG_OP(UpsampleTrilinear3d) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(UpsampleTrilinear3d) + +/** +*@brief Upsample the 3-D gradient data with the nearest neighbor ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +* @li grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float32, float64. \n + +*@par Attributes: +*@li input_size: An required listInt. + contain 5 elements: [min_batch, channels, depth, height, width]. Must: + input_size[0] == grad_output_tensor_size[0] + input_size[1] == grad_output_tensor_size[1]. \n +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: depth, height, width. The number of elements of 'output_size' should + be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must: + grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0] + grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1] + grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'grad_output'. + One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n + +*@par Outputs: +*y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(UpsampleNearest3dGrad) + .INPUT(grad_output, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(input_size, ListInt) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .OP_END_FACTORY_REG(UpsampleNearest3dGrad) + +/** +*@brief Upsample the 3-D gradient data trilinear ​interpolation algorithm. \n + +*@par Inputs: +*One inputs, including: +* @li grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types: +* float32, float64. \n + +*@par Attributes: +*@li input_size: An required listInt. + contain 5 elements: [min_batch, channels, depth, height, width]. Must: + input_size[0] == grad_output_tensor_size[0] + input_size[1] == grad_output_tensor_size[1]. \n +*@li output_size: An optional listInt. Defaults to none. + contain 3 elements: depth, height, width. The number of elements of 'output_size' should + be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must: + grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0] + grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1] + grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n +*@li scales: An optional listFloat. Defaults to none. + The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. + The number of elements of 'scales' should be the same as the rank of input 'grad_output'. + One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n + +*@par Outputs: +*y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following + types: float16, float32. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(UpsampleTrilinear3dGrad) + .INPUT(grad_output, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(input_size, ListInt) + .ATTR(output_size, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(UpsampleTrilinear3dGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 5341a95c..336d71a9 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1120,11 +1120,12 @@ REG_OP(IndexAdd) *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@par Inputs: -* Two inputs, including: -*@li x: A Tensor. Must be one of the following types: -* float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. -*@li diagonal:(int, optional) – the diagonal to consider。\n +*x: A Tensor. Must be one of the following types: +*float16, float32, double, int32, uint8, int16, int8, complex64, int64, +*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n + +*@par Attributes: +*diagonal: An optional attribute indicates the diagonal to consider. \n *@par Outputs: *y: A Tensor. Has the same type as "x" . \n @@ -1142,11 +1143,12 @@ REG_OP(Triu) *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@par Inputs: -* Two inputs, including: -*@li x: A Tensor. Must be one of the following types: -* float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. -*@li diagonal:(int, optional) – the diagonal to consider。\n +*x: A Tensor. Must be one of the following types: +*float16, float32, double, int32, uint8, int16, int8, complex64, int64, +*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n + +*@par Attributes: +*diagonal: An optional attribute indicates the diagonal to consider. \n *@par Outputs: *y: A Tensor. Has the same type as "x" . \n diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 9f35e27a..ec50aa2e 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1445,16 +1445,16 @@ REG_OP(DecodeBboxV2) .OP_END_FACTORY_REG(DecodeBboxV2) /** -*@brief Computes sort function. +*@brief sort the input tensor and return the value of index. * *@par Inputs: *Inputs include: -* x: A Tensor. Dtype support: flaot16, flaot, int16, int8, +* x: A Tensor. Dtype support: float16, float, int16, int8, uint8, int32, int64. * *@par Attributes: -* @li axis: optional, int. -* @li descending: optional,bool. +* @li axis: An optional attribute indicates the sorting axis. +* @li descending: An optional attribute indicates desending sort or not. * *@par Outputs: * @li y1: A Tensor. Must have the same type as x. @@ -1515,10 +1515,10 @@ whether boxes overlap too much with respect to IOU. deciding when to remove boxes based on score . \n *@par Attributes: -*center_point_box:Integer indicate the format of the box data. -The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] -where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair -of box corners and the coordinates can be provided as normalized +*center_point_box:Integer indicate the format of the box data. +The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] +where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair +of box corners and the coordinates can be provided as normalized (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. 1 - the box data is supplied as [x_center, y_center, width, height]. Mostly used for Pytorch models. \n @@ -1567,16 +1567,18 @@ deciding when to remove boxes based on score . \n the last dim representing (batch_id,class_id,index_id) . \n *@par Attributes: -*center_point_box:Integer indicate the format of the box data. -The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] -where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair -of box corners and the coordinates can be provided as normalized +*@li center_point_box:Integer indicate the format of the box data. +The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] +where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair +of box corners and the coordinates can be provided as normalized (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. 1 - the box data is supplied as [x_center, y_center, width, height]. - Mostly used for Pytorch models. \n + Mostly used for Pytorch models. +*@li max_boxes_size: An optional attribute integer representing the real maximum +*number of boxes to be selected by non max suppression . \n *@par Outputs: -*@li selected_indices: A 2-D integer tensor of shape [M] representing the +*selected_indices: A 2-D integer tensor of shape [M] representing the selected indices from the boxes tensor, where M <= max_output_size. \n *@attention Constraints: @@ -1602,7 +1604,7 @@ REG_OP(NonMaxSuppressionV7) *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n *@par Inputs: -* Three inputs, including: +* Two inputs, including: *@li features: A 5HD Tensor list of type float32 or float16. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". @@ -1818,4 +1820,3 @@ REG_OP(GridAssignPositive) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ - diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 10047d55..66d17bc8 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -568,7 +568,7 @@ REG_OP(LayerNorm) .OP_END_FACTORY_REG(LayerNorm) /** -*@brief Returns a tensor where each sub-tensor of input along dimension +*@brief Returns a tensor where each sub-tensor of input along dimension * dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n *@par Inputs: @@ -576,7 +576,7 @@ REG_OP(LayerNorm) * @li x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Attributes: -* @li p: Specify L_p norm, the type is float. +* @li p: Specify L_p norm, the type is float. * @li dim: The processed dim, the type is int. * @li maxnorm: Threshold for comparison, the type is float. \n @@ -1543,14 +1543,14 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2) .ATTR(reduction, String, "mean") .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) /** - * @brief Calculate the PoissonNllLoss function. + * @brief Calculate the PoissonNllLoss function. * target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n * @par Inputs: * Two inputs, including: * @li input_x: A tensor. Must be one of the following types: * float16, float32. \n - * + * * @par Inputs: * @li target: A tensor. Must be one of the following types: * float16, float32. \n @@ -1558,13 +1558,13 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2) * @par Attributes: * four Attributes, including: * @li log_input: An optional bool. Defaults to "True" \n - * + * * @par Attributes: * @li full: An optional bool. Defaults to "False" \n - * + * * @par Attributes: * @li eps: An optional float. Defaults to "1e-8" \n - * + * * @par Attributes: * @li reduction: An optional string. Defaults to "mean" \n @@ -1592,7 +1592,7 @@ REG_OP(PoissonNllLoss) * @li num_step: A required int.\n * @li hidden_size: A required int. \n * - * + * * @par Output: * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n * @@ -1605,24 +1605,22 @@ REG_OP(RnnGenMask) .OP_END_FACTORY_REG(RnnGenMask) /** -* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) +* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) * between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n - + * @par Inputs: * Two inputs, including: * @li x: A tensor. Must be one of the following types: -* float16, float32. \n -* -* @par Inputs: +* float16, float32. * @li target: A tensor. Must be the following types: * int32. \n * @par Attributes: -* @li reduction: An optional string. Defaults to "mean" \n +* reduction: An optional string. Defaults to "mean" \n * @par Outputs: -* y: A Tensor has same element type as input x. \n -* is_target: A Tensor has same element type as input target. \n +* @li y: A Tensor has same element type as input x. \n +* @li is_target: A Tensor has same element type as input target. \n * @par Third-party framework compatibility * Compatible with the Pytorch operator MultiLabelMarginLoss. \n diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index b9df706b..bf850019 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -224,22 +224,22 @@ REG_OP(Relu6Grad) .OUTPUT(backprops, TensorType::RealNumberType()) .OP_END_FACTORY_REG(Relu6Grad) /** -*@brief Calculate the elu_grad_v2 function. +*@brief Calculate the elu_grad_v2 function. *Applies the element-wise function: * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . *@par Inputs: *One inputs, including: * @li grads: A tensor. Must be one of the following types: -* float16, float32. +* float16, float32. * @li activations: A tensor. Must be one of the following types: -* float16, float32. +* float16, float32. * *@par Outputs: *y: A Tensor with the same type and shape of grads's. -* +* *@par Attributes: *@li alpha: scalar parameter, default value = 1.0 -*/ +*/ REG_OP(EluGradV2) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -539,24 +539,20 @@ REG_OP(Elu) *x: A float16, float32, for the input data type . \n *@par Attributes: -*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n - -*@par Attributes: -*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n - -*@par Attributes: -*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n +*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . +*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . +*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n *@par Outputs: *y: A float16, float32, for the normalized result . \n *@attention Constraints: -*@li The input is of type float16 or float32 . \n +*The input is of type float16 or float32 . \n *@par Multiple batches supported or not *Supported *@par Third-party framework compatibility -*@li Compatible with ONNX's Celu operator +*Compatible with ONNX's Celu operator */ REG_OP(Celu) .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) @@ -808,15 +804,15 @@ REG_OP(SoftplusV2Grad) /** * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. - * + * * @par inputs * one input including: * @li x: input A Tensor. Must be one of the following types: float32, float16 - * + * * @par output * one output including: * @li y:A Tensor of the same type as x - * + * */ REG_OP(ThresholdedRelu) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -892,7 +888,7 @@ REG_OP(HardShrink) * @par Third-party framework compatibility * Compatible with the Pytorch operator Hardsigmoid. \n -*/ +*/ REG_OP(HardSigmoid) .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 6854c866..4efb2683 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -213,11 +213,11 @@ REG_OP(PadV2) *@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n -*constant_values: A Tensor. Must have the same type as input. +*@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n +*@li constant_values: A Tensor. Must have the same type as input. *@par Attributes: -*paddings: An optional "vector>". Defaults to "{}". +*paddings: A required Attribute. * For each dimension D of input, paddings[D, 0] indicates how many * values to add before the contents of tensor in that dimension, * and paddings[D, 1] indicates how many values to add after the diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index afa3bb45..f607315a 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -584,6 +584,30 @@ REG_OP(DropoutV2) .OUTPUT(seed, TensorType({ DT_FLOAT })) .REQUIRED_ATTR(p, Float) .OP_END_FACTORY_REG(DropoutV2) + +/** +* @brief The Bernoulli distribution with probability . \n + +* @par Inputs: +* @li x: A ND Tensor. Must be one of the following data types: + int8, uint8, int16, int32, int64, bool, float32, float64 . +* @li p: A ND Tensor. The probability of an element to be zeroed. + Must be one of the following data types: float32, float64. \n + +* @par Attributes: +* seed: An Integer, the seed of the random generator. Default value -1 + to use current timestamp, otherwise it should be a positive integer. + +* @par Outputs: +* y: A tensor with the same shape and type as "x". +*/ + +REG_OP(Bernoulli) + .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + .INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE })) + .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + .ATTR(seed, Int, -1) + .OP_END_FACTORY_REG(Bernoulli) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 40fcf911..e096fd3e 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -182,14 +182,14 @@ REG_OP(Permute) * int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32. *@par Outputs: -* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened +* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened * to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output. * Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . *@par Attributes: -* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened -* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of -* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of +* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened +* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of +* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of * the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n). *@par Third-party framework compatibility @@ -723,11 +723,13 @@ REG_OP(CompressFcOp) *@brief Performs Col2im for each batch entry. \n *@par Inputs: -*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. -where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1 \n +*@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`. +where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1. +*@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w). \n *@par Outputs: -*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n +*y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n + *@par Attributes: *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution. @@ -837,7 +839,7 @@ REG_OP(AffineGrid) *@par Inputs: *Four inputs, including: *@li x: The input tensor. -*@li size: The shape of output tensor. +*@li size: The shape of output tensor. *@li stride: The stride of output tensor. *@li storage_offset: The offset in the underlying storage of the output tensor. \n diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 9e555230..57948c47 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -23,12 +23,18 @@ extern "C" { #endif +typedef enum rtEventWaitStatus { + EVENT_STATUS_COMPLETE = 0, + EVENT_STATUS_NOT_READY = 1, + EVENT_STATUS_MAX = 2, +} rtEventWaitStatus_t; + /** * @ingroup event_flags * @brief event op bit flags */ -#define RT_EVENT_DEFAULT (0x00) -#define RT_EVENT_WITH_FLAG (0x01) +#define RT_EVENT_DEFAULT (0x0E) +#define RT_EVENT_WITH_FLAG (0x0B) #define RT_EVENT_DDSYNC_NS 0x01U #define RT_EVENT_STREAM_MARK 0x02U @@ -111,6 +117,16 @@ RTS_API rtError_t rtEventQuery(rtEvent_t event); /** * @ingroup dvrt_event + * @brief Queries an event's wait status + * @param [in] event event to query + * @param [in out] EVENT_WAIT_STATUS status + * @return EVENT_STATUS_COMPLETE for complete + * @return EVENT_STATUS_NOT_READY for not complete + */ +RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t event, rtEventWaitStatus_t *status); + +/** + * @ingroup dvrt_event * @brief computes the elapsed time between events. * @param [in] time time between start and end in ms * @param [in] start starting event