Browse Source

!1939 code_sync_0705_inc

Merge pull request !1939 from mindspore_ding/code_sync_0705
tags/v1.3.0^0
i-robot Gitee 4 years ago
parent
commit
acc2472c41
13 changed files with 397 additions and 110 deletions
  1. +2
    -2
      inc/external/acl/acl.h
  2. +97
    -0
      inc/external/acl/ops/acl_dvpp.h
  3. +3
    -4
      third_party/fwkacllib/inc/ops/array_ops.h
  4. +31
    -31
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  5. +152
    -0
      third_party/fwkacllib/inc/ops/image_ops.h
  6. +12
    -10
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  7. +17
    -16
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  8. +14
    -16
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  9. +14
    -18
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  10. +3
    -3
      third_party/fwkacllib/inc/ops/pad_ops.h
  11. +24
    -0
      third_party/fwkacllib/inc/ops/random_ops.h
  12. +10
    -8
      third_party/fwkacllib/inc/ops/transformation_ops.h
  13. +18
    -2
      third_party/fwkacllib/inc/runtime/event.h

+ 2
- 2
inc/external/acl/acl.h View File

@@ -25,9 +25,9 @@
extern "C" { extern "C" {
#endif #endif


// Current version is 1.0.0
// Current version is 1.1.0
#define ACL_MAJOR_VERSION 1 #define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_MINOR_VERSION 1
#define ACL_PATCH_VERSION 0 #define ACL_PATCH_VERSION 0


/** /**


+ 97
- 0
inc/external/acl/ops/acl_dvpp.h View File

@@ -158,6 +158,20 @@ enum acldvppJpegFormat {
ACL_JPEG_CSS_UNKNOWN = 1000 ACL_JPEG_CSS_UNKNOWN = 1000
}; };


enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 };

enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 };

// Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType
enum acldvppCscMatrix {
ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0,
ACL_DVPP_CSC_MATRIX_BT601_NARROW,
ACL_DVPP_CSC_MATRIX_BT709_WIDE,
ACL_DVPP_CSC_MATRIX_BT709_NARROW,
ACL_DVPP_CSC_MATRIX_BT2020_WIDE,
ACL_DVPP_CSC_MATRIX_BT2020_NARROW
};

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief alloc device memory for dvpp. * @brief alloc device memory for dvpp.
@@ -2560,7 +2574,90 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
acldvppResizeConfig *resizeConfig, aclrtStream stream); acldvppResizeConfig *resizeConfig, aclrtStream stream);
/**
* @ingroup AscendCL
* @brief set param for dvpp channel desc
*
* @par Function
* set attribution in dvpp channelDesc for specified type
*
* @param channelDesc [OUT] the channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length of param
* @param param [IN] pointer to param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc,
acldvppChannelDescParamType paramType, size_t length,
const void *param);

/**
* @ingroup AscendCL
* @brief get param of dvpp channel desc
*
* @par Function
* get attribution value in dvpp channelDesc for specified type
*
* @param channelDesc [IN] the channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length allocated for output param
* @param paramRetSize [OUT] mem length of output param
* @param param [OUT] pointer to output param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc,
acldvppChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
/**
* @ingroup AscendCL
* @brief set param for vdec channel desc
*
* @par Function
* set attribution in channelDesc for specified type
*
* @param channelDesc [OUT] the vdec channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length of param
* @param param [IN] pointer to param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc,
aclvdecChannelDescParamType paramType, size_t length,
const void *param);


/**
* @ingroup AscendCL
* @brief get param of vdec channel desc
*
* @par Function
* get attribution value in channelDesc for specified type
*
* @param channelDesc [IN] the vdec channel destruction
* @param paramType [IN] specified param type
* @param length [IN] mem length allocated for output param
* @param paramRetSize [OUT] mem length of output param
* @param param [OUT] pointer to output param
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc,
aclvdecChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 3
- 4
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -1154,18 +1154,17 @@ REG_OP(EditDistance)
.OP_END_FACTORY_REG(EditDistance) .OP_END_FACTORY_REG(EditDistance)


/** /**
* @brief sort_v2.
* @brief sort the input tensor without returning the value of index.


* @par Inputs: * @par Inputs:
* @li x: An ND tensor of type float16.
* x: An ND tensor of type float16.


* @par Attributes: * @par Attributes:

* @li axis: An optional int. The dimension to sort along. This value defaults to -1. * @li axis: An optional int. The dimension to sort along. This value defaults to -1.
* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.


* @par Outputs: * @par Outputs:
* @li y: An ND tensor of type float16.
* y: An ND tensor of type float16.


* @attention Constraints: * @attention Constraints:
* @li Axis should select the last dim. * @li Axis should select the last dim.


+ 31
- 31
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -624,9 +624,9 @@ REG_OP(Log1p)


*@attention Constraints: *@attention Constraints:
*@li x2: The input data does not support 0 *@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form *requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent *on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8


@@ -2066,9 +2066,9 @@ REG_OP(FloorDiv)


*@attention Constraints: *@attention Constraints:
*@li x2: The input data does not support 0 *@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form *requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent *on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8


@@ -2200,9 +2200,9 @@ REG_OP(Tan)


*@attention Constraints: *@attention Constraints:
*@li x2: The input data does not support 0 *@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form *requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent *on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8


@@ -3395,7 +3395,7 @@ REG_OP(TensorRedirect)
* multiply the result by the scalar value and add it to tensor x1 * multiply the result by the scalar value and add it to tensor x1


* @par Inputs: * @par Inputs:
* Three inputs, including:
* Four inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types: * @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32. * float16, float32.
* @li x1: A mutable input Tensor of the same type as x1. * @li x1: A mutable input Tensor of the same type as x1.
@@ -3404,7 +3404,7 @@ REG_OP(TensorRedirect)
* float16, float32, int32. \n * float16, float32, int32. \n


* @par Outputs: * @par Outputs:
* @li y: A mutable Tensor. Has the same type as "x1". \n
* y: A mutable Tensor. Has the same type as "x1". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcdiv. * Compatible with the Pytorch operator Addcdiv.
@@ -3418,12 +3418,12 @@ REG_OP(Addcdiv)
.OP_END_FACTORY_REG(Addcdiv) .OP_END_FACTORY_REG(Addcdiv)


/** /**
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor input_data
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor input_data




* @par Inputs: * @par Inputs:
* Three inputs, including:
* Four inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types: * @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32, int8, int32, uint8. * float16, float32, int8, int32, uint8.
* @li x1: A mutable input Tensor of the same type as x1. * @li x1: A mutable input Tensor of the same type as x1.
@@ -3431,7 +3431,7 @@ REG_OP(Addcdiv)
* @li value: A tensor which includes only one element of the same type as x1. \n * @li value: A tensor which includes only one element of the same type as x1. \n


* @par Outputs: * @par Outputs:
* @li y: A mutable output Tensor. Has the same type as "x1". \n
* y: A mutable output Tensor. Has the same type as "x1". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcmul. * Compatible with the Pytorch operator Addcmul.
@@ -3453,7 +3453,7 @@ REG_OP(Addcmul)
* @li alpha: A scalar tensor of type float16, float32. \n * @li alpha: A scalar tensor of type float16, float32. \n


* @par Outputs: * @par Outputs:
* @li y: An ND tensor tensor with the same shape and type as "x1". \n
* y: An ND tensor tensor with the same shape and type as "x1". \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Axpy. * Compatible with the Pytorch operator Axpy.
@@ -3533,21 +3533,21 @@ REG_OP(TensorEqual)
.OP_END_FACTORY_REG(TensorEqual) .OP_END_FACTORY_REG(TensorEqual)


/** /**
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
* All inputs and outputs must have the same data type. This operator supports multidirectional
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
* All inputs and outputs must have the same data type. This operator supports multidirectional
* (i.e., Numpy-style) broadcasting * (i.e., Numpy-style) broadcasting
*
* @par inputs
*
* @par Inputs:
* one input including: * one input including:
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
*
* @par output
* x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
*
* @par Outputs:
* one output including: * one output including:
* @li y:A Tensor of the same type as x
*
* y:A Tensor of the same type as x
*
*/ */
REG_OP(MaxN) REG_OP(MaxN)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(MaxN) .OP_END_FACTORY_REG(MaxN)


@@ -3632,16 +3632,16 @@ REG_OP(DataCompare)
*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
*corresponding input. *corresponding input.
* *
*@par inputs
*@par Inputs:
*one input including: *one input including:
*@li x: input A Tensor.Must be one of the following types:float32,float16
*x: input A Tensor.Must be one of the following types:float32,float16
* *
*@par Attributes: *@par Attributes:
*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
*axis:A required int attribute that decides which dimension will be used to cal the hard_max
* *
*@par output:
*@par Outputs:
*one output including: *one output including:
*@li y:A Tensor of the same type as x
*y:A Tensor of the same type as x
* *
*/ */
REG_OP(HardMax) REG_OP(HardMax)
@@ -3669,7 +3669,7 @@ REG_OP(Dot)
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.OP_END_FACTORY_REG(Dot) .OP_END_FACTORY_REG(Dot)
/** /**
*@brief Returns a new tensor with boolean elements representing \n *@brief Returns a new tensor with boolean elements representing \n
*if each element of input is “close” to the corresponding element of other \n *if each element of input is “close” to the corresponding element of other \n
@@ -3717,7 +3717,7 @@ REG_OP(IsClose)
* *
*@attention Constraints: *@attention Constraints:
*@li indices: only support int32,and shape same to "updates" *@li indices: only support int32,and shape same to "updates"
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li y:A Tensor, the type and shape is same to "var" \n *@li y:A Tensor, the type and shape is same to "var" \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
@@ -3752,7 +3752,7 @@ REG_OP(ArgMaxGrad)


*@attention Constraints: *@attention Constraints:
*@li indices: only support int32,and shape same to "updates" *@li indices: only support int32,and shape same to "updates"
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
*@li y:A Tensor, the type and shape is same to "var" \n *@li y:A Tensor, the type and shape is same to "var" \n


*@par Third-party framework compatibility *@par Third-party framework compatibility


+ 152
- 0
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1512,6 +1512,9 @@ REG_OP(IMGWarp)


*@par Outputs: *@par Outputs:
*map_img: A Tensor after resize. \n *map_img: A Tensor after resize. \n

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(Remap) REG_OP(Remap)
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
@@ -1848,6 +1851,9 @@ REG_OP(GridUnnormal)


*@par Outputs: *@par Outputs:
*y: Returns 4-D Tensor with the same dtype as `x`. *y: Returns 4-D Tensor with the same dtype as `x`.

*@par Restrictions:
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(ImageUnfold) REG_OP(ImageUnfold)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1940,5 +1946,151 @@ REG_OP(GridSampler3DGrad)
.ATTR(align_corners, Bool, false) .ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(GridSampler3DGrad) .OP_END_FACTORY_REG(GridSampler3DGrad)


/**
*@brief Upsample the 3-D data with the nearest neighbor ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
* @li x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float32, float64. \n

*@par Attributes:
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size'
should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and
'output_size' MUST be specified and it is an error if both are specified. \n

*@par Outputs:
*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/

REG_OP(UpsampleNearest3d)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.OP_END_FACTORY_REG(UpsampleNearest3d)

/**
*@brief Upsample the 3-D data with the trilinear ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
* @li x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float32, float64. \n

*@par Attributes:
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should
be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'x'.
One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
*@li align_corners: An optional bool. Defaults to false.
If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the
values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their
corner pixels, and the interpolation use edge value padding for out of boundary values. \n

*@par Outputs:
*y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/

REG_OP(UpsampleTrilinear3d)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(UpsampleTrilinear3d)

/**
*@brief Upsample the 3-D gradient data with the nearest neighbor ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
* @li grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float32, float64. \n

*@par Attributes:
*@li input_size: An required listInt.
contain 5 elements: [min_batch, channels, depth, height, width]. Must:
input_size[0] == grad_output_tensor_size[0]
input_size[1] == grad_output_tensor_size[1]. \n
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: depth, height, width. The number of elements of 'output_size' should
be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n

*@par Outputs:
*y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(UpsampleNearest3dGrad)
.INPUT(grad_output, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(input_size, ListInt)
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.OP_END_FACTORY_REG(UpsampleNearest3dGrad)

/**
*@brief Upsample the 3-D gradient data trilinear ​interpolation algorithm. \n

*@par Inputs:
*One inputs, including:
* @li grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
* float32, float64. \n

*@par Attributes:
*@li input_size: An required listInt.
contain 5 elements: [min_batch, channels, depth, height, width]. Must:
input_size[0] == grad_output_tensor_size[0]
input_size[1] == grad_output_tensor_size[1]. \n
*@li output_size: An optional listInt. Defaults to none.
contain 3 elements: depth, height, width. The number of elements of 'output_size' should
be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
*@li scales: An optional listFloat. Defaults to none.
The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n

*@par Outputs:
*y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following
types: float16, float32. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(UpsampleTrilinear3dGrad)
.INPUT(grad_output, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(input_size, ListInt)
.ATTR(output_size, ListInt, {})
.ATTR(scales, ListFloat, {})
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(UpsampleTrilinear3dGrad)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_

+ 12
- 10
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -1120,11 +1120,12 @@ REG_OP(IndexAdd)
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n


*@par Inputs: *@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
*@li diagonal:(int, optional) – the diagonal to consider。\n
*x: A Tensor. Must be one of the following types:
*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

*@par Attributes:
*diagonal: An optional attribute indicates the diagonal to consider. \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x" . \n *y: A Tensor. Has the same type as "x" . \n
@@ -1142,11 +1143,12 @@ REG_OP(Triu)
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n


*@par Inputs: *@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
*@li diagonal:(int, optional) – the diagonal to consider。\n
*x: A Tensor. Must be one of the following types:
*float16, float32, double, int32, uint8, int16, int8, complex64, int64,
*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

*@par Attributes:
*diagonal: An optional attribute indicates the diagonal to consider. \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x" . \n *y: A Tensor. Has the same type as "x" . \n


+ 17
- 16
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1445,16 +1445,16 @@ REG_OP(DecodeBboxV2)
.OP_END_FACTORY_REG(DecodeBboxV2) .OP_END_FACTORY_REG(DecodeBboxV2)


/** /**
*@brief Computes sort function.
*@brief sort the input tensor and return the value of index.
* *
*@par Inputs: *@par Inputs:
*Inputs include: *Inputs include:
* x: A Tensor. Dtype support: flaot16, flaot, int16, int8,
* x: A Tensor. Dtype support: float16, float, int16, int8,
uint8, int32, int64. uint8, int32, int64.
* *
*@par Attributes: *@par Attributes:
* @li axis: optional, int.
* @li descending: optional,bool.
* @li axis: An optional attribute indicates the sorting axis.
* @li descending: An optional attribute indicates desending sort or not.
* *
*@par Outputs: *@par Outputs:
* @li y1: A Tensor. Must have the same type as x. * @li y1: A Tensor. Must have the same type as x.
@@ -1515,10 +1515,10 @@ whether boxes overlap too much with respect to IOU.
deciding when to remove boxes based on score . \n deciding when to remove boxes based on score . \n


*@par Attributes: *@par Attributes:
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height]. 1 - the box data is supplied as [x_center, y_center, width, height].
Mostly used for Pytorch models. \n Mostly used for Pytorch models. \n
@@ -1567,16 +1567,18 @@ deciding when to remove boxes based on score . \n
the last dim representing (batch_id,class_id,index_id) . \n the last dim representing (batch_id,class_id,index_id) . \n


*@par Attributes: *@par Attributes:
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
*@li center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height]. 1 - the box data is supplied as [x_center, y_center, width, height].
Mostly used for Pytorch models. \n
Mostly used for Pytorch models.
*@li max_boxes_size: An optional attribute integer representing the real maximum
*number of boxes to be selected by non max suppression . \n


*@par Outputs: *@par Outputs:
*@li selected_indices: A 2-D integer tensor of shape [M] representing the
*selected_indices: A 2-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size. \n selected indices from the boxes tensor, where M <= max_output_size. \n


*@attention Constraints: *@attention Constraints:
@@ -1602,7 +1604,7 @@ REG_OP(NonMaxSuppressionV7)
*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n


*@par Inputs: *@par Inputs:
* Three inputs, including:
* Two inputs, including:
*@li features: A 5HD Tensor list of type float32 or float16. *@li features: A 5HD Tensor list of type float32 or float16.
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
@@ -1818,4 +1820,3 @@ REG_OP(GridAssignPositive)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 14
- 16
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -568,7 +568,7 @@ REG_OP(LayerNorm)
.OP_END_FACTORY_REG(LayerNorm) .OP_END_FACTORY_REG(LayerNorm)


/** /**
*@brief Returns a tensor where each sub-tensor of input along dimension
*@brief Returns a tensor where each sub-tensor of input along dimension
* dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n * dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n


*@par Inputs: *@par Inputs:
@@ -576,7 +576,7 @@ REG_OP(LayerNorm)
* @li x: A Tensor. Must be one of the following types: float16, float32 . \n * @li x: A Tensor. Must be one of the following types: float16, float32 . \n


*@par Attributes: *@par Attributes:
* @li p: Specify L_p norm, the type is float.
* @li p: Specify L_p norm, the type is float.
* @li dim: The processed dim, the type is int. * @li dim: The processed dim, the type is int.
* @li maxnorm: Threshold for comparison, the type is float. \n * @li maxnorm: Threshold for comparison, the type is float. \n


@@ -1543,14 +1543,14 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
.ATTR(reduction, String, "mean") .ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
/** /**
* @brief Calculate the PoissonNllLoss function.
* @brief Calculate the PoissonNllLoss function.
* target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n * target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n


* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li input_x: A tensor. Must be one of the following types: * @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n
*
*
* @par Inputs: * @par Inputs:
* @li target: A tensor. Must be one of the following types: * @li target: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n
@@ -1558,13 +1558,13 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
* @par Attributes: * @par Attributes:
* four Attributes, including: * four Attributes, including:
* @li log_input: An optional bool. Defaults to "True" \n * @li log_input: An optional bool. Defaults to "True" \n
*
*
* @par Attributes: * @par Attributes:
* @li full: An optional bool. Defaults to "False" \n * @li full: An optional bool. Defaults to "False" \n
*
*
* @par Attributes: * @par Attributes:
* @li eps: An optional float. Defaults to "1e-8" \n * @li eps: An optional float. Defaults to "1e-8" \n
*
*
* @par Attributes: * @par Attributes:
* @li reduction: An optional string. Defaults to "mean" \n * @li reduction: An optional string. Defaults to "mean" \n


@@ -1592,7 +1592,7 @@ REG_OP(PoissonNllLoss)
* @li num_step: A required int.\n * @li num_step: A required int.\n
* @li hidden_size: A required int. \n * @li hidden_size: A required int. \n
* *
*
*
* @par Output: * @par Output:
* y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
* *
@@ -1605,24 +1605,22 @@ REG_OP(RnnGenMask)
.OP_END_FACTORY_REG(RnnGenMask) .OP_END_FACTORY_REG(RnnGenMask)


/** /**
* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss)
* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss)
* between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n * between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n
* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li x: A tensor. Must be one of the following types: * @li x: A tensor. Must be one of the following types:
* float16, float32. \n
*
* @par Inputs:
* float16, float32.
* @li target: A tensor. Must be the following types: * @li target: A tensor. Must be the following types:
* int32. \n * int32. \n


* @par Attributes: * @par Attributes:
* @li reduction: An optional string. Defaults to "mean" \n
* reduction: An optional string. Defaults to "mean" \n


* @par Outputs: * @par Outputs:
* y: A Tensor has same element type as input x. \n
* is_target: A Tensor has same element type as input target. \n
* @li y: A Tensor has same element type as input x. \n
* @li is_target: A Tensor has same element type as input target. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator MultiLabelMarginLoss. \n * Compatible with the Pytorch operator MultiLabelMarginLoss. \n


+ 14
- 18
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -224,22 +224,22 @@ REG_OP(Relu6Grad)
.OUTPUT(backprops, TensorType::RealNumberType()) .OUTPUT(backprops, TensorType::RealNumberType())
.OP_END_FACTORY_REG(Relu6Grad) .OP_END_FACTORY_REG(Relu6Grad)
/** /**
*@brief Calculate the elu_grad_v2 function.
*@brief Calculate the elu_grad_v2 function.
*Applies the element-wise function: *Applies the element-wise function:
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
*@par Inputs: *@par Inputs:
*One inputs, including: *One inputs, including:
* @li grads: A tensor. Must be one of the following types: * @li grads: A tensor. Must be one of the following types:
* float16, float32.
* float16, float32.
* @li activations: A tensor. Must be one of the following types: * @li activations: A tensor. Must be one of the following types:
* float16, float32.
* float16, float32.
* *
*@par Outputs: *@par Outputs:
*y: A Tensor with the same type and shape of grads's. *y: A Tensor with the same type and shape of grads's.
*
*
*@par Attributes: *@par Attributes:
*@li alpha: scalar parameter, default value = 1.0 *@li alpha: scalar parameter, default value = 1.0
*/
*/
REG_OP(EluGradV2) REG_OP(EluGradV2)
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -539,24 +539,20 @@ REG_OP(Elu)
*x: A float16, float32, for the input data type . \n *x: A float16, float32, for the input data type . \n


*@par Attributes: *@par Attributes:
*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Attributes:
*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Attributes:
*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n


*@par Outputs: *@par Outputs:
*y: A float16, float32, for the normalized result . \n *y: A float16, float32, for the normalized result . \n


*@attention Constraints: *@attention Constraints:
*@li The input is of type float16 or float32 . \n
*The input is of type float16 or float32 . \n


*@par Multiple batches supported or not *@par Multiple batches supported or not
*Supported *Supported
*@par Third-party framework compatibility *@par Third-party framework compatibility
*@li Compatible with ONNX's Celu operator
*Compatible with ONNX's Celu operator
*/ */
REG_OP(Celu) REG_OP(Celu)
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
@@ -808,15 +804,15 @@ REG_OP(SoftplusV2Grad)
/** /**
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
*
*
* @par inputs * @par inputs
* one input including: * one input including:
* @li x: input A Tensor. Must be one of the following types: float32, float16 * @li x: input A Tensor. Must be one of the following types: float32, float16
*
*
* @par output * @par output
* one output including: * one output including:
* @li y:A Tensor of the same type as x * @li y:A Tensor of the same type as x
*
*
*/ */
REG_OP(ThresholdedRelu) REG_OP(ThresholdedRelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -892,7 +888,7 @@ REG_OP(HardShrink)


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardsigmoid. \n * Compatible with the Pytorch operator Hardsigmoid. \n
*/
*/
REG_OP(HardSigmoid) REG_OP(HardSigmoid)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))


+ 3
- 3
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -213,11 +213,11 @@ REG_OP(PadV2)
*@brief Pads a tensor . \n *@brief Pads a tensor . \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*constant_values: A Tensor. Must have the same type as input.
*@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*@li constant_values: A Tensor. Must have the same type as input.


*@par Attributes: *@par Attributes:
*paddings: An optional "vector<vector<int>>". Defaults to "{}".
*paddings: A required Attribute.
* For each dimension D of input, paddings[D, 0] indicates how many * For each dimension D of input, paddings[D, 0] indicates how many
* values to add before the contents of tensor in that dimension, * values to add before the contents of tensor in that dimension,
* and paddings[D, 1] indicates how many values to add after the * and paddings[D, 1] indicates how many values to add after the


+ 24
- 0
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -584,6 +584,30 @@ REG_OP(DropoutV2)
.OUTPUT(seed, TensorType({ DT_FLOAT })) .OUTPUT(seed, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(p, Float) .REQUIRED_ATTR(p, Float)
.OP_END_FACTORY_REG(DropoutV2) .OP_END_FACTORY_REG(DropoutV2)

/**
* @brief The Bernoulli distribution with probability . \n

* @par Inputs:
* @li x: A ND Tensor. Must be one of the following data types:
int8, uint8, int16, int32, int64, bool, float32, float64 .
* @li p: A ND Tensor. The probability of an element to be zeroed.
Must be one of the following data types: float32, float64. \n

* @par Attributes:
* seed: An Integer, the seed of the random generator. Default value -1
to use current timestamp, otherwise it should be a positive integer.

* @par Outputs:
* y: A tensor with the same shape and type as "x".
*/

REG_OP(Bernoulli)
.INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
.INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE }))
.OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
.ATTR(seed, Int, -1)
.OP_END_FACTORY_REG(Bernoulli)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_

+ 10
- 8
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -182,14 +182,14 @@ REG_OP(Permute)
* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32. * int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32.


*@par Outputs: *@par Outputs:
* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened
* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened
* to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output. * to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output.
* Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . * Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 .


*@par Attributes: *@par Attributes:
* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened
* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of
* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of
* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened
* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of
* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of
* the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n). * the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n).


*@par Third-party framework compatibility *@par Third-party framework compatibility
@@ -723,11 +723,13 @@ REG_OP(CompressFcOp)
*@brief Performs Col2im for each batch entry. \n *@brief Performs Col2im for each batch entry. \n


*@par Inputs: *@par Inputs:
*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`.
where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1 \n
*@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`.
where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1.
*@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w). \n


*@par Outputs: *@par Outputs:
*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n
*y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n



*@par Attributes: *@par Attributes:
*@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution. *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
@@ -837,7 +839,7 @@ REG_OP(AffineGrid)
*@par Inputs: *@par Inputs:
*Four inputs, including: *Four inputs, including:
*@li x: The input tensor. *@li x: The input tensor.
*@li size: The shape of output tensor.
*@li size: The shape of output tensor.
*@li stride: The stride of output tensor. *@li stride: The stride of output tensor.
*@li storage_offset: The offset in the underlying storage of the output tensor. \n *@li storage_offset: The offset in the underlying storage of the output tensor. \n




+ 18
- 2
third_party/fwkacllib/inc/runtime/event.h View File

@@ -23,12 +23,18 @@
extern "C" { extern "C" {
#endif #endif


typedef enum rtEventWaitStatus {
EVENT_STATUS_COMPLETE = 0,
EVENT_STATUS_NOT_READY = 1,
EVENT_STATUS_MAX = 2,
} rtEventWaitStatus_t;

/** /**
* @ingroup event_flags * @ingroup event_flags
* @brief event op bit flags * @brief event op bit flags
*/ */
#define RT_EVENT_DEFAULT (0x00)
#define RT_EVENT_WITH_FLAG (0x01)
#define RT_EVENT_DEFAULT (0x0E)
#define RT_EVENT_WITH_FLAG (0x0B)


#define RT_EVENT_DDSYNC_NS 0x01U #define RT_EVENT_DDSYNC_NS 0x01U
#define RT_EVENT_STREAM_MARK 0x02U #define RT_EVENT_STREAM_MARK 0x02U
@@ -111,6 +117,16 @@ RTS_API rtError_t rtEventQuery(rtEvent_t event);


/** /**
* @ingroup dvrt_event * @ingroup dvrt_event
* @brief Queries an event's wait status
* @param [in] event event to query
* @param [in out] EVENT_WAIT_STATUS status
* @return EVENT_STATUS_COMPLETE for complete
* @return EVENT_STATUS_NOT_READY for not complete
*/
RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t event, rtEventWaitStatus_t *status);

/**
* @ingroup dvrt_event
* @brief computes the elapsed time between events. * @brief computes the elapsed time between events.
* @param [in] time time between start and end in ms * @param [in] time time between start and end in ms
* @param [in] start starting event * @param [in] start starting event


Loading…
Cancel
Save