@@ -60,6 +60,7 @@ static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resou | |||||
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | ||||
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | ||||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | ||||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | ||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | ||||
@@ -293,6 +293,7 @@ struct OpDescInfo { | |||||
std::string dev_func; | std::string dev_func; | ||||
std::string tvm_magic; | std::string tvm_magic; | ||||
uint32_t tiling_key = 0U; | uint32_t tiling_key = 0U; | ||||
uintptr_t args = 0U; | |||||
std::string tiling_data; | std::string tiling_data; | ||||
std::string node_info; | std::string node_info; | ||||
std::vector<int64_t> workspace_bytes; | std::vector<int64_t> workspace_bytes; | ||||
@@ -1 +1 @@ | |||||
Subproject commit 7d777404b3b7fe7daeaf00e566e431c6a05b040a | |||||
Subproject commit fe47d04d75170006fc0d28538dec49a2da426ceb |
@@ -58,6 +58,10 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no str | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | ||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | ||||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | ||||
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||||
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | ||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | ||||
@@ -97,6 +101,10 @@ static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | ||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | ||||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -105,5 +113,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc di | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -498,6 +498,25 @@ REG_OP(Constant) | |||||
.OP_END_FACTORY_REG(Constant) | .OP_END_FACTORY_REG(Constant) | ||||
/** | /** | ||||
*@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n | |||||
*@par Attributes: | |||||
*file_id: A string, used to record file id. \n | |||||
*shape: data shape. \n | |||||
*dtype: data type. \n | |||||
*@par Outputs: | |||||
*y: The FileConstant tensor. \n | |||||
*/ | |||||
REG_OP(FileConstant) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ | |||||
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(file_id, String) | |||||
.REQUIRED_ATTR(shape, ListInt) | |||||
.REQUIRED_ATTR(dtype, Type) | |||||
.OP_END_FACTORY_REG(FileConstant) | |||||
/** | |||||
*@brief Returns a copy of the input tensor. \n | *@brief Returns a copy of the input tensor. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -1330,31 +1349,6 @@ REG_OP(ExpandD) | |||||
.OP_END_FACTORY_REG(ExpandD) | .OP_END_FACTORY_REG(ExpandD) | ||||
/** | /** | ||||
* @brief Calculate buckets limit and offset. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||||
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||||
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||||
* @par Attributes: | |||||
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||||
* @par Outputs: | |||||
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||||
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||||
*/ | |||||
REG_OP(CalcBucketsLimitAndOffset) | |||||
.INPUT(bucket_list, TensorType({DT_INT32})) | |||||
.INPUT(ivf_counts, TensorType({DT_INT32})) | |||||
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||||
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(total_limit, Int) | |||||
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||||
/** | |||||
*@brief Get dim number in tensordesc. \n | *@brief Get dim number in tensordesc. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -1362,6 +1356,9 @@ REG_OP(CalcBucketsLimitAndOffset) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A 1D tensor. The data type must be int32. \n | *y: A 1D tensor. The data type must be int32. \n | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GetShape) | REG_OP(GetShape) | ||||
.DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | .DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | ||||
@@ -1377,8 +1374,13 @@ REG_OP(GetShape) | |||||
*@par outputs: | *@par outputs: | ||||
* y: a tensor_desc, type is int.\n | * y: a tensor_desc, type is int.\n | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(UpdateTensorDesc) | REG_OP(UpdateTensorDesc) | ||||
.INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | |||||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | ||||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | ||||
.REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
@@ -586,6 +586,14 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||||
channels], The image tensor that was resized . \n | channels], The image tensor that was resized . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li size: An optional listint. Defaults to {}. | |||||
*@par Attributes: | |||||
*@li ori_image_size: An optional listint. Defaults to {}. | |||||
*@par Attributes: | |||||
*@li src_start_w: An optional int. Defaults to 0. | |||||
*@par Attributes: | |||||
*@li dst_start_w: An optional int. Defaults to 0. | |||||
*@par Attributes: | |||||
*@li align_corners: An optional bool. Defaults to False. If true, the centers of | *@li align_corners: An optional bool. Defaults to False. If true, the centers of | ||||
the 4 corner pixels of the input and grad tensors are aligned. Defaults to | the 4 corner pixels of the input and grad tensors are aligned. Defaults to | ||||
false . | false . | ||||
@@ -606,6 +614,10 @@ REG_OP(ResizeBilinearV2Grad) | |||||
.INPUT(grads, TensorType({DT_FLOAT})) | .INPUT(grads, TensorType({DT_FLOAT})) | ||||
.INPUT(original_image, TensorType::FloatingDataType()) | .INPUT(original_image, TensorType::FloatingDataType()) | ||||
.OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
.ATTR(size, ListInt, {}) | |||||
.ATTR(ori_image_size, ListInt, {}) | |||||
.ATTR(src_start_w, Int, 0) | |||||
.ATTR(dst_start_w, Int, 0) | |||||
.ATTR(align_corners, Bool, false) | .ATTR(align_corners, Bool, false) | ||||
.ATTR(half_pixel_centers, Bool, false) | .ATTR(half_pixel_centers, Bool, false) | ||||
.OP_END_FACTORY_REG(ResizeBilinearV2Grad) | .OP_END_FACTORY_REG(ResizeBilinearV2Grad) | ||||
@@ -624,7 +636,10 @@ size for the images . \n | |||||
output tensors are aligned, preserving the values at the corner pixels. | output tensors are aligned, preserving the values at the corner pixels. | ||||
Defaults to false . | Defaults to false . | ||||
* @li half_pixel_centers: An optional bool. Defaults to False . \n | * @li half_pixel_centers: An optional bool. Defaults to False . \n | ||||
*@li ori_image_size: An optional listint. Defaults to {}. | |||||
*@li split_size: An optional listint. Defaults to {}. | |||||
*@li src_start_w: An optional int. Defaults to 0. | |||||
*@li dst_start_w: An optional int. Defaults to 0. | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | *y: 4-D with shape [batch, new_height, new_width, channels] . \n | ||||
@@ -640,6 +655,10 @@ REG_OP(ResizeBilinearV2) | |||||
DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
.INPUT(size, TensorType({DT_INT32})) | .INPUT(size, TensorType({DT_INT32})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
.ATTR(ori_image_size, ListInt, {}) | |||||
.ATTR(split_size, ListInt, {}) | |||||
.ATTR(src_start_w, Int, 0) | |||||
.ATTR(dst_start_w, Int, 0) | |||||
.ATTR(align_corners, Bool, false) | .ATTR(align_corners, Bool, false) | ||||
.ATTR(half_pixel_centers, Bool, false) | .ATTR(half_pixel_centers, Bool, false) | ||||
.OP_END_FACTORY_REG(ResizeBilinearV2) | .OP_END_FACTORY_REG(ResizeBilinearV2) | ||||
@@ -113,9 +113,7 @@ if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | ||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | ||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . | |||||
*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless element. \n | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | ||||
@@ -137,7 +135,6 @@ REG_OP(BatchNorm) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | ||||
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | ||||
.OUTPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | .ATTR(epsilon, Float, 0.0001) | ||||
.ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
.ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
@@ -167,6 +164,33 @@ REG_OP(SyncBatchNormBackwardReduce) | |||||
.OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | ||||
/** | /** | ||||
*@brief part of SyncBatchNormBackward . \n | |||||
*@par Inputs: | |||||
* Three inputs, including: | |||||
*@li grad_output: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li save_input: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li mean: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li invstd: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li weight: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li mean_dy: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li mean_dy_xmu: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Outputs: | |||||
*@li grad_input: A Tensor. Has the same type and format as input "grad_output" . \n | |||||
*/ | |||||
REG_OP(SyncBatchNormBackwardElemt) | |||||
.INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(save_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(invstd, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(mean_dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(mean_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(grad_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(SyncBatchNormBackwardElemt) | |||||
/** | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -285,8 +309,7 @@ REG_OP(BatchNormExt2) | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | ||||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | ||||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | ||||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . | |||||
*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n | |||||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | ||||
@@ -313,7 +336,6 @@ REG_OP(BatchNormGrad) | |||||
.INPUT(scale, TensorType({DT_FLOAT})) | .INPUT(scale, TensorType({DT_FLOAT})) | ||||
.INPUT(reserve_space_1, TensorType({DT_FLOAT})) | .INPUT(reserve_space_1, TensorType({DT_FLOAT})) | ||||
.INPUT(reserve_space_2, TensorType({DT_FLOAT})) | .INPUT(reserve_space_2, TensorType({DT_FLOAT})) | ||||
.OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||||
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | ||||
.OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | ||||
@@ -128,7 +128,7 @@ REG_OP(OCRIdentifyPreHandle) | |||||
.INPUT(imgs_offset, TensorType({DT_INT32})) | .INPUT(imgs_offset, TensorType({DT_INT32})) | ||||
.INPUT(imgs_size, TensorType({DT_INT32})) | .INPUT(imgs_size, TensorType({DT_INT32})) | ||||
.OUTPUT(resized_imgs, TensorType({DT_UINT8})) | .OUTPUT(resized_imgs, TensorType({DT_UINT8})) | ||||
.ATTR(size, ListInt, {}) | |||||
.REQUIRED_ATTR(size, ListInt) | |||||
.ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
.OP_END_FACTORY_REG(OCRIdentifyPreHandle) | .OP_END_FACTORY_REG(OCRIdentifyPreHandle) | ||||
@@ -247,6 +247,7 @@ REG_OP(OCRDetectionPostHandle) | |||||
*@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n | *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n | ||||
*@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n | *@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n | ||||
*@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n | *@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n | ||||
*@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n | |||||
*/ | */ | ||||
REG_OP(ResizeAndClipPolys) | REG_OP(ResizeAndClipPolys) | ||||
.INPUT(polys_data, TensorType({DT_INT32})) | .INPUT(polys_data, TensorType({DT_INT32})) | ||||
@@ -259,6 +260,7 @@ REG_OP(ResizeAndClipPolys) | |||||
.OUTPUT(clipped_polys_data, TensorType({DT_INT32})) | .OUTPUT(clipped_polys_data, TensorType({DT_INT32})) | ||||
.OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) | .OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) | ||||
.OUTPUT(clipped_polys_size, TensorType({DT_INT32})) | .OUTPUT(clipped_polys_size, TensorType({DT_INT32})) | ||||
.OUTPUT(clipped_polys_num, TensorType({DT_INT32})) | |||||
.OP_END_FACTORY_REG(ResizeAndClipPolys); | .OP_END_FACTORY_REG(ResizeAndClipPolys); | ||||
@@ -1305,6 +1305,27 @@ REG_OP(ReduceStdWithMean) | |||||
.ATTR(invert, Bool, false) | .ATTR(invert, Bool, false) | ||||
.ATTR(epsilon, Float, 0.001) | .ATTR(epsilon, Float, 0.001) | ||||
.OP_END_FACTORY_REG(ReduceStdWithMean) | .OP_END_FACTORY_REG(ReduceStdWithMean) | ||||
/** | |||||
*@brief Performs reduced batch normalization . \n | |||||
*@par Inputs: | |||||
*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||||
*@par Outputs: | |||||
*@li mean: A Tensor of type float32 for SUM reduced "x". | |||||
*@li variance: A Tensor of type float32 for square sum reduced "x" . \n | |||||
*@par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(ReduceMeanVariance) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(axes, ListInt, {}) | |||||
.ATTR(keep_dims, Bool, true) | |||||
.OP_END_FACTORY_REG(ReduceMeanVariance) | |||||
} //namespace ge | } //namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -78,8 +78,8 @@ REG_OP(TopKPQDistance) | |||||
.OUTPUT(topk_ivf, TensorType({DT_INT32})) | .OUTPUT(topk_ivf, TensorType({DT_INT32})) | ||||
.OUTPUT(topk_index, TensorType({DT_INT32})) | .OUTPUT(topk_index, TensorType({DT_INT32})) | ||||
.ATTR(order, String, "ASC") | .ATTR(order, String, "ASC") | ||||
.ATTR(k, Int, 0) | |||||
.ATTR(group_size, Int, 0) | |||||
.REQUIRED_ATTR(k, Int) | |||||
.REQUIRED_ATTR(group_size, Int) | |||||
.OP_END_FACTORY_REG(TopKPQDistance) | .OP_END_FACTORY_REG(TopKPQDistance) | ||||
/** | /** | ||||
@@ -129,6 +129,68 @@ REG_OP(ScanPQCodes) | |||||
.ATTR(split_count, Int, 1) | .ATTR(split_count, Int, 1) | ||||
.ATTR(split_index, Int, 0) | .ATTR(split_index, Int, 0) | ||||
.OP_END_FACTORY_REG(ScanPQCodes) | .OP_END_FACTORY_REG(ScanPQCodes) | ||||
/** | |||||
* @brief Calculate buckets limit and offset. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||||
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||||
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||||
* @par Attributes: | |||||
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||||
* @par Outputs: | |||||
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||||
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||||
*/ | |||||
REG_OP(CalcBucketsLimitAndOffset) | |||||
.INPUT(bucket_list, TensorType({DT_INT32})) | |||||
.INPUT(ivf_counts, TensorType({DT_INT32})) | |||||
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||||
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(total_limit, Int) | |||||
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||||
/** | |||||
* @brief Calculate ProdVirialSeA. \n | |||||
* | |||||
* @par Inputs: | |||||
* Five inputs, including: | |||||
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
* @li nlist: A Tensor. dtype is int32. | |||||
* @li natoms: A Tensor. dtype is int32. \n | |||||
* | |||||
* @par Outputs: | |||||
* Two outputs, including: | |||||
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||||
* | |||||
* @par Attributes: | |||||
* Two attributes, including: | |||||
* @li n_a_sel: A Scalar. | |||||
* @li n_r_sel: A Scalar. \n | |||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(ProdVirialSeA) | |||||
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(nlist, TensorType({DT_INT32})) | |||||
.INPUT(natoms, TensorType({DT_INT32})) | |||||
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(n_a_sel, Int) | |||||
.REQUIRED_ATTR(n_r_sel, Int) | |||||
.ATTR(nall, Int, 28328) | |||||
.OP_END_FACTORY_REG(ProdVirialSeA) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ |
@@ -14,8 +14,8 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef __CCE_RUNTIME_BASE_H__ | |||||
#define __CCE_RUNTIME_BASE_H__ | |||||
#ifndef CCE_RUNTIME_BASE_H | |||||
#define CCE_RUNTIME_BASE_H | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include "toolchain/prof_callback.h" | #include "toolchain/prof_callback.h" | ||||
@@ -443,4 +443,4 @@ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||||
} | } | ||||
#endif | #endif | ||||
#endif // __CCE_RUNTIME_BASE_H__ | |||||
#endif // CCE_RUNTIME_BASE_H |
@@ -239,8 +239,18 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout); | |||||
*/ | */ | ||||
RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | ||||
/** | |||||
* @ingroup | |||||
* @brief get is Heterogenous. | |||||
* @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file. | |||||
* @param [out] heterogenous=0 NOT Heterogenous Mode: | |||||
* 1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1 | |||||
* @return RT_ERROR_NONE for ok | |||||
*/ | |||||
RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous); | |||||
#if defined(__cplusplus) | #if defined(__cplusplus) | ||||
} | } | ||||
#endif | #endif | ||||
#endif // CCE_RUNTIME_CONFIG_H | |||||
#endif // CCE_RUNTIME_CONFIG_H |
@@ -25,7 +25,7 @@ extern "C" { | |||||
#define RT_CAPABILITY_SUPPORT (0x1U) | #define RT_CAPABILITY_SUPPORT (0x1U) | ||||
#define RT_CAPABILITY_NOT_SUPPORT (0x0U) | #define RT_CAPABILITY_NOT_SUPPORT (0x0U) | ||||
#define MEMORY_INFO_TS_4G_LIMITED (0x0) // for compatibility | |||||
#define MEMORY_INFO_TS_4G_LIMITED (0x0U) // for compatibility | |||||
typedef struct tagRTDeviceInfo { | typedef struct tagRTDeviceInfo { | ||||
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | ||||
@@ -173,6 +173,15 @@ RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode); | |||||
/** | /** | ||||
* @ingroup dvrt_dev | * @ingroup dvrt_dev | ||||
* @brief get deviceMode | |||||
* @param [out] deviceMode the device mode | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtGetDeviceMode(rtDeviceMode *deviceMode); | |||||
/** | |||||
* @ingroup dvrt_dev | |||||
* @brief set target die for current thread | * @brief set target die for current thread | ||||
* @param [int] die the die id | * @param [int] die the die id | ||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
@@ -133,8 +133,11 @@ typedef struct tagRtArgsWithTiling { | |||||
uint16_t tilingDataOffset; // tiling data offset | uint16_t tilingDataOffset; // tiling data offset | ||||
uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | ||||
uint16_t hostInputDataOffset; // host_mem input data offset | uint16_t hostInputDataOffset; // host_mem input data offset | ||||
bool hasHostMemInput; // has host_memory input data in args or not: ture or false | |||||
uint8_t reserved[7]; | |||||
uint8_t hasHostMemInput; // has host_memory input data in args or not: 0 means no host_memory input data, | |||||
// others means has host_memory input data. | |||||
uint8_t isNoNeedH2DCopy; // is no need host to device copy: 0 means need H2D copy, | |||||
// others means doesn't need H2D copy. | |||||
uint8_t reserved[6]; | |||||
} rtArgsWithTiling_t; | } rtArgsWithTiling_t; | ||||
/** | /** | ||||
@@ -299,8 +302,8 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); | |||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, const void *devFunc, | |||||
uint32_t funcMode); | |||||
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, | |||||
const void *devFunc, uint32_t funcMode); | |||||
/** | /** | ||||
* @ingroup rt_kernel | * @ingroup rt_kernel | ||||
@@ -371,8 +374,9 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||||
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, | |||||
void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, | |||||
const void *kernelInfo); | |||||
/** | /** | ||||
* @ingroup rt_kernel | * @ingroup rt_kernel | ||||
@@ -576,7 +576,7 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea | |||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
* @return RT_ERROR_DRV_ERR for driver error | * @return RT_ERROR_DRV_ERR for driver error | ||||
*/ | */ | ||||
RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int num); | |||||
RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num); | |||||
/** | /** | ||||
* @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
@@ -31,5 +31,6 @@ | |||||
#include "rt_ffts.h" | #include "rt_ffts.h" | ||||
#include "rt_ffts_plus.h" | #include "rt_ffts_plus.h" | ||||
#include "rt_dfx.h" | #include "rt_dfx.h" | ||||
#include "rt_mem_queue.h" | |||||
#endif // CCE_RUNTIME_RT_H | |||||
#endif // CCE_RUNTIME_RT_H |
@@ -23,6 +23,8 @@ | |||||
#define PROF_AICORE_METRICS 0x00000004 | #define PROF_AICORE_METRICS 0x00000004 | ||||
#define PROF_AICPU_TRACE 0x00000008 | #define PROF_AICPU_TRACE 0x00000008 | ||||
#define PROF_L2CACHE 0x00000010 | #define PROF_L2CACHE 0x00000010 | ||||
#define PROF_HCCL_TRACE 0x00000020 | |||||
#define PROF_TRAINING_TRACE 0x00000040 | |||||
// system profilinig switch | // system profilinig switch | ||||
#define PROF_CPU 0x00010000 | #define PROF_CPU 0x00010000 | ||||
@@ -41,10 +43,7 @@ | |||||
#define PROF_AIVECTORCORE_METRICS 0x0000020000000 | #define PROF_AIVECTORCORE_METRICS 0x0000020000000 | ||||
#define PROF_SUBTASK_TIME 0x0000040000000 | #define PROF_SUBTASK_TIME 0x0000040000000 | ||||
#define PROF_TRAINING_TRACE 0x0000080000000 | |||||
#define PROF_HCCL_TRACE 0x0000100000000 | |||||
#define PROF_TASK_TRACE 0x0000185000002 | |||||
#define PROF_TASK_TRACE 0x0000005000062 | |||||
#define PROF_MODEL_LOAD 0x8000000000000000 | #define PROF_MODEL_LOAD 0x8000000000000000 | ||||
@@ -54,6 +53,8 @@ | |||||
#define PROF_AICORE_METRICS_MASK 0x00000004 | #define PROF_AICORE_METRICS_MASK 0x00000004 | ||||
#define PROF_AICPU_TRACE_MASK 0x00000008 | #define PROF_AICPU_TRACE_MASK 0x00000008 | ||||
#define PROF_L2CACHE_MASK 0x00000010 | #define PROF_L2CACHE_MASK 0x00000010 | ||||
#define PROF_HCCL_TRACE_MASK 0x00000020 | |||||
#define PROF_TRAINING_TRACE_MASK 0x00000040 | |||||
// system profilinig mask | // system profilinig mask | ||||
#define PROF_CPU_MASK 0x00010000 | #define PROF_CPU_MASK 0x00010000 | ||||
@@ -72,9 +73,6 @@ | |||||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | ||||
#define PROF_SUBTASK_TIME_MASK 0x0000040000000 | #define PROF_SUBTASK_TIME_MASK 0x0000040000000 | ||||
#define PROF_TRAINING_TRACE_MASK 0x0000080000000 | |||||
#define PROF_HCCL_TRACE_MASK 0x0000100000000 | |||||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | ||||
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | ||||