@@ -60,6 +60,7 @@ static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resou | |||
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
@@ -293,6 +293,7 @@ struct OpDescInfo { | |||
std::string dev_func; | |||
std::string tvm_magic; | |||
uint32_t tiling_key = 0U; | |||
uintptr_t args = 0U; | |||
std::string tiling_data; | |||
std::string node_info; | |||
std::vector<int64_t> workspace_bytes; | |||
@@ -1 +1 @@ | |||
Subproject commit 7d777404b3b7fe7daeaf00e566e431c6a05b040a | |||
Subproject commit fe47d04d75170006fc0d28538dec49a2da426ceb |
@@ -58,6 +58,10 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no str | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
@@ -97,6 +101,10 @@ static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
@@ -105,5 +113,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc di | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -498,6 +498,25 @@ REG_OP(Constant) | |||
.OP_END_FACTORY_REG(Constant) | |||
/** | |||
*@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n | |||
*@par Attributes: | |||
*file_id: A string, used to record file id. \n | |||
*shape: data shape. \n | |||
*dtype: data type. \n | |||
*@par Outputs: | |||
*y: The FileConstant tensor. \n | |||
*/ | |||
REG_OP(FileConstant) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ | |||
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) | |||
.REQUIRED_ATTR(file_id, String) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.OP_END_FACTORY_REG(FileConstant) | |||
/** | |||
*@brief Returns a copy of the input tensor. \n | |||
*@par Inputs: | |||
@@ -1330,31 +1349,6 @@ REG_OP(ExpandD) | |||
.OP_END_FACTORY_REG(ExpandD) | |||
/** | |||
* @brief Calculate buckets limit and offset. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||
* @par Attributes: | |||
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||
* @par Outputs: | |||
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||
*/ | |||
REG_OP(CalcBucketsLimitAndOffset) | |||
.INPUT(bucket_list, TensorType({DT_INT32})) | |||
.INPUT(ivf_counts, TensorType({DT_INT32})) | |||
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(total_limit, Int) | |||
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||
/** | |||
*@brief Get dim number in tensordesc. \n | |||
*@par Inputs: | |||
@@ -1362,6 +1356,9 @@ REG_OP(CalcBucketsLimitAndOffset) | |||
*@par Outputs: | |||
*y: A 1D tensor. The data type must be int32. \n | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GetShape) | |||
.DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | |||
@@ -1377,8 +1374,13 @@ REG_OP(GetShape) | |||
*@par outputs: | |||
* y: a tensor_desc, type is int.\n | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(UpdateTensorDesc) | |||
.INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | |||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | |||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
@@ -586,6 +586,14 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||
channels], The image tensor that was resized . \n | |||
*@par Attributes: | |||
*@li size: An optional listint. Defaults to {}. | |||
*@par Attributes: | |||
*@li ori_image_size: An optional listint. Defaults to {}. | |||
*@par Attributes: | |||
*@li src_start_w: An optional int. Defaults to 0. | |||
*@par Attributes: | |||
*@li dst_start_w: An optional int. Defaults to 0. | |||
*@par Attributes: | |||
*@li align_corners: An optional bool. Defaults to False. If true, the centers of | |||
the 4 corner pixels of the input and grad tensors are aligned. Defaults to | |||
false . | |||
@@ -606,6 +614,10 @@ REG_OP(ResizeBilinearV2Grad) | |||
.INPUT(grads, TensorType({DT_FLOAT})) | |||
.INPUT(original_image, TensorType::FloatingDataType()) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.ATTR(size, ListInt, {}) | |||
.ATTR(ori_image_size, ListInt, {}) | |||
.ATTR(src_start_w, Int, 0) | |||
.ATTR(dst_start_w, Int, 0) | |||
.ATTR(align_corners, Bool, false) | |||
.ATTR(half_pixel_centers, Bool, false) | |||
.OP_END_FACTORY_REG(ResizeBilinearV2Grad) | |||
@@ -624,7 +636,10 @@ size for the images . \n | |||
output tensors are aligned, preserving the values at the corner pixels. | |||
Defaults to false . | |||
* @li half_pixel_centers: An optional bool. Defaults to False . \n | |||
*@li ori_image_size: An optional listint. Defaults to {}. | |||
*@li split_size: An optional listint. Defaults to {}. | |||
*@li src_start_w: An optional int. Defaults to 0. | |||
*@li dst_start_w: An optional int. Defaults to 0. | |||
*@par Outputs: | |||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||
@@ -640,6 +655,10 @@ REG_OP(ResizeBilinearV2) | |||
DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(size, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.ATTR(ori_image_size, ListInt, {}) | |||
.ATTR(split_size, ListInt, {}) | |||
.ATTR(src_start_w, Int, 0) | |||
.ATTR(dst_start_w, Int, 0) | |||
.ATTR(align_corners, Bool, false) | |||
.ATTR(half_pixel_centers, Bool, false) | |||
.OP_END_FACTORY_REG(ResizeBilinearV2) | |||
@@ -113,9 +113,7 @@ if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | |||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . | |||
*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless element. \n | |||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. \n | |||
*@attention Constraints: | |||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||
@@ -137,7 +135,6 @@ REG_OP(BatchNorm) | |||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||
.OUTPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||
.ATTR(epsilon, Float, 0.0001) | |||
.ATTR(data_format, String, "NHWC") | |||
.ATTR(is_training, Bool, true) | |||
@@ -167,6 +164,33 @@ REG_OP(SyncBatchNormBackwardReduce) | |||
.OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | |||
/** | |||
*@brief part of SyncBatchNormBackward . \n | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li grad_output: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li save_input: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li mean: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li invstd: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li weight: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li mean_dy: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li mean_dy_xmu: A Tensor. Must be one of the following types: float16, float32 . \n | |||
*@par Outputs: | |||
*@li grad_input: A Tensor. Has the same type and format as input "grad_output" . \n | |||
*/ | |||
REG_OP(SyncBatchNormBackwardElemt) | |||
.INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(save_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(invstd, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(mean_dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(mean_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(grad_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(SyncBatchNormBackwardElemt) | |||
/** | |||
*@brief Performs batch normalization . \n | |||
*@par Inputs: | |||
@@ -285,8 +309,7 @@ REG_OP(BatchNormExt2) | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | |||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | |||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | |||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . | |||
*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n | |||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||
*@par Attributes: | |||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | |||
@@ -313,7 +336,6 @@ REG_OP(BatchNormGrad) | |||
.INPUT(scale, TensorType({DT_FLOAT})) | |||
.INPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||
.INPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | |||
.OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | |||
@@ -128,7 +128,7 @@ REG_OP(OCRIdentifyPreHandle) | |||
.INPUT(imgs_offset, TensorType({DT_INT32})) | |||
.INPUT(imgs_size, TensorType({DT_INT32})) | |||
.OUTPUT(resized_imgs, TensorType({DT_UINT8})) | |||
.ATTR(size, ListInt, {}) | |||
.REQUIRED_ATTR(size, ListInt) | |||
.ATTR(data_format, String, "NHWC") | |||
.OP_END_FACTORY_REG(OCRIdentifyPreHandle) | |||
@@ -247,6 +247,7 @@ REG_OP(OCRDetectionPostHandle) | |||
*@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n | |||
*@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n | |||
*@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n | |||
*@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n | |||
*/ | |||
REG_OP(ResizeAndClipPolys) | |||
.INPUT(polys_data, TensorType({DT_INT32})) | |||
@@ -259,6 +260,7 @@ REG_OP(ResizeAndClipPolys) | |||
.OUTPUT(clipped_polys_data, TensorType({DT_INT32})) | |||
.OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) | |||
.OUTPUT(clipped_polys_size, TensorType({DT_INT32})) | |||
.OUTPUT(clipped_polys_num, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(ResizeAndClipPolys); | |||
@@ -1305,6 +1305,27 @@ REG_OP(ReduceStdWithMean) | |||
.ATTR(invert, Bool, false) | |||
.ATTR(epsilon, Float, 0.001) | |||
.OP_END_FACTORY_REG(ReduceStdWithMean) | |||
/** | |||
*@brief Performs reduced batch normalization . \n | |||
*@par Inputs: | |||
*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||
*@par Outputs: | |||
*@li mean: A Tensor of type float32 for SUM reduced "x". | |||
*@li variance: A Tensor of type float32 for square sum reduced "x" . \n | |||
*@par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ReduceMeanVariance) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.ATTR(axes, ListInt, {}) | |||
.ATTR(keep_dims, Bool, true) | |||
.OP_END_FACTORY_REG(ReduceMeanVariance) | |||
} //namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -78,8 +78,8 @@ REG_OP(TopKPQDistance) | |||
.OUTPUT(topk_ivf, TensorType({DT_INT32})) | |||
.OUTPUT(topk_index, TensorType({DT_INT32})) | |||
.ATTR(order, String, "ASC") | |||
.ATTR(k, Int, 0) | |||
.ATTR(group_size, Int, 0) | |||
.REQUIRED_ATTR(k, Int) | |||
.REQUIRED_ATTR(group_size, Int) | |||
.OP_END_FACTORY_REG(TopKPQDistance) | |||
/** | |||
@@ -129,6 +129,68 @@ REG_OP(ScanPQCodes) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(ScanPQCodes) | |||
/** | |||
* @brief Calculate buckets limit and offset. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||
* @par Attributes: | |||
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||
* @par Outputs: | |||
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||
*/ | |||
REG_OP(CalcBucketsLimitAndOffset) | |||
.INPUT(bucket_list, TensorType({DT_INT32})) | |||
.INPUT(ivf_counts, TensorType({DT_INT32})) | |||
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(total_limit, Int) | |||
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||
/** | |||
* @brief Calculate ProdVirialSeA. \n | |||
* | |||
* @par Inputs: | |||
* Five inputs, including: | |||
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li nlist: A Tensor. dtype is int32. | |||
* @li natoms: A Tensor. dtype is int32. \n | |||
* | |||
* @par Outputs: | |||
* Two outputs, including: | |||
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Attributes: | |||
* Two attributes, including: | |||
* @li n_a_sel: A Scalar. | |||
* @li n_r_sel: A Scalar. \n | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ProdVirialSeA) | |||
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(nlist, TensorType({DT_INT32})) | |||
.INPUT(natoms, TensorType({DT_INT32})) | |||
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.REQUIRED_ATTR(n_a_sel, Int) | |||
.REQUIRED_ATTR(n_r_sel, Int) | |||
.ATTR(nall, Int, 28328) | |||
.OP_END_FACTORY_REG(ProdVirialSeA) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_BASE_H__ | |||
#define __CCE_RUNTIME_BASE_H__ | |||
#ifndef CCE_RUNTIME_BASE_H | |||
#define CCE_RUNTIME_BASE_H | |||
#include <stdint.h> | |||
#include "toolchain/prof_callback.h" | |||
@@ -443,4 +443,4 @@ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_BASE_H__ | |||
#endif // CCE_RUNTIME_BASE_H |
@@ -239,8 +239,18 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout); | |||
*/ | |||
RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | |||
/** | |||
* @ingroup | |||
* @brief get is Heterogenous. | |||
* @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file. | |||
* @param [out] heterogenous=0 NOT Heterogenous Mode: | |||
* 1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1 | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
#endif // CCE_RUNTIME_CONFIG_H | |||
#endif // CCE_RUNTIME_CONFIG_H |
@@ -25,7 +25,7 @@ extern "C" { | |||
#define RT_CAPABILITY_SUPPORT (0x1U) | |||
#define RT_CAPABILITY_NOT_SUPPORT (0x0U) | |||
#define MEMORY_INFO_TS_4G_LIMITED (0x0) // for compatibility | |||
#define MEMORY_INFO_TS_4G_LIMITED (0x0U) // for compatibility | |||
typedef struct tagRTDeviceInfo { | |||
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | |||
@@ -173,6 +173,15 @@ RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode); | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief get deviceMode | |||
* @param [out] deviceMode the device mode | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetDeviceMode(rtDeviceMode *deviceMode); | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief set target die for current thread | |||
* @param [int] die the die id | |||
* @return RT_ERROR_NONE for ok | |||
@@ -133,8 +133,11 @@ typedef struct tagRtArgsWithTiling { | |||
uint16_t tilingDataOffset; // tiling data offset | |||
uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | |||
uint16_t hostInputDataOffset; // host_mem input data offset | |||
bool hasHostMemInput; // has host_memory input data in args or not: ture or false | |||
uint8_t reserved[7]; | |||
uint8_t hasHostMemInput; // has host_memory input data in args or not: 0 means no host_memory input data, | |||
// others means has host_memory input data. | |||
uint8_t isNoNeedH2DCopy; // is no need host to device copy: 0 means need H2D copy, | |||
// others means doesn't need H2D copy. | |||
uint8_t reserved[6]; | |||
} rtArgsWithTiling_t; | |||
/** | |||
@@ -299,8 +302,8 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, const void *devFunc, | |||
uint32_t funcMode); | |||
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, | |||
const void *devFunc, uint32_t funcMode); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -371,8 +374,9 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, | |||
void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, | |||
const void *kernelInfo); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -576,7 +576,7 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
* @return RT_ERROR_DRV_ERR for driver error | |||
*/ | |||
RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int num); | |||
RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num); | |||
/** | |||
* @ingroup dvrt_mem | |||
@@ -31,5 +31,6 @@ | |||
#include "rt_ffts.h" | |||
#include "rt_ffts_plus.h" | |||
#include "rt_dfx.h" | |||
#include "rt_mem_queue.h" | |||
#endif // CCE_RUNTIME_RT_H | |||
#endif // CCE_RUNTIME_RT_H |
@@ -23,6 +23,8 @@ | |||
#define PROF_AICORE_METRICS 0x00000004 | |||
#define PROF_AICPU_TRACE 0x00000008 | |||
#define PROF_L2CACHE 0x00000010 | |||
#define PROF_HCCL_TRACE 0x00000020 | |||
#define PROF_TRAINING_TRACE 0x00000040 | |||
// system profilinig switch | |||
#define PROF_CPU 0x00010000 | |||
@@ -41,10 +43,7 @@ | |||
#define PROF_AIVECTORCORE_METRICS 0x0000020000000 | |||
#define PROF_SUBTASK_TIME 0x0000040000000 | |||
#define PROF_TRAINING_TRACE 0x0000080000000 | |||
#define PROF_HCCL_TRACE 0x0000100000000 | |||
#define PROF_TASK_TRACE 0x0000185000002 | |||
#define PROF_TASK_TRACE 0x0000005000062 | |||
#define PROF_MODEL_LOAD 0x8000000000000000 | |||
@@ -54,6 +53,8 @@ | |||
#define PROF_AICORE_METRICS_MASK 0x00000004 | |||
#define PROF_AICPU_TRACE_MASK 0x00000008 | |||
#define PROF_L2CACHE_MASK 0x00000010 | |||
#define PROF_HCCL_TRACE_MASK 0x00000020 | |||
#define PROF_TRAINING_TRACE_MASK 0x00000040 | |||
// system profilinig mask | |||
#define PROF_CPU_MASK 0x00010000 | |||
@@ -72,9 +73,6 @@ | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | |||
#define PROF_SUBTASK_TIME_MASK 0x0000040000000 | |||
#define PROF_TRAINING_TRACE_MASK 0x0000080000000 | |||
#define PROF_HCCL_TRACE_MASK 0x0000100000000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | |||