Browse Source

!2092 upgrade Ascend package 18 Nov 21

Merge pull request !2092 from yanghaoran/release
tags/v1.8.0^2
yanghaoran Gitee 3 years ago
parent
commit
b55217c85b
17 changed files with 217 additions and 58 deletions
  1. +1
    -0
      inc/external/acl/error_codes/rt_error_codes.h
  2. +1
    -0
      inc/framework/common/ge_types.h
  3. +1
    -1
      metadef
  4. +8
    -1
      third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
  5. +27
    -25
      third_party/fwkacllib/inc/ops/array_ops.h
  6. +20
    -1
      third_party/fwkacllib/inc/ops/image_ops.h
  7. +29
    -7
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  8. +3
    -1
      third_party/fwkacllib/inc/ops/ocr_ops.h
  9. +21
    -0
      third_party/fwkacllib/inc/ops/reduce_ops.h
  10. +64
    -2
      third_party/fwkacllib/inc/ops/vector_search.h
  11. +3
    -3
      third_party/fwkacllib/inc/runtime/base.h
  12. +11
    -1
      third_party/fwkacllib/inc/runtime/config.h
  13. +10
    -1
      third_party/fwkacllib/inc/runtime/dev.h
  14. +10
    -6
      third_party/fwkacllib/inc/runtime/kernel.h
  15. +1
    -1
      third_party/fwkacllib/inc/runtime/mem.h
  16. +2
    -1
      third_party/fwkacllib/inc/runtime/rt.h
  17. +5
    -7
      third_party/fwkacllib/inc/toolchain/prof_acl_api.h

+ 1
- 0
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -60,6 +60,7 @@ static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resou
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error


+ 1
- 0
inc/framework/common/ge_types.h View File

@@ -293,6 +293,7 @@ struct OpDescInfo {
std::string dev_func;
std::string tvm_magic;
uint32_t tiling_key = 0U;
uintptr_t args = 0U;
std::string tiling_data;
std::string node_info;
std::vector<int64_t> workspace_bytes;


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 7d777404b3b7fe7daeaf00e566e431c6a05b040a
Subproject commit fe47d04d75170006fc0d28538dec49a2da426ceb

+ 8
- 1
third_party/fwkacllib/inc/external/runtime/rt_error_codes.h View File

@@ -58,6 +58,10 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no str
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
@@ -97,6 +101,10 @@ static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
@@ -105,5 +113,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc di
#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 27
- 25
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -498,6 +498,25 @@ REG_OP(Constant)
.OP_END_FACTORY_REG(Constant)

/**
*@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n

*@par Attributes:
*file_id: A string, used to record file id. \n
*shape: data shape. \n
*dtype: data type. \n

*@par Outputs:
*y: The FileConstant tensor. \n
*/
REG_OP(FileConstant)
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
.REQUIRED_ATTR(file_id, String)
.REQUIRED_ATTR(shape, ListInt)
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(FileConstant)

/**
*@brief Returns a copy of the input tensor. \n

*@par Inputs:
@@ -1330,31 +1349,6 @@ REG_OP(ExpandD)
.OP_END_FACTORY_REG(ExpandD)

/**
* @brief Calculate buckets limit and offset. \n

* @par Inputs:
* Three inputs, including:
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n

* @par Attributes:
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n

* @par Outputs:
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n
*/
REG_OP(CalcBucketsLimitAndOffset)
.INPUT(bucket_list, TensorType({DT_INT32}))
.INPUT(ivf_counts, TensorType({DT_INT32}))
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(buckets_limit, TensorType({DT_INT32}))
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(total_limit, Int)
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)

/**
*@brief Get dim number in tensordesc. \n

*@par Inputs:
@@ -1362,6 +1356,9 @@ REG_OP(CalcBucketsLimitAndOffset)

*@par Outputs:
*y: A 1D tensor. The data type must be int32. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(GetShape)
.DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
@@ -1377,8 +1374,13 @@ REG_OP(GetShape)

*@par outputs:
* y: a tensor_desc, type is int.\n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(UpdateTensorDesc)
.INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE}))
.REQUIRED_ATTR(shape, ListInt)


+ 20
- 1
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -586,6 +586,14 @@ REG_OP(ResizeNearestNeighborV2GradD)
channels], The image tensor that was resized . \n

*@par Attributes:
*@li size: An optional listint. Defaults to {}.
*@par Attributes:
*@li ori_image_size: An optional listint. Defaults to {}.
*@par Attributes:
*@li src_start_w: An optional int. Defaults to 0.
*@par Attributes:
*@li dst_start_w: An optional int. Defaults to 0.
*@par Attributes:
*@li align_corners: An optional bool. Defaults to False. If true, the centers of
the 4 corner pixels of the input and grad tensors are aligned. Defaults to
false .
@@ -606,6 +614,10 @@ REG_OP(ResizeBilinearV2Grad)
.INPUT(grads, TensorType({DT_FLOAT}))
.INPUT(original_image, TensorType::FloatingDataType())
.OUTPUT(y, TensorType({DT_FLOAT}))
.ATTR(size, ListInt, {})
.ATTR(ori_image_size, ListInt, {})
.ATTR(src_start_w, Int, 0)
.ATTR(dst_start_w, Int, 0)
.ATTR(align_corners, Bool, false)
.ATTR(half_pixel_centers, Bool, false)
.OP_END_FACTORY_REG(ResizeBilinearV2Grad)
@@ -624,7 +636,10 @@ size for the images . \n
output tensors are aligned, preserving the values at the corner pixels.
Defaults to false .
* @li half_pixel_centers: An optional bool. Defaults to False . \n

*@li ori_image_size: An optional listint. Defaults to {}.
*@li split_size: An optional listint. Defaults to {}.
*@li src_start_w: An optional int. Defaults to 0.
*@li dst_start_w: An optional int. Defaults to 0.
*@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n

@@ -640,6 +655,10 @@ REG_OP(ResizeBilinearV2)
DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(size, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.ATTR(ori_image_size, ListInt, {})
.ATTR(split_size, ListInt, {})
.ATTR(src_start_w, Int, 0)
.ATTR(dst_start_w, Int, 0)
.ATTR(align_corners, Bool, false)
.ATTR(half_pixel_centers, Bool, false)
.OP_END_FACTORY_REG(ResizeBilinearV2)


+ 29
- 7
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -113,9 +113,7 @@ if input "x" is with format NC1HWC0. Specifies the mean of "x".
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output .
*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless element. \n
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. \n

*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -137,7 +135,6 @@ REG_OP(BatchNorm)
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_3, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
@@ -167,6 +164,33 @@ REG_OP(SyncBatchNormBackwardReduce)
.OP_END_FACTORY_REG(SyncBatchNormBackwardReduce)

/**
*@brief part of SyncBatchNormBackward . \n

*@par Inputs:
* Three inputs, including:
*@li grad_output: A Tensor. Must be one of the following types: float16, float32 .
*@li save_input: A Tensor. Must be one of the following types: float16, float32 .
*@li mean: A Tensor. Must be one of the following types: float16, float32 .
*@li invstd: A Tensor. Must be one of the following types: float16, float32 .
*@li weight: A Tensor. Must be one of the following types: float16, float32 .
*@li mean_dy: A Tensor. Must be one of the following types: float16, float32 .
*@li mean_dy_xmu: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*@li grad_input: A Tensor. Has the same type and format as input "grad_output" . \n
*/
REG_OP(SyncBatchNormBackwardElemt)
.INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(save_input, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(invstd, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(mean_dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(mean_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(grad_input, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(SyncBatchNormBackwardElemt)
/**
*@brief Performs batch normalization . \n

*@par Inputs:
@@ -285,8 +309,7 @@ REG_OP(BatchNormExt2)
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm .
*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n

*@par Attributes:
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
@@ -313,7 +336,6 @@ REG_OP(BatchNormGrad)
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(reserve_space_1, TensorType({DT_FLOAT}))
.INPUT(reserve_space_2, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT}))
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
.OUTPUT(offset_backprop, TensorType({DT_FLOAT}))


+ 3
- 1
third_party/fwkacllib/inc/ops/ocr_ops.h View File

@@ -128,7 +128,7 @@ REG_OP(OCRIdentifyPreHandle)
.INPUT(imgs_offset, TensorType({DT_INT32}))
.INPUT(imgs_size, TensorType({DT_INT32}))
.OUTPUT(resized_imgs, TensorType({DT_UINT8}))
.ATTR(size, ListInt, {})
.REQUIRED_ATTR(size, ListInt)
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(OCRIdentifyPreHandle)

@@ -247,6 +247,7 @@ REG_OP(OCRDetectionPostHandle)
*@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n
*@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n
*@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n
*@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n
*/
REG_OP(ResizeAndClipPolys)
.INPUT(polys_data, TensorType({DT_INT32}))
@@ -259,6 +260,7 @@ REG_OP(ResizeAndClipPolys)
.OUTPUT(clipped_polys_data, TensorType({DT_INT32}))
.OUTPUT(clipped_polys_offset, TensorType({DT_INT32}))
.OUTPUT(clipped_polys_size, TensorType({DT_INT32}))
.OUTPUT(clipped_polys_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(ResizeAndClipPolys);




+ 21
- 0
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -1305,6 +1305,27 @@ REG_OP(ReduceStdWithMean)
.ATTR(invert, Bool, false)
.ATTR(epsilon, Float, 0.001)
.OP_END_FACTORY_REG(ReduceStdWithMean)

/**
*@brief Performs reduced batch normalization . \n

*@par Inputs:
*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n

*@par Outputs:
*@li mean: A Tensor of type float32 for SUM reduced "x".
*@li variance: A Tensor of type float32 for square sum reduced "x" . \n

*@par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ReduceMeanVariance)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(axes, ListInt, {})
.ATTR(keep_dims, Bool, true)
.OP_END_FACTORY_REG(ReduceMeanVariance)
} //namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_

+ 64
- 2
third_party/fwkacllib/inc/ops/vector_search.h View File

@@ -78,8 +78,8 @@ REG_OP(TopKPQDistance)
.OUTPUT(topk_ivf, TensorType({DT_INT32}))
.OUTPUT(topk_index, TensorType({DT_INT32}))
.ATTR(order, String, "ASC")
.ATTR(k, Int, 0)
.ATTR(group_size, Int, 0)
.REQUIRED_ATTR(k, Int)
.REQUIRED_ATTR(group_size, Int)
.OP_END_FACTORY_REG(TopKPQDistance)

/**
@@ -129,6 +129,68 @@ REG_OP(ScanPQCodes)
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(ScanPQCodes)

/**
* @brief Calculate buckets limit and offset. \n

* @par Inputs:
* Three inputs, including:
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n

* @par Attributes:
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n

* @par Outputs:
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n
*/
REG_OP(CalcBucketsLimitAndOffset)
.INPUT(bucket_list, TensorType({DT_INT32}))
.INPUT(ivf_counts, TensorType({DT_INT32}))
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(buckets_limit, TensorType({DT_INT32}))
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(total_limit, Int)
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)

/**
* @brief Calculate ProdVirialSeA. \n
*
* @par Inputs:
* Five inputs, including:
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64.
* @li nlist: A Tensor. dtype is int32.
* @li natoms: A Tensor. dtype is int32. \n
*
* @par Outputs:
* Two outputs, including:
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64.
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Attributes:
* Two attributes, including:
* @li n_a_sel: A Scalar.
* @li n_r_sel: A Scalar. \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ProdVirialSeA)
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(nlist, TensorType({DT_INT32}))
.INPUT(natoms, TensorType({DT_INT32}))
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(n_a_sel, Int)
.REQUIRED_ATTR(n_r_sel, Int)
.ATTR(nall, Int, 28328)
.OP_END_FACTORY_REG(ProdVirialSeA)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_

+ 3
- 3
third_party/fwkacllib/inc/runtime/base.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_BASE_H__
#define __CCE_RUNTIME_BASE_H__
#ifndef CCE_RUNTIME_BASE_H
#define CCE_RUNTIME_BASE_H

#include <stdint.h>
#include "toolchain/prof_callback.h"
@@ -443,4 +443,4 @@ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);
}
#endif

#endif // __CCE_RUNTIME_BASE_H__
#endif // CCE_RUNTIME_BASE_H

+ 11
- 1
third_party/fwkacllib/inc/runtime/config.h View File

@@ -239,8 +239,18 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
*/
RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);

/**
* @ingroup
* @brief get is Heterogenous.
* @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file.
* @param [out] heterogenous=0 NOT Heterogenous Mode:
* 1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous);

#if defined(__cplusplus)
}
#endif

#endif // CCE_RUNTIME_CONFIG_H
#endif // CCE_RUNTIME_CONFIG_H

+ 10
- 1
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -25,7 +25,7 @@ extern "C" {

#define RT_CAPABILITY_SUPPORT (0x1U)
#define RT_CAPABILITY_NOT_SUPPORT (0x0U)
#define MEMORY_INFO_TS_4G_LIMITED (0x0) // for compatibility
#define MEMORY_INFO_TS_4G_LIMITED (0x0U) // for compatibility

typedef struct tagRTDeviceInfo {
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL
@@ -173,6 +173,15 @@ RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode);

/**
* @ingroup dvrt_dev
* @brief get deviceMode
* @param [out] deviceMode the device mode
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtGetDeviceMode(rtDeviceMode *deviceMode);

/**
* @ingroup dvrt_dev
* @brief set target die for current thread
* @param [int] die the die id
* @return RT_ERROR_NONE for ok


+ 10
- 6
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -133,8 +133,11 @@ typedef struct tagRtArgsWithTiling {
uint16_t tilingDataOffset; // tiling data offset
uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list
uint16_t hostInputDataOffset; // host_mem input data offset
bool hasHostMemInput; // has host_memory input data in args or not: ture or false
uint8_t reserved[7];
uint8_t hasHostMemInput; // has host_memory input data in args or not: 0 means no host_memory input data,
// others means has host_memory input data.
uint8_t isNoNeedH2DCopy; // is no need host to device copy: 0 means need H2D copy,
// others means doesn't need H2D copy.
uint8_t reserved[6];
} rtArgsWithTiling_t;

/**
@@ -299,8 +302,8 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle);
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, const void *devFunc,
uint32_t funcMode);
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName,
const void *devFunc, uint32_t funcMode);

/**
* @ingroup rt_kernel
@@ -371,8 +374,9 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim,
void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_,
const void *kernelInfo);

/**
* @ingroup rt_kernel


+ 1
- 1
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -576,7 +576,7 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea
* @return RT_ERROR_INVALID_VALUE for error input
* @return RT_ERROR_DRV_ERR for driver error
*/
RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int num);
RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num);

/**
* @ingroup dvrt_mem


+ 2
- 1
third_party/fwkacllib/inc/runtime/rt.h View File

@@ -31,5 +31,6 @@
#include "rt_ffts.h"
#include "rt_ffts_plus.h"
#include "rt_dfx.h"
#include "rt_mem_queue.h"

#endif // CCE_RUNTIME_RT_H
#endif // CCE_RUNTIME_RT_H

+ 5
- 7
third_party/fwkacllib/inc/toolchain/prof_acl_api.h View File

@@ -23,6 +23,8 @@
#define PROF_AICORE_METRICS 0x00000004
#define PROF_AICPU_TRACE 0x00000008
#define PROF_L2CACHE 0x00000010
#define PROF_HCCL_TRACE 0x00000020
#define PROF_TRAINING_TRACE 0x00000040

// system profilinig switch
#define PROF_CPU 0x00010000
@@ -41,10 +43,7 @@
#define PROF_AIVECTORCORE_METRICS 0x0000020000000
#define PROF_SUBTASK_TIME 0x0000040000000

#define PROF_TRAINING_TRACE 0x0000080000000
#define PROF_HCCL_TRACE 0x0000100000000

#define PROF_TASK_TRACE 0x0000185000002
#define PROF_TASK_TRACE 0x0000005000062

#define PROF_MODEL_LOAD 0x8000000000000000

@@ -54,6 +53,8 @@
#define PROF_AICORE_METRICS_MASK 0x00000004
#define PROF_AICPU_TRACE_MASK 0x00000008
#define PROF_L2CACHE_MASK 0x00000010
#define PROF_HCCL_TRACE_MASK 0x00000020
#define PROF_TRAINING_TRACE_MASK 0x00000040

// system profilinig mask
#define PROF_CPU_MASK 0x00010000
@@ -72,9 +73,6 @@
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000
#define PROF_SUBTASK_TIME_MASK 0x0000040000000

#define PROF_TRAINING_TRACE_MASK 0x0000080000000
#define PROF_HCCL_TRACE_MASK 0x0000100000000

#define PROF_MODEL_LOAD_MASK 0x8000000000000000

#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))


Loading…
Cancel
Save