Merge pull request !2115 from yanghaoran/r1.7pull/2116/MERGE
@@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | ||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | ||||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | ||||
static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | ||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | ||||
@@ -61,8 +61,7 @@ typedef enum { | |||||
* @brief handle to HCCL communicator | * @brief handle to HCCL communicator | ||||
*/ | */ | ||||
typedef void *HcclComm; | typedef void *HcclComm; | ||||
typedef void *HcclMessage; | |||||
typedef void *HcclRequest; | |||||
/** | /** | ||||
* @brief HCCL Reduction opperation | * @brief HCCL Reduction opperation | ||||
*/ | */ | ||||
@@ -88,14 +87,6 @@ typedef enum { | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | HCCL_DATA_TYPE_RESERVED /**< reserved */ | ||||
} HcclDataType; | } HcclDataType; | ||||
typedef struct { | |||||
int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问 | |||||
int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问 | |||||
int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问 | |||||
int cancelled; // 指定实现,不建议调用者访问 | |||||
int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问 | |||||
} HcclStatus; | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | ||||
/** | /** | ||||
* @brief HCCL root info | * @brief HCCL root info | ||||
@@ -104,7 +95,6 @@ typedef struct HcclRootInfoDef { | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | char internal[HCCL_ROOT_INFO_BYTES]; | ||||
} HcclRootInfo; | } HcclRootInfo; | ||||
#define HCCL_REQUEST_NULL NULL | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | #endif // __cplusplus | ||||
@@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | ||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | ||||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | ||||
static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | ||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | ||||
@@ -72,6 +72,14 @@ enum { | |||||
kSelectBranch, | kSelectBranch, | ||||
kExecuteSubGraph, | kExecuteSubGraph, | ||||
kInitSubGraphExecutor, | kInitSubGraphExecutor, | ||||
// fuzz compile | |||||
kSelectBin, | |||||
kFindCompileCache, | |||||
kAddCompileCache, | |||||
kFuzzCompileOp, | |||||
kCalcRuningParam, | |||||
kGenTask, | |||||
kRegisterBin, | |||||
// Add new definitions here | // Add new definitions here | ||||
kProfilingIndexEnd | kProfilingIndexEnd | ||||
@@ -1 +1 @@ | |||||
Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb | |||||
Subproject commit 22309b14838a763d41dccd636fec567dae3720fd |
@@ -197,6 +197,20 @@ typedef struct tagCommAttr { | |||||
WorkMode mode; // 通信域内的probe工作模式 | WorkMode mode; // 通信域内的probe工作模式 | ||||
uint32_t deviceId = 0; | uint32_t deviceId = 0; | ||||
} CommAttr; | } CommAttr; | ||||
typedef void* HcclMessage; | |||||
typedef void* HcclRequest; | |||||
typedef struct { | |||||
int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问 | |||||
int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问 | |||||
int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问 | |||||
int cancelled; // 指定实现,不建议调用者访问 | |||||
int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问 | |||||
} HcclStatus; | |||||
#define HCCL_REQUEST_NULL NULL | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | #endif // __cplusplus | ||||
@@ -3488,7 +3488,7 @@ REG_OP(Addcmul) | |||||
REG_OP(AxpyV2) | REG_OP(AxpyV2) | ||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
.OP_END_FACTORY_REG(AxpyV2) | .OP_END_FACTORY_REG(AxpyV2) | ||||
@@ -1737,17 +1737,17 @@ round_prefer_ceil, floor, ceil. Only used by nearest interpolation. | |||||
*/ | */ | ||||
REG_OP(Resize) | REG_OP(Resize) | ||||
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(scales, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32, | |||||
DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(roi, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(scales, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64,DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32, | |||||
DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) | |||||
.ATTR(coordinate_transformation_mode, String, "half_pixel") | .ATTR(coordinate_transformation_mode, String, "half_pixel") | ||||
.ATTR(cubic_coeff_a, Float, -0.75) | .ATTR(cubic_coeff_a, Float, -0.75) | ||||
.ATTR(exclude_outside, Int, 0) | .ATTR(exclude_outside, Int, 0) | ||||
.ATTR(extrapolation_value, Float, 0) | |||||
.ATTR(extrapolation_value, Float, 0.0) | |||||
.ATTR(mode, String, "nearest") | .ATTR(mode, String, "nearest") | ||||
.ATTR(nearest_mode, String, "round_prefer_floor") | .ATTR(nearest_mode, String, "round_prefer_floor") | ||||
.OP_END_FACTORY_REG(Resize) | .OP_END_FACTORY_REG(Resize) | ||||
@@ -1801,7 +1801,7 @@ REG_OP(SoftmaxCrossEntropyLoss) | |||||
* @par Restrictions: | * @par Restrictions: | ||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
REG_OP(AxpyWithSoftmaxAndDropoutdomask) | |||||
REG_OP(AxpyWithSoftmaxAndDropOutDoMask) | |||||
.INPUT(x1, TensorType({DT_FLOAT16})) | .INPUT(x1, TensorType({DT_FLOAT16})) | ||||
.INPUT(x2, TensorType({DT_FLOAT16})) | .INPUT(x2, TensorType({DT_FLOAT16})) | ||||
.INPUT(mask, TensorType({DT_UINT8})) | .INPUT(mask, TensorType({DT_UINT8})) | ||||
@@ -1810,6 +1810,6 @@ REG_OP(AxpyWithSoftmaxAndDropoutdomask) | |||||
.REQUIRED_ATTR(alpha, Float) | .REQUIRED_ATTR(alpha, Float) | ||||
.REQUIRED_ATTR(input_keep_prob, Float) | .REQUIRED_ATTR(input_keep_prob, Float) | ||||
.ATTR(axis, ListInt, {-1}) | .ATTR(axis, ListInt, {-1}) | ||||
.OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask) | |||||
.OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ |
@@ -116,6 +116,7 @@ REG_OP(FusedBatchNormV2) | |||||
* @par Outputs: | * @par Outputs: | ||||
* One output, including: | * One output, including: | ||||
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
REG_OP(SegmentSort) | REG_OP(SegmentSort) | ||||
@@ -137,6 +138,7 @@ REG_OP(SegmentSort) | |||||
* Two output, including: | * Two output, including: | ||||
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | ||||
* output_index: A Tensor.If include_index is true, output index. | * output_index: A Tensor.If include_index is true, output index. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
REG_OP(MultiMerge) | REG_OP(MultiMerge) | ||||
@@ -148,6 +150,27 @@ REG_OP(MultiMerge) | |||||
.OP_END_FACTORY_REG(MultiMerge) | .OP_END_FACTORY_REG(MultiMerge) | ||||
/** | /** | ||||
* @brief Large amount of data sort.Third operator of TopK. | |||||
* @par Inputs: | |||||
* One input, including: | |||||
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16 | |||||
* @par Attributes: | |||||
* k_num: Int.Number to be sorted. | |||||
* @par Outputs: | |||||
* Two output, including: | |||||
* @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. | |||||
* @li output_index: A Tensor. int32. Data index. | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(SingleMerge) | |||||
.INPUT(input_proposal, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_data, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_index, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(k_num, Int) | |||||
.OP_END_FACTORY_REG(SingleMerge) | |||||
/** | |||||
* @brief MultiHeadAttention. | * @brief MultiHeadAttention. | ||||
* @par Inputs: | * @par Inputs: | ||||
* thirteen input, including: | * thirteen input, including: | ||||
@@ -663,6 +663,9 @@ REG_OP(ReduceProdD) | |||||
*keep_dims: A bool or NoneType. | *keep_dims: A bool or NoneType. | ||||
* - If true, retains reduced dimensions with length 1. | * - If true, retains reduced dimensions with length 1. | ||||
* - If false, the rank of the tensor is reduced by 1 for each entry in axis. | * - If false, the rank of the tensor is reduced by 1 for each entry in axis. | ||||
*noop_with_empty_axes: A bool. | |||||
* - If true, when axes = [], not reduce. | |||||
* - If false, when axes = [], reduce all. | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x" . \n | *y: A Tensor. Has the same type as "x" . \n | ||||
@@ -674,6 +677,7 @@ REG_OP(ReduceMean) | |||||
.INPUT(axes, TensorType::IndexNumberType()) | .INPUT(axes, TensorType::IndexNumberType()) | ||||
.OUTPUT(y, TensorType::NumberType()) | .OUTPUT(y, TensorType::NumberType()) | ||||
.ATTR(keep_dims, Bool, false) | .ATTR(keep_dims, Bool, false) | ||||
.ATTR(noop_with_empty_axes, Bool, true) | |||||
.OP_END_FACTORY_REG(ReduceMean) | .OP_END_FACTORY_REG(ReduceMean) | ||||
/** | /** | ||||