Browse Source

Pre Merge pull request !2115 from yanghaoran/r1.7

pull/2115/MERGE
yanghaoran Gitee 3 years ago
parent
commit
1b7e55beec
11 changed files with 64 additions and 23 deletions
  1. +1
    -0
      inc/external/acl/error_codes/rt_error_codes.h
  2. +1
    -11
      inc/external/hccl/hccl_types.h
  3. +1
    -0
      inc/external/runtime/rt_error_codes.h
  4. +8
    -0
      inc/framework/common/profiling_definitions.h
  5. +1
    -1
      metadef
  6. +14
    -0
      third_party/fwkacllib/inc/hccl/base.h
  7. +1
    -1
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  8. +8
    -8
      third_party/fwkacllib/inc/ops/image_ops.h
  9. +2
    -2
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  10. +23
    -0
      third_party/fwkacllib/inc/ops/nn_ops.h
  11. +4
    -0
      third_party/fwkacllib/inc/ops/reduce_ops.h

+ 1
- 0
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout


static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error


+ 1
- 11
inc/external/hccl/hccl_types.h View File

@@ -61,8 +61,7 @@ typedef enum {
* @brief handle to HCCL communicator * @brief handle to HCCL communicator
*/ */
typedef void *HcclComm; typedef void *HcclComm;
typedef void *HcclMessage;
typedef void *HcclRequest;

/** /**
* @brief HCCL Reduction opperation * @brief HCCL Reduction opperation
*/ */
@@ -88,14 +87,6 @@ typedef enum {
HCCL_DATA_TYPE_RESERVED /**< reserved */ HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType; } HcclDataType;


typedef struct {
int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问
int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问
int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问
int cancelled; // 指定实现,不建议调用者访问
int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问
} HcclStatus;

const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length
/** /**
* @brief HCCL root info * @brief HCCL root info
@@ -104,7 +95,6 @@ typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES]; char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo; } HcclRootInfo;


#define HCCL_REQUEST_NULL NULL
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus


+ 1
- 0
inc/external/runtime/rt_error_codes.h View File

@@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout


static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error


+ 8
- 0
inc/framework/common/profiling_definitions.h View File

@@ -72,6 +72,14 @@ enum {
kSelectBranch, kSelectBranch,
kExecuteSubGraph, kExecuteSubGraph,
kInitSubGraphExecutor, kInitSubGraphExecutor,
// fuzz compile
kSelectBin,
kFindCompileCache,
kAddCompileCache,
kFuzzCompileOp,
kCalcRuningParam,
kGenTask,
kRegisterBin,


// Add new definitions here // Add new definitions here
kProfilingIndexEnd kProfilingIndexEnd


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb
Subproject commit 22309b14838a763d41dccd636fec567dae3720fd

+ 14
- 0
third_party/fwkacllib/inc/hccl/base.h View File

@@ -197,6 +197,20 @@ typedef struct tagCommAttr {
WorkMode mode; // 通信域内的probe工作模式 WorkMode mode; // 通信域内的probe工作模式
uint32_t deviceId = 0; uint32_t deviceId = 0;
} CommAttr; } CommAttr;

typedef void* HcclMessage;
typedef void* HcclRequest;

typedef struct {
int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问
int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问
int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问
int cancelled; // 指定实现,不建议调用者访问
int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问
} HcclStatus;

#define HCCL_REQUEST_NULL NULL

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus


+ 1
- 1
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -3488,7 +3488,7 @@ REG_OP(Addcmul)
REG_OP(AxpyV2) REG_OP(AxpyV2)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(AxpyV2) .OP_END_FACTORY_REG(AxpyV2)




+ 8
- 8
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1737,17 +1737,17 @@ round_prefer_ceil, floor, ceil. Only used by nearest interpolation.
*/ */


REG_OP(Resize) REG_OP(Resize)
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(scales, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64}))
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,
DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
.OPTIONAL_INPUT(roi, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
.OPTIONAL_INPUT(scales, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64,DT_INT32}))
.OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,
DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
.ATTR(coordinate_transformation_mode, String, "half_pixel") .ATTR(coordinate_transformation_mode, String, "half_pixel")
.ATTR(cubic_coeff_a, Float, -0.75) .ATTR(cubic_coeff_a, Float, -0.75)
.ATTR(exclude_outside, Int, 0) .ATTR(exclude_outside, Int, 0)
.ATTR(extrapolation_value, Float, 0)
.ATTR(extrapolation_value, Float, 0.0)
.ATTR(mode, String, "nearest") .ATTR(mode, String, "nearest")
.ATTR(nearest_mode, String, "round_prefer_floor") .ATTR(nearest_mode, String, "round_prefer_floor")
.OP_END_FACTORY_REG(Resize) .OP_END_FACTORY_REG(Resize)


+ 2
- 2
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -1801,7 +1801,7 @@ REG_OP(SoftmaxCrossEntropyLoss)
* @par Restrictions: * @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(AxpyWithSoftmaxAndDropoutdomask)
REG_OP(AxpyWithSoftmaxAndDropOutDoMask)
.INPUT(x1, TensorType({DT_FLOAT16})) .INPUT(x1, TensorType({DT_FLOAT16}))
.INPUT(x2, TensorType({DT_FLOAT16})) .INPUT(x2, TensorType({DT_FLOAT16}))
.INPUT(mask, TensorType({DT_UINT8})) .INPUT(mask, TensorType({DT_UINT8}))
@@ -1810,6 +1810,6 @@ REG_OP(AxpyWithSoftmaxAndDropoutdomask)
.REQUIRED_ATTR(alpha, Float) .REQUIRED_ATTR(alpha, Float)
.REQUIRED_ATTR(input_keep_prob, Float) .REQUIRED_ATTR(input_keep_prob, Float)
.ATTR(axis, ListInt, {-1}) .ATTR(axis, ListInt, {-1})
.OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask)
.OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_

+ 23
- 0
third_party/fwkacllib/inc/ops/nn_ops.h View File

@@ -116,6 +116,7 @@ REG_OP(FusedBatchNormV2)
* @par Outputs: * @par Outputs:
* One output, including: * One output, including:
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(SegmentSort) REG_OP(SegmentSort)
@@ -137,6 +138,7 @@ REG_OP(SegmentSort)
* Two output, including: * Two output, including:
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
* output_index: A Tensor.If include_index is true, output index. * output_index: A Tensor.If include_index is true, output index.
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(MultiMerge) REG_OP(MultiMerge)
@@ -148,6 +150,27 @@ REG_OP(MultiMerge)
.OP_END_FACTORY_REG(MultiMerge) .OP_END_FACTORY_REG(MultiMerge)


/** /**
* @brief Large amount of data sort.Third operator of TopK.
* @par Inputs:
* One input, including:
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16
* @par Attributes:
* k_num: Int.Number to be sorted.
* @par Outputs:
* Two output, including:
* @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
* @li output_index: A Tensor. int32. Data index.
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(SingleMerge)
.INPUT(input_proposal, TensorType({DT_FLOAT16}))
.OUTPUT(output_data, TensorType({DT_FLOAT16}))
.OUTPUT(output_index, TensorType({DT_INT32}))
.REQUIRED_ATTR(k_num, Int)
.OP_END_FACTORY_REG(SingleMerge)

/**
* @brief MultiHeadAttention. * @brief MultiHeadAttention.
* @par Inputs: * @par Inputs:
* thirteen input, including: * thirteen input, including:


+ 4
- 0
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -663,6 +663,9 @@ REG_OP(ReduceProdD)
*keep_dims: A bool or NoneType. *keep_dims: A bool or NoneType.
* - If true, retains reduced dimensions with length 1. * - If true, retains reduced dimensions with length 1.
* - If false, the rank of the tensor is reduced by 1 for each entry in axis. * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
*noop_with_empty_axes: A bool.
* - If true, when axes = [], not reduce.
* - If false, when axes = [], reduce all.
*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x" . \n *y: A Tensor. Has the same type as "x" . \n


@@ -674,6 +677,7 @@ REG_OP(ReduceMean)
.INPUT(axes, TensorType::IndexNumberType()) .INPUT(axes, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::NumberType()) .OUTPUT(y, TensorType::NumberType())
.ATTR(keep_dims, Bool, false) .ATTR(keep_dims, Bool, false)
.ATTR(noop_with_empty_axes, Bool, true)
.OP_END_FACTORY_REG(ReduceMean) .OP_END_FACTORY_REG(ReduceMean)


/** /**


Loading…
Cancel
Save