Pre Merge pull request !2115 from yanghaoran/r1.7

3 years ago · 1b7e55beec
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid
 static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
 static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
 static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout
 static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020;              // task timeout
 static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
 static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
--- a/inc/external/hccl/hccl_types.h
+++ b/inc/external/hccl/hccl_types.h
@@ -61,8 +61,7 @@ typedef enum {
 * @brief handle to HCCL communicator
 */
 typedef void *HcclComm;
 typedef void *HcclMessage;
 typedef void *HcclRequest;
 /**
 * @brief HCCL Reduction opperation
 */
@@ -88,14 +87,6 @@ typedef enum {
  HCCL_DATA_TYPE_RESERVED    /**< reserved */
 } HcclDataType;
 typedef struct {
  int srcRank;  // 接收/探测到的msg/信封的发送端rank_id，MPI标准定义，调用者可以访问
  int tag;      // 接收/探测到的msg/信封的tag，MPI标准定义，调用者可以访问
  int error;  // 接收/探测的错误码0：no error，others：传输过程出错，MPI标准定义，调用者可以访问
  int cancelled;  // 指定实现，不建议调用者访问
  int count;      // 接收/探测到的payload大小，指定实现，不建议调用者访问
 } HcclStatus;
 const uint32_t HCCL_ROOT_INFO_BYTES = 4108;  // 4108: root info length
 /**
 * @brief HCCL root info
@@ -104,7 +95,6 @@ typedef struct HcclRootInfoDef {
  char internal[HCCL_ROOT_INFO_BYTES];
 } HcclRootInfo;
 #define HCCL_REQUEST_NULL NULL
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid
 static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
 static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
 static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout
 static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020;              // task timeout
 static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
 static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
--- a/inc/framework/common/profiling_definitions.h
+++ b/inc/framework/common/profiling_definitions.h
@@ -72,6 +72,14 @@ enum {
  kSelectBranch,
  kExecuteSubGraph,
  kInitSubGraphExecutor,
  // fuzz compile
  kSelectBin,
  kFindCompileCache,
  kAddCompileCache,
  kFuzzCompileOp,
  kCalcRuningParam,
  kGenTask,
  kRegisterBin,
  // Add new definitions here
  kProfilingIndexEnd
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb
 Subproject commit 22309b14838a763d41dccd636fec567dae3720fd
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -197,6 +197,20 @@ typedef struct tagCommAttr {
    WorkMode mode;  // 通信域内的probe工作模式
    uint32_t deviceId = 0;
 } CommAttr;
 typedef void* HcclMessage;
 typedef void* HcclRequest;
 typedef struct {
    int srcRank;    // 接收/探测到的msg/信封的发送端rank_id，MPI标准定义，调用者可以访问
    int tag;        // 接收/探测到的msg/信封的tag，MPI标准定义，调用者可以访问
    int error;      // 接收/探测的错误码0：no error，others：传输过程出错，MPI标准定义，调用者可以访问
    int cancelled;  // 指定实现，不建议调用者访问
    int count;      // 接收/探测到的payload大小，指定实现，不建议调用者访问
 } HcclStatus;
 #define HCCL_REQUEST_NULL   NULL
 #ifdef __cplusplus
 }
 #endif // __cplusplus
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -3488,7 +3488,7 @@ REG_OP(Addcmul)
 REG_OP(AxpyV2)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(AxpyV2)
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1737,17 +1737,17 @@ round_prefer_ceil, floor, ceil. Only used by nearest interpolation.
 */
 REG_OP(Resize)
    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(scales, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(sizes, TensorType({DT_INT64}))
    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,
                          DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
    .OPTIONAL_INPUT(roi, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
    .OPTIONAL_INPUT(scales, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(sizes, TensorType({DT_INT64,DT_INT32}))
    .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,
                           DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
    .ATTR(coordinate_transformation_mode, String, "half_pixel")
    .ATTR(cubic_coeff_a, Float, -0.75)
    .ATTR(exclude_outside, Int, 0)
    .ATTR(extrapolation_value, Float, 0)
    .ATTR(extrapolation_value, Float, 0.0)
    .ATTR(mode, String, "nearest")
    .ATTR(nearest_mode, String, "round_prefer_floor")
    .OP_END_FACTORY_REG(Resize)
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1801,7 +1801,7 @@ REG_OP(SoftmaxCrossEntropyLoss)
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(AxpyWithSoftmaxAndDropoutdomask)
 REG_OP(AxpyWithSoftmaxAndDropOutDoMask)
    .INPUT(x1, TensorType({DT_FLOAT16}))
    .INPUT(x2, TensorType({DT_FLOAT16}))
    .INPUT(mask, TensorType({DT_UINT8}))
@@ -1810,6 +1810,6 @@ REG_OP(AxpyWithSoftmaxAndDropoutdomask)
    .REQUIRED_ATTR(alpha, Float)
    .REQUIRED_ATTR(input_keep_prob, Float)
    .ATTR(axis, ListInt, {-1})
    .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask)
    .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -116,6 +116,7 @@ REG_OP(FusedBatchNormV2)
 * @par Outputs:
 * One output, including:
 * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(SegmentSort)
@@ -137,6 +138,7 @@ REG_OP(SegmentSort)
 * Two output, including:
 * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 * output_index: A Tensor.If include_index is true, output index.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(MultiMerge)
@@ -148,6 +150,27 @@ REG_OP(MultiMerge)
    .OP_END_FACTORY_REG(MultiMerge)
 /**
 * @brief Large amount of data sort.Third operator of TopK.
 * @par Inputs:
 * One input, including:
 * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * @par Attributes:
 * k_num: Int.Number to be sorted.
 * @par Outputs:
 * Two output, including:
 * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
 * @li output_index: A Tensor. int32. Data index.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(SingleMerge)
    .INPUT(input_proposal, TensorType({DT_FLOAT16}))
    .OUTPUT(output_data, TensorType({DT_FLOAT16}))
    .OUTPUT(output_index, TensorType({DT_INT32}))
    .REQUIRED_ATTR(k_num, Int)
    .OP_END_FACTORY_REG(SingleMerge)
 /**
 * @brief MultiHeadAttention.
 * @par Inputs:
 * thirteen input, including:
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -663,6 +663,9 @@ REG_OP(ReduceProdD)
 *keep_dims: A bool or NoneType.
 * - If true, retains reduced dimensions with length 1.
 * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
 *noop_with_empty_axes: A bool.
 * - If true, when axes = [], not reduce.
 * - If false, when axes = [], reduce all.
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -674,6 +677,7 @@ REG_OP(ReduceMean)
    .INPUT(axes, TensorType::IndexNumberType())
    .OUTPUT(y, TensorType::NumberType())
    .ATTR(keep_dims, Bool, false)
    .ATTR(noop_with_empty_axes, Bool, true)
    .OP_END_FACTORY_REG(ReduceMean)
 /**