!2112 upgrade Ascend package 11 Mar 22

Merge pull request !2112 from yanghaoran/r1.7
3 years ago · de9ffe384c
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -268,6 +268,9 @@ const std::string ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
 // Configure Compress Weight flag
 const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";

 // Configure Sparse Matrix Weight flag
 const std::string ENABLE_SPARSE_MATRIX_WEIGHT = "ge.enableSparseMatrixWeight";

 // Configure fusion switch file path
 const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile";

@@ -289,6 +292,10 @@ const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";
 // Its value should be file path, default value is "./"
 const char_t *const DEBUG_DIR = "ge.debugDir";

 // Configure switch for op status check such as overflow
 // Its value should be true of flase
 const char_t *const STATUS_CHECK = "ge.status_check";

 // Configure operator compiler cache path
 // Its value should be file path, default value is "./"
 const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";
@@ -411,6 +418,7 @@ static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str()
 static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
 static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
 static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
 static const char_t *const SPARSITY = ge::ENABLE_SPARSE_MATRIX_WEIGHT.c_str();
 static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
 static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
 static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
@@ -469,6 +477,7 @@ const std::set<std::string> global_options = {CORE_TYPE,
                                              BUFFER_OPTIMIZE,
                                              ENABLE_COMPRESS_WEIGHT,
                                              COMPRESS_WEIGHT_CONF,
                                              SPARSITY,
                                              PRECISION_MODE,
                                              TUNE_DEVICE_IDS,
                                              EXEC_DISABLE_REUSED_MEMORY,
--- a/inc/external/hccl/hccl_types.h
+++ b/inc/external/hccl/hccl_types.h
@@ -61,7 +61,8 @@ typedef enum {
 * @brief handle to HCCL communicator
 */
 typedef void *HcclComm;

 typedef void *HcclMessage;
 typedef void *HcclRequest;
 /**
 * @brief HCCL Reduction opperation
 */
@@ -87,8 +88,15 @@ typedef enum {
  HCCL_DATA_TYPE_RESERVED    /**< reserved */
 } HcclDataType;

 const uint32_t HCCL_ROOT_INFO_BYTES = 4108;  // 4108: root info length
 typedef struct {
  int srcRank;  // 接收/探测到的msg/信封的发送端rank_id，MPI标准定义，调用者可以访问
  int tag;      // 接收/探测到的msg/信封的tag，MPI标准定义，调用者可以访问
  int error;  // 接收/探测的错误码0：no error，others：传输过程出错，MPI标准定义，调用者可以访问
  int cancelled;  // 指定实现，不建议调用者访问
  int count;      // 接收/探测到的payload大小，指定实现，不建议调用者访问
 } HcclStatus;

 const uint32_t HCCL_ROOT_INFO_BYTES = 4108;  // 4108: root info length
 /**
 * @brief HCCL root info
 */
@@ -96,6 +104,7 @@ typedef struct HcclRootInfoDef {
  char internal[HCCL_ROOT_INFO_BYTES];
 } HcclRootInfo;

 #define HCCL_REQUEST_NULL NULL
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -92,7 +92,7 @@ constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U;
 // dynamic execute mode
 const char_t *const kLazyRecompile = "lazy_recompile";

 constexpr size_t kMaxHostMemInputLen = 64U;
 constexpr size_t kMaxHostMemInputLen = 128U;  // 64 aligned

 // Data cache, including data address and length
 struct DataBuffer {
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -35,8 +35,8 @@ class GE_FUNC_VISIBILITY ModelHelper {
  Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file,
                       ge::ModelBufferData &model) const;
  Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param,
                           const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape);
  Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file);
                           const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape) const;
  Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file) const;
  Status LoadModel(const ge::ModelData &model_data);
  Status LoadRootModel(const ge::ModelData &model_data);
  static void SetModelToGeModel(GeModelPtr &ge_model, Model &model);
@@ -68,13 +68,13 @@ class GE_FUNC_VISIBILITY ModelHelper {
  Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper);
  Status LoadModelData(OmFileLoadHelper &om_load_helper);
  Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
  Status LoadWeights(OmFileLoadHelper &om_load_helper);
  Status LoadWeights(OmFileLoadHelper &om_load_helper) const;
  Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
  Status LoadTask(OmFileLoadHelper &om_load_helper);
  Status LoadTask(OmFileLoadHelper &om_load_helper) const;
  Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper);
  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) const;
  Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper);
  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) const;
  Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model,
                                  const size_t mode_index) const;

--- a/inc/framework/common/profiling_definitions.h
+++ b/inc/framework/common/profiling_definitions.h
@@ -49,7 +49,12 @@ enum {
  kAtomic,
  kKernelLaunchPrepare,
  kRtKernelLaunch,
  kRtEventCreateRecord,
  kRtEventSync,
  kRtEventDestroy,
  kRtStreamSync,
  kOpExecute,
  kModelExecute,
  kAllocMem,
  kCopyH2D,
  kPrepareNode,
@@ -88,7 +93,7 @@ class ProfilingContext {
   * 因此编译时注册字符串的动作并没有生效。在执行时，动态的打开了profiling，这种场景下，执行时无法拿到注册后字符串
   */
  bool IsEnabled() const noexcept {
    return enabled_ && profiler_ != nullptr;
    return enabled_ && (profiler_ != nullptr);
  }
  void SetEnable() noexcept {
    enabled_ = true;
--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -231,7 +231,7 @@ constexpr int32_t OM_PROTO_VERSION = 2;
 /// @return string
 ///
 template <typename T>
 GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
 GE_FUNC_VISIBILITY std::string ToString(const std::vector<T> &v) {
  std::stringstream ss;
  ss << "[";
  for (const T x : v) {
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -27,7 +27,6 @@
 #include "framework/common/types.h"
 #include "graph/tensor.h"
 #include "graph/ge_tensor.h"
 #include "runtime/base.h"

 namespace ge {
 class SingleOp;
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit ab3207e99f94aabf036e1c8b068de0df15ff2d01
 Subproject commit 569f685a2e6107daf613daf98d4ef8e29bde6e86
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -42,6 +42,10 @@ enum class AicpuErrMsgType {
    ERR_MSG_TYPE_AICPU  = 2,
 };

 enum class AicpuExtInfoMsgType {
    EXT_MODEL_ID_MSG_TYPE = 0,
 };

 typedef struct tagAicpuConfigMsg {
    uint8_t msgType;
    uint8_t reserved1;
@@ -52,6 +56,23 @@ typedef struct tagAicpuConfigMsg {
    uint32_t reserved2;
 } AicpuConfigMsg;


 typedef struct tagAicpuModelIdInfo {
    uint32_t modelId;
    uint32_t extendModelId;
    uint32_t extendInfo[13];
 } AicpuModelIdInfo;

 // 64 bytes
 typedef struct tagAicpuExtendInfo {
    uint8_t msgType;
    uint8_t version;
    uint8_t reserved[2];
    union {
        AicpuModelIdInfo modelIdMap;
    };
 } AicpuExtendInfo;

 typedef struct tagAicoreErrMsgInfo {
    uint8_t errType;
    uint8_t version;
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -501,6 +501,7 @@ REG_OP(Constant)
 *@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n

 *@par Attributes:
 *file_path: A string, used to record file path. \n
 *file_id: A string, used to record file id. \n
 *shape: data shape. \n
 *dtype: data type. \n
@@ -511,7 +512,8 @@ REG_OP(Constant)
 REG_OP(FileConstant)
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
    .REQUIRED_ATTR(file_id, String)
    .ATTR(file_path, String, "")
    .ATTR(file_id, String, "")
    .REQUIRED_ATTR(shape, ListInt)
    .REQUIRED_ATTR(dtype, Type)
    .OP_END_FACTORY_REG(FileConstant)
@@ -1206,6 +1208,39 @@ REG_OP(Copy)
    .OP_END_FACTORY_REG(Copy);

 /**
 *@brief copy the src tensor to the dst tensor according the special parameter . \n

 *@par Inputs:
 *Eight inputs, including:
 *dst: A tensor. Must be one of the following types:
 * double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool
 *dst_size: A tensor with type int32
 *dst_stride: A tensor with type int32
 *dst_storage_offset: A tensor with type int32
 *src: A tensor. Must be one of the following types:
 * double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool
 *src_size: A tensor with type int32
 *src_stride: A tensor with type int32
 *src_storage_offset: the storage_offset of src tensor . \n

 *@par Outputs:
 *dst: An ref tensor.Must be one of the following types:
 * double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool . \n
 */

 REG_OP(ViewCopy)
    .INPUT(dst, TensorType::BasicType())
    .INPUT(dst_size, TensorType::IndexNumberType())
    .INPUT(dst_stride, TensorType::IndexNumberType())
    .INPUT(dst_storage_offset, TensorType::IndexNumberType())
    .INPUT(src, TensorType::BasicType())
    .INPUT(src_size, TensorType::IndexNumberType())
    .INPUT(src_stride, TensorType::IndexNumberType())
    .INPUT(src_storage_offset, TensorType::IndexNumberType())
    .OUTPUT(dst, TensorType::BasicType())
    .OP_END_FACTORY_REG(ViewCopy)

 /**
 *@brief Generates fingerprint values. \n

 *@par Inputs:
--- a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
+++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
@@ -28,7 +28,7 @@ namespace ge {

 *@par Inputs:
 * @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32,
 int64, float16, float, double.The format must be NHWC NCHW NC1HWC0.
 int64, float16, float, double.The format must be NHWC/NCHW.

 *@par Attributes:
 *@li ksize: Kernel size. Input type is int.
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -2478,5 +2478,24 @@ REG_OP(GetNextFromQueue)
  .ATTR(output_types, ListType, {})
  .ATTR(output_shapes, ListListInt, {{}, {}})
  .OP_END_FACTORY_REG(GetNextFromQueue)

 /**
 * @brief OptionalGetValue
 * @par Inputs:
 * optional: A tensor of type variant
 * @par Outputs:
 * components: A list of Tensor objects of output_types
 * @par Attributes:
 * output_types: types of all outputs
 * output_shapes: shapes of all outputs
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(OptionalGetValue)
  .INPUT(optional, TensorType({DT_VARIANT}))
  .DYNAMIC_OUTPUT(components, TensorType::BasicType())
  .REQUIRED_ATTR(output_types, ListType)
  .REQUIRED_ATTR(output_shapes, ListListInt)
  .OP_END_FACTORY_REG(OptionalGetValue)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
--- a/third_party/fwkacllib/inc/ops/deep_md.h
+++ b/third_party/fwkacllib/inc/ops/deep_md.h
@@ -105,6 +105,54 @@ REG_OP(ProdEnvMatA)
    .ATTR(split_count, Int, 1)
    .ATTR(split_index, Int, 0)
    .OP_END_FACTORY_REG(ProdEnvMatA)

 /**
 * @brief Calculate ProdEnvMatACalcDescrpt. \n
 *
 * @par Inputs:
 * @li distance: A Tensor. Must be one of the following types: float32, float64.
 * @li rij_x: A Tensor. Must be one of the following types: float32, float64.
 * @li rij_y: A Tensor. Must be one of the following types: float32, float64.
 * @li rij_z: A Tensor. Must be one of the following types: float32, float64.
 * @li type: A Tensor. Must be one of the following types: int32.
 * @li natoms: A Tensor. Must be one of the following types: int32.
 * @li mesh: A Tensor. Must be one of the following types: int32.
 * @li davg: A Tensor. Must be one of the following types: float32, float64.
 * @li dstd: A Tensor. Must be one of the following types: float32, float64. \n
 *
 * @par Outputs:
 * @li descrpt: A Tensor. Must be one of the following types: float32, float64.
 * @li descrpt_deriv: A Tensor. Must be one of the following types: float32, float64. \n
 *
 * @par Attributes:
 * @li rcut_a: A Float.
 * @li rcut_r: A Float.
 * @li rcut_r_smth: A Float.
 * @li sel_a: A ListInt.
 * @li sel_r: A ListInt. \n
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ProdEnvMatACalcDescrpt)
    .INPUT(distance, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rij_x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rij_y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rij_z, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(type, TensorType({DT_INT32}))
    .INPUT(natoms, TensorType({DT_INT32}))
    .INPUT(mesh, TensorType({DT_INT32}))
    .INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(rcut_a, Float, 1.0)
    .ATTR(rcut_r, Float, 1.0)
    .ATTR(rcut_r_smth, Float, 1.0)
    .ATTR(sel_a, ListInt, {})
    .ATTR(sel_r, ListInt, {})
    .OP_END_FACTORY_REG(ProdEnvMatACalcDescrpt)

 /**
 * @brief Calculate ProdForceSeA. \n
 *
@@ -195,6 +243,9 @@ REG_OP(ProdVirialSeA)
 * Two attributes, including:
 * @li split_count: A Scalar. 
 * @li split_index: A Scalar. \n
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TabulateFusionGrad)
  .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -286,7 +286,7 @@ REG_OP(Minimum)
 *@par Inputs:
 *One inputs, include:
 *x:A Tensor of type float16, float32, int32, int64, double,
 *     complex64, complex128.the format can be [NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND]
 *     complex64, complex128.the format can be [NCHW,NHWC,ND]

 *@par Outputs:
 *y:A Tensor with same type as "x". \n
@@ -418,7 +418,7 @@ REG_OP(SquaredDifference)

 *@par Inputs:
 *x: A Tensor of type float16, float32, double, complex64, complex128.
 * the format can be [NCHW,NC1HWC0,NHWC,ND]
 * the format can be [NCHW,NHWC,ND]

 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
@@ -439,7 +439,7 @@ REG_OP(Cos)
 * Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types:
 *    float16, float32, int32, int8, uint8, float64, int64, uint16, int16,
 *    complex64, complex128, the format can be [NCHW,NC1HWC0,NHWC,ND].
 *    complex64, complex128, the format can be [NCHW,NHWC,ND].
 *@li x2: A Tensor. Has the same type and format as input "x1". \n

 *@par Outputs:
@@ -468,7 +468,7 @@ REG_OP(Div)
 *@li x1: A Tensor. Must be one of the following types:
 *    float16, float32, int32, int8, uint8, double, int16, int64, complex64,
 *    complex128, quint8, qint8, qint32, string, bool. the format can be
 *    [NCHW, NC1HWC0, NHWC, ND]
 *    [NCHW, NHWC, ND]
 *@li x2: A Tensor of the same type and format as "x1". \n

 *@par Outputs:
@@ -1177,6 +1177,31 @@ REG_OP(FusedMulAdd)
    .OP_END_FACTORY_REG(FusedMulAdd)

 /**
 *@brief Confuse mul+add+add with broadcast. \n

 *@par Inputs:
 *Four inputs, including:
 * @li x1: A Tensor. Must be one of the following types:int32, float16, float32.
 * @li x2: A Tensor of the same type as "x1".
 * @li x3: A Tensor of the same type as "x1".
 * @li x4: A Tensor of the same type as "x1". \n

 *@par Outputs:
 * y: A Tensor. Has the same type as "x1". \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */

 REG_OP(FusedMulAddAdd)
    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x3, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x4, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OP_END_FACTORY_REG(FusedMulAddAdd)
 	
 /**
 *@brief Returns x1 + x2 element-wise. \n

 *
@@ -1299,7 +1324,7 @@ REG_OP(AssignSub)

 *@par Inputs:
 * Two inputs, including:
 *@li y: An NCHW, NC1HWC0, NHWC, ND Tensor. Must be one of the following types: \
 *@li y: An NCHW, NHWC, ND Tensor. Must be one of the following types: \
 * float, int32, int8, double, complex64, complex128, half.
 *@li dy: A Tensor of the same type and format as "y". \n

@@ -1321,11 +1346,11 @@ REG_OP(RsqrtGrad)
 *@brief Computes hyperbolic sine of "x" element-wise. \n

 *@par Inputs:
 *x: An NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64,
 *x: An NCHW, NHWC,or ND Tensor of type float, double, complex64,
 * complex128, half. \n

 *@par Outputs:
 *y: A NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64,
 *y: A NCHW, NHWC,or ND Tensor of type float, double, complex64,
 * complex128, half. \n

 *@par Third-party framework compatibility
@@ -1365,7 +1390,7 @@ REG_OP(ClipByValue)

 *@par Inputs:
 *x: A Tensor of type float16, float32, double, complex64, complex128.
 * the format can be [NCHW,NC1HWC0,NHWC,ND]. \n
 * the format can be [NCHW,NHWC,ND]. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -1385,7 +1410,7 @@ REG_OP(Cosh)
 *@par Inputs:
 * Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types:float16, float32, int32,
 *    int8, uint8, double, the format can be [NCHW,NC1HWC0,NHWC,ND].
 *    int8, uint8, double, the format can be [NCHW,NHWC,ND].
 *@li x2: A Tensor of the same type as "x1". \n

 *@par Outputs:
@@ -1410,7 +1435,7 @@ REG_OP(DivNoNan)
 * One input: \n
 *x: A Tensor, Must be one of the following types:
 *    int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64,
 *    and format can be [NCHW,NC1HWC0,NHWC,ND]
 *    and format can be [NCHW,NHWC,ND]

 *@par Outputs:
 *y: A Tensor. Has the same type and format as "x"
@@ -1978,7 +2003,7 @@ REG_OP(BitwiseOr)
 *@par Inputs:
 *Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64.
 *       The format is NC1HWC0 or ND. Broadcasting is supported.
 *       The format is ND. Broadcasting is supported.
 *@li x2: A Tensor. Has the same type and format as "x1". \n

 *@par Outputs:
@@ -3468,7 +3493,7 @@ REG_OP(AxpyV2)
    .OP_END_FACTORY_REG(AxpyV2)

 /**
 * @brief Add the partial values of two tensors in format NC1HWC0.
 * @brief Add the partial values of two tensors.

 * @par Inputs:
 * @li x1: A Tensor in 5HD, and must be one of the following types: float16,
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1267,7 +1267,7 @@ REG_OP(DecodeAndCropJpeg)

 *@par Inputs:
 * One input:
 *x: An NC1HWC0 Tensor.
 *x: A Tensor.
 * Must be one of the following types: float16, float32 . \n

 *@par Attributes:
@@ -1304,7 +1304,7 @@ REG_OP(ResizeBilinearV2D)

 *@par Inputs:
 * One input:
 *images: An NC1HWC0 Tensor.
 *images: A Tensor.
 * Must be one of the following types: float16, float32 . \n

 *@par Attributes:
@@ -1338,7 +1338,7 @@ REG_OP(KeepRatioResizeBilinear)

 *@par Inputs:
 * One input:
 *x: An NC1HWC0 Tensor.
 *x: A Tensor.
 * Must be one of the following types: float16, float32, int32, int8, uint8

 *@par Attributes:
@@ -2310,6 +2310,32 @@ REG_OP(UpsampleNearest1dGrad)
    .OP_END_FACTORY_REG(UpsampleNearest1dGrad)

 /**
 * @brief Function parse image from string to int. \n

 * @par Inputs:
 * contents: A Tensor of type string. 0-D. The JPEG, GIF, PNG, BMP-encoded image. \n

 * @par Attributes:
 * @li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
 * @li dtype: type of image
 * @li expand_animations: Controls the shape of the returned op's output. If 'true', the returned op will
 produce a 4-D tensor for GIF files. If 'false', the returned op will produce a 3-D tensor for GIF files.

 * @par Outputs:
 * image: A Tensor dtype of uint8, uint16 or float.

 * @par Restrictions:
 * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DecodeImage)
    .INPUT(contents, TensorType({DT_STRING}))
    .OUTPUT(image, TensorType({DT_UINT8, DT_UINT16, DT_FLOAT}))
    .ATTR(channels, Int, 0)
    .ATTR(dtype, Type, DT_UINT8)
    .ATTR(expand_animations, Bool, true)
    .OP_END_FACTORY_REG(DecodeImage)

 /**
 * @brief JPEG encode input image with provided compression quality. \n

 * @par Inputs:
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -425,7 +425,7 @@ REG_OP(EndOfSequence)

 *@par Inputs:
 *x: A Tensor of type float16, float32 or double. the format can be
 *    [NCHW,NC1HWC0,NHWC,ND]
 *    [NCHW,NHWC,ND]

 *@par Outputs:
 *y: A Tensor. Has the same type and format as "x" . \n
@@ -462,15 +462,15 @@ REG_OP(Erfc)

 *@par Inputs:
 *Three inputs, including:
 *@li x: A Tensor of type float32, float16, int32, int64.
 *@li range: A Tensor of type float32,float16,int32, int64.
 *@li x: A Tensor of type float32, int32, int64. float16 is currently not supported.
 *@li range: A Tensor of type float32, int32, int64. float16 is currently not supported.
 *@li nbins: A Tensor of type int32 . \n

 *@par Attributes:
 * dtype: An optional attribute. Defaults to "int32" . \n

 *@par Outputs:
 *y: A Tensor. A Tensor of type int32 or int64 . \n
 *y: A Tensor. A Tensor of type int32. \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow operator HistogramFixedWidth.
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -31,9 +31,9 @@ namespace ge {
 *@par Inputs:
 *Three inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
 * float32, int32. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
 * float32, int32. Has format [ND, NHWC].
 * @li bias: A optional 1D Tensor. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC] . \n

@@ -43,7 +43,7 @@ namespace ge {

 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
 * float32, int32. Has format [ND, NHWC] . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -63,9 +63,9 @@ REG_OP(MatMul)
 *@par Inputs:
 *Four inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float32,
 float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
 float16, int32, int8. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float32,
 float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
 float16, int32, int8. Has format [ND, NHWC].
 * @li bias: A 1D Tensor. Must be one of the following types: float32,
 float16, int32. Has format [ND, NHWC].
 * @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8.
@@ -82,7 +82,7 @@ REG_OP(MatMul)

 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float32,
 float16, int32. Has format [ND, NHWC, FRACTAL_NZ]. \n
 float16, int32. Has format [ND, NHWC]. \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -147,24 +147,24 @@ REG_OP(MatMulV2Compress)
 *@par Inputs:
 *Five inputs, including:
 *@li a: A matrix Tensor. Must be one of the following types: float16, int8.
 * Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
 * Has format [ND].
 *@li b: A matrix Tensor. Must be one of the following types: float16, int8.
 * Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z).
 * Has format ND.
 *@li c: A matrix Tensor. Must be one of the following types: float16, int32,
 * float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
 * float32. has format ND.
 *@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following
 * types: float16, int32, float32. Has format [ND].
 *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following
 * types: float16, int32, float32. Has format [ND].
 * The format of a, b, c has restriction:\n
 * When type of a is int8 and type of c is int32, the format of a, b, c should
 * all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n
 * all be ND.\n
 * When type of a is int8 and type of c is float32, the format of a, b, c should
 * all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n
 * all be ND.\n
 * When type of a is float16 and type of c is float16, the format of a, b, c
 * should all be ND or FRACTAL_NZ.\n
 * should all be ND.\n
 * When type of a is float16 and type of c is float32, the format of a, b, c
 * should all be ND or FRACTAL_NZ . \n
 * should all be ND. \n

 *@par Attributes:
 *Two attributes, including:
@@ -175,8 +175,7 @@ REG_OP(MatMulV2Compress)

 *@par Outputs:
 *y: The result matrix Tensor. Must be one of the following types: float16,
 * float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a.
 * 2D(ND) or 4D(FRACTAL_NZ).
 * float32, int32. Has format [ND], the format should be equal to a.
 */

 REG_OP(GEMM)
@@ -196,9 +195,9 @@ REG_OP(GEMM)
 *@par Inputs:
 *Two inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
 * float32, int32. 2D or higher. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 * float32, int32. 2D or higher. Has format [ND, NHWC] . \n

 *@par Attributes:
 *@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
@@ -206,7 +205,7 @@ REG_OP(GEMM)

 *@par Outputs:
 *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
 * float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -227,11 +226,11 @@ REG_OP(BatchMatMul)
 * @par Inputs:
 * Three inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
 * float32, int32. 2D or higher. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 * float32, int32. 2D or higher. Has format [ND, NHWC] . \n
 * @li bias: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 * float32, int32. 2D or higher. Has format [ND, NHWC] . \n

 * @par Attributes:
 * @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
@@ -239,7 +238,7 @@ REG_OP(BatchMatMul)

 * @par Outputs:
 * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
 * float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -86,35 +86,37 @@ REG_OP(L2NormalizeGrad)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 * Five inputs, including: (NHWC, NCHW)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
 Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
 Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
 Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n

 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. 
 Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. 
 Defaults to "True" . \n

 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean of "x".
 * Five outputs, including: (NHWC, NCHW)
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. 
 Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
 Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 *@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless emement. \n
 *@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, 
 only has one useless emement. \n

 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -264,17 +266,17 @@ REG_OP(SyncBatchNormBackwardElemt)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NDC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 * Five inputs, including: (NHWC, NCHW)
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. 
 Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW.
 Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW.
 Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 *@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW.
 Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n

 *@par Attributes:
@@ -283,16 +285,16 @@ if the operation is used for training . \n
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
 *@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NDC1HWC0. Specifies the mean of "x".
 * Five outputs, including: (NHWC, NCHW)
 *@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW.
 *@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW.
 Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
 Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
 Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -375,11 +377,11 @@ REG_OP(BatchNormExt2)

 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
 *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
 *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
 *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm .
 *@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, for the gradient.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW.
 *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW.
 *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. It is an output of BatchNorm.
 *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. It is an output of BatchNorm .
 *@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n

 *@par Attributes:
@@ -388,11 +390,11 @@ REG_OP(BatchNormExt2)
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

 *@par Outputs:
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW. Pass "None" to skip this output . \n

 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n
@@ -423,11 +425,11 @@ REG_OP(BatchNormGrad)

 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
 *@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
 *@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
 *@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n
 *@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, for the gradient.
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW.
 *@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW.
 *@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. It is an output of BatchNorm.
 *@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. It is an output of BatchNorm . \n

 *@par Attributes:
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
@@ -435,11 +437,11 @@ REG_OP(BatchNormGrad)
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

 *@par Outputs:
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW. Pass "None" to skip this output . \n

 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n
@@ -515,7 +517,7 @@ REG_OP(BatchNormGradExt2)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li momentum: A Tensor,represents the mean and the variance's scale factor
@@ -545,7 +547,7 @@ REG_OP(BNInference)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
 *@li scale: An optional tensor of type float16 or float32, no use
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -268,7 +268,7 @@ REG_OP(ROIAlign)

 *@par Inputs:
 * Two inputs, including:
 *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
 *@li x: An NCHW feature map of type is float32 or float16.
 *@li img: source image. Has the same type and format as "x" . \n

 *@par Attributes:
@@ -316,12 +316,12 @@ REG_OP(PriorBox)

 *@par Inputs:
 * Six inputs, including:
 *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
 *@li x: An NCHW feature map of type is float32 or float16.
 *@li img: source image. Has the same type and format as "x".
 *@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
 *@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
 *@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
 *@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
 *@li data_h: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
 *@li data_w: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
 *@li box_height: An NCHW tensor of type float32 or float16, specifying the height of each prior box.
 *@li box_width: An NCHW tensor of type float32 or float16, specifying the width of each prior box . \n

 *@par Attributes:
 *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
@@ -371,7 +371,7 @@ REG_OP(PriorBoxD)

 *@par Inputs:
 * Six inputs, including:
 *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
 *@li x: An NCHW feature map of type is float32 or float16.
 *@li img: source image. Has the same type and format as "x".
 *@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y

@@ -420,7 +420,7 @@ REG_OP(PriorBoxDV2)

 *@par Inputs:
 * Two inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 *@li x: A tensor of type float16 or float32, describing the feature
 * map, dimension C1 must be equal to
 * (int(output_dim+15)/C0))*group_size*group_size.
 *@li rois: A tensor of type float16 or float32, with shape
@@ -438,7 +438,7 @@ REG_OP(PriorBoxDV2)
 * coordinates to the ROI coordinates . \n

 *@par Outputs:
 *y: An NC1HWC0 tensor of type float16 or float32, describing the result
 *y: A tensor of type float16 or float32, describing the result
 * feature map . \n

 *@attention Constraints:
@@ -1171,7 +1171,7 @@ REG_OP(SPP)

 *@par Inputs:
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 *@li x: A tensor of type float16 or float32, describing the feature
 * map. The data of x must be greater than or equal to "0.0".
 *@li rois: A tensor of type float16 or float32, with 3D shape
 * [batch, 5, roi_max_num], describing the RIOs. Each ROI consists of five
@@ -1195,7 +1195,7 @@ REG_OP(SPP)
 * coordinates of width to the ROI coordinates . \n

 *@par Outputs:
 *y: An NC1HWC0 tensor of type float16 or float32, describing the result
 *y: A tensor of type float16 or float32, describing the result
 * feature map . \n

 *@attention Constraints:
@@ -1860,7 +1860,7 @@ REG_OP(RoiExtractor)

 *@par Inputs:
 * Two inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 *@li x: A tensor of type float16 or float32, describing the feature
 * map, dimension C1 must be equal to
 * (int(output_dim+15)/C0))*group_size*group_size.
 *@li rois: A tensor of type float16 or float32, with shape
@@ -1878,7 +1878,7 @@ REG_OP(RoiExtractor)
 * coordinates to the ROI coordinates . \n

 *@par Outputs:
 *y: An NC1HWC0 tensor of type float16 or float32, describing the result
 *y: A tensor of type float16 or float32, describing the result
 * feature map . \n

 *@attention Constraints:
@@ -1898,7 +1898,7 @@ REG_OP(PSROIPoolingV2)

 *@par Inputs:
 * Two inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the result
 *@li x: A tensor of type float16 or float32, describing the result
 * feature map . \n
 *@li rois: A tensor of type float16 or float32, with shape
 * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
@@ -1916,7 +1916,7 @@ REG_OP(PSROIPoolingV2)
 *@li input_size: A required listInt, mapping the gradinput size: (H, W)

 *@par Outputs:
 *y: An NC1HWC0 tensor of type float16 or float32, describing the feature
 *y: A tensor of type float16 or float32, describing the feature
 * map, dimension C1 must be equal to
 * (int(output_dim+15)/C0))*group_size*group_size.

--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -104,9 +104,8 @@ REG_OP(SoftmaxCrossEntropyWithLogits)
 *@par Inputs:
 * Two inputs, including:
 * @li softmax: Output of the softmax operator. Must be one of the following
 * types: float16, float31, int32, int8, uint8. The format is NC1HWC0 or DN.
 * @li grad_softmax: A Tensor. Has the same shape and type as "softmax".
 * The format is NC1HWC0 or DN . \n
 * types: float16, float31, int32, int8, uint8.
 * @li grad_softmax: A Tensor. Has the same shape and type as "softmax".\n

 *@par Attributes:
 * axes: An optional list of ints. Defaults to "{-1}" . \n
@@ -1101,8 +1100,8 @@ REG_OP(GroupNorm)
 *@brief Performs instance normalization . \n

 *@par Inputs:
 * Five inputs, including: (NC1HWC0, supported)
 *@li x: A 5D Tensor of type float16 or float32, NC1HWC0.
 * Five inputs, including:
 *@li x: A 5D Tensor of type float16 or float32.
 *@li gamma: A Tensor of type float32.
 A 5D Tensor for scaling factor, to scale the normalized x.
 *@li beta: A Tensor of type float32.
@@ -1121,7 +1120,7 @@ the value used for the running_mean and running_var computation. Default: "0.1".
 variance to avoid dividing by zero. Defaults to "0.00001" . \n

 *@par Outputs:
 * Three outputs, including: (NHWC, NCHW NC1HWC0 supported)
 * Three outputs, including: (NHWC, NCHW supported)
 *@li y: A 5D tensor of type float16 or float32 for the normalized "x",
 *@li batch_mean: A Tensor of type float32.
 Specifies the mean of "x".
@@ -1154,7 +1153,7 @@ REG_OP(InstanceNormV2)
 *@brief Performs instance normalization for inference.

 *@par Inputs:\n
 * Five inputs, including: (NC1HWC0 supported)
 * Five inputs, including:
 *@li x: A Tensor of type float16 or float32.
 *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
 *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -54,17 +54,17 @@ REG_OP(InTopKV2)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 * Five inputs, including: (NHWC, NCHW supported)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n

 *@par Attributes:
@@ -73,16 +73,16 @@ if the operation is used for training . \n
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean of "x".
 * Five outputs, including: (NHWC, NCHWsupported)
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
 pecifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
 Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
@@ -109,18 +109,19 @@ REG_OP(FusedBatchNormV2)
 * @brief Large amount of data sort.First operator of TopK.
 * @par Inputs:
 * two input, including:
 * @li input_data: A Tensor. Data to be sorted. Support float16
 * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data.
 * @li input_data: A Tensor. Data to be sorted. Support float16 or float32.
 * @li input_index: A Tensor. Range(0, 2048). Support float16 or int32.
 * @par Attributes:
 * k_num: Int.Number to be sorted.
 * @par Outputs:
 * One output, including:
 * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(SegmentSort)
    .INPUT(input_data, TensorType({DT_FLOAT16}))
    .INPUT(input_index, TensorType({DT_FLOAT16}))
    .OUTPUT(output_proposal, TensorType({DT_FLOAT16}))
    .INPUT(input_data, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input_index, TensorType({DT_FLOAT16,DT_INT32}))
    .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
    .REQUIRED_ATTR(k_num, Int)
    .OP_END_FACTORY_REG(SegmentSort)

@@ -128,36 +129,174 @@ REG_OP(SegmentSort)
 * @brief: Large amount of data sort.Second operator of TopK.
 * @par Inputs:
 * One input, including:
 * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 or float32
 * @par Attributes:
 * k_num: Int.Number to be sorted.
 * include_index: Bool.include_index is false,output proposal. include_index is true, output data and index.
 * @par Outputs:
 * One output, including:
 * Two output, including:
 * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 * output_index: A Tensor.If include_index is true, output index.
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(MultiMerge)
    .INPUT(input_proposal, TensorType({DT_FLOAT16}))
    .OUTPUT(output_proposal, TensorType({DT_FLOAT16}))
    .INPUT(input_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(output_index, TensorType({DT_INT32}))
    .REQUIRED_ATTR(k_num, Int)
    .ATTR(include_index, Bool, false)
    .OP_END_FACTORY_REG(MultiMerge)

 /**
 * @brief Large amount of data sort.Third operator of TopK.
 * @brief MultiHeadAttention.
 * @par Inputs:
 * One input, including:
 * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * thirteen input, including:
 * @li query: A Tensor. Query of Attention. Support float16
 * @li key: A Tensor. Key of Attention. Support float16
 * @li value: A Tensor. Value of Attention. Support float16
 * @li query_weight: A Tensor. QueryWeight of Attention. Support float16
 * @li key_weight: A Tensor. KeyWeight of Attention. Support float16
 * @li value_weight: A Tensor. ValueWeight of Attention. Support float16
 * @li attn_mask: A Tensor. AttentionMask of Attention. Support float16
 * @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16
 * @li query_bias: Optional Tensor. QueryBias of Attention. Support float16
 * @li key_bias: Optional Tensor. KeyBias of Attention. Support float16
 * @li value_bias: Optional Tensor. ValueBias of Attention. Support float16
 * @li out_proj_bias: Optional Tensor. OutProjBias of Attention. Support float16
 * @li dropout_mask: Optional Tensor. DropOutMask of Attention. Support uint8 \n

 * @par Attributes:
 * k_num: Int.Number to be sorted.
 * @li attn_head_num: Attention Head numbers, Support int
 * @li attn_dim_per_head: Attention dim of a Head, Support int
 * @li src_len: source length, Support int
 * @li tgt_len: target length, Support int
 * @li keep_prob: dropout keep probability, Support float
 * @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool \n

 * @par Outputs:
 * Two output, including:
 * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
 * @li output_index: A Tensor. int32. Data index.
 * Eight output, including:
 * @li y: A Tensor. Result of Attention. Support float16
 * @li dropout_mask: DropOutMask of Attention. Support uint8
 * @li query_res: Query Result of Attention. Support float16
 * @li key_res: Key Result of Attention. Support float16
 * @li value_res: Value Result of Attention. Support float16
 * @li attn_scores: Attention Scores of SoftMax. Support float16, float
 * @li attn_res: Attention Result of SoftMax. Support float16
 * @li context: Context of Attention. Support float16

 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(SingleMerge)
    .INPUT(input_proposal, TensorType({DT_FLOAT16}))
    .OUTPUT(output_data, TensorType({DT_FLOAT16}))
    .OUTPUT(output_index, TensorType({DT_INT32}))
    .REQUIRED_ATTR(k_num, Int)
    .OP_END_FACTORY_REG(SingleMerge)
 REG_OP(MultiHeadAttention)
    .INPUT(query, TensorType({DT_FLOAT16}))
    .INPUT(key, TensorType({DT_FLOAT16}))
    .INPUT(value, TensorType({DT_FLOAT16}))
    .INPUT(query_weight, TensorType({DT_FLOAT16}))
    .INPUT(key_weight, TensorType({DT_FLOAT16}))
    .INPUT(value_weight, TensorType({DT_FLOAT16}))
    .INPUT(attn_mask, TensorType({DT_FLOAT16}))
    .INPUT(out_proj_weight, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(query_bias, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(key_bias, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(value_bias, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(out_proj_bias, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16}))
    .OUTPUT(dropout_mask, TensorType({DT_UINT8}))
    .OUTPUT(query_res, TensorType({DT_FLOAT16}))
    .OUTPUT(key_res, TensorType({DT_FLOAT16}))
    .OUTPUT(value_res, TensorType({DT_FLOAT16}))
    .OUTPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(attn_res, TensorType({DT_FLOAT16}))
    .OUTPUT(context, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(attn_head_num, Int)
    .REQUIRED_ATTR(attn_dim_per_head, Int)
    .REQUIRED_ATTR(src_len, Int)
    .REQUIRED_ATTR(tgt_len, Int)
    .REQUIRED_ATTR(keep_prob, Float)
    .REQUIRED_ATTR(softmax_use_float, Bool)
    .OP_END_FACTORY_REG(MultiHeadAttention)

 /**
 * @brief MultiHeadAttentionGrad.
 * @par Inputs:
 * thirteen input, including:
 * @li query: A Tensor. Query of Attention. Support float16
 * @li key: A Tensor. Key of Attention. Support float16
 * @li value: A Tensor. Value of Attention. Support float16
 * @li query_weight: A Tensor. QueryWeight of Attention. Support float16
 * @li key_weight: A Tensor. KeyWeight of Attention. Support float16
 * @li value_weight: A Tensor. ValueWeight of Attention. Support float16
 * @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16
 * @li query_res: A Tensor. Query Result of Attention. Support float16
 * @li key_res: A Tensor. Key Result of Attention. Support float16
 * @li value_res: A Tensor. Value Result of Attention. Support float16
 * @li attn_scores: A Tensor. Attention Scores of Attention. Support float16, float
 * @li attn_res: A Tensor. Attention Result of Attention. Support float16
 * @li context: A Tensor. Context of Attention. Support float16
 * @li y_grad: A Tensor. Grad of Attention. Support float16
 * @li dropout_mask: : A Tensor. Query Result of Attention. Support uint8 \n

 * @par Attributes:
 * @li attn_head_num: Attention Head numbers, Support int
 * @li attn_dim_per_head: Attention dim of a Head, Support int
 * @li src_len: source length, Support int
 * @li tgt_len: target length, Support int
 * @li keep_prob: dropout keep probability, Support float
 * @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool
 * @li bias_grad_mask: mask for attention has bias grad, Support list bool  \n

 * @par Outputs:
 * Eight output, including:
 * @li query_weight_grad: QueryWeight Grad of Attention. Support float16
 * @li key_weight_grad: KeyWeight Grad of Attention. Support float16
 * @li value_weight_grad: ValueWeight Grad of Attention. Support float16
 * @li out_proj_weight_grad: OutProjWeight Grad of Attention. Support float16
 * @li query_grad: Query Grad of Attention. Support float16
 * @li key_grad: Key Grad of Attention. Support float16
 * @li value_grad: Value Grad of Attention. Support float16
 * @li query_bias_grad: QueryBias Grad of Attention. Support float16
 * @li key_bias_grad: KeyBias Grad of Attention. Support float16
 * @li value_bias_grad: ValueBias Grad of Attention. Support float16
 * @li out_proj_bias_grad: OutProjBias Grad of Attention. Support float16

 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(MultiHeadAttentionGrad)
    .INPUT(query, TensorType({DT_FLOAT16}))
    .INPUT(key, TensorType({DT_FLOAT16}))
    .INPUT(value, TensorType({DT_FLOAT16}))
    .INPUT(query_weight, TensorType({DT_FLOAT16}))
    .INPUT(key_weight, TensorType({DT_FLOAT16}))
    .INPUT(value_weight, TensorType({DT_FLOAT16}))
    .INPUT(out_proj_weight, TensorType({DT_FLOAT16}))
    .INPUT(query_res, TensorType({DT_FLOAT16}))
    .INPUT(key_res, TensorType({DT_FLOAT16}))
    .INPUT(value_res, TensorType({DT_FLOAT16}))
    .INPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(attn_res, TensorType({DT_FLOAT16}))
    .INPUT(context, TensorType({DT_FLOAT16}))
    .INPUT(y_grad, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8}))
    .OUTPUT(query_weight_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(key_weight_grad, TensorType({DT_UINT8}))
    .OUTPUT(value_weight_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(out_proj_weight_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(query_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(key_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(value_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(query_bias_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(key_bias_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(value_bias_grad, TensorType({DT_FLOAT16}))
    .OUTPUT(out_proj_bias_grad, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(attn_head_num, Int)
    .REQUIRED_ATTR(attn_dim_per_head, Int)
    .REQUIRED_ATTR(src_len, Int)
    .REQUIRED_ATTR(tgt_len, Int)
    .REQUIRED_ATTR(keep_prob, Float)
    .REQUIRED_ATTR(softmax_use_float, Bool)
    .REQUIRED_ATTR(bias_grad_mask, ListBool)
    .OP_END_FACTORY_REG(MultiHeadAttentionGrad)
 }// namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -81,10 +81,16 @@ REG_OP(Pooling)
 *x: A tensor of type float16, float32, double . \n

 *@par Attributes:
 *@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255].
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63].
 *@li padding: A required string, specifying the padding algorithm, either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding.
 *@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default) . \n
 *@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window,
 * where N = C = 1, and H and W are positive integers within the range [1, 255].
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window.
 * The strides of the N and C dimensions are 1.
 * The strides of the H and W dimensions are positive integers within the range [1, 63].
 *@li padding: A required string, specifying the padding algorithm,
 * either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs.
 * With "VALID" means no padding.
 *@li data_format: An optional string, specifying the data format of "ksize" and "strides",
 * either "NCHW", or "NHWC" (default) . \n

 *@par Outputs:
 *y: The average pooled output tensor. Has the same type and format as input "x" . \n
@@ -94,7 +100,8 @@ REG_OP(Pooling)
 *@li Only single input and single output are supported.
 *@li Global pooling is supported.
 *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
 *@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
 *@li Due to instruction restrictions,
 * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool.
 */
@@ -114,11 +121,18 @@ REG_OP(AvgPool)
 *x: A tensor of type float16, float32, double.

 *@par Attributes:
 *@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255].
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63].
 *@li padding_mode: A required string, specifying the padding algorithm, either "VALID", "SAME" and "CALCULATED". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding.
 *@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window,
 * where N = C = 1, and H and W are positive integers within the range [1, 255].
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window.
 * The strides of the N and C dimensions are 1.
 * The strides of the H and W dimensions are positive integers within the range [1, 63].
 *@li padding_mode: A required string, specifying the padding algorithm,
 * either "VALID", "SAME" and "CALCULATED".
 * With "SAME" means that the outputs will have the same spatial dimensions as its inputs.
 * With "VALID" means no padding.
 *@li pads: Pad value when padding_mode is "CALCULATED".
 *@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default).
 *@li data_format: An optional string, specifying the data format of "ksize" and "strides",
 * either "NCHW", or "NHWC" (default).
 *@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w]
 *@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
 *@li exclusive: Ignore padding area or not when calculating average.
@@ -130,7 +144,8 @@ REG_OP(AvgPool)
 *@li Only single input and single output are supported.
 *@li Global pooling is supported.
 *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
 *@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
 *@li Due to instruction restrictions,
 * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPoolV2.
 */
@@ -310,21 +325,24 @@ REG_OP(AvgPool3DGradD)

 *@par Inputs:
 * One input:
 *x: An NC1HWC0 Tensor of type float16.
 *x: A Tensor of type float16.


 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
 *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li ksize: A required list of int8, int16, int32, or int64 values,
 * specifying the size of the window for each dimension of the input tensor. No default value.
 *@li strides: A required list of int8, int16, int32, or int64 values,
 * specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li padding: A required string. No default value.
 *@li data_format: An optional string. Defaults to "NC1HWC0" . \n
 *@li data_format: An optional string . \n

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n

 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
 * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n

 *@par Third-party framework compatibility
@@ -348,7 +366,7 @@ REG_OP(MaxPoolExt2)

 *@par Inputs:
 * One input:
 *x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16,
 *x: A Tensor. Supported type:float16, float32, double, int8, int16,
 * int32, int64, uint8, uint16, qint8

 *@par Attributes:
@@ -391,7 +409,7 @@ REG_OP(MaxPool)
 *@brief Performs max 3d pooling on the input . \n

 *@par Inputs:
 *x: An NC1HWC0 Tensor. Supported type float16, float32, double . \n
 *x: A Tensor. Supported type float16, float32, double . \n

 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -457,7 +475,6 @@ REG_OP(MaxPool3D)
 *  y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0.
 * @par Outputs:
 *  argmax: A 5D uint16 tensor. the indice output.
 *  format as NC1HWC0, actually it represent N, Do, C1*ksize, Ho*Wo//16, 16.
 */
 REG_OP(MaxPool3DWithArgmax)
    .INPUT(x, TensorType::RealNumberType())
@@ -546,9 +563,9 @@ REG_OP(MaxPool3DGradGrad)
 * @brief Computes gradients of the maxpooling function . \n

 * @par Inputs:
 * @li x1: A mutable NC1HWC0 tensor of type RealNumberType.
 * @li x2: A mutable NC1HWC0 tensor of type RealNumberTypex.
 * @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n
 * @li x1: A mutable tensor of type RealNumberType.
 * @li x2: A mutable tensor of type RealNumberTypex.
 * @li grad: A mutable tensor of type RealNumberType . \n

 * @par Attributes:
 * @li ksize: A required tuple or list, specifying the size of the window for
@@ -630,21 +647,24 @@ REG_OP(MaxPoolGradGrad)

 *@par Inputs:
 * Three inputs:
 *@li x: An NC1HWC0 Tensor of type float16.
 *@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value.
 *@li x: A Tensor of type float16.
 *@li strides: A required type of int32 values,
 * specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li ksize: A required type of int32 values,
 * specifying the size of the window for each dimension of the input tensor. No default value.


 *@par Attributes:
 *@li padding: A required string. No default value.
 *@li data_format: An optional string. Defaults to "NC1HWC0" . \n
 *@li data_format: An optional string. \n

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n

 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
 * strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n

 *@par Third-party framework compatibility
@@ -713,7 +733,7 @@ REG_OP(MaxPoolWithArgmax)
 *@li grad: An 4d tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 * Must set the format, supported format list ["NCHW, NHWC"]
 *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n
 *@li argmx: A tensor of type int32 or int64 . \n

 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -753,8 +773,8 @@ REG_OP(MaxPoolGradWithArgmax)

 *@par Inputs:
 * Two inputs:
 *@li x: An NC1HWC0 Tensor of type float16.
 *@li mask: An NC1HWC0 Tensor of type uint16 . \n
 *@li x: A Tensor of type float16.
 *@li mask: A Tensor of type uint16 . \n

 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
@@ -763,7 +783,7 @@ REG_OP(MaxPoolGradWithArgmax)
 *@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n

 *@par Outputs:
 *argmax: An NC1HWC0 Tensor of type int32 . \n
 *argmax: A Tensor of type int32 . \n

 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
@@ -1314,7 +1334,7 @@ REG_OP(AvgPool1DD)

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x".
 *argmax:  A Tensor. type:uint16, format:NC1HWC0.
 *argmax:  A Tensor. type:uint16.
 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 *@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
@@ -1388,7 +1408,7 @@ REG_OP(MaxPoolGradWithArgmaxV2)

 * @par Inputs:
 * One input:
 * x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int32, int64,
 * x: A Tensor. Supported type:float16, float32, double, int32, int64,
 * uint8, int16, int8, uint16, qint8

 * @par Attributes:
@@ -1400,9 +1420,8 @@ REG_OP(MaxPoolGradWithArgmaxV2)
 * the input tensor. No default value.
 * @li padding_mode: A required string. Defaults to "CALCULATED".
 * @li pads:A required list of int8, int16, int32, or int64 values,
 * a data to caculate when padding_mode is "CALCULATED".
 * a data to calculate when padding_mode is "CALCULATED".
 * @li data_format: An optional string. Defaults to "NHWC" .
 * If data_format = "NC1HWC0", ori_format must be "NCHW".
 * @li global_pooling bool, Whether to use the global pooling.
 * If global_pooling = true, kernel size and paddings will be ignored.
 * Default False
@@ -1418,7 +1437,7 @@ REG_OP(MaxPoolGradWithArgmaxV2)
 * ksize[1] * ksize[2] <= 255.
 * @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
 * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 * @li "padding" is  "SAME" "VALID" or "CACULATE" .
 * @li "padding" is  "SAME" "VALID" or "CALCULATE" .


 * @par Third-party framework compatibility
@@ -1440,9 +1459,9 @@ REG_OP(MaxPoolV3)
 * @brief Computes gradients of the maxpooling function . \n

 * @par Inputs:
 * @li orig_input: A mutable NC1HWC0 tensor of type RealNumberType.
 * @li orig_output: A mutable NC1HWC0 tensor of type RealNumberTypex.
 * @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n
 * @li orig_input: A mutable tensor of type RealNumberType.
 * @li orig_output: A mutable tensor of type RealNumberTypex.
 * @li grad: A mutable tensor of type RealNumberType . \n

 * @par Attributes:
 * @li ksize: A required list of int8, int16, int32, or int64 values,
@@ -1650,9 +1669,9 @@ REG_OP(AdaptiveAvgPool2dGrad)

 * @par Inputs:
 * Three inputs, including:
 * @li x: An NC1HWC0 tensor of type float16.
 * @li grad: An NC1HWC0 tensor of type float16.
 * @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n
 * @li x: A tensor of type float16.
 * @li grad: A tensor of type float16.
 * @li argmax: A tensor of type uint16 or int64. \n

 * @par Attributes:
 * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
@@ -1665,11 +1684,11 @@ REG_OP(AdaptiveAvgPool2dGrad)
 * y: A Tensor. Has the same type and format as input "x". \n

 * @attention Constraints:
 * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 * @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
 * @li "pads" is listint.
 * @li "ceil_mode" defaults to False.
 * @li "data_format" defaults to "NC1HWC0". \n
 * @li ksize: is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 * @li strides: is a list that has length 4: strides[0] = 1 or strides[3] = 1
 * @li pads: listint.
 * @li ceil_mode: defaults to False.
 * @li data_format: A optional string. \n

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1.
@@ -1693,7 +1712,7 @@ REG_OP(MaxPoolGradWithArgmaxV1)

 * @par Inputs:
 * One input:
 * x: An NC1HWC0 Tensor of type float16. \n
 * x: A Tensor of type float16. \n

 * @par Attributes:
 * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
@@ -1704,15 +1723,15 @@ REG_OP(MaxPoolGradWithArgmaxV1)

 * @par Outputs:
 * y: A Tensor. Has the same type and format as input "x".
 * argmax:  A Tensor. type:uint16, format:NC1HWC0. \n
 * argmax:  A Tensor. type:uint16. \n

 * @attention Constraints:
 * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 * @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
 * @li ksize: a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 * @li stride: a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
 * strides[2] <= 63, strides[2] >= 1.
 * @li "pads" is listint.
 * @li "ceil_mode" defaults to False.
 * @li "data_format" defaults to "NC1HWC0". \n
 * @li pads: listint.
 * @li ceil_mode: defaults to False.
 * @li data_format: A optional string. \n

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolWithArgmaxV1.
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -423,8 +423,8 @@ REG_OP(Softplus)

 *@par Inputs:
 *Two inputs:
 * @li gradients: An NC1HWC0 or ND Tensor of type float16 or float32.
 * @li features: An NC1HWC0 or ND Tensor of type float16 or float32.
 * @li gradients: A ND Tensor of type float16 or float32.
 * @li features: A ND Tensor of type float16 or float32.


 *@par Outputs:
@@ -458,15 +458,34 @@ REG_OP(Softsign)
    .OP_END_FACTORY_REG(Softsign)

 /**
 * @brief Computes softsignGrad: gradients / (1 + abs(features)) ** 2 .
 *
 * @par Inputs:
 * Two inputs, including:
 * @li gradients: A Tensor.Must be one of the following types:float16, float32,
 * @li features: A Tensor of the same type and shape as "gradients".

 * @par Outputs:
 * output:A Tensor. Has the same type as "gradients".
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator SoftsignGrad.
 */
 REG_OP(SoftsignGrad)
    .INPUT(gradients, TensorType::FloatingDataType())
    .INPUT(features, TensorType::FloatingDataType())
    .OUTPUT(output, TensorType::FloatingDataType())
    .OP_END_FACTORY_REG(SoftsignGrad)

 /**
 *@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n

 *@par Inputs:
 * One input:
 *x: A Tensor. Must be one of the following types: float16, float, double
 * int32, int8. format:ND, NC1HWC0 . \n
 * int32, int8. format:ND. \n

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0 . \n
 *y: A Tensor. Has the same type and format as input "x". format:ND. \n

 *@see Region()

@@ -481,6 +500,28 @@ REG_OP(Selu)
    .OP_END_FACTORY_REG(Selu)

 /**
 *@brief Computes SeluGrad backprops: gradients * (outputs + scale * alpha)
 *    if outputs < 0, scale * gradients otherwise .

 *@par Inputs:
 * Two inputs, including:
 *@li gradients: A Tensor. Must be one of the following types: float32, float16,
 * int32, int8, uint8
 *@li outputs: A Tensor. Must be one of the following types: float32, float16,
 * int32, int8, uint8
 *@par Outputs:
 *y: A Tensor. Must have the same type as "gradients" .

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SeluGrad.
 */
 REG_OP(SeluGrad)
    .INPUT(gradients, TensorType::RealNumberType())
    .INPUT(outputs, TensorType::RealNumberType())
    .OUTPUT(y, TensorType::RealNumberType())
    .OP_END_FACTORY_REG(SeluGrad)

 /**
 *@brief Computes rectified linear gradients for a ReLU operation . \n

 *@par Inputs:
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -246,14 +246,14 @@ REG_OP(AscendRequantS16)
 * @brief Quantizes the input of int8 . \n

 * @par Inputs:
 * @li x: An FRACTAL_Z tensor of type int8, specifying the input.
 * @li offset: An FRACTAL_Z tensor of type int8.
 * @li x: A tensor of type int8, specifying the input.
 * @li offset: A tensor of type int8.

 * @par Attributes:
 * @li dst_type: A optional int from: DT_INT8, DT_INT4. Defaults to DT_INT8.

 * @par Outputs:
 * @li y: output tensor of type int4 or int8 and with format FRACTAL_Z.
 * @li y: output tensor of type int4 or int8.

 * @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe, Onnx, Tensorflow or Pythorch.
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -28,7 +28,7 @@ namespace ge {
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 *x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
 *x: A tensor of type float16 or float32. \n

 *@par Outputs:
 *@li sum: A 1D Tensor of type float32 for SUM reduced "x".
@@ -49,11 +49,11 @@ REG_OP(BNTrainingReduce)
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 *x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n
 *x: A tensor of type float16 or float32. \n

 *@par Outputs:
 *@li sum: A 3D Tensor of type float32 for SUM reduced "x".
 *@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n
 *@li sum: A tensor of type float32 for SUM reduced "x".
 *@li square_sum: A tensor of type float32 for SUMSQ reduced "x" . \n

 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
@@ -71,17 +71,17 @@ REG_OP(BN3DTrainingReduce)

 *@par Inputs:
 * Seven inputs, including:
 *@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for
 *@li grads: A tensor of type float16 or float32, for
 * the gradient.
 *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
 *@li diff_scale: A 5D Tensor of type float32, with format NC1HWC0,
 *@li x: A tensor of type float16 or float32.
 *@li diff_scale: A tensor of type float32,
 * for the mean of "x".
 *@li diff_offset: A 5D Tensor of type float32, with format NC1HWC0,
 *@li diff_offset: A tensor of type float32,
 * for the variance of "x".
 *@li scale: A 5D Tensor of type float32, with format NC1HWC0.
 *@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0,
 *@li scale: A tensor of type float32.
 *@li batch_mean: A tensor of type float32,
 * for the mean of "x".
 *@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0,
 *@li batch_variance: A tensor of type float32,
 * for the variance of "x" . \n

 *@par Attributes:
@@ -89,7 +89,7 @@ REG_OP(BN3DTrainingReduce)
 * added to the variance of "x" . \n

 *@par Outputs:
 *y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset
 *y: A Tensor of type float16 or float32, for the offset
 * of "x" . \n

 *@attention Constraints:
@@ -114,17 +114,17 @@ REG_OP(BNTrainingReduceGrad)

 *@par Inputs:
 * Seven inputs, including:
 *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
 *@li grads: A tensor of type float16 or float32, for
 * the gradient.
 *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
 *@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
 *@li x: A tensor of type float16 or float32.
 *@li diff_scale: A tensor of type float32,
 * for the mean of "x".
 *@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
 *@li diff_offset: A tensor of type float32,
 * for the variance of "x".
 *@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
 *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
 *@li scale: A tensor of type float32.
 *@li batch_mean: A tensor of type float32,
 * for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
 *@li batch_variance: A tensor of type float32,
 * for the variance of "x" . \n

 *@par Attributes:
@@ -132,7 +132,7 @@ REG_OP(BNTrainingReduceGrad)
 * added to the variance of "x" . \n

 *@par Outputs:
 *y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
 *y: A Tensor of type float16 or float32, for the offset
 * of "x" . \n

 *@attention Constraints:
@@ -156,8 +156,8 @@ REG_OP(BN3DTrainingReduceGrad)
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 * Seven inputs, including: (NC1HWC0 supported)
 *@li x: A 5D Tensor of type float16 or float32.
 * Seven inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li sum: A 1D Tensor of type float32 for the output of operator
 * BNTrainingReduce.
 *@li square_sum: A 1D Tensor of type float32 for the output of operator
@@ -174,10 +174,10 @@ REG_OP(BN3DTrainingReduceGrad)
 * and variance . \n

 *@par Outputs:
 * Five outputs, including: (NC1HWC0 supported)
 *@li y: A 5D Tensor of type float16 or float32, for normalized "x".
 *@li mean: A 5D Tensor of type float32, for the updated mean.
 *@li variance: A 5D Tensor of type float32, for the updated variance.
 * Five outputs, including:
 *@li y: A tensor of type float16 or float32, for normalized "x".
 *@li mean: A tensor of type float32, for the updated mean.
 *@li variance: A tensor of type float32, for the updated variance.
 *@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 1D Tensor of type float32, for the variance of "x" . \n

@@ -209,16 +209,16 @@ REG_OP(BNTrainingUpdate)
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 * Seven inputs, including: (NDC1HWC0 supported)
 *@li x: A 6D Tensor of type float16 or float32.
 *@li sum: A 6D Tensor of type float32 for the output of operator
 * Seven inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li sum: A tensor of type float32 for the output of operator
 * BN3DTrainingUpdate.
 *@li square_sum: A 6D Tensor of type float32 for the output of operator
 *@li square_sum: A tensor of type float32 for the output of operator
 * BN3DTrainingUpdate.
 *@li scale: A 6D Tensor of type float32, for the scaling factor.
 *@li offset: A 6D Tensor of type float32, for the scaling offset.
 *@li mean: A 6D Tensor of type float32, for the updated mean.
 *@li variance: A 6D Tensor of type float32, for the updated variance . \n
 *@li scale: A tensor of type float32, for the scaling factor.
 *@li offset: A tensor of type float32, for the scaling offset.
 *@li mean: A tensor of type float32, for the updated mean.
 *@li variance: A tensor of type float32, for the updated variance . \n

 *@par Attributes:
 *@li epsilon: A required float32, specifying the small value added to variance
@@ -227,12 +227,12 @@ REG_OP(BNTrainingUpdate)
 * and variance . \n

 *@par Outputs:
 * Five outputs, including: (NDC1HWC0 supported)
 *@li y: A 6D Tensor of type float16 or float32, for normalized "x".
 *@li mean: A 6D Tensor of type float32, for the updated mean.
 *@li variance: A 6D Tensor of type float32, for the updated variance.
 *@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n
 * Five outputs, including:
 *@li y: A tensor of type float16 or float32, for normalized "x".
 *@li mean: A tensor of type float32, for the updated mean.
 *@li variance: A tensor of type float32, for the updated variance.
 *@li batch_mean: A tensor of type float32, for the mean of "x".
 *@li batch_variance: A tensor of type float32, for the variance of "x" . \n

 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
@@ -262,19 +262,19 @@ REG_OP(BN3DTrainingUpdate)
 *@brief Performs batch normalization for inference . \n

 *@par Inputs:
 * Five inputs, including: (NC1HWC0 supported)
 *@li x: A 5D Tensor of type float16 or float32.
 *@li scale: A 5D Tensor of type float32, for the scaling factor.
 *@li offset: A 5D Tensor of type float32, for the scaling offset.
 *@li mean: A 5D Tensor of type float32, for the mean.
 *@li variance: A 5D Tensor of type float32, for the variance . \n
 * Five inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li scale: A tensor of type float32, for the scaling factor.
 *@li offset: A tensor of type float32, for the scaling offset.
 *@li mean: A tensor of type float32, for the mean.
 *@li variance: A tensor of type float32, for the variance . \n

 *@par Attributes:
 *epsilon: An optional float32, specifying the small value added to variance to
 * avoid dividing by zero. Defaults to "0.0001" . \n

 *@par Outputs:
 *y: A 5D Tensor of type float16 or float32 for the normalized "x" . \n
 *y: A tensor of type float16 or float32 for the normalized "x" . \n

 *@attention Constraints:
 *For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root
@@ -295,21 +295,21 @@ REG_OP(BNInfer)
 assignmoving average . \n

 *@par Inputs:
 *Five inputs, including: (NC1HWC0 supported)
 *@li x: A 5D Tensor of type float16 or float32.
 *@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
 *@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
 *@li scale: A 5D Tensor of type float32, for the scaling factor.
 *@li offset: A 5D Tensor of type float32, for the scaling offset . \n
 *Five inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li sum: A tensor of type float32 for the output of operator BNTrainingReduce.
 *@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce.
 *@li scale: A tensor of type float32, for the scaling factor.
 *@li offset: A tensor of type float32, for the scaling offset . \n

 *@par Attributes:
 *epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n

 *@par Outputs:
 *Three outputs, including: (NC1HWC0 supported)
 *@li y: A 5D Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A 5D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 5D Tensor of type float32, for the variance of "x" . \n
 *Three outputs, including:
 *@li y: A tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A tensor of type float32, for the mean of "x".
 *@li batch_variance: A tensor of type float32, for the variance of "x" . \n

 *@attention Constraints:
 *This operator is used in conjunction with BNTrainingReduce.
@@ -332,22 +332,22 @@ REG_OP(BNTrainingUpdateV2)
 assign moving average . \n

 *@par Inputs:
 * Five inputs, including: (NC1HWC0 supported)
 *@li x: A 5D Tensor of type float16 or float32.
 *@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
 *@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
 *@li scale: A 5D Tensor of type float32, for the scaling factor.
 *@li offset: A 5D Tensor of type float32, for the scaling offset . \n
 * Five inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li sum: A tensor of type float32 for the output of operator BNTrainingReduce.
 *@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce.
 *@li scale: A tensor of type float32, for the scaling factor.
 *@li offset: A tensor of type float32, for the scaling offset . \n

 *@par Attributes:
 *epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n

 *@par Outputs:
 *@li y: A 5D Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A 5D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 5D Tensor of type float32, for the variance of "x".
 *@li reserve_1: A 5D Tensor of type float32, for the mean of batch "x". Has the same type as batch_mean.
 *@li reserve_2: A 5D Tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n
 *@li y: A tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A tensor of type float32, for the mean of "x".
 *@li batch_variance: A tensor of type float32, for the variance of "x".
 *@li reserve_1: A tensor of type float32, for the mean of batch "x". Has the same type as batch_mean.
 *@li reserve_2: A tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n

 *@attention Constraints:
 *@li This operator is used in conjunction with BNTrainingReduce.
@@ -372,12 +372,12 @@ REG_OP(BNTrainingUpdateV3)

 *@par Inputs:
 * Four inputs, including:
 *@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0,
 *@li grads: A tensor of type float16 or float32,
 * for the gradient.
 *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
 *@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0,
 *@li x: A tensor of type float16 or float32.
 *@li batch_mean: A tensor of type float32,
 * for the mean of "x".
 *@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0,
 *@li batch_variance: A tensor of type float32,
 * for the variance of "x" . \n

 *@par Attributes:
@@ -385,9 +385,9 @@ REG_OP(BNTrainingUpdateV3)
 * added to the variance of "x" . \n

 *@par Outputs:
 *@li diff_scale: A Tensor of type float32, with format NC1HWC0,
 *@li diff_scale: A Tensor of type float32,
 * for the offset of "scale".
 *@li diff_offset: A Tensor of type float32, with format NC1HWC0,
 *@li diff_offset: A Tensor of type float32,
 * for the offset of "offset" . \n

 */
@@ -406,12 +406,12 @@ REG_OP(BNTrainingUpdateGrad)

 *@par Inputs:
 * Four inputs, including:
 *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
 *@li grads: A tensor of type float16 or float32,
 * for the gradient.
 *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
 *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
 *@li x: A tensor of type float16 or float32.
 *@li batch_mean: A tensor of type float32,
 * for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
 *@li batch_variance: A tensor of type float32,
 * for the variance of "x" . \n

 *@par Attributes:
@@ -419,9 +419,9 @@ REG_OP(BNTrainingUpdateGrad)
 * added to the variance of "x" . \n

 *@par Outputs:
 *@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
 *@li diff_scale: A Tensor of type float32,
 * for the offset of "scale".
 *@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
 *@li diff_offset: A Tensor of type float32,
 * for the offset of "offset" . \n

 */
@@ -440,15 +440,15 @@ REG_OP(BN3DTrainingUpdateGrad)

 *@par Inputs:
 * Three inputs, including:
 *@li grads: A 5D Tensor of type loat16 or float32, with format NC1HWC0, for the gradient.
 *@li scale: A 5D Tensor of type float32, with format NC1HWC0.
 *@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0. It is an output of BatchNorm . \n
 *@li grads: A tensor of type loat16 or float32, for the gradient.
 *@li scale: A tensor of type float32.
 *@li batch_variance: A tensor of type float32. It is an output of BatchNorm . \n

 *@par Attributes:
 *epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n

 *@par Outputs:
 *x_backprop: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x" . \n
 *x_backprop: A Tensor of type float16 or float32, for the offset of "x" . \n

 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm.
@@ -690,6 +690,9 @@ REG_OP(ReduceMean)
 *@li keep_dims: A bool or NoneType.
 * - If true, retains reduced dimensions with length 1.
 * - If false, the rank of the tensor is reduced by 1 for each entry in axis.
 *@li keep_dims: A bool default True.
 * - If true, same as tf.
 * - If false, when x's shape is [], reduce all dims, for onnx.
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n

@@ -704,6 +707,7 @@ REG_OP(ReduceMeanD)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(axes, ListInt)
    .ATTR(keep_dims, Bool, false)
    .ATTR(noop_with_empty_axes, Bool, true)
    .OP_END_FACTORY_REG(ReduceMeanD)

 /**
@@ -983,7 +987,7 @@ REG_OP(EuclideanNormD)
 *@brief Performs instance normalization for inference . \n

 *@par Inputs:
 * Five inputs, including: (NC1HWC0 supported)
 * Five inputs, including:
 *@li x: A Tensor of type float16 or float32.
 *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
 *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
@@ -1184,22 +1188,22 @@ REG_OP(GNTrainingReduce)
 *@par Inputs:
 * Seven inputs, including: (NCHW NHWC supported)
 *@li x: A Tensor of type float16 or float32.
 *@li sum: A 5D Tensor of type float32,
 *@li sum: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the output of operator GNTrainingReduce.
 *@li square_sum: A 5D Tensor of type float32,
 *@li square_sum: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the output of operator GNTrainingReduce.
 *@li scale: A 5D Tensor of type float32,
 *@li scale: A tensor of type float32,
 shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC
 is for the scaling gamma.
 *@li offset: A 5D Tensor of type float32,
 *@li offset: A tensor of type float32,
 shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC
 for the scaling beta.
 *@li mean: A 5D Tensor of type float32,
 *@li mean: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the updated mean.
 *@li variance: A 5D Tensor of type float32,
 *@li variance: A tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
 for the updated variance.

@@ -1209,7 +1213,7 @@ for the updated variance.
 *@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce

 *@par Outputs:
 * Three outputs, including: (NC1HWC0 supported)
 * Three outputs, including:
 *@li y: A Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A Tensor of type float32, for the updated mean.
 *@li batch_variance: A Tensor of type float32, for the updated variance . \n
@@ -1338,7 +1342,7 @@ REG_OP(ReduceStdWithMean)
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 *x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
 *x: A tensor of type float16 or float32 . \n

 *@par Outputs:
 *@li mean: A Tensor of type float32 for SUM reduced "x".
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -28,10 +28,10 @@ namespace ge {
 *@brief: Basic LSTM Cell forward calculation.
 *@par Inputs:
 *five inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li x:A 4D Tensor. Must be one of the following types: float16.
 *@li h:A 4D Tensor. Must be one of the following types: float16.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li w:A 4D Tensor. Must be one of the following types: float16.
 *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
 *@li mask:A 1D Tensor. Must be one of the following types: uint8.

@@ -75,12 +75,12 @@ REG_OP(BasicLSTMCell)
 *@brief: Dynamic LSTM forward calculation . \n

 *@par Inputs:
 *@li x:A 4D Tensor. Must be the type float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be the type float32. The format must be FRACTAL_Z.
 *@li x:A 4D Tensor. Must be the type float32.
 *@li w:A 4D Tensor. Must be the type float32.
 *@li b:A 1D Tensor. Must be the type float32. The format must be ND . \n

 *@par Outputs:
 *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z.
 *output_h:A Tensor of output. Must be the type float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -96,27 +96,27 @@ REG_OP(DynamicLSTM)
 *@brief: DynamicRNNGrad calculation.
 *@par Inputs:
 *ten inputs: \n
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:A 1D Tensor. Must be one of the following types: int32. The format must be FRACTAL_NZ.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li b:A 1D Tensor. Must be one of the following types: float16, float32.
 *@li y:A 1D Tensor. Must be one of the following types: int32.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dc:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li seq_length:A 1D Tensor. Must be one of the following types: int32.
 *@li mask:A 1D Tensor. Must be one of the following types: int8.
 *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li wco:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
@@ -131,14 +131,14 @@ REG_OP(DynamicLSTM)

 *@par Outputs:
 *eight outputs: \n
 *@li dw:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li db:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dw:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li db:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dwci:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dwco:A 4D Tensor. Must be one of the following types: float16, float32.
 */
 REG_OP(DynamicRNNGrad)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -185,15 +185,15 @@ REG_OP(DynamicRNNGrad)
 *@brief: DynamicRNN calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li seq_length:A optional Tensor. Only Support int32 in ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n

 *@par Attributes:
@@ -212,14 +212,14 @@ REG_OP(DynamicRNNGrad)

 *@par Outputs:
 *eight outputs:
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility:
 * Compatible with the TF operator LSTM.
 */
@@ -260,18 +260,16 @@ REG_OP(DynamicRNN)
 *@brief: DynamicRNNV2 calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32.
 *The format must be FRACTAL_Z.
 *@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32.
 *The format must be FRACTAL_Z.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n

 *@par Attributes:
@@ -298,16 +296,16 @@ REG_OP(DynamicRNN)

 *@par Outputs:
 *eight outputs:
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *Return the last output_h.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *Return the last output_c.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility:
 * Compatible with the TF operator LSTM or TF keras operator LSTM.
 */
@@ -353,18 +351,18 @@ REG_OP(DynamicRNNV2)
 *@brief: DynamicRNNV3 calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
 *@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32.
 *@li project:A 4D optional Tensor. Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
@@ -381,14 +379,14 @@ REG_OP(DynamicRNNV2)

 *@par Outputs:
 *eight outputs:
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility:
 * Compatible with the TF operator LSTM.
 */
@@ -430,16 +428,16 @@ REG_OP(DynamicRNNV3)
 *@brief: DynamicLSTMV2 calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32.
 *@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .

 *@par Attributes:
@@ -450,11 +448,11 @@ REG_OP(DynamicRNNV3)

 *@par Outputs:
 *eight outputs:
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility:
 * Compatible with the Caffe operator LSTM.
 *@par Restrictions:
@@ -487,25 +485,25 @@ REG_OP(DynamicLSTMV2)
 *@brief: LSTMInputGrad calculation.
 *@par Inputs:
 *ten inputs: \n
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dc:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.


 *@par Outputs:
 *four outputs: \n
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dgate:A 4D Tensor. Must be one of the following types: float16.
 */
 REG_OP(LSTMInputGrad)
    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -531,18 +529,18 @@ REG_OP(LSTMInputGrad)
 *@brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state.
 *@par Inputs:
 *twelve inputs:
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dc:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li mask:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. . \n

 *@par Attributes:
 *@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
@@ -584,8 +582,8 @@ REG_OP(DynamicLSTMGradCell)
 *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state.
 *@par Inputs:
 *three inputs:
 *@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li dgate:A 4D Tensor. Must be one of the following types: float16.
 *@li w:A 4D Tensor. Must be one of the following types: float16.
 *@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n

 *@par Attributes:
@@ -612,9 +610,9 @@ REG_OP(BasicLSTMCellInputGrad)
 *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias.
 *@par Inputs:
 *three inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li dgate:A 4D Tensor. Must be one of the following types: uint8. The format must be FRACTAL_NZ . \n
 *@li x:A 4D Tensor. Must be one of the following types: float16.
 *@li h:A 4D Tensor. Must be one of the following types: float16.
 *@li dgate:A 4D Tensor. Must be one of the following types: uint8. \n

 *@par Outputs:
 *two outputs:
@@ -636,14 +634,14 @@ REG_OP(BasicLSTMCellWeightGrad)
 *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state.
 *@par Inputs:
 *eight inputs:
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li it:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li jt:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li ft:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li ot:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dht:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li it:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li jt:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li ft:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li ot:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. \n

 *@par Attributes:
 *@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
@@ -676,14 +674,14 @@ REG_OP(BasicLSTMCellCStateGrad)
 *@brief: RNN operator.
 *@par Inputs:
 *eight inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li x:A 4D Tensor. Must be one of the following types: float16.
 *@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
 *@li x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li w_sh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li x_static:A 4D Tensor. Must be one of the following types: float16.
 *@li h_0:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li w_xh:A 4D Tensor. Must be one of the following types: float16.
 *@li w_sh:A 4D Tensor. Must be one of the following types: float16.
 *@li w_hh:A 4D Tensor. Must be one of the following types: float16.
 *@li w_ho:A 4D Tensor. Must be one of the following types: float16.
 *@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n

@@ -693,8 +691,8 @@ REG_OP(BasicLSTMCellCStateGrad)

 *@par Outputs:
 *two outputs:
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
@@ -719,13 +717,13 @@ REG_OP(RNN)
 *@brief: BasicRNNCell operator.
 *@par Inputs:
 *eight inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li x:A 4D Tensor. Must be one of the following types: float16.
 *@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
 *@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16.
 *@li h_0:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li w_xh:A 4D Tensor. Must be one of the following types: float16.
 *@li w_hh:A 4D Tensor. Must be one of the following types: float16.
 *@li w_ho:A 4D Tensor. Must be one of the following types: float16.
 *@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n

@@ -735,8 +733,8 @@ REG_OP(RNN)

 *@par Outputs:
 *two outputs:
 *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
@@ -761,13 +759,13 @@ REG_OP(BasicRNNCell)
 *@brief DynamicGRU calculation.
 *@par Inputs:
 *seven inputs: 
 *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li x:Must be one of the following types: float16.
 *@li w:Must be one of the following types: float16.
 *@li b:Must be one of the following types: float16, float32. The format must be ND.
 *@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li cw:Must be one of the following types: float16.
 *@li cb:Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:Must be one of the following types: int32. The format must be ND.
 *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_h:Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
@@ -781,11 +779,11 @@ REG_OP(BasicRNNCell)

 *@par Outputs:
 *five outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:Must be one of the following types: float16, float32.
 *@li output_h:Must be one of the following types: float16, float32.
 *@li r:Must be one of the following types: float16, float32.
 *@li i:Must be one of the following types: float16, float32.
 *@li n:Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -817,13 +815,13 @@ REG_OP(DynamicGRU)
 *@brief DynamicGRUV2 calculation.
 *@par Inputs:
 *seven inputs: 
 *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li x:Must be one of the following types: float16.
 *@li weight_input:Must be one of the following types: float16.
 *@li weight_hidden:Must be one of the following types: float16.
 *@li bias_input:Must be one of the following types: float16, float32. The format must be ND.
 *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND.
 *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li seq_length:Must be one of the following types: int32 in ND.
 *@li init_h:Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
@@ -839,12 +837,12 @@ REG_OP(DynamicGRU)

 *@par Outputs:
 *six outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:Must be one of the following types: float16, float32.
 *@li output_h:Must be one of the following types: float16, float32.
 *@li update:Must be one of the following types: float16, float32.
 *@li reset:Must be one of the following types: float16, float32.
 *@li new:Must be one of the following types: float16, float32.
 *@li hidden_new:Must be one of the following types: float16, float32.
 */
 REG_OP(DynamicGRUV2)
    .INPUT(x, TensorType({DT_FLOAT16}))
@@ -877,11 +875,11 @@ REG_OP(DynamicGRUV2)
 *@brief DynamicGRUV2Hidden calculation.
 *@par Inputs:
 *five inputs: 
 *@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li x_weight_input:Must be one of the following types: float32.
 *@li weight_hidden:Must be one of the following types: float16.
 *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND.
 *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li seq_length:Must be one of the following types: int32 in ND.
 *@li init_h:Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". 
@@ -899,12 +897,12 @@ Only tanh is currently supported.

 *@par Outputs:
 *six outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:Must be one of the following types: float16, float32.
 *@li output_h:Must be one of the following types: float16, float32.
 *@li update:Must be one of the following types: float16, float32.
 *@li reset:Must be one of the following types: float16, float32.
 *@li new:Must be one of the following types: float16, float32.
 *@li hidden_new:Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -938,20 +936,20 @@ REG_OP(DynamicGRUV2Hidden)
 *@brief: DynamicGRUV2Grad calculation.
 *@par Inputs:
 *fourteen inputs: \n
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li weight_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li weight_input:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li update:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li reset:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li new:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li mask:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
@@ -965,12 +963,12 @@ REG_OP(DynamicGRUV2Hidden)

 *@par Outputs:
 *six outputs: \n
 *@li dw_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li db_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dw_input:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li db_input:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -1010,15 +1008,15 @@ REG_OP(DynamicGRUV2Grad)
 *@brief: GRUV2HiddenGrad calculation.
 *@par Inputs:
 *nine inputs: \n
 *@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li update:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li reset:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li new:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Attributes:
 *@li t_state:An Int identifying the current t state. Default to [0, 4].
@@ -1026,9 +1024,9 @@ REG_OP(DynamicGRUV2Grad)

 *@par Outputs:
 *three outputs: \n
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -1053,15 +1051,15 @@ REG_OP(GRUV2HiddenGradCell)
 *@brief: DynamicGRUCellGrad calculation.
 *@par Inputs:
 *ten inputs: \n
 *@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li update:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li reset:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li new:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.+
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND.

 *@par Attributes:
@@ -1069,9 +1067,9 @@ REG_OP(GRUV2HiddenGradCell)

 *@par Outputs:
 *three outputs: \n
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -1130,13 +1128,13 @@ REG_OP(EmbeddingDenseGrad)
 *@brief CommonLSTM calculation.
 *@par Inputs:
 *eight inputs: \n
 *@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
 *@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.

 *@par Attributes:
@@ -1150,9 +1148,9 @@ REG_OP(EmbeddingDenseGrad)

 *@par Outputs:
 *three outputs: \n
 *@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
 *@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32.
 */

 REG_OP(CommonLSTM)
@@ -1202,12 +1200,12 @@ REG_OP(RnnGenMaskV2)

 * @par Inputs:
 * Eight inputs, including:
 * @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ 
 * @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z
 * @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z
 * @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16).
 * @li w: The weight tensor for the gates is 3D Tensor(float16).
 * @li r: The recurrence weight tesnor is 3D Tensor(float16).
 * @li b: The bias tensor for the gates. The format must be ND
 * @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND
 * @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ
 * @li init_h: Optional initial value of the hidden(float16,float32).

 * @par Attributes:
 * @li activation_alpha: Optional scaling values used by some activation functions.  \n
@@ -1219,8 +1217,8 @@ REG_OP(RnnGenMaskV2)
 * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n

 * @par Outputs:
 * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ
 * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
 * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32).
 * @li y_h: The last output value of the hidden(float16,float32).
 */
 REG_OP(CommonGRU)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1251,17 +1249,17 @@ REG_OP(CommonGRU)
 * @li per_sample_weights: to indicate all weights should be taken to be 1.
 *     If specified, per_sample_weights must have exactly the same shape as input
 *     and is treated as having the same offsets, if those are not None.
 *     Only supported for mode='sum'..\n
 *     Only supported for mode='sum'.\n

 * @par Attributes:
 * @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n
 * @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag. \n

 * @li scale_grad_by_freq: An optional bool. Defaults to "False".
 *     If "True", "grad_weight" will be scale by word_frequency.
 *     If "False", "grad_weight" will not be scale by word_frequency. \n
 * @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n
 * @li include_last_offset: if True, attr offsets  has one additional element, where the last element
 *     is equivalent to the size of indices. This matches the CSR format.. \n
 *     is equivalent to the size of indices. This matches the CSR format. \n

 * @par Outputs:
 * y: A mutable output Tensor of new word grad has the same type as "grads". \n
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -188,7 +188,7 @@ REG_OP(ParallelConcat)

 *@par Inputs:
 * One input:
 *x: Dynamic input.An NC1HWC0 or ND Tensor.
 *x: Dynamic input.A ND Tensor.
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64

 *@par Attributes:
@@ -219,7 +219,7 @@ REG_OP(ConcatV2D)

 *@par Inputs:
 * Two inputs, including:
 *@li Dynamic input "x" is An NC1HWC0 or ND Tensor.
 *@li Dynamic input "x" is A ND Tensor.
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 *@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n

@@ -247,7 +247,7 @@ REG_OP(ConcatV2)

 *@par Inputs:
 * One input:
 *x:Dynamic input. An NC1HWC0 or ND Tensor.
 *x:Dynamic input. A ND Tensor.
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64

 *@par Attributes:
@@ -278,7 +278,7 @@ REG_OP(ConcatD)

 *@par Inputs:
 * Two inputs, including:
 *@li x: Dynamic input.An NC1HWC0 or ND Tensor.
 *@li x: Dynamic input.A ND Tensor.
 *Must be one of the following types: float16, float32, double, int32,
 *     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16,
 *     complex128, uint32, uint64, qint16, quint16.
--- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
+++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
@@ -39,7 +39,7 @@ namespace ge {
 *input_format: A required string, specifying the input format. \n

 *@par Outputs:
 *y: The output tensor of type uint8, format only support NC1HWC0_C04.
 *y: The output tensor of type uint8.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 *
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -138,9 +138,11 @@ REG_OP(Transpose)
 * For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n

 *@par Attributes:
 *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Z" etc.
 *@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Z" etc.
 *@li group: A optional int32, default value is 1. \n
 *@li src_format: A string source data format, can be "NHWC", "NCHW" etc.
 *@li dst_format: A string target data format, can be "NCHW" etc.
 *@li src_subformat: A optional int32 for source sub-format, default value is 0.
 *@li dst_subformat: A optional int32 for target sub-format, default value is 0.
 *@li groups: A optional int32, default value is 1. \n

 *@par Outputs:
 *dst: A Tensor. Has the same type as "src".
@@ -150,6 +152,8 @@ REG_OP(TransData)
    .OUTPUT(dst, TensorType::BasicType())
    .REQUIRED_ATTR(src_format, String)
    .REQUIRED_ATTR(dst_format, String)
    .ATTR(src_subformat, Int, 0)
    .ATTR(dst_subformat, Int, 0)
    .ATTR(groups, Int, 1)
    .OP_END_FACTORY_REG(TransData)

@@ -236,13 +240,13 @@ REG_OP(Flatten)

 *@par Inputs:
 * Three inputs, including:
 *@li x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0.
 *@li x: A 5D Tensor of type float16 or int8 or uint8.
 *@li block_shape: A 1D list or tuple of int32 or int64.
 *@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to
 *crop from start and end dimensions after permutation . \n

 *@par Outputs:
 *y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
 *y: A Tensor has the same type as input "x" . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchToSpaceND.
@@ -259,7 +263,7 @@ REG_OP(BatchToSpaceND)

 *@par Inputs:
 * One input:
 *x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0 . \n
 *x: A 5D Tensor of type float16 or int8 or uint8. \n

 *@par Attributes:
 *@li block_shape: A required 1D list or tuple of int32 or int64.
@@ -267,7 +271,7 @@ REG_OP(BatchToSpaceND)
 * from the start and end dimensions after permutation . \n

 *@par Outputs:
 *y: A Tensor with format NC1HWC0. Has the same type as input "x".
 *y: A Tensor has the same type as input "x".


 *@par Third-party framework compatibility
@@ -288,12 +292,12 @@ REG_OP(BatchToSpaceNDD)

 *@par Inputs:
 * Three inputs, including:
 *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
 *@li x: A 5D Tensor of type float16 or float32.
 *@li block_shape: A 1D list or tuple of int32 or int64.
 *@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n

 *@par Outputs:
 *y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
 *y: A Tensor has the same type as input "x" . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SpaceToBatchND.
@@ -310,14 +314,14 @@ REG_OP(SpaceToBatchND)

 *@par Inputs:
 * One input:
 *x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
 *x: A 5D Tensor of type float16 or float32. \n

 *@par Attributes:
 *@li block_shape: A required 1D list or tuple of int32 or int64.
 *@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n

 *@par Outputs:
 *y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
 *y: A Tensor has the same type as input "x" . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SpaceToBatchND.
@@ -516,7 +520,7 @@ REG_OP(SpaceToBatchD)
 * tensors . \n

 * @par Inputs:
 * x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0 . \n
 * x: A rank-R tensor (R > 0) of type BasicType. \n

 * @par Attributes:
 * @li num: A required int, specifying the number of tensors to be unpacked to.
@@ -529,8 +533,7 @@ REG_OP(SpaceToBatchD)

 * @attention Constraints:
 * @li If "num" is not specified, it is inferred from the shape of "x".
 * @li For the ND format, "axis" is in the range [-R, R); For the NC1HWC0 format,
 * "axis" must not be 2, 3, -2, or -3 . \n
 * @li For the ND format, "axis" is in the range [-R, R). \n

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Unpack.
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -140,6 +140,16 @@ typedef void *rtModel_t;

 #define RT_PROF_MAX_DEV_NUM 64

 #define PATH_LEN_MAX 1023
 #define PARAM_LEN_MAX 4095
 typedef struct rtCommandHandleParams {
    uint32_t pathLen;
    uint32_t storageLimit;  // MB
    uint32_t profDataLen;
    char_t path[PATH_LEN_MAX + 1];
    char_t profData[PARAM_LEN_MAX + 1];
 } rtCommandHandleParams_t;

 /**
 * @ingroup profiling_base
 * @brief profiling command info
@@ -151,6 +161,7 @@ typedef struct rtProfCommandHandle {
    uint32_t devIdList[RT_PROF_MAX_DEV_NUM];
    uint32_t modelId;
    uint32_t type;
    rtCommandHandleParams_t commandHandleParams;
 } rtProfCommandHandle_t;

 /**
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -131,6 +131,32 @@ typedef struct tagRtArgsWithTiling {
 } rtArgsWithTiling_t;

 /**
 * @ingroup rt_kernel
 * @brief host memory input struct
 */
 typedef struct rtHostInputInfo {
    uint16_t addrOffset;
    uint16_t dataOffset;
 } rtHostInputInfo_t;

 /**
 * @ingroup rt_kernel
 * @brief args struct
 */
 typedef struct tagRtArgsEx {
    void *args;                     // args host mem addr
    rtHostInputInfo_t *hostInputInfoPtr;     // nullptr means no host mem input
    uint32_t argsSize;              // input + output + tiling addr size + tiling data size + host mem
    uint16_t tilingAddrOffset;      // tiling addr offset
    uint16_t tilingDataOffset;      // tiling data offset
    uint16_t hostInputInfoNum;      // hostInputInfo num
    uint8_t hasTiling;              // if has tiling: 0 means no tiling
    uint8_t isNoNeedH2DCopy;        // is no need host to device copy: 0 means need H2D copy,
                                    // others means doesn't need H2D copy.
    uint8_t reserved[4];
 } rtArgsEx_t;

 /**
 * @ingroup rt_KernelConfigDump
 * @brief device dump type
 */
@@ -375,39 +401,70 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
 /**
 * @ingroup rt_kernel
 * @brief launch kernel with handle to device
 * @param [in] hdl   program
 * @param [in] hdl             program
 * @param [in] kernelInfoExt   kernel Info extension. device function description or tiling key,
 *                             depending static shape or dynmaic shape.
 * @param [in] blockDim   block dimentions
 * @param [in] args   argments address for kernel function
 * @param [in] argsSize   argements size
 * @param [in] smDesc   shared memory description
 * @param [in] stm   associated stream
 * @param [in] kernelInfo   kernel info
 * @param [in] blockDim        block dimentions
 * @param [in] argsInfo        argments address for kernel function
 * @param [in] smDesc          shared memory description
 * @param [in] stm             associated stream
 * @param [in] kernelInfo      kernel info
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
                                           void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_,
                                           rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                           const void *kernelInfo);

 /**
 * @ingroup rt_kernel
 * @ingroup rtKernelLaunchWithHandleV2
 * @brief launch kernel with handle to device
 * @param [in] hdl   program
 * @param [in] kernelInfoExt   kernel Info extension. device function description or tiling key,
 *                             depending static shape or dynmaic shape.
 * @param [in] blockDim        block dimentions
 * @param [in] argsInfo        argments address for kernel function
 * @param [in] smDesc          shared memory description
 * @param [in] stm             associated stream
 * @param [in] kernelInfo      kernel info
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandleV2(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
                                             rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                             const void *kernelInfo);

 /**
 * @ingroup rtKernelLaunchWithFlag
 * @brief launch kernel to device
 * @param [in] stubFunc   stub function
 * @param [in] blockDim   block dimentions
 * @param [in] args   argments address for kernel function
 * @param [in] argsSize   argements size
 * @param [in] smDesc   shared memory description
 * @param [in] stm   associated stream
 * @param [in] flag   dump flag
 * @param [in] argsInfo   argments address for kernel function
 * @param [in] smDesc     shared memory description
 * @param [in] stm        associated stream
 * @param [in] flags      dump flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
 RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo,
                                         rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags);

 /**
 * @ingroup rtKernelLaunchWithFlagV2
 * @brief launch kernel to device
 * @param [in] stubFunc   stub function
 * @param [in] blockDim   block dimentions
 * @param [in] argsInfo   argments address for kernel function
 * @param [in] smDesc     shared memory description
 * @param [in] stm        associated stream
 * @param [in] flags      dump flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithFlagV2(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo,
                                           rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags);

 /**
 * @ingroup rt_kernel(abandoned)
 * @brief launch kernel to device
 * @param [in] args       argments address for kernel function
@@ -465,38 +522,70 @@ RTS_API rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames,
    uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm);

 /**
 * @ingroup rt_kernel(abandoned)
 * @ingroup rtCpuKernelLaunchWithFlag(abandoned)
 * @brief launch cpu kernel to device  with dump identifier
 * @param [in] soName        so name
 * @param [in] kernelName    kernel name
 * @param [in] blockDim      block dimentions
 * @param [in] args          argments address for kernel function
 * @param [in] argsSize      argments size
 * @param [in] argsInfo      argments address for kernel function
 * @param [in] smDesc        shared memory description
 * @param [in] stm        associated stream
 * @param [in] stm           associated stream
 * @param [in] flag          dump flag or others function flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim,
                                            const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm,
                                            const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                            uint32_t flags);

 /**
 * @ingroup rt_kernel(in use)
 * @ingroup rtCpuKernelLaunchWithFlagV2
 * @brief launch cpu kernel to device  with dump identifier
 * @param [in] soName        so name
 * @param [in] kernelName    kernel name
 * @param [in] blockDim      block dimentions
 * @param [in] argsInfo      argments address for kernel function
 * @param [in] smDesc        shared memory description
 * @param [in] stm           associated stream
 * @param [in] flags         dump flag or others function flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtCpuKernelLaunchWithFlagV2(const void *soName, const void *kernelName, uint32_t blockDim,
                                              const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                              uint32_t flags);

 /**
 * @ingroup rtAicpuKernelLaunchWithFlag(in use)
 * @brief launch cpu kernel to device  with dump identifier
 * @param [in] launchNames   names for kernel launch
 * @param [in] blockDim      block dimentions
 * @param [in] args          argments address for kernel function
 * @param [in] argsSize      argments size
 * @param [in] smDesc        shared memory description
 * @param [in] stm        associated stream
 * @param [in] flag          dump flag or others function flag
 * @param [in] stm           associated stream
 * @param [in] flags          dump flag or others function flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim,
    const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags);
                                              const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                              uint32_t flags);

 /**
 * @ingroup rtAicpuKernelLaunchWithFlagV2(in use)
 * @brief launch cpu kernel to device  with dump identifier
 * @param [in] launchNames   names for kernel launch
 * @param [in] blockDim      block dimentions
 * @param [in] argsInfo      argments address for kernel function
 * @param [in] smDesc        shared memory description
 * @param [in] stm           associated stream
 * @param [in] flags         dump flag or others function flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtAicpuKernelLaunchWithFlagV2(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim,
                                                const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
                                                uint32_t flags);

 /**
 * @ingroup rt_kernel
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -57,6 +57,14 @@ extern "C" {
 #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U)     // Malloc mem only use huge page, use for p2p, 0x1U << 14U
 #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U)  // Malloc mem only use default page, use for p2p, 0x1U << 15U

 /**
 * @ingroup dvrt_mem
 * @brief memory attribute
 */
 #define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U)
 // memory read only attribute, now only dvpp memory support.
 #define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U)    // Malloc readonly, 1<<20.

 #define MEM_ALLOC_TYPE_BIT (0x3FFU)  // mem type bit in <0, 9>

 /**
@@ -232,6 +240,18 @@ RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);

 /**
 * @ingroup dvrt_mem
 * @brief alloc device memory for dvpp, support set flag
 * @param [in|out] devPtr   memory pointer
 * @param [in] size   memory size
 * @param [in] flag   mem flag, can use mem attribute set read only.
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 * @return others is error
 */
 RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag);

 /**
 * @ingroup dvrt_mem
 * @brief free device memory for dvpp
 * @param [in|out] devPtr   memory pointer
 * @return RT_ERROR_NONE for ok
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
@@ -36,4 +36,4 @@ RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTas
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 #endif // CCE_RUNTIME_RT_FFTS_PLUS_H
 #endif // CCE_RUNTIME_RT_FFTS_PLUS_H
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -82,6 +82,17 @@ RTS_API rtError_t rtStreamWaitEvent(rtStream_t stm, rtEvent_t evt);

 /**
 * @ingroup dvrt_stream
 * @brief wait an recorded event for stream, used for 1951 pg1
 * @param [in] stm   the wait stream
 * @param [in] event   the event to wait
 * @param [in] timeout   timeout value for 1951 pg1
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtStreamWaitEventWithTimeout(rtStream_t stm, rtEvent_t evt, uint32_t timeout);

 /**
 * @ingroup dvrt_stream
 * @brief wait stream to be complete
 * @param [in] stm   stream to wait
 * @return RT_ERROR_NONE for ok
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -36,6 +36,7 @@
 #define PROF_SUBTASK_TIME           0x0000040000000ULL
 #define PROF_OP_DETAIL              0x0000080000000ULL

 #define PROF_AICPU_MODEL            0x4000000000000000ULL
 #define PROF_MODEL_LOAD             0x8000000000000000ULL

 #define PROF_TASK_TRACE             (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \
@@ -69,6 +70,7 @@
 #define PROF_SUBTASK_TIME_MASK           0x0000040000000ULL
 #define PROF_OP_DETAIL_MASK              0x0000080000000ULL

 #define PROF_AICPU_MODEL_MASK            0x4000000000000000ULL
 #define PROF_MODEL_LOAD_MASK             0x8000000000000000ULL

 #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -114,6 +114,7 @@ enum MsprofCtrlCallbackType {
    MSPROF_CTRL_INIT_ACL_JSON,              // start pro with acl.json
    MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
    MSPROF_CTRL_FINALIZE,                   // stop profiling
    MSPROF_CTRL_INIT_HELPER,                // start profiling in helper device
    MSPROF_CTRL_INIT_DYNA = 0xFF,           // start profiling for dynamic profiling
 };

--- a/third_party/fwkacllib/inc/toolchain/prof_common.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_common.h
@@ -28,6 +28,7 @@ enum MsprofDataTag {
    MSPROF_RUNTIME_DATA_TAG_API = 40,   //runtime data tag, range: 40~59
    MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
    MSPROF_AICPU_DATA_TAG = 60,         //aicpu data tag, range: 60~79
    MSPROF_AICPU_MODEL_TAG = 61,
    MSPROF_HCCL_DATA_TAG = 80,          //hccl data tag, range: 80~99
    MSPROF_DP_DATA_TAG = 100,           //dp data tag, range: 100~119
    MSPROF_MSPROFTX_DATA_TAG = 120,     //hccl data tag, range: 120~139
@@ -52,6 +53,16 @@ struct MsprofMixData {
    } data;
 };

 #define PATH_LEN_MAX 1023
 #define PARAM_LEN_MAX 4095
 struct MsprofCommandHandleParams {
    uint32_t pathLen;
    uint32_t storageLimit;  // MB
    uint32_t profDataLen;
    char path[PATH_LEN_MAX + 1];
    char profData[PARAM_LEN_MAX + 1];
 };

 /**
 * @brief profiling command info
 */
@@ -63,6 +74,7 @@ struct MsprofCommandHandle {
    uint32_t devIdList[MSPROF_MAX_DEV_NUM];
    uint32_t modelId;
    uint32_t type;
    struct MsprofCommandHandleParams params;
 };

 /**
@@ -305,6 +317,19 @@ struct MsprofAicpuProfData {
    uint8_t  reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
 };

 struct MsprofAicpuModelProfData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_AICPU_MODEL_TAG;
    uint32_t rsv;   // Ensure 8-byte alignment
    uint64_t timeStamp;
    uint64_t indexId;
    uint32_t modelId;
    uint16_t tagId;
    uint16_t rsv1;
    uint64_t eventId;
    uint8_t  reserve[24];
 };

 /**
 * @brief struct of data reported by DP
 */
--- a/third_party/prebuild/aarch64/libalog.so
+++ b/third_party/prebuild/aarch64/libalog.so
--- a/third_party/prebuild/aarch64/liberror_manager.so
+++ b/third_party/prebuild/aarch64/liberror_manager.so
--- a/third_party/prebuild/aarch64/libmmpa.a
+++ b/third_party/prebuild/aarch64/libmmpa.a
--- a/third_party/prebuild/x86_64/libalog.so
+++ b/third_party/prebuild/x86_64/libalog.so
--- a/third_party/prebuild/x86_64/liberror_manager.so
+++ b/third_party/prebuild/x86_64/liberror_manager.so
--- a/third_party/prebuild/x86_64/libmmpa.a
+++ b/third_party/prebuild/x86_64/libmmpa.a