!83 sync-from-trunk-to-blue-zone-1009

Merge pull request !83 from HW_KK/master
4 years ago · 7a75f024d5
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,7 +68,7 @@ elseif(DEFINED ENV{D_LINK_PATH})
    find_library(slog libslog.so ${GE_LIB_PATH})
    find_library(mmpa libmmpa.so ${GE_LIB_PATH})
    find_library(runtime libruntime.so ${GE_LIB_PATH})
    find_library(msprof libmsprof.so ${GE_LIB_PATH})
    find_library(msprof libmsprofiler.a ${GE_LIB_PATH})
    find_library(register libregister.so ${GE_LIB_PATH})
    find_library(hccl libhccl.so ${GE_LIB_PATH})
    find_library(resource libresource.so ${GE_LIB_PATH})
@@ -85,7 +85,7 @@ else()
    set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64)
    find_library(slog libslog.so ${ASCEND_DRIVER_DIR})
    find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR})
    find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR})
    find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR})

    find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
    find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -61,8 +61,10 @@ class StringUtils {
  ///  @param [in] delim  separator
  ///  @return string array after segmentation
  ///
  /*lint -e1077*/
  static std::vector<std::string> Split(const std::string &str, char delim) {
    std::vector<std::string> elems;
    /*lint +e1077*/

    if (str.empty()) {
      elems.emplace_back("");
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -92,6 +92,9 @@ struct OmgContext {
  std::map<std::string, std::vector<int32_t>> out_nodes_map;
  // user-designate out nodes (this is used for determing the orders)
  std::vector<std::pair<std::string, int32_t>> user_out_nodes;
  // save the output node of the network, value = topName,
  // topName indicates the output name of the operator.
  std::vector<std::string> user_out_nodes_top_vec;
  // net out nodes (where user_out_nodes or leaf nodes)
  std::vector<std::string> net_out_nodes;
  // net out nodes top names(only caffe has top)
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -1052,6 +1052,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE;

 // op dynamic input
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_START;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_END;

 // functional ops attr
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH;
--- a/inc/graph/op_desc.h
+++ b/inc/graph/op_desc.h
@@ -235,7 +235,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {
  vector<string> GetOpInferDepends() const;

  string GetInputNameByIndex(uint32_t index) const;

  string GetValidInputNameByIndex(uint32_t index) const;
  int GetValidInputIndexByName(const string &name) const;
  int GetInputIndexByName(const string &name) const;

  string GetOutputNameByIndex(uint32_t index) const;
--- a/inc/graph/range_vistor.h
+++ b/inc/graph/range_vistor.h
@@ -22,8 +22,10 @@
 template <class E, class O>
 class RangeVistor {
 public:
  /*lint -e151*/
  using Iterator = typename std::vector<E>::iterator;
  using ConstIterator = typename std::vector<E>::const_iterator;
  /*lint +e151*/

  RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {}

@@ -41,7 +43,9 @@ class RangeVistor {

  bool empty() const { return elements_.empty(); }

  /*lint -e659*/
  E &at(std::size_t index) { return elements_.at(index); }
  /*lint +e659*/

  const E &at(std::size_t index) const { return elements_.at(index); }

--- a/inc/graph/utils/op_desc_utils.h
+++ b/inc/graph/utils/op_desc_utils.h
@@ -53,6 +53,7 @@ class OpDescUtils {
  static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node);
  static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights);
  static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights);
  static graphStatus SetWeights(ge::Node& node, const map<int, ge::GeTensorPtr>& weights_map);
  static graphStatus ClearWeights(ge::NodePtr node);

  static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index);
--- a/src/common/graph/detail/attributes_holder.cc
+++ b/src/common/graph/detail/attributes_holder.cc
@@ -28,7 +28,7 @@ using std::unordered_set;
 void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); }
 graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) {
  if (value.IsEmpty()) {
    GELOGE(GRAPH_FAILED, "value is empty, key %s", name.c_str());
    GELOGE(GRAPH_FAILED, "value is empty, key of the attr is %s", name.c_str());
    return GRAPH_FAILED;
  }
  auto proto_map = MutableAttrMap().GetProtoMsg();
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -1060,6 +1060,10 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node";
 const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr";
 const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index";

 // op dynamic input
 const std::string ATTR_NAME_DYNAMIC_INPUT_START = "_dynamic_input_index_start";
 const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end";

 // atc user def dtype&format
 const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type";
 const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format";
--- a/src/common/graph/node.cc
+++ b/src/common/graph/node.cc
@@ -762,9 +762,10 @@ graphStatus Node::Verify() const {
  if (!is_unknown_graph) {
    for (const auto &in_anchor_ptr : GetAllInDataAnchors()) {
      GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue);
      bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type ||
                          op_->GetType() == const_type || op_->GetType() == variable_type ||
                          op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0;
      bool valid_anchor =
        op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type ||
        op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) ||
        op_->MutableInputDesc(in_anchor_ptr->GetIdx()) == nullptr || in_anchor_ptr->GetPeerAnchors().size() > 0;
      if (!valid_anchor) {
        ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"},
                                                        {GetName(), std::to_string(in_anchor_ptr->GetIdx())});
--- a/src/common/graph/op_desc.cc
+++ b/src/common/graph/op_desc.cc
@@ -347,7 +347,10 @@ graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorD

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
 OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) {
  GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index);
  if (index >= inputs_desc_.size()) {
    GELOGW("The index is invalid. index[%u]", index);
    return GRAPH_FAILED;
  }

  inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc);
  if (inputs_desc_[index] == nullptr) {
@@ -949,6 +952,43 @@ int OpDesc::GetInputIndexByName(const string &name) const {
  return static_cast<int>(it_find->second);
 }

 int OpDesc::GetValidInputIndexByName(const string &name) const {
  map<string, uint32_t> valid_input_name_idx{};
  uint32_t j = 0;
  for (size_t i = 0; i < GetAllInputsSize(); i++) {
    if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) {
      auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i));
      GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), -1);
      valid_input_name_idx.insert({valid_name, j});
      j++;
    }
  }
  auto it_find = valid_input_name_idx.find(name);
  GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != valid_input_name_idx.end(), -1);
  return static_cast<int>(it_find->second);
 }

 string OpDesc::GetValidInputNameByIndex(uint32_t index) const {
  map<string, uint32_t> valid_input_name_idx{};
  uint32_t j = 0;
  for (size_t i = 0; i < GetAllInputsSize(); i++) {
    if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) {
      auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i));
      GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), "");
      valid_input_name_idx.insert({valid_name, j});
      j++;
    }
  }
  auto it = valid_input_name_idx.begin();
  for (; it != valid_input_name_idx.end(); ++it) {
    if (it->second == index) {
      break;
    }
  }
  GE_CHK_BOOL_RET_STATUS_NOLOG(it != valid_input_name_idx.end(), "");
  return it->first;
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const {
  auto it = output_name_idx_.begin();
  for (; it != output_name_idx_.end(); ++it) {
--- a/src/common/graph/ref_relation.cc
+++ b/src/common/graph/ref_relation.cc
@@ -56,7 +56,7 @@ class RefRelations::Impl {
      }
      return GRAPH_SUCCESS;
    }
    GELOGW("can not find any relations! key value is %s", lookup_key.c_str());
    GELOGW("can not find any relations! key value of dest relation is %s", lookup_key.c_str());
    return GRAPH_SUCCESS;
  };
  graphStatus BuildRefRelations(ge::ComputeGraph &root_graph);
--- a/src/common/graph/utils/op_desc_utils.cc
+++ b/src/common/graph/utils/op_desc_utils.cc
@@ -560,6 +560,53 @@ OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights)
  return GRAPH_SUCCESS;
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
 OpDescUtils::SetWeights(ge::Node &node, const map<int, ge::GeTensorPtr> &weights_map) {
  GE_CHECK_NOTNULL(node.GetOpDesc());
  // 1. node is const
  if (node.GetOpDesc()->GetType() == CONSTANT) {
    if (weights_map.size() == CONST_OP_NORMAL_WEIGHT_SIZE) {
      return SetWeights(node.GetOpDesc(), weights_map.begin()->second);
    }
    GELOGE(GRAPH_PARAM_INVALID, "const op %s weight size %zu should be 1", node.GetName().c_str(), weights_map.size());
    return GRAPH_PARAM_INVALID;
  }
  // 2. node is not const
  for (const auto &pair : weights_map) {
    auto in_data_anchor = node.GetInDataAnchor(pair.first);
    if (in_data_anchor != nullptr && in_data_anchor->GetPeerOutAnchor() != nullptr) {
      // a. update const input node
      auto out_anchor = in_data_anchor->GetPeerOutAnchor();
      auto peer_node = out_anchor->GetOwnerNode();
      if (peer_node == nullptr) {
        GELOGE(GRAPH_PARAM_INVALID, "op %s [%d]'s input node is null", node.GetName().c_str(), pair.first);
        return GRAPH_PARAM_INVALID;
      }
      if (peer_node->GetType() != CONSTANT) {
        GELOGE(GRAPH_PARAM_INVALID, " op %s [%d]'s input node should be const, but is %s type:%s ",
               node.GetName().c_str(), pair.first, peer_node->GetName().c_str(), peer_node->GetType().c_str());
      }
      SetWeights(peer_node->GetOpDesc(), pair.second);
    } else {
      // b. create new const input node
      auto const_opdesc = CreateConstOp(pair.second);
      GE_CHECK_NOTNULL(const_opdesc);
      auto owner_graph = node.GetOwnerComputeGraph();
      if (owner_graph == nullptr) {
        GELOGE(GRAPH_PARAM_INVALID, "node's graph is empty, name: %s", node.GetName().c_str());
        return GRAPH_PARAM_INVALID;
      }
      auto const_node = owner_graph->AddNodeFront(const_opdesc);
      if (node.AddLinkFrom(static_cast<uint32_t>(pair.first), const_node) != GRAPH_SUCCESS) {
        GELOGE(GRAPH_FAILED, "op %s add const to input index[%d] failed", node.GetName().c_str(), pair.first);
        return GRAPH_FAILED;
      }
    }
  }
  NodeUtils::UpdateIsInputConst(node);
  return GRAPH_SUCCESS;
 }

 OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) {
  GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!");
  shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>();
--- a/src/ge/CMakeLists.txt
+++ b/src/ge/CMakeLists.txt
@@ -229,6 +229,7 @@ target_link_libraries(ge_runner
        ${resouce}
        ${ascend_hal}
        ${adump_server}
        ${msprofiler}
        rt
        dl)

@@ -358,7 +359,10 @@ add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HD
 target_compile_definitions(ge_compiler PRIVATE
        PROTOBUF_INLINE_NOT_IN_HEADERS=0
        REUSE_MEMORY=1
        FMK_HOST_INFER)
        FMK_HOST_INFER
        FMK_SUPPORT_DUMP
        COMPILE_OMG_PACKAGE
        REUSE_MEMORY=1)
 target_link_libraries(ge_compiler
        graph
        ge_common
--- a/src/ge/client/CMakeLists.txt
+++ b/src/ge/client/CMakeLists.txt
@@ -68,5 +68,7 @@ target_link_libraries(ge_client
        ${mmpa}
        ${runtime}
        ${msprof}
        ${msprofiler}
        ${ascend_hal}
        rt
        dl)
--- a/src/ge/client/ge_api.cc
+++ b/src/ge/client/ge_api.cc
@@ -16,6 +16,7 @@

 #include "ge/ge_api.h"
 #include <iostream>
 #include <malloc.h>
 #include "common/debug/log.h"
 #include "framework/common/debug/ge_log.h"
 #include "common/ge/datatype_util.h"
@@ -163,6 +164,9 @@ Status GEFinalize() {
    g_ge_initialized = false;
  }

  // to avoid memory fragment, use malloc_trim to back free stack to system
  malloc_trim(0);

  GELOGT(TRACE_STOP, "GEFinalize finished");
  return ret;
 }
--- a/src/ge/client/module.mk
+++ b/src/ge/client/module.mk
@@ -70,9 +70,10 @@ LOCAL_SHARED_LIBRARIES := \
    libregister \
    libge_compiler \
    libge_common \
    libmsprof

    libmsprof \
    stub/libascend_hal

 LOCAL_STATIC_LIBRARIES := libmsprofiler

 LOCAL_LDFLAGS := -lrt -ldl

@@ -107,6 +108,7 @@ LOCAL_SHARED_LIBRARIES := \
    libge_common \
    libmsprof

 LOCAL_STATIC_LIBRARIES := libmsprofiler

 LOCAL_LDFLAGS := -lrt -ldl
 LOCAL_CFLAGS += \
--- a/src/ge/common/dump/dump_op.cc
+++ b/src/ge/common/dump/dump_op.cc
@@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
    return RT_FAILED;
  }

  constexpr int32_t ioAddrNum = 2;
  constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t);
  char args[argsSize] = {0};
  auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args);
  paramHead->length = argsSize;
  paramHead->ioAddrNum = ioAddrNum;
  auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead));
  ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_);
  ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_);
  constexpr int32_t io_addr_num = 2;
  constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t);
  char args[args_size] = {0};
  auto param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args);
  param_head->length = args_size;
  param_head->ioAddrNum = io_addr_num;
  auto io_addr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead));
  io_addr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_);
  io_addr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_);
  rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp,
                             1,  // blockDim default 1
                             args, argsSize,
                             args, args_size,
                             nullptr,  // no need smDesc
                             stream_);
  if (rt_ret != RT_ERROR_NONE) {
--- a/src/ge/common/ge/datatype_util.cc
+++ b/src/ge/common/ge/datatype_util.cc
@@ -34,7 +34,7 @@ std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_ty
  {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}},
  {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}};

 static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = {
 std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = {
  // key:ge datatype,value:proto datatype
  {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
  {ge::DT_FLOAT, ge::proto::DT_FLOAT},
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -51,12 +51,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana
  return profiling_manager;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options,
                                                                                   bool convert_2_phy_device_id) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  vector<int32_t>().swap(device_id_);
  job_id_ = options.job_id;

  GELOGI("ProfilingManager::Init  job_id:%s", job_id_.c_str());

  Status ret;
  if (!recv_profiling_config_.empty()) {
    GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str());
@@ -64,18 +65,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
  } else {
    ret = InitFromOptions(options);
    if (ret == SUCCESS && is_load_profiling_) {
      // profiling need phy device id
      if (!convert_2_phy_device_id) {
        device_id_.push_back(options.device_id);
      } else {
        uint32_t phy_device_id = 0;
        rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id);
        if (rt_ret != RT_ERROR_NONE) {
          GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
          return FAILED;
        }
        device_id_.push_back(phy_device_id);
      }
      device_id_.push_back(options.device_id);
    }
  }
  if (ret != SUCCESS) {
@@ -557,25 +547,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
    return;
  }
  GELOGI("current logic_device_id:%d", logic_device_id);

  uint32_t phy_device_id = 0;
  rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
    return;
  }
  GELOGI("current phy_device_id:%d", phy_device_id);
  if (!is_acl_api_mode_) {
    auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
    auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
    if (ret == device_id_.end()) {
      GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
      return;
    }
  }
  GELOGI("start ProfilingTaskDescInfo.");
  ProfilingTaskDescInfo(task_desc_info, phy_device_id);
  ProfilingTaskDescInfo(task_desc_info, logic_device_id);
  GELOGI("start ProfilingGraphDescInfo.");
  ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id);
  ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id);
  GELOGI("Report profiling data for GE end.");
 #endif
 }
--- a/src/ge/common/profiling/profiling_manager.h
+++ b/src/ge/common/profiling/profiling_manager.h
@@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  ProfilingManager();
  virtual ~ProfilingManager();
  static ProfilingManager &Instance();
  ge::Status Init(const Options &options, bool convert_2_phy_device_id = false);
  ge::Status Init(const Options &options);
  ge::Status InitFromOptions(const Options &options);
  ge::Status InitFromAclCfg(const std::string &config);
  ge::Status StartProfiling(int32_t iter, int32_t device_id);
--- a/src/ge/common/util.cc
+++ b/src/ge/common/util.cc
@@ -472,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str
    return true;
  }

  ret = regexec(&reg, str.c_str(), 0, nullptr, 0);
  ret = regexec(&reg, str.c_str(), 0, NULL, 0);
  if (ret) {
    regerror(ret, &reg, ebuff, kMaxBuffSize);
    GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff);
--- a/src/ge/executor/CMakeLists.txt
+++ b/src/ge/executor/CMakeLists.txt
@@ -120,6 +120,7 @@ target_link_libraries(ge_executor
        ${mmpa}
        ${msprof}
        ${error_manager}
        ${ascend_hal}
        rt
        dl)

--- a/src/ge/executor/module.mk
+++ b/src/ge/executor/module.mk
@@ -89,6 +89,7 @@ local_ge_executor_shared_library :=        \
    libregister                            \
    libmsprof                              \
    liberror_manager                       \
    libascend_hal

 local_ge_executor_ldflags := -lrt -ldl     \

@@ -104,6 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files)
 LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

 LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library)
 LOCAL_STATIC_LIBRARIES := libmsprofiler
 ifeq ($(device_os),android)
 LOCAL_LDFLAGS += -ldl
 LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
@@ -140,6 +142,9 @@ LOCAL_SHARED_LIBRARIES :=                  \
    libregister                            \
    libmsprof                              \
    liberror_manager                       \
    stub/libascend_hal

 LOCAL_STATIC_LIBRARIES := libmsprofiler

 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)

--- a/src/ge/ge_inference.mk
+++ b/src/ge/ge_inference.mk
@@ -355,7 +355,7 @@ LOCAL_MODULE := libge_compiler

 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 # from ome_inference.mk
 LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif
@@ -418,7 +418,7 @@ include $(CLEAR_VARS)
 LOCAL_MODULE := libge_compiler
 LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
 LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE
 LOCAL_CFLAGS += -DOMG_DEVICE_VERSION
 LOCAL_CFLAGS += -O2
 LOCAL_MODULE_CLASS := SHARED_LIBRARIES
--- a/src/ge/ge_local_engine/CMakeLists.txt
+++ b/src/ge/ge_local_engine/CMakeLists.txt
@@ -42,7 +42,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge)

 ######### libge_local_engine.so #############
 add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_definitions(ge_local_engine PRIVATE Werror)
 target_compile_definitions(ge_local_engine PRIVATE Werror COMPILE_OMG_PACKAGE)
 target_link_libraries(ge_local_engine
        graph
        ${PROTOBUF_LIBRARY}
--- a/src/ge/ge_local_engine/module.mk
+++ b/src/ge/ge_local_engine/module.mk
@@ -42,7 +42,7 @@ include ${BUILD_HOST_SHARED_LIBRARY}
 include $(CLEAR_VARS)
 LOCAL_MODULE := atclib/libge_local_engine
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -std=c++11
 LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE
 LOCAL_LDFLAGS :=

 LOCAL_STATIC_LIBRARIES :=
--- a/src/ge/ge_runner.mk
+++ b/src/ge/ge_runner.mk
@@ -356,6 +356,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \
                          libadump_server \
                          libmsprofiler \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
--- a/src/ge/generator/ge_generator.cc
+++ b/src/ge/generator/ge_generator.cc
@@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen
                        bool attr) {
  GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
  GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);

  auto format = tensor.GetFormat();
  auto data_type = tensor.GetDataType();
  if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) {
    return SUCCESS;
  }

  string op_type;
  if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) {
    op_type = DATA;
@@ -521,8 +528,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
                                  const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                                  bool is_offline) {
  GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
  if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) {
    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize());
  if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) {
    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize());
    return PARAM_INVALID;
  }
  if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -322,11 +322,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
          GELOGE(ge::FAILED,
                 "There is an atomic conflict between the current node and the peer out node, not supported!");
          return ge::FAILED;
        } else if (is_loop_graph) {
          GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start));
        } else {
          GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}),
                            "SetAtomicCleanAttr failed.");
        }

        const auto &in_control_anchor = node->GetInControlAnchor();
        GE_CHECK_NOTNULL(in_control_anchor);
        for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
          auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
          if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
            ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size});
            if (ret != SUCCESS) {
              GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
              return ret;
            }
          }
        }
      }
    }
@@ -840,68 +848,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt
 }

 Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  GE_CHECK_NOTNULL(compute_graph_);
  // Atomic op memory start addr
  int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_);

  vector<NodePtr> connect_netoutput_nodes;
  for (auto &node : compute_graph_->GetAllNodes()) {
    auto node_op_desc = node->GetOpDesc();
    if (node_op_desc == nullptr) {
      continue;
    }

    bool is_atomic = false;
    // If GetBool fail, is_atomic is false.
    (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
    if (!is_atomic) {
      continue;
    }

    bool is_ref = false;
    // If GetBool fail, is_ref is false.
    (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref);
    if (is_ref) {
      GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.",
             node_op_desc->GetName().c_str());
      return ge::PARAM_INVALID;
    }

    vector<int> is_connect_netoutput;
    // If GetBool fail, attr is_connect_netoutput is an empty vector.
    (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
    if (!is_connect_netoutput.empty()) {
      connect_netoutput_nodes.emplace_back(node);
      continue;
    }
  map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map;
  vector<NodePtr> connecting_output_atomic_nodes;
  Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  if (status != SUCCESS) {
    GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
    return status;
  }

    // Atomic op memory start addr of loop graph
    int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
    vector<int64_t> mem_offset_end;
    if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
      GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
      return FAILED;
    }
  for (auto &iter : normal_atomic_and_clean_nodes_map) {
    int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
    GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);

    /// In networks with loop op, atomic op uses atomic_addr_clean op independently,
    /// so we need to set the attr separately.
    if (is_loop_graph) {
      GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start));
    for (auto &atomic_node : iter.second) {
      vector<int64_t> mem_offset_end;
      status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
      if (status != SUCCESS) {
        GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
               atomic_node->GetName().c_str());
        return status;
      }
    }
  }

  // In networks without loop op, the same atomic addr clean op is used for atomic op
  if (!is_loop_graph) {
    // Set the address attr of atomic clean operator
    int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
    if (atomic_mem_size != 0) {
      GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}),
                        "SetAtomicCleanAttr failed.");
    int64_t atomic_mem_size = static_cast<int64_t>(memory_offset_[0].mem_offset_) - atomic_mem_start;
    status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size});
    if (status != SUCCESS) {
      GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
      return status;
    }
  }

  if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) {
  if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) {
    GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
    return FAILED;
  }
@@ -909,6 +886,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  return SUCCESS;
 }

 Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
                                                             vector<NodePtr> &connecting_output_atomic_nodes) {
  GE_CHECK_NOTNULL(compute_graph_);
  for (const auto &node : compute_graph_->GetAllNodes()) {
    if (node->GetType() == ATOMICADDRCLEAN) {
      vector<NodePtr> tmp_normal_atomic_nodes;
      const auto &out_control_anchor = node->GetOutControlAnchor();
      GE_CHECK_NOTNULL(out_control_anchor);
      for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
        if (peer_in_control_anchor != nullptr) {
          auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
          auto peer_in_node_desc = peer_in_node->GetOpDesc();
          if (peer_in_node_desc != nullptr) {
            bool is_atomic_node = false;
            // If GetBool fail, is_atomic_node is false.
            (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
            if (is_atomic_node) {
              bool is_reference = false;
              // If GetBool fail, is_reference is false.
              (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
              if (is_reference) {
                GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.",
                       peer_in_node_desc->GetName().c_str());
                return ge::PARAM_INVALID;
              }

              vector<int> is_connecting_output;
              // If GetBool fail, attr is_connecting_output is an empty vector.
              (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
              if (is_connecting_output.empty()) {
                tmp_normal_atomic_nodes.emplace_back(peer_in_node);
                continue;
              }
              connecting_output_atomic_nodes.emplace_back(peer_in_node);
              tmp_normal_atomic_nodes.clear();
              break;
            }
          }
        }
      }

      if (!tmp_normal_atomic_nodes.empty()) {
        normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes;
      }
    }
  }
  return SUCCESS;
 }

 Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
                                                                 vector<int64_t> &mem_offset_end) {
  auto node_op_desc = node->GetOpDesc();
@@ -1331,6 +1357,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
  vector<int64_t> memory_type;
  auto tmp_op_desc = node->GetOpDesc();
  origin_input_list = tmp_op_desc->GetInputOffset();
  int64_t valid_input_index = 0;
  bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  for (const auto &anchor : node->GetAllInDataAnchors()) {
    vector<int64_t> output_list;
@@ -1344,8 +1371,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
    auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
    GE_CHECK_NOTNULL(last_peer_out_op_desc);
    output_list = last_peer_out_op_desc->GetOutputOffset();
    if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
      auto input_index = anchor->GetIdx();
    auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
    if (output_list.size() > static_cast<size_t>(out_index)) {
      int64_t input_offset = output_list.at(out_index);
      if (has_mem_type_attr) {
        auto input_size = tmp_op_desc->GetInputsSize();
        auto ori_input_offset_list_size = origin_input_list.size();
@@ -1359,26 +1387,21 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
        }
        // not hbm keep orignal inputoffest
        // hbm inputoffset = original inputoffset + outputoffset
        input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1
                                  ? origin_input_list[input_index]
                                  : origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx()));
        GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]",
               tmp_op_desc->GetName().c_str(), input_index,
               peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
               input_list.back());
      } else {
        int64_t output_offset = output_list.at(peer_out_anchor->GetIdx());
        const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
        if (in_node->GetType() == CONSTANT) {
          GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index);
          GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset));
        }

        GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(),
               input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
               output_offset);
        input_list.emplace_back(output_offset);
        input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1
                          ? origin_input_list[valid_input_index]
                          : origin_input_list[valid_input_index] + output_list.at(out_index));
      }
      const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
      if (in_node->GetType() == CONSTANT) {
        GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
        GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
      }

      GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
             has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index,
             peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset);
      input_list.emplace_back(input_offset);
      valid_input_index++;
    }
  }
  return ge::SUCCESS;
@@ -1473,125 +1496,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in
  return SUCCESS;
 }

 Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) {
  // set the address attr of atomic clean operator for loop graph
  int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
  GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.",
         atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_);
  const auto &in_control_anchor = node->GetInControlAnchor();
  if (atomic_mem_size != 0 && in_control_anchor != nullptr) {
    for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
      if (peer_out_control_anchor == nullptr) {
        continue;
      }
      auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
      auto peer_out_node_desc = peer_out_node->GetOpDesc();
      if (peer_out_node_desc == nullptr) {
        continue;
      }

      GELOGD("SetLoopGraphAtomicAttr,  node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(),
             peer_out_node_desc->GetType().c_str());

      if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
        GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}),
                           GELOGE(FAILED, "SetAtomicCleanAttr failed.");
                           return FAILED);
      }
    }
  }
  return SUCCESS;
 }

 ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node,
                                                         bool &is_independent_atomic_clean_node) {
  GE_CHECK_NOTNULL(node);
  const auto &out_control_anchor = node->GetOutControlAnchor();
  GE_CHECK_NOTNULL(out_control_anchor);
  for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
    if (peer_in_control_anchor != nullptr) {
      auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
      auto peer_in_node_desc = peer_in_node->GetOpDesc();
      if (peer_in_node_desc != nullptr) {
        bool is_atomic_node = false;
        // If GetBool fail, is_atomic_node is false.
        (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
        if (is_atomic_node) {
          vector<int> is_connect_netoutput;
          // If GetBool fail, attr is_connect_netoutput is an empty vector.
          (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
          if (!is_connect_netoutput.empty()) {
            GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str());
            is_independent_atomic_clean_node = true;
            break;
          }
        }
      }
    }
  }

  return SUCCESS;
 }

 ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start,
 ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
                                                   const vector<int64_t> &atomic_mem_size) {
  for (ge::NodePtr &node : compute_graph_->GetAllNodes()) {
    auto node_op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);

    bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName());

    if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) {
      bool is_independent_atomic_clean = false;
      if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) {
        GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node.");
        return PARAM_INVALID;
      }

      is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean);
  auto node_op_desc = node->GetOpDesc();
  if (node_op_desc != nullptr) {
    GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
    vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
    vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
    workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
    workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
    node_op_desc->SetWorkspace(workspace_vector);
    node_op_desc->SetWorkspaceBytes(workspace_byte_vector);

    std::vector<int64_t> mem_start_vector;
    // If GetListInt fail, mem_start_vector is empty.
    (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
    mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
    GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
                     GELOGE(FAILED, "SetListInt failed.");
                     return FAILED);

    std::vector<int64_t> mem_size_vector;
    // If GetListInt fail, mem_size_vector is empty.
    (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
    mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
    GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
                     GELOGE(FAILED, "SetListInt failed.");
                     return FAILED);

    std::stringstream ss;
    for (auto iter : atomic_mem_start) {
      ss << iter << " ";
    }

    if (is_valid_atomic_clean_node) {
      GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
      vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
      vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
      workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
      workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
      node_op_desc->SetWorkspace(workspace_vector);
      node_op_desc->SetWorkspaceBytes(workspace_byte_vector);

      std::vector<int64_t> mem_start_vector;
      // If GetListInt fail, mem_start_vector is empty.
      (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
      mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
      GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
                       GELOGE(FAILED, "SetListInt failed.");
                       return FAILED);

      std::vector<int64_t> mem_size_vector;
      // If GetListInt fail, mem_size_vector is empty.
      (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
      mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
      GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
                       GELOGE(FAILED, "SetListInt failed.");
                       return FAILED);

      std::stringstream ss;
      for (auto iter : atomic_mem_start) {
        ss << iter << " ";
      }
      string atomic_mem_start_str = ss.str();
      ss.clear();
      ss.str("");
      for (auto iter : atomic_mem_size) {
        ss << iter << " ";
      }
      string atomic_mem_size_str = ss.str();

      GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
             node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
             atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
    string atomic_mem_start_str = ss.str();
    ss.clear();
    ss.str("");
    for (auto iter : atomic_mem_size) {
      ss << iter << " ";
    }
    string atomic_mem_size_str = ss.str();

    GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
           node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
           atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
  }
  return SUCCESS;
 }
--- a/src/ge/graph/build/memory/graph_mem_assigner.h
+++ b/src/ge/graph/build/memory/graph_mem_assigner.h
@@ -135,6 +135,9 @@ class GraphMemoryAssigner {

  ge::Status ReAssignAtomicMemory(bool is_loop_graph);

  ge::Status FilterAtomicNodesForMemoryAssign(std::map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
                                              std::vector<NodePtr> &connecting_output_atomic_nodes);

  ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
                                         int64_t &continuous_mem_size);

@@ -165,14 +168,8 @@ class GraphMemoryAssigner {

  ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
                                      const std::vector<int64_t> &mem_offset_end);
  ///
  /// @brief set loop graph atomic attr
  /// @param node, atomic memory assignment start offset
  /// @param atomic_mem_start: atomic op memory start address
  ///
  ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start);

  ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start,
  ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start,
                                const std::vector<int64_t> &atomic_mem_size);

  ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node);
--- a/src/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/src/ge/graph/load/new_model_manager/data_dumper.cc
@@ -695,11 +695,7 @@ Status DataDumper::LoadDumpInfo() {
    }
    if (dump_properties_.GetDumpMode() == kDumpInput) {
      if (op_iter.is_task) {
        Status ret = DumpInput(op_iter, task);
        if (ret != SUCCESS) {
          GELOGE(ret, "Dump input failed");
          return ret;
        }
        GE_CHK_STATUS_RET(DumpInput(op_iter, task), "Dump input failed");
      }
      op_mapping_info.mutable_task()->Add(std::move(task));
      continue;
@@ -726,7 +722,7 @@ Status DataDumper::LoadDumpInfo() {

  SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);

  if (!op_list_.empty() || is_op_debug_) {
  if (!op_list_.empty() || is_op_debug_ || is_end_graph_) {
    auto ret = ExecuteLoadDumpInfo(op_mapping_info);
    if (ret != SUCCESS) {
      GELOGE(ret, "Execute load dump info failed");
@@ -740,7 +736,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
                                      aicpu::dump::OpMappingInfo &op_mapping_info) {
  if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput ||
      dump_properties_.GetDumpMode() == kDumpAll) {
    GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
    aicpu::dump::Task task;
    task.set_end_graph(true);
    task.set_task_id(end_graph_task_id_);
@@ -748,6 +743,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
    task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH);
    task.mutable_op()->set_op_type(ENDGRAPH);
    op_mapping_info.mutable_task()->Add(std::move(task));

    is_end_graph_ = true;
    if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) {
      GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u",
             op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_);
      return;
    }
    GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
  }
 }

--- a/src/ge/graph/load/new_model_manager/data_dumper.h
+++ b/src/ge/graph/load/new_model_manager/data_dumper.h
@@ -116,6 +116,7 @@ class DataDumper {
  std::vector<InnerDumpInfo> op_list_;
  uint32_t end_graph_task_id_ = 0;
  uint32_t end_graph_stream_id_ = 0;
  bool is_end_graph_ = false;
  std::multimap<std::string, InnerInputMapping> input_map_;
  bool load_flag_;
  uint32_t device_id_;
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -1928,13 +1928,7 @@ Status DavinciModel::SinkModelProfile() {
    name = name_;
  }
  size_t name_len = name.size();
  // phy device id
  uint32_t phy_device_id = 0;
  rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                  GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
                  return FAILED);
  reporter_data.deviceId = phy_device_id;
  reporter_data.deviceId = device_id_;
  reporter_data.data = (unsigned char *)&name_len;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.",
@@ -2103,12 +2097,7 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");
  // device id
  uint32_t phy_device_id = 0;
  rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                  GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
                  return FAILED);
  reporter_data.deviceId = phy_device_id;
  reporter_data.deviceId = device_id_;

  // Model Header
  string name;
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -236,7 +236,6 @@ ModelManager::~ModelManager() {
  std::lock_guard<std::mutex> lock(map_mutex_);
  model_map_.clear();
  model_aicpu_kernel_.clear();
  cust_aicpu_so_.clear();

  GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0)));
 }
@@ -400,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) {
  }
  std::lock_guard<std::mutex> lock(exeception_infos_mutex_);
  exception_infos_.clear();
  cust_aicpu_so_.clear();
  return SUCCESS;
 }

--- a/src/ge/graph/load/new_model_manager/model_utils.cc
+++ b/src/ge/graph/load/new_model_manager/model_utils.cc
@@ -328,15 +328,14 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
           op_desc->GetName().c_str(), v_memory_type.size(), inputs_size);
    return v_input_data_addr;
  }
  for (size_t i = 0; i < inputs_size; ++i) {
  for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
    const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
    if (tensor_desc == nullptr) {
      GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i);
      continue;
    }
    if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) {
      // TBE: add weights address to input
      const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
      if (tensor_desc == nullptr) {
        GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
        continue;
      }

      int64_t tensor_size = 0;
      GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size));
      if (tensor_size) {
--- a/src/ge/graph/passes/attach_stream_label_pass.cc
+++ b/src/ge/graph/passes/attach_stream_label_pass.cc
@@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
  nodes.push(node);

  static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE};
  bool merge_flag = false;
  bool exit_flag = false;
  bool net_output_flag = false;
  while (!nodes.empty()) {
    NodePtr cur_node = nodes.top();
    nodes.pop();
    if (visited.count(cur_node) > 0) {
      continue;
    }
    if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) {
    if (AttachFlag(cur_node, stream_label) != SUCCESS) {
      GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str());
      return FAILED;
    }
@@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
    GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed.");
  }

  bool attach_flag = (merge_flag || exit_flag) && net_output_flag;
  if (attach_flag) {
    GELOGI("No need to keep on attaching label.");
    return SUCCESS;
  }

  for (const NodePtr &tmp_node : branch_nodes) {
    GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str());
    GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed.");
@@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
 /// @brief attach flag
 /// @param [in] node
 /// @param [out] stream_label
 /// @param [out] merge_flag
 /// @param [out] exit_flag
 /// @param [out] net_output_flag
 /// @return Status
 ///
 Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag,
                                         bool &exit_flag, bool &net_output_flag) {
 Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) {
  const std::string &type = node->GetType();
  if (type == STREAMSWITCH) {
    if (node->GetInDataNodes().empty()) {
@@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea
  } else if (type == STREAMMERGE) {
    stream_label = node->GetName();
    GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
    merge_flag = true;
  } else if ((type == EXIT) || (type == REFEXIT)) {
    GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
    exit_flag = true;
  } else if (type == NETOUTPUT) {
    net_output_flag = true;
  }

  return SUCCESS;
--- a/src/ge/graph/passes/attach_stream_label_pass.h
+++ b/src/ge/graph/passes/attach_stream_label_pass.h
@@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass {
  /// @brief attach flag
  /// @param [in] node
  /// @param [out] stream_label
  /// @param [out] merge_flag
  /// @param [out] exit_flag
  /// @param [out] net_output_flag
  /// @return Status
  ///
  static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag,
                           bool &net_output_flag);
  static Status AttachFlag(const NodePtr &node, std::string &stream_label);

  ///
  /// @brief Update stream_label for loop_branch
--- a/src/ge/graph/passes/enter_pass.cc
+++ b/src/ge/graph/passes/enter_pass.cc
@@ -20,13 +20,14 @@
 #include "framework/common/debug/log.h"
 #include "graph/utils/graph_utils.h"

 namespace {
 const size_t kOutNodesNum = 1;
 }

 namespace ge {
 Status EnterPass::Run(NodePtr &node) {
  GELOGD("EnterPass running");
  if (node == nullptr) {
    GELOGE(PARAM_INVALID, "param [node] must not be null.");
    return PARAM_INVALID;
  }
  GE_CHECK_NOTNULL(node);

  if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) {
    return SUCCESS;
@@ -38,18 +39,17 @@ Status EnterPass::Run(NodePtr &node) {
    return PARAM_INVALID;
  }
  NodePtr in_node = node->GetInDataNodes().at(0);
  if (in_node == nullptr) {
    GELOGE(PARAM_INVALID, "param [in_node] must not be null");
    return PARAM_INVALID;
  }
  GE_CHECK_NOTNULL(in_node);

  if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) {
    return SUCCESS;
  }

  bool need_remove_flag =
    in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty();
  if (need_remove_flag) {
  bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty();
  if (!need_remove_flag) {
    return SUCCESS;
  }
  if (node->GetOutDataNodes().empty()) {
    for (auto &out_ctrl_node : node->GetOutControlNodes()) {
      if (out_ctrl_node == nullptr) {
        continue;
@@ -60,9 +60,47 @@ Status EnterPass::Run(NodePtr &node) {
        return FAILED;
      }
    }
  } else {
    if (OptimizeEnter(node, in_node) != SUCCESS) {
      GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str());
      return FAILED;
    }
  }

  GELOGD("EnterPass success");
  return SUCCESS;
 }

 Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) {
  auto out_nodes_of_in_node = in_node->GetOutAllNodes();
  if (out_nodes_of_in_node.size() != kOutNodesNum) {
    return SUCCESS;
  }

  if (!node->GetOutControlNodes().empty()) {
    return SUCCESS;
  }

  for (const auto &out_node : node->GetOutDataNodes()) {
    GE_CHECK_NOTNULL(out_node);
    if (out_node->GetType() == MERGE) {
      return SUCCESS;
    }
  }

  GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0));
  GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0)));
  auto out_data_anchor = node->GetOutDataAnchor(0);
  GE_CHECK_NOTNULL(out_data_anchor);
  for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor));
    GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor));
  }

  auto graph = node->GetOwnerComputeGraph();
  GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node))
  AddRePassNodesWithInOut(in_node);

  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/graph/passes/enter_pass.h
+++ b/src/ge/graph/passes/enter_pass.h
@@ -23,6 +23,9 @@ namespace ge {
 class EnterPass : public BaseNodePass {
 public:
  Status Run(NodePtr &node) override;

 private:
  Status OptimizeEnter(NodePtr &node, NodePtr &in_node);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_ENTER_PASS_H_
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -41,7 +41,6 @@
 #include "inc/pass_manager.h"
 #include "graph/common/local_context.h"

 using std::map;
 using std::set;
 using std::string;
 using std::vector;
@@ -266,24 +265,27 @@ Status MultiBatchGraphCopyer::Init() {
 }

 Status MultiBatchGraphCopyer::LabelStatus() {
  map<string, vector<NodePtr>> frame_enters;
  InitStatus(frame_enters);

  for (const auto &data : origin_data_nodes_) {
    auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
    if (!IsAllDimsPositive(data_shape.GetDims())) {
      origin_nodes_status_[data.get()] = kNodeInBatchBranch;
    }
  }
  bool changed = true;
  // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch
  while (changed) {
    changed = false;
    for (const auto &node : origin_all_nodes_) {
      auto iter = origin_nodes_status_.find(node.get());
      if (iter != origin_nodes_status_.end()) {
        continue;
      }
      for (auto &in_node : node->GetInAllNodes()) {
        bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() &&
                           origin_nodes_status_[in_node.get()] == kNodeInBatchBranch;
        if (is_in_batch) {
          if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() ||
              origin_nodes_status_[node.get()] != kNodeInBatchBranch) {
            origin_nodes_status_[node.get()] = kNodeInBatchBranch;
            ResetEnterStatus(frame_enters, node);
            changed = true;
          }
          origin_nodes_status_[node.get()] = kNodeInBatchBranch;
          changed = true;
          break;
        }
      }
@@ -314,45 +316,6 @@ Status MultiBatchGraphCopyer::LabelStatus() {
  return SUCCESS;
 }

 void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) {
  for (const auto &node : origin_all_nodes_) {
    if (node->GetType() != ENTER && node->GetType() != REFENTER) {
      continue;
    }
    auto op_desc = node->GetOpDesc();
    if (op_desc == nullptr) {
      continue;
    }
    string frame_name;
    if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) {
      frame_enters[frame_name].emplace_back(node);
    }
  }

  for (const auto &data : origin_data_nodes_) {
    auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
    if (!IsAllDimsPositive(data_shape.GetDims())) {
      origin_nodes_status_[data.get()] = kNodeInBatchBranch;
    }
  }
 }

 void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) {
  if (node->GetType() != ENTER && node->GetType() != REFENTER) {
    return;
  }

  for (const auto &frame_enter : frame_enters) {
    auto &enters = frame_enter.second;
    if (std::find(enters.begin(), enters.end(), node) != enters.end()) {
      for (const auto &enter : enters) {
        origin_nodes_status_[enter.get()] = kNodeInBatchBranch;
      }
      break;
    }
  }
 }

 Status MultiBatchGraphCopyer::CreateNewNodes() {
  shape_data_ = InsertShapeDataNode();
  if (shape_data_ == nullptr) {
@@ -1200,7 +1163,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s
  }
 }

 // Connect NetOutput directly: DTS2020070612498
 // Connect NetOutput directly
 void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index,
                          vector<string> &dynamic_output_dims) {
  GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str());
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.h
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h
@@ -68,8 +68,6 @@ class MultiBatchGraphCopyer {

  // label status for origin_all_nodes_
  Status LabelStatus();
  void InitStatus(std::map<string, vector<NodePtr>> &frame_enters);
  void ResetEnterStatus(std::map<string, vector<NodePtr>> &frame_enters, const NodePtr &node);
  // add nodes functions
  Status CreateNewNodes();

--- a/src/ge/host_cpu_engine/module.mk
+++ b/src/ge/host_cpu_engine/module.mk
@@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY}
 include $(CLEAR_VARS)
 LOCAL_MODULE := atclib/libhost_cpu_engine
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -std=c++11
 LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE
 LOCAL_LDFLAGS :=

 LOCAL_STATIC_LIBRARIES :=
--- a/src/ge/init/gelib.cc
+++ b/src/ge/init/gelib.cc
@@ -165,8 +165,10 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
    }
  }

  // In train and infer, profiling is always needed.
  InitOptions(options);

  // In train and infer, profiling is always needed.
  InitProfiling(this->options_);
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS,
@@ -176,21 +178,19 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
  // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer
  // these two case with logical device id
  if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) {
    InitProfiling(this->options_, true);
    status = InitSystemWithOptions(this->options_);
  } else {
    InitProfiling(this->options_);
    status = InitSystemWithoutOptions();
  }
  return status;
 }

 void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) {
 void GELib::InitProfiling(Options &options) {
  GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id);
  std::lock_guard<std::mutex> lock(status_mutex_);
  GetContext().Init();
  // Profiling init
  if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) {
  if (ProfilingManager::Instance().Init(options) != SUCCESS) {
    GELOGW("Profiling init failed.");
  }
 }
--- a/src/ge/init/gelib.h
+++ b/src/ge/init/gelib.h
@@ -68,7 +68,7 @@ class GELib {
  // get incre build cache path
  const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; }

  void InitProfiling(Options &options, bool convert_2_phy_device_id = false);
  void InitProfiling(Options &options);
  void ShutDownProfiling();

  Status InitSystemWithoutOptions();
--- a/src/ge/ir_build/atc_ir_common.cc
+++ b/src/ge/ir_build/atc_ir_common.cc
@@ -522,7 +522,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip
  for (auto iter = options.begin(); iter != options.end(); iter++) {
    std::string key = iter->first;
    std::string option_name = iter->second;
    GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str());
    GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str());
  }
 }

--- a/src/ge/ir_build/ge_ir_build.cc
+++ b/src/ge/ir_build/ge_ir_build.cc
@@ -96,6 +96,12 @@ static graphStatus CheckGlobalOptions(std::map<std::string, std::string> &global
                   return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!");
  global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode;

  // set precision mode default value
  std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == global_options.end()
                                 ? "force_fp16"
                                 : global_options[ge::ir_option::PRECISION_MODE];
  global_options[ge::ir_option::PRECISION_MODE] = precision_mode;

  return GRAPH_SUCCESS;
 }

--- a/src/ge/opskernel_manager/ops_kernel_manager.cc
+++ b/src/ge/opskernel_manager/ops_kernel_manager.cc
@@ -175,25 +175,25 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options,
      } else if (flag == 1) {
        enable_flag = true;
      } else {
        GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(),
               iter->second.c_str());
        GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.",
               plugin_name.c_str(), iter->second.c_str());
        return GE_GRAPH_OPTIONS_INVALID;
      }
    } catch (std::invalid_argument &) {
      GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.",
      GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.",
             iter->second.c_str());
      return GE_GRAPH_OPTIONS_INVALID;
    } catch (std::out_of_range &) {
      GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
      GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
             iter->second.c_str());
      return GE_GRAPH_OPTIONS_INVALID;
    } catch (...) {
      GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(),
             iter->second.c_str());
      GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.",
             plugin_name.c_str(), iter->second.c_str());
      return GE_GRAPH_OPTIONS_INVALID;
    }
  } else {
    GELOGI("Not find key %s, set to default value false.", plugin_name.c_str());
    GELOGI("Not find option_key %s, set to default value false.", plugin_name.c_str());
    enable_flag = false;
  }

--- a/src/ge/session/omg.cc
+++ b/src/ge/session/omg.cc
@@ -618,11 +618,16 @@ Status ParseOutNodes(const string &out_nodes) {
    if (!out_nodes.empty()) {
      domi::GetContext().out_nodes_map.clear();
      domi::GetContext().user_out_nodes.clear();
      domi::GetContext().user_out_nodes_top_vec.clear();

      vector<string> nodes_v = StringUtils::Split(out_nodes, ';');
      for (const string &node : nodes_v) {
        vector<string> key_value_v = StringUtils::Split(node, ':');
        if (key_value_v.size() != 2) {  // The size must be 2.
          if (key_value_v.size() == 1 && domi::GetContext().type == domi::CAFFE) {
            domi::GetContext().user_out_nodes_top_vec.push_back(node);
            continue;
          }
          ErrorManager::GetInstance().ATCReportErrMessage(
            "E10001", {"parameter", "value", "reason"},
            {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""});
@@ -632,7 +637,13 @@ Status ParseOutNodes(const string &out_nodes) {
                 node.c_str());
          return PARAM_INVALID;
        }
        auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
        if (!domi::GetContext().user_out_nodes_top_vec.empty()) {
          ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                          {"--out_nodes", out_nodes, "is not all index or top_name"});
          GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s",
                 out_nodes.c_str());
          return PARAM_INVALID;
        }
        // stoi: The method may throw an exception: invalid_argument/out_of_range
        if (!CheckDigitStr(key_value_v[1])) {
          ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
@@ -640,7 +651,10 @@ Status ParseOutNodes(const string &out_nodes) {
          GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str());
          return PARAM_INVALID;
        }

        auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
        int32_t index = stoi(StringUtils::Trim(key_value_v[1]));
        GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index);
        if (iter != domi::GetContext().out_nodes_map.end()) {
          iter->second.emplace_back(index);
        } else {
--- a/src/ge/single_op/single_op.cc
+++ b/src/ge/single_op/single_op.cc
@@ -279,7 +279,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, con
  if (op_task_->GetOpTaskType() == OP_TASK_TBE) {
    return ExecuteTbeTask(input_desc, inputs, output_desc, outputs);
  } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) {
    return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_);
    return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_);
  } else {
    GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u",
           op_task_->GetOpTaskType());
--- a/src/ge/single_op/task/build_task_utils.cc
+++ b/src/ge/single_op/task/build_task_utils.cc
@@ -75,8 +75,11 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) {
    // Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]]
    // OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]]
    ss << op_type << " IN[";
    for (uint32_t idx = 0; idx < op_desc->GetInputsSize(); idx++) {
    for (uint32_t idx = 0; idx < op_desc->GetAllInputsSize(); idx++) {
      const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx);
      if (input == nullptr) {
        continue;
      }
      ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " ";
      ss << TypeUtils::FormatToSerialString(input->GetFormat());
      ss << VectorToString(input->GetShape().GetDims());
--- a/src/ge/single_op/task/op_task.cc
+++ b/src/ge/single_op/task/op_task.cc
@@ -34,6 +34,11 @@ constexpr int kLaunchRetryTimes = 1000;
 constexpr int kSleepTime = 10;
 constexpr uint64_t kReleaseFlag = 1;
 constexpr int kCopyNum = 2;
 void FreeHbm(void *var) {
  if (var) {
    (void)rtFree(var);
  }
 }
 }  // namespace

 Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) {
@@ -336,49 +341,23 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor
 }

 AiCpuTask::~AiCpuTask() {
  if (args_ != nullptr) {
    (void)rtFree(args_);
  }

  if (io_addr_ != nullptr) {
    (void)rtFree(io_addr_);
  }

  if (dynamic_flag_ && workspace_addr_ != nullptr) {
    (void)rtFree(workspace_addr_);
  }
  if (copy_workspace_buf_ != nullptr) {
    (void)rtFree(copy_workspace_buf_);
  }

  if (copy_ioaddr_dev_ != nullptr) {
    (void)rtFree(copy_ioaddr_dev_);
  }

  if (copy_input_release_flag_dev_ != nullptr) {
    (void)rtFree(copy_input_release_flag_dev_);
  }

  if (copy_input_data_size_dev_ != nullptr) {
    (void)rtFree(copy_input_data_size_dev_);
  }

  if (copy_input_src_dev_ != nullptr) {
    (void)rtFree(copy_input_src_dev_);
  }

  if (copy_input_dst_dev_ != nullptr) {
    (void)rtFree(copy_input_dst_dev_);
  }

  if (copy_task_args_buf_ != nullptr) {
    (void)rtFree(copy_task_args_buf_);
  }

  FreeHbm(args_);
  FreeHbm(io_addr_);
  if (dynamic_flag_) {
    FreeHbm(workspace_addr_);
  }
  FreeHbm(copy_workspace_buf_);
  FreeHbm(copy_ioaddr_dev_);
  FreeHbm(copy_input_release_flag_dev_);
  FreeHbm(copy_input_data_size_dev_);
  FreeHbm(copy_input_src_dev_);
  FreeHbm(copy_input_dst_dev_);
  FreeHbm(copy_task_args_buf_);
  for (auto summary : output_summary_) {
    if (summary != nullptr) {
      (void)rtFree(summary);
    }
    FreeHbm(summary);
  }
  for (auto out_shape : out_shape_hbm_) {
    FreeHbm(out_shape);
  }
 }

@@ -405,7 +384,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
  return SUCCESS;
 }

 Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm) {
 Status AiCpuTask::PrepareCopyInputs(vector<DataBuffer> &outputs) {
  std::vector<uint64_t> copy_input_release_flag;
  std::vector<uint64_t> copy_input_data_size;
  std::vector<uint64_t> copy_input_src;
@@ -417,11 +396,15 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v
           summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size);
    auto output = outputs[i];
    copy_input_release_flag.emplace_back(kReleaseFlag);
    copy_input_data_size.emplace_back(summary.raw_data_size);
    if (summary.raw_data_size > 0) {
      copy_input_data_size.emplace_back(output.length);
    } else {
      copy_input_data_size.emplace_back(summary.raw_data_size);
    }
    copy_input_src.emplace_back(summary.raw_data_ptr);
    copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output));
    copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output.data));

    const auto &shape_buffer = out_shape_hbm[i];
    const auto &shape_buffer = out_shape_hbm_[i];
    copy_input_release_flag.emplace_back(kReleaseFlag);
    copy_input_data_size.emplace_back(summary.shape_data_size);
    copy_input_src.emplace_back(summary.shape_data_ptr);
@@ -441,7 +424,7 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v
  return SUCCESS;
 }

 Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm) {
 Status AiCpuTask::ReadResultSummaryAndPrepareMemory() {
  for (size_t i = 0; i < num_outputs_; ++i) {
    auto &result_summary = output_summary_host_[i];

@@ -449,36 +432,39 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_sha
                           sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST));
    auto shape_data_size = result_summary.shape_data_size;
    void *shape_buffer = nullptr;
    GE_MAKE_GUARD_RTMEM(shape_buffer);
    GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM));
    out_shape_hbm.emplace_back(shape_buffer);
    if (shape_data_size > 0) {
      GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM));
    }
    out_shape_hbm_.emplace_back(shape_buffer);
  }
  return SUCCESS;
 }

 Status AiCpuTask::CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream) {
  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm));
 Status AiCpuTask::CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) {
  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs));

  GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream));
  GE_CHK_RT_RET(rtStreamSynchronize(stream));
  return SUCCESS;
 }

 Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm) {
 Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
  for (size_t i = 0; i < num_outputs_; ++i) {
    const auto &result_summary = output_summary_host_[i];
    std::vector<int64_t> shape_dims;
    const auto &shape_hbm = out_shape_hbm[i];

    uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
    std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
    GE_CHECK_NOTNULL(shape_addr);
    GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size,
                           RT_MEMCPY_DEVICE_TO_HOST));

    for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
      shape_dims.emplace_back(shape_addr[dim_idx]);
      GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]);
    if (result_summary.shape_data_size > 0) {
      const auto &shape_hbm = out_shape_hbm_[i];

      uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
      std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
      GE_CHECK_NOTNULL(shape_addr);
      GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm,
                             result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));

      for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
        shape_dims.emplace_back(shape_addr[dim_idx]);
        GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]);
      }
    }

    GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]),
@@ -487,7 +473,7 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, cons
  return SUCCESS;
 }

 Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs,
 Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs,
                                                    rtStream_t stream) {
  if (num_outputs_ == 0) {
    GELOGI("Output num is 0, there is no need to update the output and size.");
@@ -496,13 +482,20 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output

  GELOGI("Update shape and data by result summary begin.");

  std::vector<void *> out_shape_hbm;
  GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm),
                    "Read ResultSummary and update output shape failed.");
  for (auto out_shape : out_shape_hbm_) {
    FreeHbm(out_shape);
  }
  out_shape_hbm_.clear();
  GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed.");

  GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed.");

  GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed.");
  GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed.");

  GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed.");
  for (auto out_shape : out_shape_hbm_) {
    FreeHbm(out_shape);
  }
  out_shape_hbm_.clear();

  GELOGI("Update shape and data by result summary end.");
  return SUCCESS;
@@ -603,10 +596,18 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
  return SUCCESS;
 }

 Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
                               std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs,
                               rtStream_t stream) {
 Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
                               const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc,
                               std::vector<DataBuffer> &output_buffers, rtStream_t stream) {
  GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc));
  std::vector<void *> inputs;
  std::vector<void *> outputs;
  for (auto &buffer : input_buffers) {
    inputs.emplace_back(buffer.data);
  }
  for (auto &buffer : output_buffers) {
    outputs.emplace_back(buffer.data);
  }
  GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs));
  GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));
  GE_CHK_RT_RET(rtStreamSynchronize(stream));
@@ -614,7 +615,7 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, cons
  if (unknown_type_ == DEPEND_SHAPE_RANGE) {
    GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc));
  } else if (unknown_type_ == DEPEND_COMPUTE) {
    GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream));
    GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, output_buffers, stream));
  }

  return SUCCESS;
@@ -658,9 +659,9 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
  return SUCCESS;
 }

 Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
                                 std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs,
                                 rtStream_t stream) {
 Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
                                 const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc,
                                 std::vector<DataBuffer> &output_buffers, rtStream_t stream) {
  GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED,
                         "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_);

@@ -669,11 +670,11 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, co
  size_t arg_index = 0;
  auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_);
  GE_CHECK_NOTNULL(task_io_addr);
  for (auto &input : inputs) {
    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input);
  for (auto &input : input_buffers) {
    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input.data);
  }
  for (auto &output : outputs) {
    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output);
  for (auto &output : output_buffers) {
    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output.data);
  }

  GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));
--- a/src/ge/single_op/task/op_task.h
+++ b/src/ge/single_op/task/op_task.h
@@ -57,8 +57,9 @@ class OpTask {
  void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
  const OpDescPtr &GetOpdesc() const { return op_desc_; }
  Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream);
  virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
                              std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) {
  virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers,
                              std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers,
                              rtStream_t stream) {
    return UNSUPPORTED;
  }

@@ -138,8 +139,9 @@ class AiCpuTask : public AiCpuBaseTask {
  OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; }
  const void *GetIOAddr() const override;

  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
                      std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override;
  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers,
                      std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers,
                      rtStream_t stream) override;
  Status SetMemCopyTask(const domi::KernelExDef &kernel_def);

 private:
@@ -147,14 +149,14 @@ class AiCpuTask : public AiCpuBaseTask {

  // for copy task.
  Status InitForSummaryAndCopy();
  Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs,
  Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs,
                                           rtStream_t stream);
  Status ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm);
  Status ReadResultSummaryAndPrepareMemory();

  Status CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream);
  Status PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm);
  Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream);
  Status PrepareCopyInputs(vector<DataBuffer> &outputs);

  Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm);
  Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc);

  friend class AiCpuTaskBuilder;
  void *workspace_addr_ = nullptr;
@@ -178,6 +180,8 @@ class AiCpuTask : public AiCpuBaseTask {
  void *copy_input_data_size_dev_;
  void *copy_input_src_dev_;
  void *copy_input_dst_dev_;

  vector<void *> out_shape_hbm_;
 };

 class AiCpuCCTask : public AiCpuBaseTask {
@@ -197,8 +201,9 @@ class AiCpuCCTask : public AiCpuBaseTask {
  void SetIoAddr(void *io_addr);
  size_t GetArgSize() const;

  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
                      std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override;
  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers,
                      std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers,
                      rtStream_t stream) override;

 private:
  friend class AiCpuCCTaskBuilder;
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -25,16 +25,21 @@

 namespace ge {
 /**
 *@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC),
 image normalization (by subtracting the mean value or multiplying a factor), image cropping
 (by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n
 *@brief Performs AI pre-processing (AIPP) on images including color space 
 conversion (CSC),
 image normalization (by subtracting the mean value or multiplying a factor), 
 image cropping
 (by specifying the crop start and cropping the image to the size required by 
 the neural network), and much more. \n

 *@par Inputs:
 *@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer.
 *@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the 
 data layer.
 *@li params: Dynamic AIPP configuration parameters of type uint8. \n

 *@par Attributes:
 *aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n
 *aipp_config_path: A required string, specifying the path of the AIPP 
 configuration file. \n

 *@par Outputs:
 *features: The AIPP-processed output tensor of type float16 or uint8.
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -28,9 +28,10 @@ namespace ge {

 *@par Inputs:
 *Dynamic inputs, including:
 * @li x: A list of Tensor objects, each with same shape and type. The supported types are:
 * @li x: A list of Tensor objects, each with same shape and type. The supported 
 types are:
 *   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
 *   qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

 *@par Outputs:
 *y: A Tensor. Has the same shape and type as the elements of "x". \n
@@ -121,7 +122,8 @@ REG_OP(MinimumGrad)

 *@par Inputs:
 *One input:
 *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
 *x:A Tensor. Must be one of the following types: bool, float16, float, int8, 
 int32, uint32, uint8,
   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n

 *@par Attributes:
@@ -385,7 +387,8 @@ REG_OP(Sign)

 *@par Inputs:
 *Two inputs, including: \n
 *@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128
 *@li x1: A Tensor. Must be one of the following types: float16, float32,
 float64, int32, int64, complex64,complex128
 *@li x2: A Tensor. Has the same type as "x1". \n

 *@par Outputs:
@@ -484,12 +487,16 @@ REG_OP(Equal)

 *@par Inputs:
 *One input:\n
 *x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
 *x: A Tensor. Must be one of the following types: float16, float32, double, 
 complex64, complex128. \n

 *@par Attributes:
 *@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0".
 *@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0".
 *@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n
 *@li base: An optional attribute of type float32, specifying the base gamma. 
 Defaults to "-1.0".
 *@li scale: An optional attribute of type float32, specifying the scale alpha. 
 Defaults to "1.0".
 *@li shift: An optional attribute of type float32, specifying the shift beta. 
 Defaults to "0.0". \n

 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
@@ -510,7 +517,8 @@ REG_OP(Exp)

 *@par Inputs:
 *One input:
 *x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
 *x: A Tensor. Must be one of the following types: float16, float32, double, 
 complex64, complex128. \n

 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
@@ -527,7 +535,9 @@ REG_OP(Expm1)
 *@brief: Computes the reciprocal of "x". \n

 *@par Inputs:\n
 *x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n
 *x: A Tensor. Must be one of the following types: float16, float32,
 int32, int64, double,
 complex64, complex128. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -749,7 +759,8 @@ REG_OP(Xlogy)

 *@par Inputs:
 *One input: \n
 *x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128
 *x: A Tensor. Must be one of the following types: float16, float32, float64,
 int32, int64, complex64, complex128

 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -790,7 +801,8 @@ REG_OP(Rsqrt)

 *
 *@par Inputs:
 * x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128.
 * x: A tensor. Must be one of the following types: float16, float32, float64, 
 int32, int64, complex64, complex128.
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "x".
@@ -811,7 +823,8 @@ REG_OP(Asin)

 *
 *@par Inputs:
 *@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128.
 *@li y: A tensor of type float16, float32, float64, 
 int32, int64, complex64, complex128.
 *@li dy: A tensor of the same type as "y".
 *
 *@attention Constraints:
@@ -838,7 +851,8 @@ REG_OP(AsinGrad)

 *
 *@par Inputs:
 * x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128.
 * x: A tensor. Must be one of the following types: float16, float32, float64,
 int32, int64, complex64, complex128.
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "x".
@@ -883,7 +897,8 @@ REG_OP(AcosGrad)

 *
 *@par Inputs:
 * x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128.
 * x: A tensor. Must be one of the following types: float16, float32, float64,
 complex64, complex128.
 *
 *@attention Constraints:
 * x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n
@@ -1160,7 +1175,8 @@ REG_OP(FusedMulAdd)

 *
 *@par Inputs:
 *@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128.
 *@li x1: A tensor. Must be one of the following types: float16, float32, float64,
 uint8, int8, int16, int32, int64, complex64, complex128.
 *@li x2: A tensor of the same type as "x1".
 *
 *@attention Constraints:
@@ -1189,7 +1205,8 @@ REG_OP(AddV2)
 *@brief Updates "ref" by adding "value" to it. \n

 *@par Inputs:
 *@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
 *@li ref: A Tensor. Must be one of the following types: float16, float32, int8,
 int16, int32, int64, uint8, uint16, uint32, uint64.
 *@li value: A Tensor of the same type as "ref". \n

 *@par Attributes:
@@ -1218,12 +1235,14 @@ REG_OP(AssignAdd)
 *@brief Updates "ref" by assigning "value" to it. \n

 *@par Inputs:
 *@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
 *@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, 
 int32, int64, uint8, uint16, uint32, uint64.
 *@li value: A Tensor of the same type as "ref". \n

 *@par Attributes:
 *@li validate_shape: An optional bool. Defaults to "true".
                     If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to.
                     If "true", the operation will validate that the shape of "value"
                     matches the shape of the Tensor being assigned to.
 *                    If "false", "ref" will take on the shape of "value".
 *                    This attribute is reserved.
 *@li use_locking: An optional bool. Defaults to True.
@@ -1252,7 +1271,8 @@ REG_OP(Assign)

 *
 *@par Inputs:
 *@li var: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64
 *@li var: A tensor. Must be one of the following types: float32, float64,
 int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64
 *@li value: A tensor of the same type as "var".
 *
 *@par Attributes:
@@ -1644,7 +1664,9 @@ REG_OP(Atan2)

 *
 *@par Inputs:
 *@li x1: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64
 *@li x1: A tensor. Must be one of the following types: float32, float64, int32,
 uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128,
 float16, uint32, uint64
 *@li x2: A tensor of the same type as "x1".
 *
 *@par Attributes:
@@ -1666,16 +1688,18 @@ REG_OP(ApproximateEqual)

 /**
 *@brief Returns the element-wise sum of a list of tensors.\n
 * AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs
 to be ready before beginning to sum.\n This can save memory if inputs are ready at different times,
 since minimum temporary storage is proportional to the output size rather than the inputs size.
 Returns a Tensor of same shape and type as the elements of inputs. \n
 * AccumulateNV2 performs the same operation as AddN, but does not wait for all 
 of its inputs to be ready before beginning to sum.\n This can save memory if 
 inputs are ready at different times, \n since minimum temporary storage is 
 proportional to the output size rather than the inputs size.\n Returns a Tensor 
 of same shape and type as the elements of inputs. \n

 *
 *@par Inputs:
 *Dynamic inputs, including:
 * x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64,
 qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n
 * x: A tensor. Must be one of the following types: float32, float64, int32, 
 uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, 
 complex128, float16, uint32, uint64.
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "x".
@@ -1731,7 +1755,8 @@ REG_OP(FakeQuantWithMinMaxArgs)

 *@par Inputs:
 *Two inputs, including: \n
 *@li gradients: A Tensor of type float32. Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
 *@li gradients: A Tensor of type float32. Backpropagated gradients 
 above the FakeQuantWithMinMaxArgs operation.
 *@li x: A Tensor of type float32. Has the same type and format as "gradients".\n
 * This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n

@@ -2210,9 +2235,13 @@ REG_OP(BiasAdd)

 *@par Inputs:
 *Two inputs, including:
 *@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64.
 *@li x: A Tensor. Must be one of the following types: float32, float64, int32, 
 uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, 
 complex128, float16, uint32, uint64.
 *format is ND.
 *@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across.
 *@li dimension: A Tensor. Must be one of the following types: int32, int64. 
 Must be in the range [-rank(input x), rank(input x)]. Describes which dimension 
 of the input Tensor to reduce across.
 * The format is ND.
 *@par Attributes:
 *dtype: The output type, either "int32" or "int64". Defaults to "int64". \n
@@ -2286,6 +2315,7 @@ REG_OP(ArgMaxV2)
    .ATTR(dtype, Type, DT_INT64)
    .OP_END_FACTORY_REG(ArgMaxV2)


 /**
 *@brief Returns the index with the largest value across axes of a tensor. \n

@@ -2298,15 +2328,16 @@ REG_OP(ArgMaxV2)
 *@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n

 *@par Outputs:
 *y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n
 *y: A multi-dimensional Tensor of type int32, specifying the index with the 
 largest value. The dimension is one less than that of "x". \n

 *@attention Constraints:
 *@li x: If there are multiple maximum values, the index of the first maximum value is used.
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the 
 dimension length of "x". \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow operator ArgMax.
 *
 * @par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
@@ -2929,9 +2960,13 @@ REG_OP(FusedMulAddN)
 *@li bias: An ND tensor of type float16 or float32. \n

 *@par Attributes:
 *@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1".
 *@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1".
 *@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n
 *@li axis: An optional int32 used to compute the shape of bias input from the 
 online bottoms. Defaults to "1".
 *@li num_axes: An optional int32 used to compute the shape of bias input from a 
 Caffe model trained offline. Defaults to "1".
 *@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe 
 model trained offline. If "false", bias is input from online bottoms. Defaults 
 to "true". \n

 *@par Outputs:
 *y: An ND tensor of type float16 or float32. \n
@@ -2939,13 +2974,25 @@ REG_OP(FusedMulAddN)
 *@attention Constraints:\n
 * Assume that the shape length of "x" is "n" and that of "bias" is "m".
 *@li "axis" is within the range [-n, n-1]. num_axes >= -1.
 *@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n
 * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis).
 *@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1.
 *@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
 * If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
 *@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n
 * If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m).
 *@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis 
 of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < 
 n-axis).\n
 * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must 
 have the same size (0 <= i < -axis).
 *@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with 
 shape length 1 and dimension size 1.
 *@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + 
 num_axes" must be less than or equal to "n" and the ith axis of "bias" and the 
 (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
 * If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and 
 the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same 
 size (0 <= i < num_axes).
 *@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis 
 + m" must be less than or equal to "n" and the ith axis of "bias" and the (i
 +"axis")th axis of "x" must have the same size (0 <= i < m).\n
 * If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith 
 axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= 
 i < m).
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Bias.
 */
@@ -3023,10 +3070,12 @@ REG_OP(FusedMulAddNL2loss)
 *@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n

 *@par Attributes:
 *@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
 *@li threshold: A required float32. Defaults to "0.0". "x" is compared with 
 "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n

 *@par Outputs:
 *@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
 *@li y: A Tensor with any format. Has the same type as the input. Must be one 
 of the following types: float16, float32.
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Threshold.
 */
@@ -3044,11 +3093,16 @@ REG_OP(FusedMulAddNL2loss)
 *@li x: A tensor. Must be one of the following types: float16, float32. \n

 *@par Attributes:
 *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
 *@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output.
 *@li axis: An optional int. Specify the axis to be cut at the input tensor. If 
 this parameter is not provided, find the topk for each batch. Defaults to 10000
 *@li out_max_val: An optional bool. Whether to output the maximum value. If it 
 is True, the maximum value and index are output, otherwise only the index is 
 output.
 * Defaults to False
 *@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)].
 * Defaults to 1
 *@li topk: An optional int. It means the number of top tok in each axis (the 
 value is greater than or equal to 1), and the value range must be in [1,x.shape
 (axis)].
 * Defaults to 1 \n

 *@par Outputs:
 *@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output.
@@ -3168,7 +3222,8 @@ REG_OP(Axpy)
    .OP_END_FACTORY_REG(Axpy)

 /**
 *@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n
 *@brief Creates a criterion that measures the loss given input tensors x1 x2 
 and a Tensor label y with values 1 or -1. \n

 *@par Inputs:
 *@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32.
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -36,7 +36,7 @@ namespace ge {
 *          if "cond" is a numerical scalar, non-zero means True and zero means False;
 *          if "cond" is a string scalar, non-empty means True and empty means False;
 *          if "cond" is not a scalar, non-empty means True and empty means False.
 *@li input: The input tensors . It's a dynamic input. \n
 *@li input: The input tensors . \n

 *@par Graphs:
 *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
@@ -69,7 +69,7 @@ REG_OP(_If)
 *          if "cond" is a numerical scalar, non-zero means True and zero means False;
 *          if "cond" is a string scalar, non-empty means True and empty means False;
 *          if "cond" is not a scalar, non-empty means True and empty means False.
 *@li input: The input tensors . It's a dynamic input. \n
 *@li input: The input tensors . \n

 *@par Graphs:
 *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
@@ -102,7 +102,7 @@ REG_OP(StatelessIf)
 *          if "cond" is a numerical scalar, non-zero means True and zero means False;
 *          if "cond" is a string scalar, non-empty means True and empty means False;
 *          if "cond" is not a scalar, non-empty means True and empty means False.
 *@li input: The input tensors . It's a dynamic input. \n
 *@li input: The input tensors . \n

 *@par Graphs:
 *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
@@ -129,7 +129,7 @@ REG_OP(If)

 *@par Inputs:
 *@li branch_index: A int32 scalar which determines the selected subgraph.
 *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n
 *@li input: The input tensors, which will be passed to the subgraph . \n

 *@par Graphs:
 *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors,
@@ -152,7 +152,7 @@ REG_OP(Case)
 *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n

 *@par Inputs:
 *input: The input tensors . It's a dynamic input. \n
 *input: The input tensors . \n

 *@par Graphs:
 *@li cond: A subgraph takes 'input' and returns a tensor.
@@ -183,7 +183,7 @@ REG_OP(_While)
 *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n

 *@par Inputs:
 *input: The input tensors . It's a dynamic input. \n
 *input: The input tensors . \n

 *@par Graphs:
 *@li cond: A subgraph takes 'input' and returns a tensor.
@@ -215,7 +215,7 @@ REG_OP(While)
 *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n

 *@par Inputs:
 *input: The input tensors . It's a dynamic input. \n
 *input: The input tensors . \n

 *@par Graphs:
 *@li cond: A subgraph takes 'input' and returns a tensor.
@@ -250,7 +250,7 @@ REG_OP(StatelessWhile)
 *@li start: A int32 scalar. The lower bound.
 *@li limit: A int32 scalar. The upper bound.
 *@li delta: A int32 scalar. The step size.
 *@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n
 *@li input: The input tensors, which will be passed to "body" . \n

 *@par Graphs:
 *body: A subgraph takes 'input' and returns a another list of tensors . \n
@@ -274,7 +274,7 @@ REG_OP(For)
 *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n

 *@par Inputs:
 *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n
 *args: The input tensors, which will be passed to "f" . \n

 *@par Graphs:
 *f: A subgraph takes 'args' and returns a another list of tensors . \n
@@ -303,7 +303,7 @@ REG_OP(PartitionedCall)
 *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n

 *@par Inputs:
 *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n
 *args: The input tensors, which will be passed to "f" . \n

 *@par Graphs:
 *f: A subgraph takes 'args' and returns a another list of tensors . \n
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -160,8 +160,10 @@ REG_OP(CropAndResize)
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n

 *@par Attributes:
 *@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size.
 *@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable.
 *@li crop_size: list int. [crop_height, crop_width]. All cropped image patches 
 are resized to this size.
 *@li extrapolation_value: An optional float. Defaults to 0. Value used for 
 extrapolation, when applicable.
 *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n

 *@par Outputs:
@@ -172,7 +174,6 @@ REG_OP(CropAndResize)

 *@par Third-party framework compatibility
 *Compatible with tensorflow CropAndResize operator.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead.
 */
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -87,39 +87,58 @@ REG_OP(L2NormalizeGrad)

 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW 
 for 4D or NC1HWC0 for 5D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format 
 NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format 
 NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. 
 Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format 
 NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for 
 inference. Must be "None"
 if the operation is used for training . \n

 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
 *@li epsilon: An optional float32, specifying the small value added to variance 
 to avoid dividing by zero. Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to 
 "NHWC".
 *@li is_training: An optional bool, specifying if the operation is used for 
 training or inference. Defaults to "True" . \n

 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", 
 with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with 
 format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with 
 format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input 
 "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for 
 gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input 
 "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" 
 for gradient computation. Pass "None" to skip this output . \n

 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
 then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square root instruction . \n
 *@li If the operation is used for inference and outputs "reserve_space_1" and 
 "reserve_space_2" are available,
 then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has 
 the same value as "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square 
 root instruction . \n

 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator fused_batch_norm.
@@ -166,13 +185,17 @@ is used for training or inference. Defaults to "True" . \n
 *@li y: A 4D Tensor of type float16 or float32, for the normalized "x".
 *@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 1D Tensor of type float32, for the variance of "x".
 *@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation.
 *@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n
 *@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for
 gradient computation.
 *@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" 
 for gradient computation . \n

 *@attention Constraints:
 *@li If the operation is used for inference, then output "reserve_space_1"
 has the same value as "mean" and output "reserve_space_2" has the same value as "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square root instruction . \n
 has the same value as "mean" and output "reserve_space_2" has the same value as
 "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square 
 root instruction . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator fused_batch_norm_v2.
@@ -198,23 +221,34 @@ REG_OP(BatchNormExt2)

 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
 *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
 *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
 *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n
 *@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format 
 NHWC, NCHW, or NC1HWC0, for the gradient.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, 
 or NC1HWC0.
 *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or 
 NC1HWC0.
 *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, 
 NCHW, or NC1HWC0. It is an output of BatchNorm.
 *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, 
 NCHW, or NC1HWC0. It is an output of BatchNorm . \n

 *@par Attributes:
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number 
 added to the variance of "x".
 *@li data_format: An optional string. Defaults to "NHWC".
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

 *@par Outputs:
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, 
 or NC1HWC0, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or 
 NC1HWC0, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or 
 NC1HWC0, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or 
 NC1HWC0. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or 
 NC1HWC0. Pass "None" to skip this output . \n

 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n
@@ -244,21 +278,28 @@ REG_OP(BatchNormGrad)

 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or 
 NCHW, for the gradient.
 *@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li scale: A 4D Tensor of type float32, with format NHWC or NCHW.
 *@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2.
 *@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n
 *@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It 
 is an output of BatchNormExt2.
 *@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It 
 is an output of BatchNormExt2 . \n

 *@par Attributes:
 *@li epsilon: A required float32. A small float number added to the variance of "x".
 *@li data_format: A required string for the format.
 *@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n
 *@li is_training: A required bool for specifying the operation is for training 
 (true) or inference (false) . \n

 *@par Outputs:
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale".
 *@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset".
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, 
 for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for 
 the offset of "scale".
 *@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for 
 the offset of "offset".
 *@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW.
 *@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n

@@ -290,14 +331,18 @@ REG_OP(BatchNormGradExt2)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW 
 for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  
 Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  
 Specifies the variance used for inference.
 *@li momentum: A Tensor,represents the mean and the variance's scale factor
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li epsilon: An optional float32, specifying the small value added to variance 
 to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional input, not use
 *@par Outputs:
@@ -315,16 +360,20 @@ REG_OP(BNInference)
    .ATTR(use_global_stats, Bool,true)
    .ATTR(mode, Int,1)
    .OP_END_FACTORY_REG(BNInference)

 /**
 *@brief aicpu batch normalization host  . \n

 *@par Inputs:

 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  
 Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  
 Specifies the variance used for inference.
 *@li momentum: An optional float, mean and variance's Scale factor
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li epsilon: An optional float32, specifying the small value added to variance 
 to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional attr, not use
 *@par Outputs:
@@ -348,14 +397,19 @@ REG_OP(BnHost)
 *@brief Performs batch normalization . \n

 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW 
 for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" 
 Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" 
 Specifies the variance used for inference.
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
 *@li momentum: An optional float32 num, represents the mean and the variance's scale factor
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li momentum: An optional float32 num, represents the mean and the variance's 
 scale factor
 *@li epsilon: An optional float32, specifying the small value added to variance 
 to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional attr, not use
 *@par Outputs:
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -310,9 +310,6 @@ REG_OP(DepthwiseConv2DBackpropInputD)
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2D.
 * @li Compatible with the Caffe operator DepthwiseConv2D.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DepthwiseConv2D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
@@ -460,9 +457,9 @@ REG_OP(Conv2DBackpropInputD)
 *@par Attributes:
 * Six attributes:
 * @li strides: A tuple or list of 2 integers. The stride of the sliding window
 * for H/W dimension.
 * for H/W dimension, defaults to [1,1].
 * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right]
 * padding on the feature map.
 * padding on the feature map, defaults to [0,0,0,0].
 * @li dilations: A tuple or list of 4 integers. The dilation factor for each
 * dimension of input, defaults to [1,1,1,1].
 * @li groups: Number of blocked connections from input channels to
@@ -482,8 +479,8 @@ REG_OP(Deconvolution)
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(strides, ListInt, {1, 1})
    .ATTR(pads, ListInt, {0, 0, 0, 0})
    .ATTR(dilations, ListInt, {1, 1, 1, 1})
    .ATTR(groups, Int, 1)
    .ATTR(data_format, String, "NCHW")
@@ -593,7 +590,7 @@ REG_OP(Conv2DBackpropFilterD)

 *@li bias: An optional 1D tensor. Shape is [out_channels].
 *@li offset_w: An optional 1D tensor for quantized convolution. Shape is
 * [out_channels]. Reserved.
 * [out_channels]. Not supported.
 *\n
 *\n
 * Note that there is a strict data type mapping between the input and output
@@ -622,7 +619,8 @@ REG_OP(Conv2DBackpropFilterD)
 * and right padding.
 * @li dilations: Optional. A list of 4 integers. Specifying the dilation rate
 * to use for dilated convolution. Has the same dimension order and value as
 * "strides". Defaults to [1, 1, 1, 1].
 * "strides". Dilation > 1 is not supported for quantized convolution. Defaults
 * to [1, 1, 1, 1].
 * @li groups: Optional. An integer of type int32, for the number of blocked
 * connections from input channels to output channels. Input channels and output
 * channels must both be divisible by "groups". "x" in_channels must be equal to
@@ -704,13 +702,62 @@ REG_OP(Conv2D)
    .ATTR(offset_x, Int, 0)
    .OP_END_FACTORY_REG(Conv2D)

 /**
 *@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
 *@par Inputs:
 * @li x: A 4D tensor of input images.
 * @li filter_compress: A 4D tensor of compressed filters.
 * @li compress_index: A 1D Tensor dtype of int8.
 * @li bias: An optional 1D tensor.
 * @li offset_w: An optional 1D tensor for quantized convolution. Reserved.
 *
 * The input and output tensor attributes are listed as follows:
 * @verbatim
    |Tensor    | x       | filter_compress  | bias    | offset_w | y
    -----------|---------|---------|---------|----------|--------
    |Data Type | float16 | float16 | float16 | _        | float16
    |          |---------|---------|---------|----------|--------
    |          | float32 | float32 | float32 | _        | float32
    |          |---------|---------|---------|----------|--------
    |          | int8    | int8    | int32   | int8     | int32
    -----------|---------|---------|---------|----------|--------
    |Format    | NCHW    | NCHW    | ND      | ND       | NCHW
    |          | NHWC    | NHWC    |         |          | NHWC
    |          |         | HWCN    |         |          |
@endverbatim
 * It should be noted that the data types must correspond to each other, but the
 * format does not need to . \n

 *@par Attributes:
 * @li strides: A list of 4 integers. Specifying the strides of the
 * convolution along the height and width. The dimension order is determined
 * by the data format of "x". By default the N and C dimensions are set to 1.
 * @li pads: A list of 4 integers. Specifying the top, bottom, left and right
 * padding.
 * @li dilations: A list of 4 integers. Specifying the dilation rate to use
 * for dilated convolution. Has the same dimension order and value as "strides".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Input channels and output channels must both be divisible by
 * "groups".Type is int32.
 * @li offset_x: An optional integer for quantized convolution. Type is int32.
 * Defaults to "0".
 * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
 * data format of the input and output images. Type is string.
 * Defaults to "NHWC". Reserved . \n

 *@par Outputs:
 * @li y: A 4D Tensor of output images . \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS DEPRECATED.
 */
 REG_OP(Conv2DCompress)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
    .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
    .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
    .INPUT(compress_index, TensorType({DT_INT8}))
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(dilations, ListInt, {1, 1, 1, 1})
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -158,18 +158,25 @@ REG_OP(Iou)
 *@par Inputs:
 * Three inputs, including:
 *@li ydiff: A 5HD gradient input of type float32.
 *@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
 the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
 *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
 *@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" 
 indicates the number of ROIs,
 the value "5" indicates the indexes of images where the ROIs are located, "x0", 
 "x1", "y0", and "y1".
 *@li rois_n: An optional input, specifying the number of valid ROIs. This 
 parameter is reserved . \n

 *@par Attributes:
 *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
 *@li pooled_width: A required attribute of type int, specifying the W dimension.
 *@li pooled_height: A required attribute of type int, specifying the H dimension.
 *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
 *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
 sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
 equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
 *@li spatial_scale: A required attribute of type float, specifying the scaling 
 ratio of "features" to the original image.
 *@li sample_num: An optional attribute of type int, specifying the horizontal 
 and vertical
 sampling frequency of each output. If this attribute is set to "0", the 
 sampling frequency is
 equal to the rounded up value of "rois", which is a floating point number. 
 Defaults to "2" . \n

 *@par Outputs:
 *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
@@ -876,9 +883,7 @@ REG_OP(YoloV3DetectionOutputV2)
 A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
 *@li imginfo: A float16, describing the image information including the required image height and width
 and the actual image height and width.
 *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
 [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
 is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
 *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n

 *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n

--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -896,29 +896,7 @@ REG_OP(InstanceNormV2)
    .ATTR(epsilon, Float, 0.00001)
    .OP_END_FACTORY_REG(InstanceNormV2)

 /**
 *@brief Performs instance normalization for inference.

 *@par Inputs:\n
 * Five inputs, including: (NC1HWC0 supported)
 *@li x: A Tensor of type float16 or float32.
 *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
 *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
 *@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean.
 *@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
 *@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.

 *@par Outputs:\n
 *y: A Tensor of type float16 or float32 for the normalized "x".
 *batch_mean: A Tensor of type float32 for the result mean.
 *batch_ variance: A Tensor of type float32 for the result variance.

 *@attention Constraints:
 *For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead.
 */
 REG_OP(INInferV2D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
@@ -931,6 +909,20 @@ REG_OP(INInferV2D)
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(INInferV2D)

 /**
 *@brief Performs instance normalization for inference of InHost part.

 *@par Inputs:\n
 * One input, including: (NC1HWC0 supported)
 * variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.

 *@par Attributes:
 * epsilon: An optional float32, specifying the small value added to
 variance to avoid dividing by zero. Defaults to "0.00001" . \n

 *@par Outputs:\n
 * variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
 */
 REG_OP(InHost)
     .INPUT(variance, TensorType({DT_FLOAT}))
     .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -128,9 +128,6 @@ REG_OP(AvgPool)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
 *
 * @par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AvgPool3D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -111,9 +111,6 @@ REG_OP(ApplyAdaMax)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdaMax.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead.
 */
 REG_OP(ApplyAdaMaxD)
    .INPUT(var, TensorType::NumberType())
@@ -352,9 +349,6 @@ REG_OP(ApplyMomentum)
 * accum: A mutable tensor. Has the same type as input "accum".
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyMomentum.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead.
 */

 REG_OP(ApplyMomentumD)
@@ -681,9 +675,6 @@ REG_OP(ApplyPowerSign)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyPowerSign.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead.
 */
 REG_OP(ApplyPowerSignD)
    .INPUT(var, TensorType::NumberType())
@@ -804,9 +795,6 @@ REG_OP(ApplyAddSign)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ApplyAddSign.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead.
 */
 REG_OP(ApplyAddSignD)
    .INPUT(var, TensorType::NumberType())
@@ -928,9 +916,6 @@ REG_OP(ApplyCenteredRMSProp)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyCenteredRMSPropD.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead.
 */
 REG_OP(ApplyCenteredRMSPropD)
    .INPUT(var, TensorType::NumberType())
@@ -1049,9 +1034,6 @@ REG_OP(ApplyAdagrad)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdagrad.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead.
 */
 REG_OP(ApplyAdagradD)
    .INPUT(var, TensorType::NumberType())
@@ -1236,9 +1218,6 @@ REG_OP(ApplyAdagradDA)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdagradDA.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead.
 */
 REG_OP(ApplyAdagradDAD)
    .INPUT(var, TensorType::NumberType())
@@ -1496,9 +1475,6 @@ REG_OP(ApplyProximalAdagrad)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyProximalAdagradD.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead.
 */
 REG_OP(ApplyProximalAdagradD)
    .INPUT(var, TensorType::NumberType())
@@ -1592,9 +1568,6 @@ REG_OP(SparseApplyProximalAdagrad)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SparseApplyProximalAdagrad.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead.
 */
 REG_OP(SparseApplyProximalAdagradD)
    .INPUT(var, TensorType::NumberType())
@@ -1681,9 +1654,6 @@ REG_OP(ApplyFtrl)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyFtrl.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead.
 */
 REG_OP(ApplyFtrlD)
    .INPUT(var, TensorType::NumberType())
@@ -1775,9 +1745,6 @@ REG_OP(ApplyFtrlV2)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyFtrlV2.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead.
 */
 REG_OP(ApplyFtrlV2D)
    .INPUT(var, TensorType::NumberType())
@@ -1890,9 +1857,6 @@ REG_OP(ApplyAdam)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdam.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead.
 */
 REG_OP(ApplyAdamD)
    .INPUT(var, TensorType::NumberType())
@@ -1981,9 +1945,6 @@ REG_OP(ApplyAdadelta)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ApplyAdadelta.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead.
 */
 REG_OP(ApplyAdadeltaD)
    .INPUT(var, TensorType::NumberType())
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -65,9 +65,6 @@ REG_OP(Fill)
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "value".
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead.
 */
 REG_OP(FillD)
    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
@@ -125,9 +122,6 @@ REG_OP(BroadcastTo)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator BroadcastTo.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead.
 */
 REG_OP(BroadcastToD)
    .INPUT(x, TensorType::BasicType())
@@ -175,9 +169,6 @@ REG_OP(Pad)

 *@par Third-party framework compatibility:
 * Compatible with TensorFlow operator Pad.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
 */
 REG_OP(PadD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
@@ -269,9 +260,6 @@ REG_OP(PadV3D)
 *@see Diag()
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Diag.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead.
 */
 REG_OP(DiagD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -30,7 +30,7 @@ namespace ge {
 *@par Inputs:
 *Two inputs, including:
 *@li rt_nested_splits: A list of at least 1 Tensor objects with the same type
 in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input.
 in: int32, int64. The row_splits for the RaggedTensor.
 *@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor
 Must be one of the following types: bool, int8, int16, uint16, int32,
 int64, double, float, float16 . \n
@@ -66,7 +66,7 @@ REG_OP(RaggedTensorToSparse)
 *@li values:A 1D tensor representing the values of the ragged tensor.
 *@li default_value:A `Tensor`. Must have the same type as `values`.
 *@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same
 type in: `int64`, `int32` . It's a dynamic input.\n
 type in: `int64`, `int32` .\n

 *@par Attributes:
 *@li num_row_partition_tensors:Numbers of row partition tensors.
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -374,9 +374,6 @@ REG_OP(DropOutGenMask)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator lin_space.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead.
 */
 REG_OP(LinSpaceD)
    .INPUT(assist, TensorType({DT_FLOAT}))
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -353,9 +353,6 @@ REG_OP(ReduceSum)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Sum.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead.
 */
 REG_OP(ReduceSumD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -381,9 +378,6 @@ REG_OP(ReduceSumD)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ReduceAll.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead.
 */
 REG_OP(ReduceAllD)
    .INPUT(x, TensorType({DT_BOOL}))
@@ -459,9 +453,6 @@ REG_OP(ReduceProd)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ReduceProd.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead.
 */
 REG_OP(ReduceProdD)
    .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16}))
@@ -516,9 +507,6 @@ REG_OP(ReduceMean)

 *@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator ReduceMean.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead.
 */
 REG_OP(ReduceMeanD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -573,9 +561,6 @@ REG_OP(ReduceMax)

 *@par Third-party framework compatibility
 * Compatible with TensorFlow operator Max.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead.
 */
 REG_OP(ReduceMaxD)
    .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8,
@@ -630,9 +615,6 @@ REG_OP(ReduceMin)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator reduce_min.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
 */
 REG_OP(ReduceMinD)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
@@ -699,9 +681,6 @@ REG_OP(ReduceAny)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator reduce_any.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead.
 */
 REG_OP(ReduceAnyD)
    .INPUT(x, TensorType({DT_BOOL}))
@@ -787,9 +766,6 @@ REG_OP(EuclideanNorm)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator EuclideanNorm.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead.
 */
 REG_OP(EuclideanNormD)
    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -92,6 +92,7 @@ REG_OP(DynamicLSTM)
    .OUTPUT(output_h, TensorType({DT_FLOAT32}))
    .OP_END_FACTORY_REG(DynamicLSTM)


 /**
 *@brief: DynamicRNNGrad calculation.
 *@par Inputs:
@@ -126,7 +127,7 @@ REG_OP(DynamicLSTM)
 *@li keep_prob:An float identifying the keep prob in the op. Default to 1.
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to false.
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
 *@li is_training:An bool identifying is training in the op. Default to true.
@@ -138,6 +139,9 @@ REG_OP(DynamicLSTM)
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 */
 REG_OP(DynamicRNNGrad)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -28,7 +28,7 @@ namespace ge {
 /**
 *@brief Mark which tensors need to be saved to the ckpt file.
 *@par Inputs:
 *tensors: A list of input tensor.It's a dynamic input.
 *tensors: A list of input tensor.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -35,16 +35,16 @@ namespace ge {
 *rate . \n

 *@par Inputs:
 *@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input.
 *@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input.
 *@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input.
 *@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input.
 *@li sparse_example_indices: a list of vectors which contain example indices.
 *@li sparse_feature_indices: a list of vectors which contain feature indices.
 *@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.
 *@li dense_features: a list of matrices which contains the dense feature values.
 *@li example_weights: a vector which contains the weight associated with each example.
 *@li example_labels: a vector which contains the label/target associated with each example.
 *@li sparse_indices: a list of vectors where each value is the indices which has
 *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
 *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.
 *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
 *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
 *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.
 *@li example_state_data: a list of vectors containing the example state data.
 *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
 *@li l1: Symmetric l1 regularization strength.
@@ -61,7 +61,6 @@ namespace ge {
 *@par Third-party framework compatibility
 * Compatible with tensorflow SdcaOptimizerV2 operator.
 */

 REG_OP(SdcaOptimizerV2)
    .DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64}))
    .DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64}))
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -79,9 +79,6 @@ REG_OP(Range)

 *@see Range()
 *@since V100R001C33
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead.
 */
 REG_OP(RangeD)
    .INPUT(x, TensorType({DT_FLOAT,DT_INT32}))
@@ -186,7 +183,8 @@ REG_OP(GatherNd)
 *     uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16,
 *     uint16, complex128, float16, uint32, uint64, complex64, complex128.
 * @li indices: A Tensor of type int32 or int64.
 * @li axis: A Tensor of type as int32 . \n
 * @li axis: A Tensor of type as int32 or int64,
 *     Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -225,9 +223,6 @@ REG_OP(GatherV2)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator GatherV2.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead.
 */
 REG_OP(GatherV2D)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8,
@@ -330,9 +325,6 @@ REG_OP(StridedSlice)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSlice.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead.
 */
 REG_OP(StridedSliceD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
@@ -388,9 +380,6 @@ REG_OP(StridedSliceD)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceGradD.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead.
 */
 REG_OP(StridedSliceGradD)
    .INPUT(dy, TensorType::BasicType())
@@ -502,9 +491,6 @@ REG_OP(UnsortedSegmentSum)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator UnsortedSegmentSum.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead.
 */
 REG_OP(UnsortedSegmentSumD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8}))
@@ -729,9 +715,6 @@ REG_OP(OneHot)

 *@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator OneHot.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead.
 */
 REG_OP(OneHotD)
    .INPUT(x, TensorType({DT_UINT8, DT_INT32}))
@@ -807,7 +790,7 @@ REG_OP(SliceD)
 * @li assist_seq: A 1D tensor of type float16.
 * with size of 2N, which "N" is the last dimension.
 * The first N numbers is indices, and the next N numbers is deviation of casting
 * int32 to float16. \n
 * float16 to int32 . \n

 * @par Attributes:
 * @li k: A required int that is at least 0, specifying the number of top elements
@@ -816,7 +799,7 @@ REG_OP(SliceD)
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
 * @li dim: An optional int. Defaults to -1. For reserved use.
 * @li largest: An optional bool. Defaults to true. For reserved use. \n
 * @li largest: An optional bool. Defaults to true. For reserved use.

 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as "input".
@@ -1270,9 +1253,6 @@ REG_OP(InplaceUpdate)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator InplaceUpdate.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead.
 */
 REG_OP(InplaceUpdateD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
@@ -1325,9 +1305,6 @@ REG_OP(InplaceAdd)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator InplaceAdd.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead.
 */
 REG_OP(InplaceAddD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
@@ -1379,9 +1356,6 @@ REG_OP(InplaceSub)

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator InplaceSub.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead.
 */
 REG_OP(InplaceSubD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
@@ -1433,9 +1407,6 @@ REG_OP(ScatterNonAliasingAdd)
 * @par Outputs:
 * y: A Tensor of type RealNumberType . \n

 * @attention Constraints:
 * @li segment_ids must be non-negative tensor.

 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),

 * @par Third-party framework compatibility
@@ -1463,9 +1434,6 @@ REG_OP(UnsortedSegmentMin)
 * @par Outputs:
 * y: A Tensor.Must have the same type as input "x" . \n

 * @attention Constraints:
 * @li segment_ids must be non-negative tensor.

 * @see UnsortedSegmentProdD(), UnsortedSegmentSumD(),
 *
 * @par Restrictions:
@@ -1491,9 +1459,6 @@ REG_OP(UnsortedSegmentMinD)
 * @par Outputs:
 * y: A Tensor of type RealNumberType . \n

 * @attention Constraints:
 * @li segment_ids must be non-negative tensor.

 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),

 * @par Third-party framework compatibility
@@ -1521,9 +1486,6 @@ REG_OP(UnsortedSegmentMax)
 * @par Outputs:
 * y: A Tensor.Must have the same type as input "x" . \n

 * @attention Constraints:
 * @li segment_ids must be non-negative tensor.

 * @see UnsortedSegmentProdD(),
 *
 * @par Restrictions:
@@ -1548,9 +1510,6 @@ REG_OP(UnsortedSegmentMaxD)
 * @par Outputs:
 * y: A Tensor of type NumberType . \n

 * @attention Constraints:
 * @li segment_ids must be non-negative tensor.

 * @see UnsortedSegmentSum(), UnsortedSegmentMin(),

 * @par Third-party framework compatibility
@@ -1582,9 +1541,6 @@ REG_OP(UnsortedSegmentProd)
 * @li segment_ids must be non-negative tensor.

 * @see UnsortedSegmentMinD()
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentProd instead.
 */
 REG_OP(UnsortedSegmentProdD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
@@ -1900,9 +1856,6 @@ REG_OP(CumulativeLogsumexp)
 *y: A Tensor. Has the same type as "x".
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Cumsum.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead.
 */
 REG_OP(CumulativeLogsumexpD)
    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -75,9 +75,6 @@ REG_OP(Split)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Split.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead.
 */
 REG_OP(SplitD)
    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
@@ -144,9 +141,6 @@ Under the caffe framework, the conversion of slice_point through the cut point t
 Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect.
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SplitV.

 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead.
 */
 REG_OP(SplitVD)
    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
@@ -164,8 +158,7 @@ REG_OP(SplitVD)
 * Two inputs, including:
 * @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
 *     int64, uint8, uint16, uint32, uint64, float16, float32.
 *     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
 *     It's a dynamic input.
 *     Tensors to be concatenated. All must have size 1 in the first dimension and same shape. 
 * @li shape: A Tensor of the same type as "x".
 * The final shape of the result. Should be equal to the shapes of any input
 * but with the number of input values in the first dimension . \n
@@ -314,7 +307,7 @@ REG_OP(Concat)

 *@par Inputs:
 * x: A list of N Tensors. Must be one of the following types: int8, int16, int32,
 *     int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n
 *     int64, uint8, uint16, uint32, uint64, float16, float32, bool . \n

 *@par Attributes:
 *@li axis: A optional int, defaultvalue is 0.
@@ -340,7 +333,7 @@ REG_OP(Pack)
 *@par Inputs:
 *Two inputs, including:
 * @li concat_dim: A Tensor of type int32.
 * @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n
 * @li x: A list of 1D Tensor objects of type int32 . \n

 *@par Attributes:
 *N: A required int . \n
@@ -364,7 +357,7 @@ REG_OP(ConcatOffset)
 *@par Inputs:
 *Two inputs, including:
 * @li concat_dim: A Tensor of type int32.
 * @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n
 * @li x: A list of 1D Tensor objects of type int32 . \n

 *@par Attributes:
 *@li Concat_dim: A required int. Must be within the rank of input "x".
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -235,12 +235,8 @@ REG_OP(BatchToSpaceND)
 *@par Outputs:
 *y: A Tensor with format NC1HWC0. Has the same type as input "x".


 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchToSpaceND.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead.
 */
 REG_OP(BatchToSpaceNDD)
    .INPUT(x, TensorType::BasicType())
@@ -287,9 +283,6 @@ REG_OP(SpaceToBatchND)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SpaceToBatchND.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead.
 */
 REG_OP(SpaceToBatchNDD)
    .INPUT(x, TensorType::BasicType())
@@ -411,9 +404,6 @@ REG_OP(BatchToSpace)

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchToSpace.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead.
 */
 REG_OP(BatchToSpaceD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
@@ -467,9 +457,6 @@ REG_OP(SpaceToBatch)
 *y: A Tensor. Has the same type as input "x".
 *@par Third-party framework compatibility
 *@ Compatible with the TensorFlow operator SpaceToBatch.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead.
 */
 REG_OP(SpaceToBatchD)
    .INPUT(x, TensorType::BasicType())
@@ -598,9 +585,6 @@ REG_OP(ExtractVolumePatches)

 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead.
 */
 REG_OP(ConfusionTransposeD)
    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
@@ -664,11 +648,6 @@ REG_OP(FlattenV2)
    .ATTR(end_axis, Int, -1)
    .OP_END_FACTORY_REG(FlattenV2)

 REG_OP(DeConvTrans)
    .INPUT(x, TensorType({DT_INT8}))
    .OUTPUT(y, TensorType({DT_INT8}))
    .OP_END_FACTORY_REG(DeConvTrans)

 /**
 *@brief Compress large weight to small one. Usually inserted before Conv2d.
 *
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -19,7 +19,7 @@

 #include <stdint.h>

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -580,7 +580,8 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
 #ifdef __cplusplus

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -185,7 +185,7 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
 */
 RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -149,7 +149,7 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint
 */
 RTS_API rtError_t rtGetGroupCount(uint32_t *count);

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -339,7 +339,7 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile();
 */
 RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -229,7 +229,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
 */
 RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "stream.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -529,7 +529,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
 * @return RT_ERROR_INVALID_VALUE for error input 
 */
 RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -24,7 +24,7 @@
 #include "config.h"
 #include "stream.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -491,7 +491,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
 */
 RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -430,7 +430,7 @@ rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint
 */
 RTS_API rtError_t rtDebugUnRegister(rtModel_t model);

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "event.h"

 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

@@ -188,7 +188,7 @@ RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream);
 */
 RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
                                  uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
 #ifdef __cplusplus
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif

--- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
@@ -1,12 +1,18 @@
 /**
 * @file adx_datadump_server.h
 *
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef ADX_DATADUMP_SERVER_H
 #define ADX_DATADUMP_SERVER_H