From bf316a3eeb362dd38287d53f4807f9d04134e2dc Mon Sep 17 00:00:00 2001
From: wuweikang <wuweikang@huawei.com>
Date: Mon, 28 Sep 2020 21:59:48 +0800
Subject: [PATCH 1/7] sync-from-trunk-to-blue-zone-0928

---
 CMakeLists.txt                                     |   1 +
 inc/external/ge/ge_prof.h                          |  61 +++++++--
 inc/framework/common/ge_types.h                    |   2 +
 inc/framework/executor/ge_executor.h               |   2 +
 inc/graph/debug/ge_attr_define.h                   |   4 +
 inc/graph/runtime_inference_context.h              |   3 +
 src/common/graph/ge_attr_define.cc                 |   4 +
 src/common/graph/ge_tensor.cc                      |   2 +-
 src/common/graph/graph.mk                          |   2 +-
 src/common/graph/runtime_inference_context.cc      |  33 +++++
 src/common/graph/utils/node_utils.cc               |  73 ++++++++--
 src/ge/CMakeLists.txt                              |   3 +
 src/ge/client/ge_prof.cc                           |  67 ++++------
 src/ge/common/dump/dump_server.cc                  |  21 +++
 src/ge/common/profiling/profiling_manager.cc       |  35 +++--
 src/ge/common/util.cc                              |   6 +-
 src/ge/executor/ge_executor.cc                     |  16 +++
 src/ge/ge_inference.mk                             |   3 +-
 src/ge/ge_runner.mk                                |   6 +-
 src/ge/graph/build/memory/block_mem_assigner.cc    |  17 ++-
 src/ge/graph/build/memory/block_mem_assigner.h     |   3 +-
 src/ge/graph/build/memory/graph_mem_assigner.cc    |  50 ++++++-
 src/ge/graph/execute/graph_execute.cc              |  10 ++
 src/ge/graph/execute/graph_execute.h               |   2 +
 .../graph/load/new_model_manager/davinci_model.cc  |  58 +++++++-
 .../graph/load/new_model_manager/davinci_model.h   |  10 +-
 .../graph/load/new_model_manager/model_manager.cc  |   8 ++
 .../graph/load/new_model_manager/model_manager.h   |   2 +
 src/ge/graph/manager/graph_manager.cc              |   7 +
 src/ge/graph/optimize/mem_rw_conflict_optimize.cc  |   2 +-
 src/ge/graph/passes/subgraph_pass.cc               |   3 +
 src/ge/graph/preprocess/insert_op/ge_aipp_op.cc    |  56 ++++++--
 src/ge/graph/preprocess/insert_op/ge_aipp_op.h     |   2 +
 .../preprocess/insert_op/util_insert_aipp_op.cc    |  25 ++--
 src/ge/graph/preprocess/multi_batch_copy_graph.cc  |  61 +++++++--
 src/ge/graph/preprocess/multi_batch_copy_graph.h   |   2 +
 .../node_executor/aicpu/aicpu_node_executor.cc     |  11 +-
 src/ge/session/inner_session.cc                    |  29 ++++
 src/ge/session/inner_session.h                     |   4 +
 src/ge/single_op/single_op.cc                      |   2 +-
 third_party/fwkacllib/inc/ops/aipp.h               |   2 +
 .../fwkacllib/inc/ops/elewise_calculation_ops.h    |  55 +++++++-
 third_party/fwkacllib/inc/ops/internal_ops.h       |   3 +
 third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 148 ++++++++++++---------
 third_party/fwkacllib/inc/ops/nn_detect_ops.h      |  18 +++
 third_party/fwkacllib/inc/ops/nn_norm_ops.h        |  17 +++
 third_party/fwkacllib/inc/ops/nn_training_ops.h    |   8 ++
 third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h |  18 +++
 third_party/fwkacllib/inc/ops/pad_ops.h            |  70 ++++++++++
 third_party/fwkacllib/inc/ops/random_ops.h         |   2 +-
 third_party/fwkacllib/inc/ops/reduce_ops.h         |   6 +-
 .../fwkacllib/inc/ops/resource_variable_ops.h      |  54 ++++++++
 third_party/fwkacllib/inc/ops/rnn.h                |  69 ++++++----
 third_party/fwkacllib/inc/ops/selection_ops.h      |   6 +
 third_party/fwkacllib/inc/ops/transformation_ops.h |   3 +
 .../fwkacllib/inc/ops/warp_perspective_ops.h       |   3 +
 third_party/fwkacllib/inc/runtime/mem.h            |   9 +-
 third_party/fwkacllib/inc/tdt/status.h             |   4 +
 .../fwkacllib/inc/toolchain/adx_datadump_server.h  |  36 +++++
 59 files changed, 981 insertions(+), 258 deletions(-)
 create mode 100644 src/ge/common/dump/dump_server.cc
 create mode 100644 third_party/fwkacllib/inc/toolchain/adx_datadump_server.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86f473e8..266ea024 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,6 +91,7 @@ else()
     find_library(register libregister.so ${ASCEND_RUNTIME_DIR})
     find_library(resource libresource.so ${ASCEND_RUNTIME_DIR})
     find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
+    find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
 endif()
 
 # add compile flags
diff --git a/inc/external/ge/ge_prof.h b/inc/external/ge/ge_prof.h
index dbd87966..658cea76 100644
--- a/inc/external/ge/ge_prof.h
+++ b/inc/external/ge/ge_prof.h
@@ -25,22 +25,11 @@
 
 namespace ge {
 enum ProfDataTypeConfig {
-  kProfAcl = 0x0001,
   kProfTaskTime = 0x0002,
   kProfAiCoreMetrics = 0x0004,
   kProfAicpuTrace = 0x0008,
-  kProfModelExecute = 0x0010,
-  kProfRuntimeApi = 0x0020,
-  kProfRuntimeTrace = 0x0040,
-  kProfScheduleTimeline = 0x0080,
-  kProfScheduleTrace = 0x0100,
-  kProfAiVectorCoreMetrics = 0x0200,
-  kProfSubtaskTime = 0x0400,
   kProfTrainingTrace = 0x0800,
-  kProfHcclTrace = 0x1000,
-  kProfDataProcess = 0x2000,
-  kProfTaskTrace = 0x3842,
-  kProfModelLoad = 0x8000000000000000
+  kProfHcclTrace = 0x1000
 };
 
 enum ProfilingAicoreMetrics {
@@ -49,20 +38,64 @@ enum ProfilingAicoreMetrics {
   kAicoreSynchronization = 2,
   kAicoreMemory = 3,
   kAicoreInternalMemory = 4,
-  kAicoreStall = 5,
-  kAicoreMetricsAll = 255  // only for op_trace
+  kAicoreStall = 5
 };
 
 typedef struct ProfAicoreEvents ProfAicoreEvents;
 typedef struct aclgrphProfConfig aclgrphProfConfig;
 
+///
+/// @ingroup AscendCL
+/// @brief Initialize the profiling and set profiling configuration path
+/// @param [in] profiler_path: configuration path of profiling
+/// @param [in] length: length of configuration path
+/// @return Status result of function
+///
 Status aclgrphProfInit(const char *profiler_path, uint32_t length);
+
+///
+/// @ingroup AscendCL
+/// @brief Finalize profiling
+/// @return Status result of function
+///
 Status aclgrphProfFinalize();
+
+///
+/// @ingroup AscendCL
+/// @brief Create data of type aclgrphProfConfig
+/// @param [in] deviceid_list: device id list
+/// @param [in] device_nums: device numbers
+/// @param [in] aicore_metrics: type of aicore metrics
+/// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now
+/// @param [in] data_type_config: modules need profiling
+/// @return Status result of function
+///
 aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums,
                                            ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events,
                                            uint64_t data_type_config);
+
+///
+/// @ingroup AscendCL
+/// @brief  Destroy data of type aclgrphProfConfig
+/// @param [in] profiler_config: config of profiling
+/// @return Status result of function
+///
 Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config);
+
+///
+/// @ingroup AscendCL
+/// @brief Start profiling of modules which is configured by profiler config
+/// @param [in] profiler_config: config of profiling
+/// @return Status result of function
+///
 Status aclgrphProfStart(aclgrphProfConfig *profiler_config);
+
+///
+/// @ingroup AscendCL
+/// @brief Stop profiling of modules which is configured by profiler config
+/// @param [in] profiler_config: config of profiling
+/// @return Status result of function
+///
 Status aclgrphProfStop(aclgrphProfConfig *profiler_config);
 }  // namespace ge
 
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 9a4fd1f9..6033521c 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -48,6 +48,8 @@ enum OpEngineType {
   ENGINE_AIVECTOR = 4  // not support
 };
 
+enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE };
+
 const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
 const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";
 
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index 00846112..6e82bb96 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -163,6 +163,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
   ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
   ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
 
+  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
+
   ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                          std::vector<ge::TensorDesc> &output_desc);
 
diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h
index a32907bb..7538ba6a 100644
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -141,8 +141,12 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS;
 
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS;
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS;
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_RELATED_AIPP_MODE;
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_AIPP_DATA_NAME_MAP;
 
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED;
+
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME;
 
diff --git a/inc/graph/runtime_inference_context.h b/inc/graph/runtime_inference_context.h
index 6c6c82e7..f0b38546 100644
--- a/inc/graph/runtime_inference_context.h
+++ b/inc/graph/runtime_inference_context.h
@@ -23,6 +23,7 @@
 #include <vector>
 #include "external/graph/ge_error_codes.h"
 #include "external/graph/tensor.h"
+#include "ge_attr_value.h"
 
 namespace ge {
 class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext {
@@ -32,10 +33,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext {
   static void DestroyContext(const std::string &context_id);
 
   graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor);
+  graphStatus GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor);
   graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor);
 
  private:
   std::map<int64_t, std::vector<Tensor>> tensors_;
+  std::map<int64_t, std::vector<GeTensorPtr>> ge_tensors_;
   std::mutex mu_;
 
   static std::map<std::string, std::unique_ptr<RuntimeInferenceContext>> contexts_;
diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc
index 4834c73b..cd504812 100644
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -122,8 +122,12 @@ const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs";
 const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs";
 
 const std::string ATTR_NAME_INPUT_DIMS = "input_dims";
+const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS = "_dynamic_aipp_input_dims";
+const std::string ATTR_DATA_RELATED_AIPP_MODE = "_data_related_aipp_mode";
+const std::string ATTR_DATA_AIPP_DATA_NAME_MAP = "_data_aipp_data_name_map";
 
 const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added";
+
 const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id";
 const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name";
 
diff --git a/src/common/graph/ge_tensor.cc b/src/common/graph/ge_tensor.cc
index 196b8569..65881435 100644
--- a/src/common/graph/ge_tensor.cc
+++ b/src/common/graph/ge_tensor.cc
@@ -431,7 +431,7 @@ graphStatus GeTensorDesc::GetShapeRange(std::vector<std::pair<int64_t, int64_t>>
       return GRAPH_FAILED;
     }
     std::pair<int64_t, int64_t> pair({ele[0], ele[1]});
-    range.push_back(pair);
+    range.emplace_back(pair);
   }
 
   return GRAPH_SUCCESS;
diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk
index 9e9ffa3a..4ea84919 100644
--- a/src/common/graph/graph.mk
+++ b/src/common/graph/graph.mk
@@ -33,7 +33,6 @@ COMMON_LOCAL_SRC_FILES := \
     ./utils/tuning_utils.cc \
     ./utils/graph_utils.cc \
     ./utils/ge_ir_utils.cc \
-    ./utils/node_utils.cc \
     ./utils/op_desc_utils.cc \
     ./utils/type_utils.cc \
     ./utils/tensor_utils.cc \
@@ -44,6 +43,7 @@ COMMON_LOCAL_SRC_FILES := \
     option/ge_context.cc \
     option/ge_local_context.cc \
     ./runtime_inference_context.cc \
+    ./utils/node_utils.cc \
 
 COMMON_LOCAL_C_INCLUDES := \
     proto/om.proto \
diff --git a/src/common/graph/runtime_inference_context.cc b/src/common/graph/runtime_inference_context.cc
index 95068481..361d893c 100644
--- a/src/common/graph/runtime_inference_context.cc
+++ b/src/common/graph/runtime_inference_context.cc
@@ -15,6 +15,7 @@
  */
 
 #include "graph/runtime_inference_context.h"
+#include "graph/utils/tensor_adapter.h"
 #include <cstdint>
 #include "framework/common/debug/ge_log.h"
 
@@ -67,6 +68,14 @@ graphStatus RuntimeInferenceContext::SetTensor(int64_t node_id, int output_id, T
 
   GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id);
   output_tensors[output_id] = std::move(tensor);
+
+  auto &output_ge_tensors = ge_tensors_[node_id];
+  if (static_cast<uint32_t>(output_id) >= output_ge_tensors.size()) {
+    output_ge_tensors.resize(output_id + 1);
+  }
+
+  GELOGD("Set ge tensor for node_id = %ld, output_id = %d", node_id, output_id);
+  output_ge_tensors[output_id] = TensorAdapter::AsGeTensorPtr(tensor);
   return GRAPH_SUCCESS;
 }
 
@@ -93,4 +102,28 @@ graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, T
   tensor = output_tensors[output_id];
   return GRAPH_SUCCESS;
 }
+
+graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor) {
+  if (output_id < 0) {
+    GELOGE(GRAPH_PARAM_INVALID, "Invalid output index: %d", output_id);
+    return GRAPH_PARAM_INVALID;
+  }
+
+  std::lock_guard<std::mutex> lk(mu_);
+  auto iter = ge_tensors_.find(node_id);
+  if (iter == ge_tensors_.end()) {
+    GELOGE(INTERNAL_ERROR, "Node not register. Id = %ld", node_id);
+    return INTERNAL_ERROR;
+  }
+
+  auto &output_tensors = iter->second;
+  if (static_cast<uint32_t>(output_id) >= output_tensors.size()) {
+    GELOGE(GRAPH_FAILED, "Node output is not registered. node_id = %ld, output index = %d", node_id, output_id);
+    return GRAPH_FAILED;
+  }
+
+  GELOGD("Get ge tensor for node_id = %ld, output_id = %d", node_id, output_id);
+  tensor = output_tensors[output_id];
+  return GRAPH_SUCCESS;
+}
 }  // namespace ge
\ No newline at end of file
diff --git a/src/common/graph/utils/node_utils.cc b/src/common/graph/utils/node_utils.cc
index 72981d10..684e37ac 100644
--- a/src/common/graph/utils/node_utils.cc
+++ b/src/common/graph/utils/node_utils.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "utils/node_utils.h"
-#include "utils/op_desc_utils.h"
+#include "graph/utils/node_utils.h"
+#include "graph/utils/op_desc_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "debug/ge_op_types.h"
 #include "debug/ge_util.h"
@@ -23,8 +23,13 @@
 #include "graph/anchor.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/types.h"
-#include "utils/tensor_utils.h"
-#include "utils/type_utils.h"
+#include "external/graph/operator.h"
+#include "graph/ge_context.h"
+#include "graph/runtime_inference_context.h"
+#include "graph/utils/op_desc_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/tensor_adapter.h"
+#include "graph/utils/type_utils.h"
 
 namespace ge {
 std::map<NodePtr, std::vector<uint32_t>> NodeUtils::map_send_info_{};
@@ -575,6 +580,58 @@ graphStatus NodeUtils::GetNodeUnknownShapeStatus(const Node &node, bool &is_unkn
   return GRAPH_SUCCESS;
 }
 
+graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) {
+  GE_CHECK_NOTNULL(node_ptr);
+  return NodeUtils::GetInputConstData(*node_ptr, dst_name, ge_tensor);
+}
+
+graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) {
+  // For inner compute graph
+  auto op_desc = node.GetOpDesc();
+  GE_CHECK_NOTNULL(op_desc);
+  auto index = op_desc->GetInputIndexByName(dst_name);
+  auto in_data_anchor = node.GetInDataAnchor(index);
+  GE_CHECK_NOTNULL(in_data_anchor);
+  auto out_data_anchor = in_data_anchor->GetPeerOutAnchor();
+  GE_CHECK_NOTNULL(out_data_anchor);
+  auto peer_node = out_data_anchor->GetOwnerNode();
+  GE_CHECK_NOTNULL(peer_node);
+  auto peer_op_desc = peer_node->GetOpDesc();
+  GE_CHECK_NOTNULL(peer_op_desc);
+  auto peer_op_type = peer_op_desc->GetType();
+  if (peer_op_type == CONSTANTOP || peer_op_type == CONSTANT) {
+    if (!AttrUtils::MutableTensor(peer_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) {
+      GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str());
+      return GRAPH_FAILED;
+    }
+    return GRAPH_SUCCESS;
+  } else if (peer_op_type == DATA) {
+    auto parent_node = NodeUtils::GetParentInput(peer_node);
+    while ((parent_node != nullptr) && (parent_node->GetType() == DATA)) {
+      parent_node = NodeUtils::GetParentInput(parent_node);
+    }
+    if ((parent_node != nullptr) && ((parent_node->GetType() == CONSTANT) || (parent_node->GetType() == CONSTANTOP))) {
+      if (!AttrUtils::MutableTensor(parent_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) {
+        GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str());
+        return GRAPH_FAILED;
+      }
+      return GRAPH_SUCCESS;
+    }
+  }
+  // Try get from runtime inference context
+  auto session_id = std::to_string(GetContext().SessionId());
+  RuntimeInferenceContext *runtime_infer_ctx = nullptr;
+  if (RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx) == GRAPH_SUCCESS) {
+    GELOGD("To get constant from runtime inference context. session_id = %s", session_id.c_str());
+    auto ret = runtime_infer_ctx->GetTensor(peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx(), ge_tensor);
+    if (ret == GRAPH_SUCCESS) {
+      return GRAPH_SUCCESS;
+    }
+  }
+  GELOGW("node[%s]'s input[%s]'s peer node is not const", node.GetName().c_str(), dst_name.c_str());
+  return GRAPH_FAILED;
+}
+
 std::string NodeUtils::GetNodeType(const Node &node) {
   if (node.GetType() != FRAMEWORKOP) {
     return node.GetType();
@@ -587,14 +644,6 @@ std::string NodeUtils::GetNodeType(const Node &node) {
 
 std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); }
 
-graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) {
-  return GRAPH_SUCCESS;
-}
-
-graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) {
-  return GRAPH_SUCCESS;
-}
-
 ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) {
   auto op_desc = node.GetOpDesc();
   if (op_desc == nullptr) {
diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt
index 8c20b336..db00d8a1 100755
--- a/src/ge/CMakeLists.txt
+++ b/src/ge/CMakeLists.txt
@@ -51,6 +51,7 @@ include_directories(${GE_SOURCE_DIR}/inc/graph)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce)
+include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain)
 include_directories(${CMAKE_BINARY_DIR})
 include_directories(${CMAKE_BINARY_DIR}/proto/ge)
 
@@ -227,6 +228,7 @@ target_link_libraries(ge_runner
         ${runtime}
         ${resouce}
         ${ascend_hal}
+        ${adump_server}
         rt
         dl)
 
@@ -237,6 +239,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
         "common/dump/dump_properties.cc"
         "common/dump/dump_manager.cc"
         "common/dump/dump_op.cc"
+        "common/dump/dump_server.cc"
         "common/formats/format_transfers/*.cc"
         "common/formats/formats.cc"
         "common/formats/utils/formats_trans_utils.cc"
diff --git a/src/ge/client/ge_prof.cc b/src/ge/client/ge_prof.cc
index d4407852..ad9cc9eb 100644
--- a/src/ge/client/ge_prof.cc
+++ b/src/ge/client/ge_prof.cc
@@ -29,13 +29,14 @@ using std::vector;
 
 namespace {
 const uint32_t kMaxDeviceNum = 64;
-const std::string PROFILING_INIT = "prof_init";
-const std::string PROFILING_FINALIZE = "prof_finalize";
-const std::string PROFILING_START = "prof_start";
-const std::string PROFILING_STOP = "prof_stop";
-const std::string DEVICES_NUMS = "devNums";
-const std::string DEVICE_ID_LIST = "devIdList";
-const std::string AICORE_METRICS = "aicoreMetrics";
+const uint32_t kDeviceListIndex = 3;
+const std::string kProfilingInit = "prof_init";
+const std::string kProfilingFinalize = "prof_finalize";
+const std::string kProfilingStart = "prof_start";
+const std::string kProfilingStop = "prof_stop";
+const std::string kDeviceNums = "devNums";
+const std::string kDeviceIdList = "devIdList";
+const std::string kAicoreMetrics = "aicoreMetrics";
 
 const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToString = {
   {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"},
@@ -43,25 +44,7 @@ const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToStri
   {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"},
   {ge::kAicoreMemory, "AICORE_MEMORY"},
   {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"},
-  {ge::kAicoreStall, "AICORE_STALL"},
-  {ge::kAicoreMetricsAll, "AICORE_METRICS_ALL"}};
-
-const std::map<uint64_t, uint64_t> kDataTypeConfigMapping = {{ge::kProfAcl, PROF_ACL_API},
-                                                             {ge::kProfTaskTime, PROF_TASK_TIME},
-                                                             {ge::kProfAiCoreMetrics, PROF_AICORE_METRICS},
-                                                             {ge::kProfAicpuTrace, PROF_AICPU_TRACE},
-                                                             {ge::kProfModelExecute, PROF_MODEL_EXECUTE},
-                                                             {ge::kProfRuntimeApi, PROF_RUNTIME_API},
-                                                             {ge::kProfRuntimeTrace, PROF_RUNTIME_TRACE},
-                                                             {ge::kProfScheduleTimeline, PROF_SCHEDULE_TIMELINE},
-                                                             {ge::kProfScheduleTrace, PROF_SCHEDULE_TRACE},
-                                                             {ge::kProfAiVectorCoreMetrics, PROF_AIVECTORCORE_METRICS},
-                                                             {ge::kProfSubtaskTime, PROF_SUBTASK_TIME},
-                                                             {ge::kProfTrainingTrace, PROF_TRAINING_TRACE},
-                                                             {ge::kProfHcclTrace, PROF_HCCL_TRACE},
-                                                             {ge::kProfDataProcess, PROF_DATA_PROCESS},
-                                                             {ge::kProfTaskTrace, PROF_TASK_TRACE},
-                                                             {ge::kProfModelLoad, PROF_MODEL_LOAD}};
+  {ge::kAicoreStall, "AICORE_STALL"}};
 }  // namespace
 
 static bool g_graph_prof_init_ = false;
@@ -107,11 +90,11 @@ Status aclgrphProfInit(const char *profiler_path, uint32_t length) {
   GraphLoader graph_loader;
   Command command;
   command.cmd_params.clear();
-  command.cmd_type = PROFILING_INIT;
-  command.module_index = kProfModelLoad | kProfTrainingTrace;
+  command.cmd_type = kProfilingInit;
+  command.module_index = PROF_MODEL_LOAD;
   ret = graph_loader.CommandHandle(command);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Handle profiling command %s failed, config = %s", PROFILING_INIT.c_str(), profiler_path);
+    GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path);
     return ret;
   }
   if (!g_graph_prof_init_) {
@@ -143,10 +126,10 @@ Status aclgrphProfFinalize() {
   GraphLoader graph_loader;
   Command command;
   command.cmd_params.clear();
-  command.cmd_type = PROFILING_FINALIZE;
+  command.cmd_type = kProfilingFinalize;
   Status ret = graph_loader.CommandHandle(command);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Handle profiling command %s failed.", PROFILING_FINALIZE.c_str());
+    GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str());
     return ret;
   }
 
@@ -164,9 +147,9 @@ Status aclgrphProfFinalize() {
 
 bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<string> &prof_config_params) {
   prof_config_params.clear();
-  prof_config_params.emplace_back(DEVICES_NUMS);
+  prof_config_params.emplace_back(kDeviceNums);
   prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums));
-  prof_config_params.emplace_back(DEVICE_ID_LIST);
+  prof_config_params.emplace_back(kDeviceIdList);
   std::string devID = "";
   if (profiler_config->config.devNums == 0) {
     GELOGW("The device num is invalid.");
@@ -180,7 +163,7 @@ bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<str
   }
 
   prof_config_params.push_back(devID);
-  prof_config_params.push_back(AICORE_METRICS);
+  prof_config_params.push_back(kAicoreMetrics);
   auto iter =
     kProfAicoreMetricsToString.find(static_cast<ProfilingAicoreMetrics>(profiler_config->config.aicoreMetrics));
   if (iter == kProfAicoreMetricsToString.end()) {
@@ -250,13 +233,7 @@ aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t dev
   }
 
   config->config.aicoreMetrics = static_cast<ProfAicoreMetrics>(aicore_metrics);
-  uint64_t data_type = 0;
-  for (auto &iter : kDataTypeConfigMapping) {
-    if ((iter.first & data_type_config) == iter.first) {
-      data_type |= iter.second;
-    }
-  }
-  config->config.dataTypeConfig = data_type;
+  config->config.dataTypeConfig = data_type_config;
   GELOGI("Successfully create prof config.");
   return config;
 }
@@ -309,9 +286,11 @@ Status aclgrphProfStart(aclgrphProfConfig *profiler_config) {
   GraphLoader graph_loader;
   Command command;
   command.cmd_params.clear();
-  command.cmd_type = PROFILING_START;
+  command.cmd_type = kProfilingStart;
   command.cmd_params = prof_params;
   command.module_index = profiler_config->config.dataTypeConfig;
+  GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(),
+         prof_params[kDeviceListIndex].c_str(), command.module_index);
   ret = graph_loader.CommandHandle(command);
   if (ret != SUCCESS) {
     GELOGE(ret, "Handle profiling command failed");
@@ -360,9 +339,11 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) {
   GraphLoader graph_loader;
   Command command;
   command.cmd_params.clear();
-  command.cmd_type = PROFILING_STOP;
+  command.cmd_type = kProfilingStop;
   command.cmd_params = prof_params;
   command.module_index = profiler_config->config.dataTypeConfig;
+  GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(),
+         prof_params[kDeviceListIndex].c_str(), command.module_index);
   ret = graph_loader.CommandHandle(command);
   if (ret != SUCCESS) {
     GELOGE(ret, "Handle profiling command failed");
diff --git a/src/ge/common/dump/dump_server.cc b/src/ge/common/dump/dump_server.cc
new file mode 100644
index 00000000..1f95dc3a
--- /dev/null
+++ b/src/ge/common/dump/dump_server.cc
@@ -0,0 +1,21 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "adx_datadump_server.h"
+
+int AdxDataDumpServerUnInit() { return 0; }
+
+int AdxDataDumpServerInit() { return 0; }
diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc
index d301f647..d02f7e8f 100644
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -55,19 +55,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
                                                                                    bool convert_2_phy_device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   vector<int32_t>().swap(device_id_);
-  // profiling need phy device id
-  if (!convert_2_phy_device_id) {
-    device_id_.push_back(options.device_id);
-  } else {
-    uint32_t phy_device_id = 0;
-    rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
-      return FAILED;
-    }
-    device_id_.push_back(phy_device_id);
-  }
-
   job_id_ = options.job_id;
 
   Status ret;
@@ -76,6 +63,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
     ret = InitFromAclCfg(recv_profiling_config_);
   } else {
     ret = InitFromOptions(options);
+    if (ret == SUCCESS && is_load_profiling_) {
+      // profiling need phy device id
+      if (!convert_2_phy_device_id) {
+        device_id_.push_back(options.device_id);
+      } else {
+        uint32_t phy_device_id = 0;
+        rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id);
+        if (rt_ret != RT_ERROR_NONE) {
+          GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
+          return FAILED;
+        }
+        device_id_.push_back(phy_device_id);
+      }
+    }
   }
   if (ret != SUCCESS) {
     GELOGE(ret, "Failed to init profiling.");
@@ -868,14 +869,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
   }
   GELOGI("Current logic_device_id:%d", logic_device_id);
 
-  uint32_t phy_device_id = 0;
-  rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id);
-  if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
-  }
-  GELOGI("Current phy_device_id:%d", phy_device_id);
   bool execute_model_prof_on = false;
-  auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
+  auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
   if (iter != device_id_.end()) {
     execute_model_prof_on = true;
   }
diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc
index cbd2ee71..ce5aa57e 100644
--- a/src/ge/common/util.cc
+++ b/src/ge/common/util.cc
@@ -58,7 +58,7 @@ const int kWarningThreshold = 536870912 * 2;  // 536870912 represent 512M
 const int kMaxFileSizeLimit = INT_MAX;
 const int kMaxBuffSize = 256;
 const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character";
-constexpr uint32_t MAX_CONFIG_FILE_BYTE = 10 * 1024 * 1024;
+constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024;
 }  // namespace
 
 namespace ge {
@@ -512,9 +512,9 @@ FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) {
            stat.st_mode);
     return false;
   }
-  if (stat.st_size > MAX_CONFIG_FILE_BYTE) {
+  if (stat.st_size > kMaxConfigFileByte) {
     GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]",
-           resolved_file_path.c_str(), stat.st_size, MAX_CONFIG_FILE_BYTE);
+           resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte);
     return false;
   }
   return true;
diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc
index bf1e250b..0a247142 100644
--- a/src/ge/executor/ge_executor.cc
+++ b/src/ge/executor/ge_executor.cc
@@ -745,6 +745,22 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo
   GELOGI("GetAIPPInfo succ.");
   return SUCCESS;
 }
+
+Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
+  GELOGI("Begin to get aipp type.");
+  if (!isInit_) {
+    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
+    return GE_EXEC_NOT_INIT;
+  }
+  Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index);
+  if (ret != SUCCESS) {
+    GELOGW("Get aipp type is not success.");
+    return ret;
+  }
+  GELOGI("Get aipp type success.");
+  return SUCCESS;
+}
+
 Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) {
   GELOGI("Begin to get dynamic batch output shape info");
   if (!isInit_) {
diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk
index f83e590a..232e79ec 100644
--- a/src/ge/ge_inference.mk
+++ b/src/ge/ge_inference.mk
@@ -29,6 +29,7 @@ COMMON_LOCAL_SRC_FILES := \
     common/dump/dump_properties.cc \
     common/dump/dump_manager.cc \
     common/dump/dump_op.cc \
+    common/dump/dump_server.cc \
     common/helper/model_cache_helper.cc \
     ge_local_engine/engine/host_cpu_engine.cc \
 
@@ -371,7 +372,6 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES)
 LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES)
 
 LOCAL_STATIC_LIBRARIES := libge_memory \
-                          libadump_server_stub \
 
 LOCAL_SHARED_LIBRARIES := \
     libc_sec \
@@ -436,7 +436,6 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)
 LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES)
 
 LOCAL_STATIC_LIBRARIES := libge_memory \
-                          libadump_server_stub \
 
 LOCAL_SHARED_LIBRARIES := \
     libc_sec \
diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk
index 7a65787c..04182070 100644
--- a/src/ge/ge_runner.mk
+++ b/src/ge/ge_runner.mk
@@ -1,5 +1,5 @@
 LOCAL_PATH := $(call my-dir)
-
+include $(LOCAL_PATH)/stub/Makefile
 LIBGE_LOCAL_SRC_FILES := \
     proto/fusion_model.proto \
     proto/optimizer_priority.proto \
@@ -392,8 +392,8 @@ endif
 
 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)
 
-LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc
-LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_prof.cc
+LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \
+                   ../../out/ge/lib64/stub/ge_prof.cc \
 
 
 LOCAL_SHARED_LIBRARIES :=
diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc
index 746f73c2..773eac6a 100644
--- a/src/ge/graph/build/memory/block_mem_assigner.cc
+++ b/src/ge/graph/build/memory/block_mem_assigner.cc
@@ -413,7 +413,8 @@ BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<stri
       life_time_(0) {}
 
 BlockMemAssigner::~BlockMemAssigner() {
-  for (MemoryBlock *memory_block : memory_blocks_) {
+  GELOGD("blocks_store_ size : %lu", blocks_store_.size());
+  for (MemoryBlock *memory_block : blocks_store_) {
     GE_DELETE_NEW_SINGLE(memory_block);
   }
 }
@@ -544,7 +545,7 @@ bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const Me
 }
 
 bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
-                                                   uint32_t &peer_input_index) {
+                                                   uint32_t &peer_input_index, bool &no_need_assign_memory) {
   if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) {
     return false;
   }
@@ -571,6 +572,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
 
       // If GetBool fail, is_input_continuous is false.
       (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
+
+      GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()),
+                      GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index);
+                      no_need_assign_memory = true; return false;);
+
       if (is_input_continuous) {
         if (n->GetOwnerComputeGraph() != nullptr) {
           string graph_name = n->GetOwnerComputeGraph()->GetName();
@@ -828,6 +834,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
     }
   }
   memory_blocks_.emplace_back(block);
+  blocks_store_.emplace_back(block);
   return block;
 }
 
@@ -1143,8 +1150,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
     bool out_node_set_continuous_input = false;
     bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType()));
     if (!no_need_assign_memory) {
-      out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index);
-      no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);
+      out_node_set_continuous_input =
+        IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory);
+      GE_IF_BOOL_EXEC(!no_need_assign_memory,
+                      no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input););
     }
     no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node));
     if (no_need_assign_memory) {
diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h
index 7e37fe8e..6137911c 100644
--- a/src/ge/graph/build/memory/block_mem_assigner.h
+++ b/src/ge/graph/build/memory/block_mem_assigner.h
@@ -259,6 +259,7 @@ class BlockMemAssigner : public MemAssigner {
   ge::ComputeGraphPtr compute_graph_;
 
   std::vector<MemoryBlock *> memory_blocks_;
+  std::vector<MemoryBlock *> blocks_store_;
 
   std::vector<NodeTypeIndex> zero_memory_list_;
 
@@ -357,7 +358,7 @@ class BlockMemAssigner : public MemAssigner {
   bool IsZeroCopyBlock(const NodePtr &node, bool continuous);
 
   bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
-                                   uint32_t &peer_input_index);
+                                   uint32_t &peer_input_index, bool &no_need_assign_memory);
 
   ///
   /// @ingroup GE
diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc
index 583f65d8..1518714f 100644
--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -39,6 +39,33 @@ const size_t kVirtualInputNodeOutputSize = 1;
 const size_t kVirtualOutputNodeInputSize = 1;
 const size_t kVirtualNodeDataIndex = 0;
 const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
+int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
+                              const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
+                              const ge::NodePtr &node, const uint32_t i) {
+  ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
+  auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
+  if (iter1 == anchor_to_symbol.end()) {
+    return ge::kInvalidOffset;
+  }
+  auto out_symbol = iter1->second;
+  auto iter2 = symbol_to_anchors.find(out_symbol);
+  if (iter2 == symbol_to_anchors.end()) {
+    return ge::kInvalidOffset;
+  }
+  for (const auto &node_index_io : iter2->second) {
+    if (node_index_io.value_ == out_symbol) {
+      vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
+      vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
+      if (node_index_io.index_ >= symbol_output_list.size()) {
+        return ge::kInvalidOffset;
+      }
+      GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
+             output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
+      return symbol_output_list.at(node_index_io.index_);
+    }
+  }
+  return ge::kInvalidOffset;
+}
 }  // namespace
 namespace ge {
 Status VariableMemoryAssigner::Assign() {
@@ -1191,6 +1218,12 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt
 }
 
 Status GraphMemoryAssigner::CheckOffset() {
+  std::map<std::string, std::string> anchor_to_symbol;
+  std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
+  if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
+    GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
+    return FAILED;
+  }
   for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
     GE_CHECK_NOTNULL(node->GetOpDesc());
     vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
@@ -1200,13 +1233,26 @@ Status GraphMemoryAssigner::CheckOffset() {
         return FAILED;
       }
     }
+
+    bool need_update_output = false;
     vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
-    for (auto output : output_list) {
-      if (output == ge::kInvalidOffset) {
+    for (uint32_t i = 0; i < output_list.size(); ++i) {
+      if (output_list[i] == ge::kInvalidOffset) {
         GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
         return FAILED;
       }
+      if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
+        auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
+        if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
+          output_list[i] = symbol_offset;
+          need_update_output = true;
+        }
+      }
     }
+    if (need_update_output) {
+      node->GetOpDesc()->SetOutputOffset(output_list);
+    }
+
     vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
     for (auto workspace : workspace_list) {
       if (workspace == ge::kInvalidOffset) {
diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc
index 25208aa4..e1322180 100644
--- a/src/ge/graph/execute/graph_execute.cc
+++ b/src/ge/graph/execute/graph_execute.cc
@@ -592,7 +592,17 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI
     GELOGW("GetAIPPInfo is not success.");
     return ret;
   }
+  return SUCCESS;
+}
 
+Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
+  auto model_manager = ge::ModelManager::GetInstance();
+  GE_CHECK_NOTNULL(model_manager);
+  Status ret = model_manager->GetAippType(model_id, index, type, aipp_index);
+  if (ret != SUCCESS) {
+    GELOGW("Get aipp type is not success.");
+    return ret;
+  }
   return SUCCESS;
 }
 
diff --git a/src/ge/graph/execute/graph_execute.h b/src/ge/graph/execute/graph_execute.h
index 5cf39bae..242103f8 100644
--- a/src/ge/graph/execute/graph_execute.h
+++ b/src/ge/graph/execute/graph_execute.h
@@ -75,6 +75,8 @@ class GraphExecutor {
 
   static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
 
+  static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
+
   ///
   /// @ingroup ge
   /// @brief Get dynamic batch_info
diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc
index 3c2aaffa..81eb4bc9 100644
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -125,7 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
       rt_model_stream_(nullptr),
       is_inner_model_stream_(false),
       is_async_mode_(false),
-      last_execute_mode_(false),
+      last_execute_mode_(INITIALIZATION),
       session_id_(0),
       device_id_(0),
       maxDumpOpNum_(0),
@@ -1573,6 +1573,48 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
   return SUCCESS;
 }
 
+Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) {
+  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
+  // Set default value
+  type = DATA_WITHOUT_AIPP;
+  aipp_index = 0xFFFFFFFF;  // default invalid value
+  OpDescPtr data_op = data_op_list_[index];
+  GE_CHECK_NOTNULL(data_op);
+  if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) {
+    GELOGW("There is no aipp releated info with index %u.", index);
+    return SUCCESS;
+  }
+  std::string data_mode;
+  (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode);
+  if (data_mode == "static_aipp") {
+    type = DATA_WITH_STATIC_AIPP;
+  } else if (data_mode == "dynamic_aipp") {
+    type = DATA_WITH_DYNAMIC_AIPP;
+  } else if (data_mode == "dynamic_aipp_conf") {
+    type = DYNAMIC_AIPP_NODE;
+  } else {
+    GELOGE(INTERNAL_ERROR, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index);
+    return INTERNAL_ERROR;
+  }
+
+  if (type == DATA_WITH_DYNAMIC_AIPP) {
+    string releated_name;
+    (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
+    for (size_t i = 0; i < data_op_list_.size(); ++i) {
+      GE_CHECK_NOTNULL(data_op_list_[i]);
+      if (data_op_list_[i]->GetName() == releated_name) {
+        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index);
+        aipp_index = i;
+      }
+    }
+    if (aipp_index == 0xFFFFFFFF) {
+      GELOGE(INTERNAL_ERROR, "Can not find aipp data node from index %u", index);
+      return INTERNAL_ERROR;
+    }
+  }
+  return SUCCESS;
+}
+
 void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type) {
   batch_size_.clear();
   if (batch_num.empty()) {
@@ -1666,9 +1708,9 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format,
     return;
   }
   // judge if this data is linked dynamic aipp first, multiply batch has been considered
-  if (op_desc->HasAttr("_dynamic_aipp_input_dims")) {
+  if (op_desc->HasAttr(ATTR_DYNAMIC_AIPP_INPUT_DIMS)) {
     vector<int64_t> dynamic_aipp_input_dims;
-    (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims);
+    (void)AttrUtils::GetListInt(op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_input_dims);
     SetInputDimsInfo(dynamic_aipp_input_dims, format, input);
     return;
   } else {
@@ -3371,11 +3413,15 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) {
 /// @return Status
 ///
 Status DavinciModel::InitModelStream(rtStream_t stream) {
+  ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION;
+  GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR,
+                         "NnExecute not support mix execute.");
+  last_execute_mode_ = curr_mode;
+
   // asynchronize mode, use user input stream.
   if (is_async_mode_) {
     rt_model_stream_ = stream;
     is_inner_model_stream_ = false;
-    last_execute_mode_ = true;
     return SUCCESS;
   }
 
@@ -3387,14 +3433,12 @@ Status DavinciModel::InitModelStream(rtStream_t stream) {
 
     rt_model_stream_ = stream;
     is_inner_model_stream_ = false;
-    last_execute_mode_ = false;
     return SUCCESS;
   }
 
-  if (last_execute_mode_ || (rt_model_stream_ == nullptr)) {
+  if (rt_model_stream_ == nullptr) {
     GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT));
     is_inner_model_stream_ = true;
-    last_execute_mode_ = false;
   }
 
   return SUCCESS;
diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h
index 15f4539f..438fe639 100644
--- a/src/ge/graph/load/new_model_manager/davinci_model.h
+++ b/src/ge/graph/load/new_model_manager/davinci_model.h
@@ -75,6 +75,12 @@ struct timeInfo {
   int64_t dumpEndTime;
 };
 
+enum ExecuteMode {
+  INITIALIZATION,
+  SYNCHRONIZATION,
+  ASYNCHRONIZATION,
+};
+
 // comments
 class DavinciModel {
  public:
@@ -314,6 +320,8 @@ class DavinciModel {
   ///
   Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info);
 
+  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index);
+
   ///
   /// @ingroup ge
   /// @brief Get model_id.
@@ -884,7 +892,7 @@ class DavinciModel {
   bool is_inner_model_stream_;
 
   bool is_async_mode_;  // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_.
-  bool last_execute_mode_;
+  ExecuteMode last_execute_mode_;
 
   bool is_stream_list_bind_{false};
   bool is_pure_head_stream_{false};
diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc
index 320bfb16..f6995052 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -876,6 +876,14 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo
   return davinci_model->GetAIPPInfo(index, aipp_info);
 }
 
+Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
+  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
+  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.",
+                         model_id);
+
+  return davinci_model->GetAippType(index, type, aipp_index);
+}
+
 Status ModelManager::GenSessionId(uint64_t &session_id) {
   std::lock_guard<std::mutex> lock(session_id_create_mutex_);
 
diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h
index e89bfc36..3dce3807 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.h
+++ b/src/ge/graph/load/new_model_manager/model_manager.h
@@ -224,6 +224,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   ///
   ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
 
+  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
+
   ///
   /// @ingroup domi_ome
   /// @brief set model input and output size zero copy
diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc
index 82108653..39bdee36 100644
--- a/src/ge/graph/manager/graph_manager.cc
+++ b/src/ge/graph/manager/graph_manager.cc
@@ -2795,11 +2795,18 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vector<std::st
         GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str());
         return FAILED;
       } else {
+        auto var_tensor = var_results[var_name].GetTensorDesc();
+        var_tensor.SetName(var_name);
+        var_results[var_name].SetTensorDesc(var_tensor);
         var_values.emplace_back(var_results[var_name]);
       }
     }
   } else {
     for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) {
+      string var_name = iter->first;
+      auto var_tensor = iter->second.GetTensorDesc();
+      var_tensor.SetName(var_name);
+      iter->second.SetTensorDesc(var_tensor);
       var_values.emplace_back(iter->second);
     }
   }
diff --git a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc
index 9c166f4d..3c3419ae 100644
--- a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc
+++ b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc
@@ -491,7 +491,7 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I
   if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) {
     auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx());
     GE_CHECK_NOTNULL(new_identity);
-    if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS &&
+    if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS ||
         GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) {
       GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s",
              pre_out_data_anchor->GetOwnerNode()->GetName().c_str(),
diff --git a/src/ge/graph/passes/subgraph_pass.cc b/src/ge/graph/passes/subgraph_pass.cc
index fbf444fb..fd71e65b 100644
--- a/src/ge/graph/passes/subgraph_pass.cc
+++ b/src/ge/graph/passes/subgraph_pass.cc
@@ -176,6 +176,9 @@ Status SubgraphPass::WhileInputNodes(const ComputeGraphPtr &graph, const NodePtr
     GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
     NodePtr in_node = peer_out_anchor->GetOwnerNode();
     GE_CHECK_NOTNULL(in_node);
+    if (in_node->GetType() == VARIABLE || in_node->GetType() == VARHANDLEOP || in_node->GetType() == VARIABLEV2) {
+      continue;
+    }
     // Input->While and Input link to other nodes need insert memcpy
     if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) {
       GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str());
diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc
index eb936282..545fe66f 100644
--- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc
+++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc
@@ -124,7 +124,14 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat
         return PARAM_INVALID;
     }
   }
-  GELOGE(PARAM_INVALID, "when dynamic aipp, shape must be in range [3, 4], but is %zu", shape.size());
+  string errormsg =
+    "its shape size must be in range[3,4] which dynamic aipp is linked, "
+    "maybe this input is not suitable for dynamic aipp";
+  ErrorManager::GetInstance().ATCReportErrMessage(
+    "E10001", {"parameter", "value", "reason"},
+    {data_node->GetName() + " shape size", to_string(shape.size()), errormsg});
+  GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu",
+         data_node->GetName().c_str(), shape.size());
   return PARAM_INVALID;
 }
 
@@ -272,7 +279,6 @@ Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aip
 
   GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR,
                          "Set kCurrentAippIndex attr for aipp node failed");
-
   // add input/output desc
   GeTensorDesc tensor;
   GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node");
@@ -318,6 +324,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr
     GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank);
     return PARAM_INVALID;
   }
+  data_node_linked_aipp = data_node;
   auto data_opdesc = data_node->GetOpDesc();
   GE_CHECK_NOTNULL(data_opdesc);
   string set_dt_str;
@@ -330,10 +337,17 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr
     return PARAM_INVALID;
   }
 
+  // add dynamic or static attr memsage to data
+  if (GetAippMode() == domi::AippOpParams::static_) {
+    (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp");
+  } else if (GetAippMode() == domi::AippOpParams::dynamic) {
+    (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp");
+  }
+
   // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize
   GeAttrValue::NAMED_ATTRS aipp_attr;
   ConvertParamToAttr(aipp_attr);
-  if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) {
+  if (!AttrUtils::SetNamedAttrs(data_opdesc, ATTR_NAME_AIPP, aipp_attr)) {
     GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank);
     return INTERNAL_ERROR;
   }
@@ -737,7 +751,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) {
     data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0);
   }
   vector<int64_t> dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim};
-  (void)AttrUtils::SetListInt(data_op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_linked_data_shape);
+  (void)AttrUtils::SetListInt(data_op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_linked_data_shape);
 
   int64_t batch_count = -1;
   if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) {
@@ -759,7 +773,24 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) {
   return AddNodeToGraph(aipp_node, max_dynamic_aipp_size);
 }
 
+Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) {
+  // Add dynamic aipp config to aipp_data
+  GeAttrValue::NAMED_ATTRS aipp_attr;
+  ConvertParamToAttr(aipp_attr);
+  (void)AttrUtils::SetNamedAttrs(aipp_data_op_desc, ATTR_NAME_AIPP, aipp_attr);
+  (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf");
+
+  // add node name attr to data linked aipp_data, it can be queried by acl.
+  GE_CHECK_NOTNULL(data_node_linked_aipp);
+  auto data_op_desc = data_node_linked_aipp->GetOpDesc();
+  GE_CHECK_NOTNULL(data_op_desc);
+  (void)AttrUtils::SetStr(data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, aipp_data_op_desc->GetName());
+  (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, data_op_desc->GetName());
+  return SUCCESS;
+}
+
 Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) {
+  static int index = 0;
   std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size);
   GeShape input_shape(input_shape_dim);
   // construct input tensor
@@ -767,18 +798,21 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp
   TensorUtils::SetReuseInput(input_tensor, false);
   TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size);
 
-  // Only flush subgraph name
   const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph();
-  string node_name = (graph->GetParentGraph() == nullptr) ? kDynamicAippData : (graph->GetName() + "_" + node_name);
-
+  string node_name;
+  if (index == 0) {
+    node_name = kDynamicAippData;
+  } else {
+    node_name = string(kDynamicAippData) + "_" + to_string(index);
+  }
+  ++index;
   // new add aipp_data ops for dynamic aipp param input
   OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA);
   GE_CHECK_NOTNULL(op_desc_ptr_data);
 
-  // Add dynamic aipp config to aipp_data
-  GeAttrValue::NAMED_ATTRS aipp_attr;
-  ConvertParamToAttr(aipp_attr);
-  (void)AttrUtils::SetNamedAttrs(op_desc_ptr_data, ATTR_NAME_AIPP, aipp_attr);
+  if (AddAttrToAippData(op_desc_ptr_data) != SUCCESS) {
+    return INTERNAL_ERROR;
+  }
 
   auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor);
 
diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h
index c98935ee..64c89b62 100644
--- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h
+++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h
@@ -78,9 +78,11 @@ class AippOp : public InsertOpBase {
   Status CreateAippData(const NodePtr &aipp);
   Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size);
   Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index);
+  Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc);
 
   domi::AippOpParams *aipp_params_ = nullptr;
   ge::NodePtr aipp_node_ = nullptr;
+  ge::NodePtr data_node_linked_aipp = nullptr;
 };
 }  // namespace ge
 
diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
index c55be013..83a16e75 100644
--- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
+++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
@@ -22,6 +22,7 @@
 #include "common/ge/ge_util.h"
 #include "common/op/ge_op_utils.h"
 #include "common/util.h"
+#include "common/util/error_manager/error_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/ge_inner_error_codes.h"
@@ -120,15 +121,15 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() {
 
     for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) {
       const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j);
-
-      GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;);
-
-      GE_IF_BOOL_EXEC(
-        item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 ||
-          item->input_edge_idx(0) == another_item->input_edge_idx(0),
-        GELOGE(PARAM_INVALID,
-               "Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx.");
-        return PARAM_INVALID;);
+      GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(),
+                      string errormsg =
+                        "Can not insert aipp to the same postion! Please ensure related_input_rank"
+                        " param is different in different aipp config.";
+                      ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg});
+                      GELOGE(PARAM_INVALID,
+                             "Can not insert aipp op to the same postion! Please ensure related_input_rank param "
+                             "is different in different aipp config.");
+                      return PARAM_INVALID;);
     }
   }
 
@@ -162,18 +163,12 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) {
     std::unique_ptr<domi::AippOpParams> aippParams(new (std::nothrow) domi::AippOpParams());
     GE_CHECK_NOTNULL(aippParams);
 
-    GE_IF_BOOL_EXEC(aippNodes.size() > 0, GE_CHK_STATUS(GetAippParams(aippParams, aippNodes[0]));
-                    aippMode = (aippMode == domi::AippOpParams::undefined) ? aippParams->aipp_mode() : aippMode;
-                    GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID,
-                                           "The aipp_mode of all aipp_op must be the same"););
     GE_IF_BOOL_EXEC(
       aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) {
         std::unique_ptr<domi::AippOpParams> currAippParam(new (std::nothrow) domi::AippOpParams());
         GE_CHECK_NOTNULL(currAippParam);
         GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i]));
 
-        GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID,
-                               "The aipp_mode of all aipp_op must be the same");
         if (aippMode == domi::AippOpParams::static_) {
           GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID,
                                  "The input_format of all aipp_ops after one Data should be the same");
diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
index 298e7749..331d9c31 100644
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -41,6 +41,7 @@
 #include "inc/pass_manager.h"
 #include "graph/common/local_context.h"
 
+using std::map;
 using std::set;
 using std::string;
 using std::vector;
@@ -265,27 +266,24 @@ Status MultiBatchGraphCopyer::Init() {
 }
 
 Status MultiBatchGraphCopyer::LabelStatus() {
-  for (const auto &data : origin_data_nodes_) {
-    auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
-    if (!IsAllDimsPositive(data_shape.GetDims())) {
-      origin_nodes_status_[data.get()] = kNodeInBatchBranch;
-    }
-  }
+  map<string, vector<NodePtr>> frame_enters;
+  InitStatus(frame_enters);
+
   bool changed = true;
   // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch
   while (changed) {
     changed = false;
     for (const auto &node : origin_all_nodes_) {
-      auto iter = origin_nodes_status_.find(node.get());
-      if (iter != origin_nodes_status_.end()) {
-        continue;
-      }
       for (auto &in_node : node->GetInAllNodes()) {
         bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() &&
                            origin_nodes_status_[in_node.get()] == kNodeInBatchBranch;
         if (is_in_batch) {
-          origin_nodes_status_[node.get()] = kNodeInBatchBranch;
-          changed = true;
+          if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() ||
+              origin_nodes_status_[node.get()] != kNodeInBatchBranch) {
+            origin_nodes_status_[node.get()] = kNodeInBatchBranch;
+            ResetEnterStatus(frame_enters, node);
+            changed = true;
+          }
           break;
         }
       }
@@ -316,6 +314,45 @@ Status MultiBatchGraphCopyer::LabelStatus() {
   return SUCCESS;
 }
 
+void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) {
+  for (const auto &node : origin_all_nodes_) {
+    if (node->GetType() != ENTER && node->GetType() != REFENTER) {
+      continue;
+    }
+    auto op_desc = node->GetOpDesc();
+    if (op_desc == nullptr) {
+      continue;
+    }
+    string frame_name;
+    if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) {
+      frame_enters[frame_name].emplace_back(node);
+    }
+  }
+
+  for (const auto &data : origin_data_nodes_) {
+    auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
+    if (!IsAllDimsPositive(data_shape.GetDims())) {
+      origin_nodes_status_[data.get()] = kNodeInBatchBranch;
+    }
+  }
+}
+
+void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) {
+  if (node->GetType() != ENTER && node->GetType() != REFENTER) {
+    return;
+  }
+
+  for (const auto &frame_enter : frame_enters) {
+    auto &enters = frame_enter.second;
+    if (std::find(enters.begin(), enters.end(), node) != enters.end()) {
+      for (const auto &enter : enters) {
+        origin_nodes_status_[enter.get()] = kNodeInBatchBranch;
+      }
+      break;
+    }
+  }
+}
+
 Status MultiBatchGraphCopyer::CreateNewNodes() {
   shape_data_ = InsertShapeDataNode();
   if (shape_data_ == nullptr) {
diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h
index 062b98d2..f665b65e 100644
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.h
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h
@@ -68,6 +68,8 @@ class MultiBatchGraphCopyer {
 
   // label status for origin_all_nodes_
   Status LabelStatus();
+  void InitStatus(std::map<string, vector<NodePtr>> &frame_enters);
+  void ResetEnterStatus(std::map<string, vector<NodePtr>> &frame_enters, const NodePtr &node);
   // add nodes functions
   Status CreateNewNodes();
 
diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
index 44fe377a..871f1db4 100644
--- a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -722,8 +722,15 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
   GE_CHECK_NOTNULL(node_item);
   auto task_defs = model.GetTaskDefs(node);
   GE_CHECK_NOTNULL(task_defs);
-  GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1",
-                         node->GetName().c_str(), (*task_defs).size());
+  if (node_item->shape_inference_type != DEPEND_COMPUTE) {
+    GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1",
+                           node->GetName().c_str(), (*task_defs).size());
+  } else {
+    // The number of tasks of the fourth type operator may be 2
+    GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID,
+                           "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", node->GetName().c_str(),
+                           (*task_defs).size());
+  }
   const auto &task_def = (*task_defs)[0];
   std::shared_ptr<AicpuNodeTaskBase> aicpu_task;
   if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) {
diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc
index 3d3adfd8..44c29460 100644
--- a/src/ge/session/inner_session.cc
+++ b/src/ge/session/inner_session.cc
@@ -18,6 +18,7 @@
 #include <map>
 #include <memory>
 #include <vector>
+#include "adx_datadump_server.h"
 #include "common/dump/dump_properties.h"
 #include "common/util.h"
 #include "framework/common/debug/ge_log.h"
@@ -76,10 +77,12 @@ Status InnerSession::Initialize() {
 
   DumpProperties dump_properties;
   dump_properties.InitByOptions();
+  GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed");
 
   ret = graph_manager_.Initialize(options_);
   if (ret != SUCCESS) {
     GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_);
+    GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed");
     return ret;
   }
 
@@ -87,6 +90,7 @@ Status InnerSession::Initialize() {
   if (ret != SUCCESS) {
     GELOGE(ret, "failed to set malloc size");
     (void)graph_manager_.Finalize();
+    GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed");
     GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
     return ret;
   }
@@ -97,6 +101,7 @@ Status InnerSession::Initialize() {
   ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID);
   if (ret != SUCCESS) {
     GELOGE(ret, "failed to init session instance");
+    GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed");
   }
   init_flag_ = true;
   return SUCCESS;
@@ -122,6 +127,7 @@ Status InnerSession::Finalize() {
   (void)VarManager::Instance(session_id_)->FreeVarMemory();
 
   GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
+  GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed");
 
   return ret;
 }
@@ -297,4 +303,27 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vector<std::st
   return graph_manager_.SaveVariables(graph, var_names, outputs, var_values);
 }
 
+Status InnerSession::AddDumpProperties(const DumpProperties &dump_properties) {
+  if (!is_dump_server_inited_) {
+    if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
+      GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server init failed");
+                      return PARAM_INVALID)
+      GELOGI("Init adx data dump server success");
+      is_dump_server_inited_ = true;
+    }
+  }
+  PropertiesManager::Instance().AddDumpProperties(session_id_, dump_properties);
+  return SUCCESS;
+}
+
+Status InnerSession::RemoveDumpProperties() {
+  PropertiesManager::Instance().RemoveDumpProperties(session_id_);
+  if (is_dump_server_inited_ && PropertiesManager::Instance().GetDumpPropertiesMap().empty()) {
+    GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server uninit failed");
+                    return PARAM_INVALID)
+    GELOGI("UnInit adx data dump server success");
+    is_dump_server_inited_ = false;
+  }
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/src/ge/session/inner_session.h b/src/ge/session/inner_session.h
index 6d57af61..94d1ac12 100644
--- a/src/ge/session/inner_session.h
+++ b/src/ge/session/inner_session.h
@@ -63,6 +63,10 @@ class InnerSession {
 
   bool IsGraphNeedRebuild(uint32_t graph_id);
 
+  Status AddDumpProperties(const DumpProperties &dump_properties);
+
+  Status RemoveDumpProperties();
+
  private:
   bool init_flag_;
   uint64_t session_id_;
diff --git a/src/ge/single_op/single_op.cc b/src/ge/single_op/single_op.cc
index a74be1f3..8e68208d 100644
--- a/src/ge/single_op/single_op.cc
+++ b/src/ge/single_op/single_op.cc
@@ -30,7 +30,7 @@ namespace ge {
 namespace {
 const size_t kDataMemAlignSize = 32;
 
-size_t GetAlignedSize(uint32_t size) {
+size_t GetAlignedSize(size_t size) {
   size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
   return aligned_size;
 }
diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h
index 07d25fc7..0c1d5112 100644
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -40,6 +40,8 @@ image normalization (by subtracting the mean value or multiplying a factor), ima
 *features: The AIPP-processed output tensor of type float16 or uint8.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
 */
 REG_OP(Aipp)
     .INPUT(images, TensorType{DT_UINT8})
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 2313b4a0..5d68b977 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1143,6 +1143,9 @@ REG_OP(Add)
 
 *@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator LRN.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 
 REG_OP(FusedMulAdd)
@@ -2464,6 +2467,8 @@ REG_OP(PopulationCount)
 * @li y3: A Tensor. Must be one of the following types: float16, float32.
 * @li y4: A Tensor. Must be one of the following types: float16, float32. \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambNextMVWithDecay)
     .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2510,6 +2515,9 @@ REG_OP(LambNextMVWithDecay)
 *@li y2: A Tensor. Has the same type as "input_mul3".
 *@li y3: A Tensor. Has the same type as "input_mul3".
 *@li y4: A Tensor. Has the same type as "input_mul3".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambNextMV)
     .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2548,6 +2556,8 @@ REG_OP(LambNextMV)
 * @li y1: A Tensor of the same type as "input_square".
 * @li y2: A Tensor of the same type as "input_square". \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambNextRight)
     .INPUT(input_square, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2578,6 +2588,8 @@ REG_OP(LambNextRight)
 *@par Outputs:
 *y: A Tensor of the same type as "input_greater1". \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambUpdateWithLr)
     .INPUT(input_greater1, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2608,6 +2620,8 @@ REG_OP(LambUpdateWithLr)
 *@par Outputs:
 *y: A Tensor of the same type as input. \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambUpdateWithLrV2)
     .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2643,6 +2657,8 @@ REG_OP(LambUpdateWithLrV2)
 * @li output1: A Tensor. Must be one of the following types: float16, float32.
 * @li output2: A Tensor. Must be one of the following types: float16, float32. \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AdamApplyOneWithDecay)
     .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2683,6 +2699,8 @@ REG_OP(AdamApplyOneWithDecay)
 * @li output1: A Tensor. Must be one of the following types: float16, float32.
 * @li output2: A Tensor. Must be one of the following types: float16, float32. \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AdamApplyOne)
     .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2723,6 +2741,8 @@ REG_OP(AdamApplyOne)
 * @li output1: A Tensor. Must be one of the following types: float16, float32.
 * @li output2: A Tensor. Must be one of the following types: float16, float32. \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AdamApplyOneWithDecayAssign)
     .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2763,6 +2783,8 @@ REG_OP(AdamApplyOneWithDecayAssign)
 * @li output1: A Tensor. Must be one of the following types: float16, float32.
 * @li output2: A Tensor. Must be one of the following types: float16, float32. \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AdamApplyOneAssign)
     .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2793,6 +2815,8 @@ REG_OP(AdamApplyOneAssign)
 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(ClipByNormNoDivSum)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2817,6 +2841,9 @@ REG_OP(ClipByNormNoDivSum)
 *Two outputs, including: \n
 *@li y1: A Tensor. Has the same type as "x".
 *@li y2: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(SquareSumV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2839,6 +2866,9 @@ REG_OP(SquareSumV2)
 
 *@par Outputs:
 y: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(SquareSumV1)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2857,6 +2887,9 @@ REG_OP(SquareSumV1)
 *@par Outputs:
 y1: A Tensor. Has the same type as "x1".The result of "x1".
 y2: A Tensor. Has the same type as "x2".The result of "x2".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(SquareSumAll)
     .INPUT(x1, TensorType({DT_FLOAT}))
@@ -2876,6 +2909,9 @@ REG_OP(SquareSumAll)
 
 *@par Outputs:
 * y: A Tensor. Has the same type as "x1".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(FusedMulAddN)
     .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
@@ -2942,6 +2978,9 @@ If false, don’t keep these dimensions. Default:False. \n
 *@par Outputs:
 *@li output0: A Tensor result of which input0 dot multily input1.
 *@li output1: A Tensor result of which input0 dot multily input1, then reducesum it.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(ConfusionMulGrad)
     .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2965,6 +3004,9 @@ REG_OP(ConfusionMulGrad)
 *@li y1: A Tensor of shape and dtype of first output, which should have \n
 shape (1,) and dtype as input.
 *@li y2: A Tensor of shape and dtype of second output, should be same shape and type as input.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(FusedMulAddNL2loss)
     .INPUT(x1, TensorType::NumberType())
@@ -3186,6 +3228,9 @@ REG_OP(KLDiv)
 *y: A Tensor. Has the same type as "x". \n
 
 *@par Third-party framework compatibility
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(TensorMove)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
@@ -3197,20 +3242,18 @@ REG_OP(TensorMove)
 
 *@par Inputs:
 *One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. \n
+* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
 
 *@par Outputs:
-*x: A Tensor. Has the same type as "x". \n
+*output_x: A Tensor. Has the same type as "x". \n
 
 *@par Third-party framework compatibility
 */
 REG_OP(TensorRedirect)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
-                           DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE,
-                           DT_COMPLEX64}))
+                           DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
     .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
-                           DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE,
-                           DT_COMPLEX64}))
+                           DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
     .OP_END_FACTORY_REG(TensorRedirect)
 }  // namespace ge
 
diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h
index 2f9906fc..7e9fd4a4 100644
--- a/third_party/fwkacllib/inc/ops/internal_ops.h
+++ b/third_party/fwkacllib/inc/ops/internal_ops.h
@@ -68,6 +68,9 @@ REG_OP(CacheUpdate)
 
 *@par Outputs:
 *The output is dynamic for attribute func_name.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InternalDataMove)
     .INPUT(x, TensorType::ALL())
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 5a02c1ca..12412516 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -585,13 +585,20 @@ REG_OP(Conv2DBackpropFilterD)
 /**
 *@brief Computes a 2D convolution given 4D "x" and "filter" tensors.
 *@par Inputs:
-* @li x: A 4D tensor of input images.
-* @li filter: A 4D tensor of filters.
-* @li bias: An optional 1D tensor.
-* @li offset_w: An optional 1D tensor for quantized convolution. Reserved.
-*
-* The input and output tensor attributes are listed as follows:
-* @verbatim
+*@li x: A 4D tensor of input images. With "NHWC" format, the shape is
+* [batch, in_height, in_width, in_channels].
+*@li filter: A 4D tensor of filters. Has the same type as "x". With "HWCN"
+* format, the shape is [filter_height, filter_width, in_channels,
+* out_channels].
+
+*@li bias: An optional 1D tensor. Shape is [out_channels].
+*@li offset_w: An optional 1D tensor for quantized convolution. Shape is
+* [out_channels]. Reserved.
+*\n
+*\n
+* Note that there is a strict data type mapping between the input and output
+* tensors:
+*@verbatim
     |Tensor    | x       | filter  | bias    | offset_w | y
     -----------|---------|---------|---------|----------|--------
     |Data Type | float16 | float16 | float16 | _        | float16
@@ -601,69 +608,84 @@ REG_OP(Conv2DBackpropFilterD)
     |          | int8    | int8    | int32   | int8     | int32
     -----------|---------|---------|---------|----------|--------
     |Format    | NCHW    | NCHW    | ND      | ND       | NCHW
-    |          | NHWC    | NHWC    |         |          | NHWC
-    |          |         | HWCN    |         |          |
+    |          | NHWC    | HWCN    |         |          | NHWC
 @endverbatim
-* It should be noted that the data types must correspond to each other, but the
-* format does not need to . \n
-
+* Type float32 is allowed only in mixed precision (float32->float16) scenarios.
+* Mixed precision is enabled by default.
+* \n
+*
 *@par Attributes:
-* @li strides: A list of 4 integers. Specifying the strides of the
+*@li strides: Required. A list of 4 integers. Specifying the strides of the
 * convolution along the height and width. The dimension order is determined
 * by the data format of "x". By default the N and C dimensions are set to 1.
-* @li pads: A list of 4 integers. Specifying the top, bottom, left and right
-* padding.
-* @li dilations: A list of 4 integers. Specifying the dilation rate to use
-* for dilated convolution. Has the same dimension order and value as "strides".
-* @li groups: Number of blocked connections from input channels to output
-* channels. Input channels and output channels must both be divisible by
-* "groups".Type is int32.
-* @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0".
-* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
-* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved . \n
-
-*@par Outputs:
-* @li y: A 4D Tensor of output images . \n
-
-*@attention
-* @li The parameter scope is listed as follows:
-* @verbatim
-    |Name             | Field        | Scope
-    ------------------|--------------|----------
-    |Input Image Size | H dimension  | [1, 4096]
-    |                 | W dimension  | [1, 4096]
-    ------------------|--------------|----------
-    |Filter Size      | H dimension  | [1, 255]
-    |                 | W dimension  | [1, 255]
-    ------------------|--------------|----------
-    |Stride Size      | H dimension  | [1, 63]
-    |                 | W dimension  | [1, 63]
-    ------------------|--------------|----------
-    |Padding Size     | top side     | [0, 255]
-    |                 | bottom side  | [0, 255]
-    |                 | left side    | [0, 255]
-    |                 | right side   | [0, 255]
-    ------------------|--------------|----------
-    |Dilation Size    | H dimension  | [1, 255]
-                      | W dimension  | [1, 255]
+*@li pads: Required. A list of 4 integers. Specifying the top, bottom, left
+* and right padding.
+* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate
+* to use for dilated convolution. Has the same dimension order and value as
+* "strides". Defaults to [1, 1, 1, 1].
+* @li groups: Optional. An integer of type int32, for the number of blocked
+* connections from input channels to output channels. Input channels and output
+* channels must both be divisible by "groups". "x" in_channels must be equal to
+* "filter" in_channels * groups. Defaults to 1.
+* @li offset_x: Optional. An integer of type int32, for quantized convolution.
+* Defaults to 0.
+* @li data_format: Reserved and optional. A string from: "NHWC" and "NCHW".
+* Specifying the data format of the input and output images. Defaults to
+* "NHWC".
+*\n
+*\n
+* The following value range restrictions must be met:
+*@verbatim
+    |Name             | Field    | Scope
+    ------------------|----------|----------
+    |Input Image Size | H        | [1, 4096]
+    |                 | W        | [1, 4096]
+    ------------------|----------|----------
+    |Filter Size      | H        | [1, 255]
+    |                 | W        | [1, 255]
+    ------------------|----------|----------
+    |Stride           | H        | [1, 63]
+    |                 | W        | [1, 63]
+    ------------------|----------|----------
+    |Padding          | top      | [0, 255]
+    |                 | bottom   | [0, 255]
+    |                 | left     | [0, 255]
+    |                 | right    | [0, 255]
+    ------------------|----------|----------
+    |Dilation         | H        | [1, 255]
+    |                 | W        | [1, 255]
 @endverbatim
-
-* @li There are restrictions for certain scenarios:
-* @verbatim
-     Output           | Restrictions
-    ------------------|----------------------------------------------
-     W dimension == 1 | HxW(input) == HxW(filter)
-     H dimension == 1 |
-    ------------------|----------------------------------------------
-     W dimension == 1 | Not supported
-     H dimension != 1 |
+*
+*@par Outputs:
+*@li y: A 4D Tensor of output images. Has the same type and format as "x". With
+* "NHWC" format, the shape is [batch, out_height, out_width, out_channels].
+*\n
+*     out_height = (in_height + top_pad + bottom_pad -
+*                   dilation_h * (filter_height - 1) - 1)
+*                  / stride_h + 1
+*\n
+*     out_width = (in_width + left_pad + right_pad -
+*                   dilation_w * (filter_width - 1) - 1)
+*                   / stride_w + 1
+*
+*@attention Constraints:
+*@li The following restrictions on the output must be met:
+*@verbatim
+    | Output           | Restrictions
+    -------------------|---------------------------
+    | W dimension == 1 | H*W(input) == H*W(filter)
+    | H dimension == 1 |
+    -------------------|---------------------------
+    | W dimension == 1 | Not supported
+    | H dimension != 1 |
 @endverbatim
-* As shown above, "HxW(input)" indicates the image size after padding and
-* "HxW(filter)" indicates the filter size after dilation . \n
-
+* "H * W (input)" indicates the image size after padding and "H * W (filter)"
+* indicates the filter size after dilation.
+*\n
+*
 *@par Quantization supported or not
-* Yes
-
+*@li Yes
+*
 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator "conv2d".
 *@li Compatible with the Caffe operator 2D "Convolution".
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index d9c28087..415cc4ef 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1035,6 +1035,9 @@ REG_OP(ROIPooling)
 *@par Outputs:
 * @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
 *                    N-D with shape [N, 4].
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DecodeBbox)
     .INPUT(box_predictions, TensorType{DT_FLOAT16})
@@ -1052,6 +1055,9 @@ REG_OP(DecodeBbox)
 
 *@par Outputs:
 *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(ClipBoxes)
     .INPUT(boxes_input, TensorType({DT_FLOAT16}))
@@ -1270,6 +1276,9 @@ REG_OP(RpnProposalPostProcessing)
 *
 *@par Outputs:
 * @ boundary_encoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DecodeBoundariesTarget)
     .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
@@ -1287,6 +1296,9 @@ REG_OP(DecodeBoundariesTarget)
 *
 *@par Outputs:
 * @ keypoints_decoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DecodeCornerpointsTargetBG)
     .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
@@ -1304,6 +1316,9 @@ REG_OP(DecodeCornerpointsTargetBG)
 *
 *@par Outputs:
 * @ keypoints_decoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DecodeCornerpointsTargetWrtCenterV1)
     .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
@@ -1321,6 +1336,9 @@ REG_OP(DecodeCornerpointsTargetWrtCenterV1)
 *
 *@par Outputs:
 * @ boundary_encoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DecodeWheelsTarget)
     .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 6d4f6f9d..14949c54 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -335,6 +335,8 @@ REG_OP(LogSoftmaxV2)
 *@par Outputs:
 * y: A Tensor of the same type as "grad" . \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(ConfusionSoftmaxGrad)
   .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -499,6 +501,9 @@ REG_OP(LayerNorm)
 * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LayerNormGrad)
     .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -540,6 +545,9 @@ REG_OP(LayerNormGrad)
 *@par Outputs:
 *Three outputs, including:
 * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LayerNormXBackprop)
     .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -579,6 +587,9 @@ REG_OP(LayerNormXBackprop)
 *Three outputs, including:
 * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LayerNormBetaGammaBackprop)
     .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -811,6 +822,9 @@ instruction . \n
 
 *@par Third-party framework compatibility
 *@li Compatible with the PyTorch operator GroupNorm.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(GroupNorm)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -862,6 +876,9 @@ Specifies the variance of "x" . \n
 
 *@par Third-party framework compatibility
 *@li Compatible with the PyTorch operator InstanceNorm.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InstanceNormV2)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 4f51a82e..65fb462e 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -2031,6 +2031,9 @@ REG_OP(ApplyAdadeltaD)
 *   Two outputs, including:
 *  @li var: A mutable Tensor has the same type as "var".
 *  @li accum: A mutable Tensor has the same type as "var".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(FusedMulApplyMomentum)
     .INPUT(var, TensorType::NumberType())
@@ -2079,6 +2082,9 @@ REG_OP(FusedMulApplyMomentum)
 *  @li var: A Tensor has the type float32.
 *  @li var_copy: A Tensor has the type float16.
 *  @li accum: A Tensor has the same type as input "accum".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(FusedMulApplyMomentumExtern)
     .INPUT(var, TensorType(DT_FLOAT))
@@ -2581,6 +2587,8 @@ REG_OP(SparseApplyAdadeltaD)
 *@par Attributes:
 * @li automic_add_mem_size: sizes of workspaces . \n
 
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AtomicAddrClean)
     .ATTR(automic_add_mem_size, ListInt, {})
diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
index 90628af6..e94dafa7 100644
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -30,6 +30,9 @@ namespace ge {
 
 *@par Outputs:
 *data: A Tensor of data value. Must be float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUAllocFloatStatusOperator)
     .OUTPUT(data, TensorType({DT_FLOAT}))
@@ -43,6 +46,9 @@ REG_OP(NPUAllocFloatStatusOperator)
 
 *@par Outputs:
 *data: A Tensor of data value. Must be float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUClearFloatStatusOperator)
     .INPUT(addr, TensorType{DT_FLOAT})
@@ -57,6 +63,9 @@ REG_OP(NPUClearFloatStatusOperator)
 
 *@par Outputs:
 *data: A Tensor of data value. Must be float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUGetFloatStatusOperator)
     .INPUT(addr, TensorType{DT_FLOAT})
@@ -68,6 +77,9 @@ REG_OP(NPUGetFloatStatusOperator)
 
 *@par Outputs:
 *y: A Tensor of type int32, output eight numbers with a value of zero.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUAllocFloatStatus)
     .OUTPUT(data, TensorType({DT_FLOAT}))
@@ -81,6 +93,9 @@ REG_OP(NPUAllocFloatStatus)
 
 *@par Outputs:
 *data: A Tensor of type float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUClearFloatStatus)
     .INPUT(addr, TensorType{DT_FLOAT})
@@ -95,6 +110,9 @@ REG_OP(NPUClearFloatStatus)
 
 *@par Outputs:
 *data: A Tensor of type float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUGetFloatStatus)
     .INPUT(addr, TensorType{DT_FLOAT})
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index 5938941a..4f42008e 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -186,6 +186,73 @@ REG_OP(PadD)
     .OP_END_FACTORY_REG(PadD)
 
 /**
+*@brief Pads a tensor.
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64.
+* @li paddings: A Tensor of type int32 or int64.
+* @li constant_values: A optional Tensor of int32 or int64
+
+*@par Attributes:
+* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
+*     support "constant", "reflect", "edge"
+* @li paddings_contiguous: An optional bool value, Defaults to true.
+*     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+*     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+
+*@par Third-party framework compatibility:
+* Compatible with ONNX operator Pad.
+*/
+REG_OP(PadV3)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(constant_values, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(mode, String, "constant")
+    .ATTR(paddings_contiguous, Bool, true)
+    .OP_END_FACTORY_REG(PadV3)
+
+/**
+*@brief Pads a tensor.
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
+
+*@par Attributes:
+* @li paddings: An required "vector<vector<int>>".
+*     For each dimension D of input, paddings[D, 0] indicates how many
+*     values to add before the contents of tensor in that dimension,
+*     and paddings[D, 1] indicates how many values to add after the
+*     contents of tensor in that dimension.
+* @li constant_values: An optional int value for pad.
+* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
+*     support "constant", "reflect", "edge"
+* @li paddings_contiguous: An optional bool value, Defaults to true.
+*     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+*     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+
+*@par Third-party framework compatibility:
+* Compatible with ONNX operator Pad.
+*/
+REG_OP(PadV3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+    .REQUIRED_ATTR(paddings, ListListInt)
+    .ATTR(constant_values, Int, 0)
+    .ATTR(mode, String, "constant")
+    .ATTR(paddings_contiguous, Bool, true)
+    .OP_END_FACTORY_REG(PadV3D)
+
+/**
 *@brief Create a diagonal tensor
 
 *@par Inputs:
@@ -258,6 +325,9 @@ REG_OP(AscendPadding)
 /**
 *@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n
 
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n
+
 *@par Inputs:
 *One input, include:
 *addr_table: Tensor which last dimension must be 3. For example: [8, 3].
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index edec232d..b97d824f 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -32,7 +32,7 @@ namespace ge {
 
 *@par Inputs:
 *Inputs include:
-* @li logits: A Tensor. Must be one of the following types: float32, float64，double.
+* @li logits: A Tensor. Must be one of the following types: float16, float, double.
 2-D Tensor with shape [batch_size, num_classes].
 * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n
 
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 7a239732..626dda59 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -502,7 +502,7 @@ REG_OP(ReduceMean)
 
 *@par Inputs:
 *One input:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n
+* @li x: A Tensor. Must be one of the following types: float16, float32 . \n
 
 *@par Attributes:
 *@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
@@ -521,8 +521,8 @@ REG_OP(ReduceMean)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead.
 */
 REG_OP(ReduceMeanD)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(axes, ListInt)
     .ATTR(keep_dims, Bool, false)
     .OP_END_FACTORY_REG(ReduceMeanD)
diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
index a4d54088..fdc76391 100644
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -26,6 +26,21 @@
 
 namespace ge {
 
+/**
+*@brief Creates a handle to a Variable resource. \n
+
+*@par Outputs:
+*y:A Tensor of type resource. \n
+
+*@par Attributes:
+* @li container: optional, string.
+* @li shared_name: optional, string.
+* @li dtype: required, type.
+* @li shape: optional, ListInt. \n
+
+*@see VarHandleOp.
+*/
+
 REG_OP(VarHandleOp)
     .ATTR(container, String, "")
     .ATTR(shared_name, String, "")
@@ -34,6 +49,19 @@ REG_OP(VarHandleOp)
     .OUTPUT(y, TensorType({DT_RESOURCE}))
     .OP_END_FACTORY_REG(VarHandleOp)
 
+/**
+*@brief Assigns a new value to a variable. \n
+
+*@par Inputs:
+*resource:Handle to the resource in which to store the variable.
+*value:The value to set the new tensor to use. \n
+
+*@par Attributes:
+* @li dtype: required, type. \n
+
+*@see AssignVariableOp.
+*/
+
 REG_OP(AssignVariableOp)
     .INPUT(resource, TensorType({DT_RESOURCE}))
     .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
@@ -41,6 +69,19 @@ REG_OP(AssignVariableOp)
     .REQUIRED_ATTR(dtype, Type)
     .OP_END_FACTORY_REG(AssignVariableOp)
 
+/**
+*@brief Adds a value to the current value of a variable. \n
+
+*@par Inputs:
+*resource:Handle to the resource in which to store the variable.
+*value:The value by which the variable will be incremented. \n
+
+*@par Attributes:
+* @li dtype: required, type. \n
+
+*@see AssignAddVariableOp.
+*/
+
 REG_OP(AssignAddVariableOp)
     .INPUT(resource, TensorType({DT_RESOURCE}))
     .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
@@ -48,6 +89,19 @@ REG_OP(AssignAddVariableOp)
     .REQUIRED_ATTR(dtype, Type)
     .OP_END_FACTORY_REG(AssignAddVariableOp)
 
+/**
+*@brief Subtracts a value to the current value of a variable. \n
+
+*@par Inputs:
+*resource:Handle to the resource in which to store the variable.
+*value:The value by which the variable will be incremented. \n
+
+*@par Attributes:
+* @li dtype: required, type. \n
+
+*@see AssignSubVariableOp.
+*/
+
 REG_OP(AssignSubVariableOp)
     .INPUT(resource, TensorType({DT_RESOURCE}))
     .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index 77437aba..e1a83f43 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -81,6 +81,9 @@ REG_OP(BasicLSTMCell)
 
 *@par Outputs:
 *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicLSTM)
     .INPUT(x, TensorType({DT_FLOAT32}))
@@ -306,6 +309,9 @@ REG_OP(LSTMInputGrad)
 *two outputs:
 *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li dht:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicLSTMCellInputGrad)
     .INPUT(dgate, TensorType({DT_FLOAT16}))
@@ -328,6 +334,9 @@ REG_OP(BasicLSTMCellInputGrad)
 *two outputs:
 *@li dw:A 4D Tensor. Must be one of the following types: float16.
 *@li db:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicLSTMCellWeightGrad)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -358,6 +367,9 @@ REG_OP(BasicLSTMCellWeightGrad)
 *two outputs:
 *@li dgate:A 4D Tensor. Must be one of the following types: float16.
 *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicLSTMCellCStateGrad)
     .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -439,6 +451,9 @@ REG_OP(RNN)
 *two outputs:
 *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(BasicRNNCell)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -460,13 +475,13 @@ REG_OP(BasicRNNCell)
 *@brief: DynamicGRU calculation.
 *@par Inputs:
 *seven inputs: \n
-*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM.
-*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li cw:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM.
-*@li cb:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li b:Must be one of the following types: float16, float32. The format must be ND.
+*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li cb:Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
@@ -480,11 +495,11 @@ REG_OP(BasicRNNCell)
 
 *@par Outputs:
 *five outputs: \n
-*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li r:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li n:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -495,7 +510,7 @@ REG_OP(DynamicGRU)
     .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(cw, TensorType({DT_FLOAT16}))
     .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -516,13 +531,13 @@ REG_OP(DynamicGRU)
 *@brief: DynamicGRUV2 calculation.
 *@par Inputs:
 *seven inputs: \n
-*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
-*@li weight_input:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM.
-*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM.
-*@li bias_input:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li bias_hidden:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li bias_input:Must be one of the following types: float16, float32. The format must be ND.
+*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
@@ -538,12 +553,12 @@ REG_OP(DynamicGRU)
 
 *@par Outputs:
 *six outputs: \n
-*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -554,7 +569,7 @@ REG_OP(DynamicGRUV2)
     .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
     .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index d17e8e94..613ce358 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1787,6 +1787,9 @@ REG_OP(TileWithAxis)
 
 *@par Outputs:
 *y: A Tensor of the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(ReadSelect)
     .INPUT(x, TensorType::ALL())
@@ -1802,6 +1805,9 @@ REG_OP(ReadSelect)
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(WriteSelect)
     .INPUT(x, TensorType::ALL())
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 5414f122..edc55820 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -625,6 +625,9 @@ REG_OP(ConfusionTransposeD)
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(ConfusionTranspose)
     .INPUT(x, TensorType::BasicType())
diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
index 2f014937..c96b96be 100644
--- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
+++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
@@ -28,6 +28,9 @@ namespace ge {
 /**
 *@brief Applies a perspective transformation to an image . \n
 
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n
+
 *@par Inputs:
 *@li x: input tensor, format NCHW, type must be float.
 *@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index 8e159dd7..8c1a4326 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -159,7 +159,12 @@ typedef struct rtAiCoreMemorySize {
  * @ingroup dvrt_mem
  * @brief memory type
  */
-typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t;
+typedef enum tagRtMemoryType { 
+    RT_MEMORY_TYPE_HOST = 1, 
+    RT_MEMORY_TYPE_DEVICE = 2 , 
+    RT_MEMORY_TYPE_SVM = 3,
+    RT_MEMORY_TYPE_DVPP = 4
+} rtMemoryType_t;
 
 /**
  * @ingroup dvrt_mem
@@ -167,8 +172,8 @@ typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE =
  */
 typedef struct tagRtPointerAttributes {
   rtMemoryType_t memoryType;  // host memory or device memory
+  rtMemoryType_t locationType;
   uint32_t deviceID;          // device ID
-  uint32_t isManaged;
   uint32_t pageSize;
 } rtPointerAttributes_t;
 
diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h
index 87ae8f75..185d2b9c 100644
--- a/third_party/fwkacllib/inc/tdt/status.h
+++ b/third_party/fwkacllib/inc/tdt/status.h
@@ -100,6 +100,8 @@ enum {
   TDT_TSD_SEND_HEARTBEAT_FAILED_CODE,
   TDT_TSD_CLEAN_RESOURCE_FAILED_CODE,
   TDT_TSD_SEND_MSG_FAILED_CODE,
+  TDT_TSD_AICPU_SD_PROCESS_ABNORMAL_CODE,
+  TDT_TSD_CUSTOM_PROCESS_ABNORMAL_CODE,
   TDT_PPC_DRIVER_INIT_FAIL_CODE,
   TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE,
   TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE,
@@ -510,6 +512,8 @@ TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, "
 TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail");
 TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail");
 TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_AICPU_SD_PROCESS_ABNORMAL, "aicpu_sd process abnormal");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_INFO, TDT_TSD_CUSTOM_PROCESS_ABNORMAL, "custom_aicpu_sd process abnormal");
 
 /********************* PPC ****************************/
 // create PPC error level error
diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
new file mode 100644
index 00000000..a1c39a51
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
@@ -0,0 +1,36 @@
+/**
+* @file adx_datadump_server.h
+*
+* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef ADX_DATADUMP_SERVER_H
+#define ADX_DATADUMP_SERVER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * @brief initialize server for normal datadump function.
+ * @return
+ *      IDE_DAEMON_OK:    datadump server init success
+ *      IDE_DAEMON_ERROR: datadump server init failed
+ */
+int AdxDataDumpServerInit();
+
+/**
+ * @brief uninitialize server for normal datadump function.
+ * @return
+ *      IDE_DAEMON_OK:    datadump server uninit success
+ *      IDE_DAEMON_ERROR: datadump server uninit failed
+ */
+int AdxDataDumpServerUnInit();
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+

From dcc1768c68ea7b107d2c9a20eb57d431c5b073ca Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Wed, 30 Sep 2020 15:44:56 +0800
Subject: [PATCH 2/7] fix securec download links due to mistakes made by
 openeuler community

---
 cmake/external_libs/securec.cmake          | 4 ++--
 third_party/patch/securec/securec.patch001 | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake
index 83a4409d..2fbf8b80 100644
--- a/cmake/external_libs/securec.cmake
+++ b/cmake/external_libs/securec.cmake
@@ -1,7 +1,7 @@
 graphengine_add_pkg(securec
         VER 1.1.10
-        URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz
-        MD5 0782dd2351fde6920d31a599b23d8c91
+        URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
+        MD5 193f0ca5246c1dd84920db34d2d8249f
         LIBS c_sec
         PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001
         CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release"
diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001
index 666f28ce..01c2d769 100644
--- a/third_party/patch/securec/securec.patch001
+++ b/third_party/patch/securec/securec.patch001
@@ -1,5 +1,5 @@
-diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt
---- bounds_checking_function/CMakeLists.txt	1970-01-01 08:00:00.000000000 +0800
+diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt
+--- libboundscheck/CMakeLists.txt	1970-01-01 08:00:00.000000000 +0800
 +++ securec/CMakeLists.txt	2020-09-19 16:53:48.689460700 +0800
 @@ -0,0 +1,18 @@
 +cmake_minimum_required(VERSION 3.14)

From fe038d0ae551931b861f7ad27ff579ef378889b0 Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Wed, 30 Sep 2020 16:30:32 +0800
Subject: [PATCH 3/7] add libadump_server support in ge_lib_path mode

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 266ea024..0ae9c88f 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -73,6 +73,7 @@ elseif(DEFINED ENV{D_LINK_PATH})
     find_library(hccl libhccl.so ${GE_LIB_PATH})
     find_library(resource libresource.so ${GE_LIB_PATH})
     find_library(error_manager liberror_manager.so ${GE_LIB_PATH})
+    find_library(adump_server libadump_server.a ${GE_LIB_PATH})
 else()
     # Ascend mode
     if(DEFINED ENV{ASCEND_CUSTOM_PATH})

From 250465a71f8520b792cceaabc816318b7fcfa552 Mon Sep 17 00:00:00 2001
From: wuweikang <wuweikang@huawei.com>
Date: Fri, 9 Oct 2020 12:33:07 +0800
Subject: [PATCH 4/7] sync-from-trunk-to-blue-zone-1009

---
 CMakeLists.txt                                     |   5 +-
 cmake/external_libs/securec.cmake                  |   4 +-
 inc/framework/common/string_util.h                 |   2 +
 inc/framework/omg/omg_inner_types.h                |   3 +
 inc/graph/debug/ge_attr_define.h                   |   4 +
 inc/graph/op_desc.h                                |   3 +-
 inc/graph/range_vistor.h                           |   4 +
 inc/graph/utils/op_desc_utils.h                    |   1 +
 src/common/graph/detail/attributes_holder.cc       |   2 +-
 src/common/graph/ge_attr_define.cc                 |   4 +
 src/common/graph/node.cc                           |   7 +-
 src/common/graph/op_desc.cc                        |  42 ++-
 src/common/graph/ref_relation.cc                   |   2 +-
 src/common/graph/utils/op_desc_utils.cc            |  47 +++
 src/ge/CMakeLists.txt                              |   6 +-
 src/ge/client/CMakeLists.txt                       |   2 +
 src/ge/client/ge_api.cc                            |   4 +
 src/ge/client/module.mk                            |   6 +-
 src/ge/common/dump/dump_op.cc                      |  20 +-
 src/ge/common/ge/datatype_util.cc                  |   2 +-
 src/ge/common/profiling/profiling_manager.cc       |  32 +-
 src/ge/common/profiling/profiling_manager.h        |   2 +-
 src/ge/common/util.cc                              |   2 +-
 src/ge/executor/CMakeLists.txt                     |   1 +
 src/ge/executor/module.mk                          |   5 +
 src/ge/ge_inference.mk                             |   4 +-
 src/ge/ge_local_engine/CMakeLists.txt              |   2 +-
 src/ge/ge_local_engine/module.mk                   |   2 +-
 src/ge/ge_runner.mk                                |   1 +
 src/ge/generator/ge_generator.cc                   |  11 +-
 src/ge/graph/build/memory/graph_mem_assigner.cc    | 341 +++++++++------------
 src/ge/graph/build/memory/graph_mem_assigner.h     |  11 +-
 src/ge/graph/load/new_model_manager/data_dumper.cc |  17 +-
 src/ge/graph/load/new_model_manager/data_dumper.h  |   1 +
 .../graph/load/new_model_manager/davinci_model.cc  |  15 +-
 .../graph/load/new_model_manager/model_manager.cc  |   2 +-
 src/ge/graph/load/new_model_manager/model_utils.cc |  13 +-
 src/ge/graph/passes/attach_stream_label_pass.cc    |  21 +-
 src/ge/graph/passes/attach_stream_label_pass.h     |   6 +-
 src/ge/graph/passes/enter_pass.cc                  |  60 +++-
 src/ge/graph/passes/enter_pass.h                   |   3 +
 src/ge/graph/preprocess/multi_batch_copy_graph.cc  |  63 +---
 src/ge/graph/preprocess/multi_batch_copy_graph.h   |   2 -
 src/ge/host_cpu_engine/module.mk                   |   2 +-
 src/ge/init/gelib.cc                               |  10 +-
 src/ge/init/gelib.h                                |   2 +-
 src/ge/ir_build/atc_ir_common.cc                   |   2 +-
 src/ge/ir_build/ge_ir_build.cc                     |   6 +
 src/ge/opskernel_manager/ops_kernel_manager.cc     |  14 +-
 src/ge/session/omg.cc                              |  16 +-
 src/ge/single_op/single_op.cc                      |   2 +-
 src/ge/single_op/task/build_task_utils.cc          |   5 +-
 src/ge/single_op/task/op_task.cc                   | 163 +++++-----
 src/ge/single_op/task/op_task.h                    |  27 +-
 third_party/fwkacllib/inc/ops/aipp.h               |  15 +-
 .../fwkacllib/inc/ops/elewise_calculation_ops.h    | 155 +++++++---
 third_party/fwkacllib/inc/ops/functional_ops.h     |  20 +-
 third_party/fwkacllib/inc/ops/image_ops.h          |   7 +-
 third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h  | 160 ++++++----
 third_party/fwkacllib/inc/ops/nn_calculation_ops.h |  73 ++++-
 third_party/fwkacllib/inc/ops/nn_detect_ops.h      |  25 +-
 third_party/fwkacllib/inc/ops/nn_norm_ops.h        |  36 +--
 third_party/fwkacllib/inc/ops/nn_pooling_ops.h     |   3 -
 third_party/fwkacllib/inc/ops/nn_training_ops.h    |  39 ---
 third_party/fwkacllib/inc/ops/pad_ops.h            |  12 -
 .../fwkacllib/inc/ops/ragged_conversion_ops.h      |   4 +-
 third_party/fwkacllib/inc/ops/random_ops.h         |   3 -
 third_party/fwkacllib/inc/ops/reduce_ops.h         |  24 --
 third_party/fwkacllib/inc/ops/rnn.h                |   6 +-
 third_party/fwkacllib/inc/ops/save_ops.h           |   2 +-
 third_party/fwkacllib/inc/ops/sdca_ops.h           |  13 +-
 third_party/fwkacllib/inc/ops/selection_ops.h      |  55 +---
 .../fwkacllib/inc/ops/split_combination_ops.h      |  15 +-
 third_party/fwkacllib/inc/ops/transformation_ops.h |  21 --
 third_party/fwkacllib/inc/runtime/base.h           |   5 +-
 third_party/fwkacllib/inc/runtime/config.h         |   4 +-
 third_party/fwkacllib/inc/runtime/context.h        |   4 +-
 third_party/fwkacllib/inc/runtime/dev.h            |   4 +-
 third_party/fwkacllib/inc/runtime/dvfsprofile.h    |   4 +-
 third_party/fwkacllib/inc/runtime/event.h          |   4 +-
 third_party/fwkacllib/inc/runtime/kernel.h         |   4 +-
 third_party/fwkacllib/inc/runtime/mem.h            |   4 +-
 third_party/fwkacllib/inc/runtime/rt_model.h       |   4 +-
 third_party/fwkacllib/inc/runtime/stream.h         |   4 +-
 .../fwkacllib/inc/toolchain/adx_datadump_server.h  |  22 +-
 third_party/patch/securec/securec.patch001         |   4 +-
 86 files changed, 933 insertions(+), 858 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 266ea024..457fa086 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,11 +68,12 @@ elseif(DEFINED ENV{D_LINK_PATH})
     find_library(slog libslog.so ${GE_LIB_PATH})
     find_library(mmpa libmmpa.so ${GE_LIB_PATH})
     find_library(runtime libruntime.so ${GE_LIB_PATH})
-    find_library(msprof libmsprof.so ${GE_LIB_PATH})
+    find_library(msprof libmsprofiler.a ${GE_LIB_PATH})
     find_library(register libregister.so ${GE_LIB_PATH})
     find_library(hccl libhccl.so ${GE_LIB_PATH})
     find_library(resource libresource.so ${GE_LIB_PATH})
     find_library(error_manager liberror_manager.so ${GE_LIB_PATH})
+    find_library(adump_server libadump_server.a ${GE_LIB_PATH})
 else()
     # Ascend mode
     if(DEFINED ENV{ASCEND_CUSTOM_PATH})
@@ -84,7 +85,7 @@ else()
     set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64)
     find_library(slog libslog.so ${ASCEND_DRIVER_DIR})
     find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR})
-    find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR})
+    find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR})
 
     find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
     find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake
index 83a4409d..2fbf8b80 100644
--- a/cmake/external_libs/securec.cmake
+++ b/cmake/external_libs/securec.cmake
@@ -1,7 +1,7 @@
 graphengine_add_pkg(securec
         VER 1.1.10
-        URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz
-        MD5 0782dd2351fde6920d31a599b23d8c91
+        URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
+        MD5 193f0ca5246c1dd84920db34d2d8249f
         LIBS c_sec
         PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001
         CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release"
diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h
index 918a3950..3e4bf093 100644
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -61,8 +61,10 @@ class StringUtils {
   ///  @param [in] delim  separator
   ///  @return string array after segmentation
   ///
+  /*lint -e1077*/
   static std::vector<std::string> Split(const std::string &str, char delim) {
     std::vector<std::string> elems;
+    /*lint +e1077*/
 
     if (str.empty()) {
       elems.emplace_back("");
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index 2f91d7aa..e1a7da0b 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -92,6 +92,9 @@ struct OmgContext {
   std::map<std::string, std::vector<int32_t>> out_nodes_map;
   // user-designate out nodes (this is used for determing the orders)
   std::vector<std::pair<std::string, int32_t>> user_out_nodes;
+  // save the output node of the network, value = topName,
+  // topName indicates the output name of the operator.
+  std::vector<std::string> user_out_nodes_top_vec;
   // net out nodes (where user_out_nodes or leaf nodes)
   std::vector<std::string> net_out_nodes;
   // net out nodes top names(only caffe has top)
diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h
index 7538ba6a..47b11ba8 100644
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -1052,6 +1052,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE;
 
+// op dynamic input
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_START;
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_END;
+
 // functional ops attr
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH;
diff --git a/inc/graph/op_desc.h b/inc/graph/op_desc.h
index c7da30b7..4d724c42 100644
--- a/inc/graph/op_desc.h
+++ b/inc/graph/op_desc.h
@@ -235,7 +235,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {
   vector<string> GetOpInferDepends() const;
 
   string GetInputNameByIndex(uint32_t index) const;
-
+  string GetValidInputNameByIndex(uint32_t index) const;
+  int GetValidInputIndexByName(const string &name) const;
   int GetInputIndexByName(const string &name) const;
 
   string GetOutputNameByIndex(uint32_t index) const;
diff --git a/inc/graph/range_vistor.h b/inc/graph/range_vistor.h
index 20905bd9..8635d413 100644
--- a/inc/graph/range_vistor.h
+++ b/inc/graph/range_vistor.h
@@ -22,8 +22,10 @@
 template <class E, class O>
 class RangeVistor {
  public:
+  /*lint -e151*/
   using Iterator = typename std::vector<E>::iterator;
   using ConstIterator = typename std::vector<E>::const_iterator;
+  /*lint +e151*/
 
   RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {}
 
@@ -41,7 +43,9 @@ class RangeVistor {
 
   bool empty() const { return elements_.empty(); }
 
+  /*lint -e659*/
   E &at(std::size_t index) { return elements_.at(index); }
+  /*lint +e659*/
 
   const E &at(std::size_t index) const { return elements_.at(index); }
 
diff --git a/inc/graph/utils/op_desc_utils.h b/inc/graph/utils/op_desc_utils.h
index 6a9a4695..daa95ebe 100644
--- a/inc/graph/utils/op_desc_utils.h
+++ b/inc/graph/utils/op_desc_utils.h
@@ -53,6 +53,7 @@ class OpDescUtils {
   static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node);
   static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights);
   static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights);
+  static graphStatus SetWeights(ge::Node& node, const map<int, ge::GeTensorPtr>& weights_map);
   static graphStatus ClearWeights(ge::NodePtr node);
 
   static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index);
diff --git a/src/common/graph/detail/attributes_holder.cc b/src/common/graph/detail/attributes_holder.cc
index 113f4b6f..7e3b6de9 100644
--- a/src/common/graph/detail/attributes_holder.cc
+++ b/src/common/graph/detail/attributes_holder.cc
@@ -28,7 +28,7 @@ using std::unordered_set;
 void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); }
 graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) {
   if (value.IsEmpty()) {
-    GELOGE(GRAPH_FAILED, "value is empty, key %s", name.c_str());
+    GELOGE(GRAPH_FAILED, "value is empty, key of the attr is %s", name.c_str());
     return GRAPH_FAILED;
   }
   auto proto_map = MutableAttrMap().GetProtoMsg();
diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc
index cd504812..9b723bb3 100644
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -1060,6 +1060,10 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node";
 const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr";
 const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index";
 
+// op dynamic input
+const std::string ATTR_NAME_DYNAMIC_INPUT_START = "_dynamic_input_index_start";
+const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end";
+
 // atc user def dtype&format
 const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type";
 const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format";
diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc
index 10d6b3ed..d33c6008 100644
--- a/src/common/graph/node.cc
+++ b/src/common/graph/node.cc
@@ -762,9 +762,10 @@ graphStatus Node::Verify() const {
   if (!is_unknown_graph) {
     for (const auto &in_anchor_ptr : GetAllInDataAnchors()) {
       GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue);
-      bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type ||
-                          op_->GetType() == const_type || op_->GetType() == variable_type ||
-                          op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0;
+      bool valid_anchor =
+        op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type ||
+        op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) ||
+        op_->MutableInputDesc(in_anchor_ptr->GetIdx()) == nullptr || in_anchor_ptr->GetPeerAnchors().size() > 0;
       if (!valid_anchor) {
         ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"},
                                                         {GetName(), std::to_string(in_anchor_ptr->GetIdx())});
diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc
index fdd1acb7..dee0aece 100644
--- a/src/common/graph/op_desc.cc
+++ b/src/common/graph/op_desc.cc
@@ -347,7 +347,10 @@ graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorD
 
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
 OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) {
-  GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index);
+  if (index >= inputs_desc_.size()) {
+    GELOGW("The index is invalid. index[%u]", index);
+    return GRAPH_FAILED;
+  }
 
   inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc);
   if (inputs_desc_[index] == nullptr) {
@@ -949,6 +952,43 @@ int OpDesc::GetInputIndexByName(const string &name) const {
   return static_cast<int>(it_find->second);
 }
 
+int OpDesc::GetValidInputIndexByName(const string &name) const {
+  map<string, uint32_t> valid_input_name_idx{};
+  uint32_t j = 0;
+  for (size_t i = 0; i < GetAllInputsSize(); i++) {
+    if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) {
+      auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i));
+      GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), -1);
+      valid_input_name_idx.insert({valid_name, j});
+      j++;
+    }
+  }
+  auto it_find = valid_input_name_idx.find(name);
+  GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != valid_input_name_idx.end(), -1);
+  return static_cast<int>(it_find->second);
+}
+
+string OpDesc::GetValidInputNameByIndex(uint32_t index) const {
+  map<string, uint32_t> valid_input_name_idx{};
+  uint32_t j = 0;
+  for (size_t i = 0; i < GetAllInputsSize(); i++) {
+    if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) {
+      auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i));
+      GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), "");
+      valid_input_name_idx.insert({valid_name, j});
+      j++;
+    }
+  }
+  auto it = valid_input_name_idx.begin();
+  for (; it != valid_input_name_idx.end(); ++it) {
+    if (it->second == index) {
+      break;
+    }
+  }
+  GE_CHK_BOOL_RET_STATUS_NOLOG(it != valid_input_name_idx.end(), "");
+  return it->first;
+}
+
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const {
   auto it = output_name_idx_.begin();
   for (; it != output_name_idx_.end(); ++it) {
diff --git a/src/common/graph/ref_relation.cc b/src/common/graph/ref_relation.cc
index 9a9f66ba..48e136fb 100644
--- a/src/common/graph/ref_relation.cc
+++ b/src/common/graph/ref_relation.cc
@@ -56,7 +56,7 @@ class RefRelations::Impl {
       }
       return GRAPH_SUCCESS;
     }
-    GELOGW("can not find any relations! key value is %s", lookup_key.c_str());
+    GELOGW("can not find any relations! key value of dest relation is %s", lookup_key.c_str());
     return GRAPH_SUCCESS;
   };
   graphStatus BuildRefRelations(ge::ComputeGraph &root_graph);
diff --git a/src/common/graph/utils/op_desc_utils.cc b/src/common/graph/utils/op_desc_utils.cc
index 63fff177..17c80b2c 100644
--- a/src/common/graph/utils/op_desc_utils.cc
+++ b/src/common/graph/utils/op_desc_utils.cc
@@ -560,6 +560,53 @@ OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights)
   return GRAPH_SUCCESS;
 }
 
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
+OpDescUtils::SetWeights(ge::Node &node, const map<int, ge::GeTensorPtr> &weights_map) {
+  GE_CHECK_NOTNULL(node.GetOpDesc());
+  // 1. node is const
+  if (node.GetOpDesc()->GetType() == CONSTANT) {
+    if (weights_map.size() == CONST_OP_NORMAL_WEIGHT_SIZE) {
+      return SetWeights(node.GetOpDesc(), weights_map.begin()->second);
+    }
+    GELOGE(GRAPH_PARAM_INVALID, "const op %s weight size %zu should be 1", node.GetName().c_str(), weights_map.size());
+    return GRAPH_PARAM_INVALID;
+  }
+  // 2. node is not const
+  for (const auto &pair : weights_map) {
+    auto in_data_anchor = node.GetInDataAnchor(pair.first);
+    if (in_data_anchor != nullptr && in_data_anchor->GetPeerOutAnchor() != nullptr) {
+      // a. update const input node
+      auto out_anchor = in_data_anchor->GetPeerOutAnchor();
+      auto peer_node = out_anchor->GetOwnerNode();
+      if (peer_node == nullptr) {
+        GELOGE(GRAPH_PARAM_INVALID, "op %s [%d]'s input node is null", node.GetName().c_str(), pair.first);
+        return GRAPH_PARAM_INVALID;
+      }
+      if (peer_node->GetType() != CONSTANT) {
+        GELOGE(GRAPH_PARAM_INVALID, " op %s [%d]'s input node should be const, but is %s type:%s ",
+               node.GetName().c_str(), pair.first, peer_node->GetName().c_str(), peer_node->GetType().c_str());
+      }
+      SetWeights(peer_node->GetOpDesc(), pair.second);
+    } else {
+      // b. create new const input node
+      auto const_opdesc = CreateConstOp(pair.second);
+      GE_CHECK_NOTNULL(const_opdesc);
+      auto owner_graph = node.GetOwnerComputeGraph();
+      if (owner_graph == nullptr) {
+        GELOGE(GRAPH_PARAM_INVALID, "node's graph is empty, name: %s", node.GetName().c_str());
+        return GRAPH_PARAM_INVALID;
+      }
+      auto const_node = owner_graph->AddNodeFront(const_opdesc);
+      if (node.AddLinkFrom(static_cast<uint32_t>(pair.first), const_node) != GRAPH_SUCCESS) {
+        GELOGE(GRAPH_FAILED, "op %s add const to input index[%d] failed", node.GetName().c_str(), pair.first);
+        return GRAPH_FAILED;
+      }
+    }
+  }
+  NodeUtils::UpdateIsInputConst(node);
+  return GRAPH_SUCCESS;
+}
+
 OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) {
   GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!");
   shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>();
diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt
index db00d8a1..3f4f1a8b 100755
--- a/src/ge/CMakeLists.txt
+++ b/src/ge/CMakeLists.txt
@@ -229,6 +229,7 @@ target_link_libraries(ge_runner
         ${resouce}
         ${ascend_hal}
         ${adump_server}
+        ${msprofiler}
         rt
         dl)
 
@@ -358,7 +359,10 @@ add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HD
 target_compile_definitions(ge_compiler PRIVATE
         PROTOBUF_INLINE_NOT_IN_HEADERS=0
         REUSE_MEMORY=1
-        FMK_HOST_INFER)
+        FMK_HOST_INFER
+        FMK_SUPPORT_DUMP
+        COMPILE_OMG_PACKAGE
+        REUSE_MEMORY=1)
 target_link_libraries(ge_compiler
         graph
         ge_common
diff --git a/src/ge/client/CMakeLists.txt b/src/ge/client/CMakeLists.txt
index a87beb77..b568e3f6 100755
--- a/src/ge/client/CMakeLists.txt
+++ b/src/ge/client/CMakeLists.txt
@@ -68,5 +68,7 @@ target_link_libraries(ge_client
         ${mmpa}
         ${runtime}
         ${msprof}
+        ${msprofiler}
+        ${ascend_hal}
         rt
         dl)
diff --git a/src/ge/client/ge_api.cc b/src/ge/client/ge_api.cc
index ad01e48f..7c4cf9c8 100644
--- a/src/ge/client/ge_api.cc
+++ b/src/ge/client/ge_api.cc
@@ -16,6 +16,7 @@
 
 #include "ge/ge_api.h"
 #include <iostream>
+#include <malloc.h>
 #include "common/debug/log.h"
 #include "framework/common/debug/ge_log.h"
 #include "common/ge/datatype_util.h"
@@ -163,6 +164,9 @@ Status GEFinalize() {
     g_ge_initialized = false;
   }
 
+  // to avoid memory fragment, use malloc_trim to back free stack to system
+  malloc_trim(0);
+
   GELOGT(TRACE_STOP, "GEFinalize finished");
   return ret;
 }
diff --git a/src/ge/client/module.mk b/src/ge/client/module.mk
index 1a304cbf..476841c9 100644
--- a/src/ge/client/module.mk
+++ b/src/ge/client/module.mk
@@ -70,9 +70,10 @@ LOCAL_SHARED_LIBRARIES := \
     libregister \
     libge_compiler \
     libge_common \
-    libmsprof
-
+    libmsprof \
+    stub/libascend_hal
 
+LOCAL_STATIC_LIBRARIES := libmsprofiler
 
 LOCAL_LDFLAGS := -lrt -ldl
 
@@ -107,6 +108,7 @@ LOCAL_SHARED_LIBRARIES := \
     libge_common \
     libmsprof
 
+LOCAL_STATIC_LIBRARIES := libmsprofiler
 
 LOCAL_LDFLAGS := -lrt -ldl
 LOCAL_CFLAGS += \
diff --git a/src/ge/common/dump/dump_op.cc b/src/ge/common/dump/dump_op.cc
index 31a88023..8c4ff330 100644
--- a/src/ge/common/dump/dump_op.cc
+++ b/src/ge/common/dump/dump_op.cc
@@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
     return RT_FAILED;
   }
 
-  constexpr int32_t ioAddrNum = 2;
-  constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t);
-  char args[argsSize] = {0};
-  auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args);
-  paramHead->length = argsSize;
-  paramHead->ioAddrNum = ioAddrNum;
-  auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead));
-  ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_);
-  ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_);
+  constexpr int32_t io_addr_num = 2;
+  constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t);
+  char args[args_size] = {0};
+  auto param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args);
+  param_head->length = args_size;
+  param_head->ioAddrNum = io_addr_num;
+  auto io_addr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead));
+  io_addr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_);
+  io_addr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_);
   rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp,
                              1,  // blockDim default 1
-                             args, argsSize,
+                             args, args_size,
                              nullptr,  // no need smDesc
                              stream_);
   if (rt_ret != RT_ERROR_NONE) {
diff --git a/src/ge/common/ge/datatype_util.cc b/src/ge/common/ge/datatype_util.cc
index f2ff12cb..79a473fe 100644
--- a/src/ge/common/ge/datatype_util.cc
+++ b/src/ge/common/ge/datatype_util.cc
@@ -34,7 +34,7 @@ std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_ty
   {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}},
   {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}};
 
-static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = {
+std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = {
   // key:ge datatype,value:proto datatype
   {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
   {ge::DT_FLOAT, ge::proto::DT_FLOAT},
diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc
index d02f7e8f..9492045c 100644
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -51,12 +51,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana
   return profiling_manager;
 }
 
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options,
-                                                                                   bool convert_2_phy_device_id) {
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   vector<int32_t>().swap(device_id_);
   job_id_ = options.job_id;
 
+  GELOGI("ProfilingManager::Init  job_id:%s", job_id_.c_str());
+
   Status ret;
   if (!recv_profiling_config_.empty()) {
     GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str());
@@ -64,18 +65,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
   } else {
     ret = InitFromOptions(options);
     if (ret == SUCCESS && is_load_profiling_) {
-      // profiling need phy device id
-      if (!convert_2_phy_device_id) {
-        device_id_.push_back(options.device_id);
-      } else {
-        uint32_t phy_device_id = 0;
-        rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id);
-        if (rt_ret != RT_ERROR_NONE) {
-          GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
-          return FAILED;
-        }
-        device_id_.push_back(phy_device_id);
-      }
+      device_id_.push_back(options.device_id);
     }
   }
   if (ret != SUCCESS) {
@@ -557,25 +547,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
     return;
   }
   GELOGI("current logic_device_id:%d", logic_device_id);
-
-  uint32_t phy_device_id = 0;
-  rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id);
-  if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
-    return;
-  }
-  GELOGI("current phy_device_id:%d", phy_device_id);
   if (!is_acl_api_mode_) {
-    auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
+    auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
     if (ret == device_id_.end()) {
       GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
       return;
     }
   }
   GELOGI("start ProfilingTaskDescInfo.");
-  ProfilingTaskDescInfo(task_desc_info, phy_device_id);
+  ProfilingTaskDescInfo(task_desc_info, logic_device_id);
   GELOGI("start ProfilingGraphDescInfo.");
-  ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id);
+  ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id);
   GELOGI("Report profiling data for GE end.");
 #endif
 }
diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h
index f4249451..a030efd3 100644
--- a/src/ge/common/profiling/profiling_manager.h
+++ b/src/ge/common/profiling/profiling_manager.h
@@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   ProfilingManager();
   virtual ~ProfilingManager();
   static ProfilingManager &Instance();
-  ge::Status Init(const Options &options, bool convert_2_phy_device_id = false);
+  ge::Status Init(const Options &options);
   ge::Status InitFromOptions(const Options &options);
   ge::Status InitFromAclCfg(const std::string &config);
   ge::Status StartProfiling(int32_t iter, int32_t device_id);
diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc
index ce5aa57e..4adf3ebd 100644
--- a/src/ge/common/util.cc
+++ b/src/ge/common/util.cc
@@ -472,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str
     return true;
   }
 
-  ret = regexec(&reg, str.c_str(), 0, nullptr, 0);
+  ret = regexec(&reg, str.c_str(), 0, NULL, 0);
   if (ret) {
     regerror(ret, &reg, ebuff, kMaxBuffSize);
     GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff);
diff --git a/src/ge/executor/CMakeLists.txt b/src/ge/executor/CMakeLists.txt
index 7358585a..b68507bd 100755
--- a/src/ge/executor/CMakeLists.txt
+++ b/src/ge/executor/CMakeLists.txt
@@ -120,6 +120,7 @@ target_link_libraries(ge_executor
         ${mmpa}
         ${msprof}
         ${error_manager}
+        ${ascend_hal}
         rt
         dl)
 
diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk
index bb642da9..1c3efe4c 100644
--- a/src/ge/executor/module.mk
+++ b/src/ge/executor/module.mk
@@ -89,6 +89,7 @@ local_ge_executor_shared_library :=        \
     libregister                            \
     libmsprof                              \
     liberror_manager                       \
+    libascend_hal
 
 local_ge_executor_ldflags := -lrt -ldl     \
 
@@ -104,6 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files)
 LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
 
 LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library)
+LOCAL_STATIC_LIBRARIES := libmsprofiler
 ifeq ($(device_os),android)
 LOCAL_LDFLAGS += -ldl
 LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
@@ -140,6 +142,9 @@ LOCAL_SHARED_LIBRARIES :=                  \
     libregister                            \
     libmsprof                              \
     liberror_manager                       \
+    stub/libascend_hal
+
+LOCAL_STATIC_LIBRARIES := libmsprofiler
 
 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
 
diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk
index 232e79ec..621e42c5 100644
--- a/src/ge/ge_inference.mk
+++ b/src/ge/ge_inference.mk
@@ -355,7 +355,7 @@ LOCAL_MODULE := libge_compiler
 
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 # from ome_inference.mk
-LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP
+LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif
@@ -418,7 +418,7 @@ include $(CLEAR_VARS)
 LOCAL_MODULE := libge_compiler
 LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
-LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP
+LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE
 LOCAL_CFLAGS += -DOMG_DEVICE_VERSION
 LOCAL_CFLAGS += -O2
 LOCAL_MODULE_CLASS := SHARED_LIBRARIES
diff --git a/src/ge/ge_local_engine/CMakeLists.txt b/src/ge/ge_local_engine/CMakeLists.txt
index e685c301..bcbc3e4c 100755
--- a/src/ge/ge_local_engine/CMakeLists.txt
+++ b/src/ge/ge_local_engine/CMakeLists.txt
@@ -42,7 +42,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge)
 
 ######### libge_local_engine.so #############
 add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
-target_compile_definitions(ge_local_engine PRIVATE Werror)
+target_compile_definitions(ge_local_engine PRIVATE Werror COMPILE_OMG_PACKAGE)
 target_link_libraries(ge_local_engine
         graph
         ${PROTOBUF_LIBRARY}
diff --git a/src/ge/ge_local_engine/module.mk b/src/ge/ge_local_engine/module.mk
index ee6b15c1..3307f780 100644
--- a/src/ge/ge_local_engine/module.mk
+++ b/src/ge/ge_local_engine/module.mk
@@ -42,7 +42,7 @@ include ${BUILD_HOST_SHARED_LIBRARY}
 include $(CLEAR_VARS)
 LOCAL_MODULE := atclib/libge_local_engine
 LOCAL_CFLAGS += -Werror
-LOCAL_CFLAGS += -std=c++11
+LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE
 LOCAL_LDFLAGS :=
 
 LOCAL_STATIC_LIBRARIES :=
diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk
index 04182070..956bab0b 100644
--- a/src/ge/ge_runner.mk
+++ b/src/ge/ge_runner.mk
@@ -356,6 +356,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)
 
 LOCAL_STATIC_LIBRARIES := libge_memory \
                           libadump_server \
+                          libmsprofiler \
 
 LOCAL_SHARED_LIBRARIES := \
     libc_sec \
diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc
index edd7a155..bef93333 100644
--- a/src/ge/generator/ge_generator.cc
+++ b/src/ge/generator/ge_generator.cc
@@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen
                         bool attr) {
   GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
   GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
+
+  auto format = tensor.GetFormat();
+  auto data_type = tensor.GetDataType();
+  if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) {
+    return SUCCESS;
+  }
+
   string op_type;
   if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) {
     op_type = DATA;
@@ -521,8 +528,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
                                   const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                                   bool is_offline) {
   GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
-  if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) {
-    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize());
+  if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) {
+    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize());
     return PARAM_INVALID;
   }
   if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc
index 1518714f..1cdb2efa 100644
--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -322,11 +322,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
           GELOGE(ge::FAILED,
                  "There is an atomic conflict between the current node and the peer out node, not supported!");
           return ge::FAILED;
-        } else if (is_loop_graph) {
-          GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start));
-        } else {
-          GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}),
-                            "SetAtomicCleanAttr failed.");
+        }
+
+        const auto &in_control_anchor = node->GetInControlAnchor();
+        GE_CHECK_NOTNULL(in_control_anchor);
+        for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
+          auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
+          if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
+            ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size});
+            if (ret != SUCCESS) {
+              GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
+              return ret;
+            }
+          }
         }
       }
     }
@@ -840,68 +848,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt
 }
 
 Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
-  GE_CHECK_NOTNULL(compute_graph_);
-  // Atomic op memory start addr
-  int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
-  GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_);
-
-  vector<NodePtr> connect_netoutput_nodes;
-  for (auto &node : compute_graph_->GetAllNodes()) {
-    auto node_op_desc = node->GetOpDesc();
-    if (node_op_desc == nullptr) {
-      continue;
-    }
-
-    bool is_atomic = false;
-    // If GetBool fail, is_atomic is false.
-    (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
-    if (!is_atomic) {
-      continue;
-    }
-
-    bool is_ref = false;
-    // If GetBool fail, is_ref is false.
-    (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref);
-    if (is_ref) {
-      GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.",
-             node_op_desc->GetName().c_str());
-      return ge::PARAM_INVALID;
-    }
-
-    vector<int> is_connect_netoutput;
-    // If GetBool fail, attr is_connect_netoutput is an empty vector.
-    (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
-    if (!is_connect_netoutput.empty()) {
-      connect_netoutput_nodes.emplace_back(node);
-      continue;
-    }
+  map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map;
+  vector<NodePtr> connecting_output_atomic_nodes;
+  Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
+  if (status != SUCCESS) {
+    GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
+    return status;
+  }
 
-    // Atomic op memory start addr of loop graph
-    int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
-    vector<int64_t> mem_offset_end;
-    if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
-      GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
-      return FAILED;
-    }
+  for (auto &iter : normal_atomic_and_clean_nodes_map) {
+    int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
+    GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
 
-    /// In networks with loop op, atomic op uses atomic_addr_clean op independently,
-    /// so we need to set the attr separately.
-    if (is_loop_graph) {
-      GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start));
+    for (auto &atomic_node : iter.second) {
+      vector<int64_t> mem_offset_end;
+      status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
+      if (status != SUCCESS) {
+        GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
+               atomic_node->GetName().c_str());
+        return status;
+      }
     }
-  }
 
-  // In networks without loop op, the same atomic addr clean op is used for atomic op
-  if (!is_loop_graph) {
-    // Set the address attr of atomic clean operator
-    int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
-    if (atomic_mem_size != 0) {
-      GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}),
-                        "SetAtomicCleanAttr failed.");
+    int64_t atomic_mem_size = static_cast<int64_t>(memory_offset_[0].mem_offset_) - atomic_mem_start;
+    status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size});
+    if (status != SUCCESS) {
+      GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
+      return status;
     }
   }
 
-  if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) {
+  if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) {
     GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
     return FAILED;
   }
@@ -909,6 +886,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
   return SUCCESS;
 }
 
+Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
+                                                             vector<NodePtr> &connecting_output_atomic_nodes) {
+  GE_CHECK_NOTNULL(compute_graph_);
+  for (const auto &node : compute_graph_->GetAllNodes()) {
+    if (node->GetType() == ATOMICADDRCLEAN) {
+      vector<NodePtr> tmp_normal_atomic_nodes;
+      const auto &out_control_anchor = node->GetOutControlAnchor();
+      GE_CHECK_NOTNULL(out_control_anchor);
+      for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
+        if (peer_in_control_anchor != nullptr) {
+          auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
+          auto peer_in_node_desc = peer_in_node->GetOpDesc();
+          if (peer_in_node_desc != nullptr) {
+            bool is_atomic_node = false;
+            // If GetBool fail, is_atomic_node is false.
+            (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
+            if (is_atomic_node) {
+              bool is_reference = false;
+              // If GetBool fail, is_reference is false.
+              (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
+              if (is_reference) {
+                GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.",
+                       peer_in_node_desc->GetName().c_str());
+                return ge::PARAM_INVALID;
+              }
+
+              vector<int> is_connecting_output;
+              // If GetBool fail, attr is_connecting_output is an empty vector.
+              (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
+              if (is_connecting_output.empty()) {
+                tmp_normal_atomic_nodes.emplace_back(peer_in_node);
+                continue;
+              }
+              connecting_output_atomic_nodes.emplace_back(peer_in_node);
+              tmp_normal_atomic_nodes.clear();
+              break;
+            }
+          }
+        }
+      }
+
+      if (!tmp_normal_atomic_nodes.empty()) {
+        normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes;
+      }
+    }
+  }
+  return SUCCESS;
+}
+
 Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
                                                                  vector<int64_t> &mem_offset_end) {
   auto node_op_desc = node->GetOpDesc();
@@ -1331,6 +1357,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
   vector<int64_t> memory_type;
   auto tmp_op_desc = node->GetOpDesc();
   origin_input_list = tmp_op_desc->GetInputOffset();
+  int64_t valid_input_index = 0;
   bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
   for (const auto &anchor : node->GetAllInDataAnchors()) {
     vector<int64_t> output_list;
@@ -1344,8 +1371,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
     auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
     GE_CHECK_NOTNULL(last_peer_out_op_desc);
     output_list = last_peer_out_op_desc->GetOutputOffset();
-    if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
-      auto input_index = anchor->GetIdx();
+    auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
+    if (output_list.size() > static_cast<size_t>(out_index)) {
+      int64_t input_offset = output_list.at(out_index);
       if (has_mem_type_attr) {
         auto input_size = tmp_op_desc->GetInputsSize();
         auto ori_input_offset_list_size = origin_input_list.size();
@@ -1359,26 +1387,21 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
         }
         // not hbm keep orignal inputoffest
         // hbm inputoffset = original inputoffset + outputoffset
-        input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1
-                                  ? origin_input_list[input_index]
-                                  : origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx()));
-        GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]",
-               tmp_op_desc->GetName().c_str(), input_index,
-               peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
-               input_list.back());
-      } else {
-        int64_t output_offset = output_list.at(peer_out_anchor->GetIdx());
-        const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
-        if (in_node->GetType() == CONSTANT) {
-          GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index);
-          GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset));
-        }
-
-        GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(),
-               input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
-               output_offset);
-        input_list.emplace_back(output_offset);
+        input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1
+                          ? origin_input_list[valid_input_index]
+                          : origin_input_list[valid_input_index] + output_list.at(out_index));
+      }
+      const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
+      if (in_node->GetType() == CONSTANT) {
+        GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
+        GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
       }
+
+      GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
+             has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index,
+             peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset);
+      input_list.emplace_back(input_offset);
+      valid_input_index++;
     }
   }
   return ge::SUCCESS;
@@ -1473,125 +1496,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in
   return SUCCESS;
 }
 
-Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) {
-  // set the address attr of atomic clean operator for loop graph
-  int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
-  GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.",
-         atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_);
-  const auto &in_control_anchor = node->GetInControlAnchor();
-  if (atomic_mem_size != 0 && in_control_anchor != nullptr) {
-    for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
-      if (peer_out_control_anchor == nullptr) {
-        continue;
-      }
-      auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
-      auto peer_out_node_desc = peer_out_node->GetOpDesc();
-      if (peer_out_node_desc == nullptr) {
-        continue;
-      }
-
-      GELOGD("SetLoopGraphAtomicAttr,  node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(),
-             peer_out_node_desc->GetType().c_str());
-
-      if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
-        GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}),
-                           GELOGE(FAILED, "SetAtomicCleanAttr failed.");
-                           return FAILED);
-      }
-    }
-  }
-  return SUCCESS;
-}
-
-ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node,
-                                                         bool &is_independent_atomic_clean_node) {
-  GE_CHECK_NOTNULL(node);
-  const auto &out_control_anchor = node->GetOutControlAnchor();
-  GE_CHECK_NOTNULL(out_control_anchor);
-  for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
-    if (peer_in_control_anchor != nullptr) {
-      auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
-      auto peer_in_node_desc = peer_in_node->GetOpDesc();
-      if (peer_in_node_desc != nullptr) {
-        bool is_atomic_node = false;
-        // If GetBool fail, is_atomic_node is false.
-        (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
-        if (is_atomic_node) {
-          vector<int> is_connect_netoutput;
-          // If GetBool fail, attr is_connect_netoutput is an empty vector.
-          (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
-          if (!is_connect_netoutput.empty()) {
-            GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str());
-            is_independent_atomic_clean_node = true;
-            break;
-          }
-        }
-      }
-    }
-  }
-
-  return SUCCESS;
-}
-
-ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start,
+ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
                                                    const vector<int64_t> &atomic_mem_size) {
-  for (ge::NodePtr &node : compute_graph_->GetAllNodes()) {
-    auto node_op_desc = node->GetOpDesc();
-    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
-
-    bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName());
-
-    if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) {
-      bool is_independent_atomic_clean = false;
-      if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) {
-        GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node.");
-        return PARAM_INVALID;
-      }
-
-      is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean);
+  auto node_op_desc = node->GetOpDesc();
+  if (node_op_desc != nullptr) {
+    GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
+    vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
+    vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
+    workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
+    workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
+    node_op_desc->SetWorkspace(workspace_vector);
+    node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
+
+    std::vector<int64_t> mem_start_vector;
+    // If GetListInt fail, mem_start_vector is empty.
+    (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
+    mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
+    GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
+                     GELOGE(FAILED, "SetListInt failed.");
+                     return FAILED);
+
+    std::vector<int64_t> mem_size_vector;
+    // If GetListInt fail, mem_size_vector is empty.
+    (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
+    mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
+    GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
+                     GELOGE(FAILED, "SetListInt failed.");
+                     return FAILED);
+
+    std::stringstream ss;
+    for (auto iter : atomic_mem_start) {
+      ss << iter << " ";
     }
-
-    if (is_valid_atomic_clean_node) {
-      GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
-      vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
-      vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
-      workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
-      workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
-      node_op_desc->SetWorkspace(workspace_vector);
-      node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
-
-      std::vector<int64_t> mem_start_vector;
-      // If GetListInt fail, mem_start_vector is empty.
-      (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
-      mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
-      GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
-                       GELOGE(FAILED, "SetListInt failed.");
-                       return FAILED);
-
-      std::vector<int64_t> mem_size_vector;
-      // If GetListInt fail, mem_size_vector is empty.
-      (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
-      mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
-      GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
-                       GELOGE(FAILED, "SetListInt failed.");
-                       return FAILED);
-
-      std::stringstream ss;
-      for (auto iter : atomic_mem_start) {
-        ss << iter << " ";
-      }
-      string atomic_mem_start_str = ss.str();
-      ss.clear();
-      ss.str("");
-      for (auto iter : atomic_mem_size) {
-        ss << iter << " ";
-      }
-      string atomic_mem_size_str = ss.str();
-
-      GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
-             node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
-             atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
+    string atomic_mem_start_str = ss.str();
+    ss.clear();
+    ss.str("");
+    for (auto iter : atomic_mem_size) {
+      ss << iter << " ";
     }
+    string atomic_mem_size_str = ss.str();
+
+    GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
+           node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
+           atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
   }
   return SUCCESS;
 }
diff --git a/src/ge/graph/build/memory/graph_mem_assigner.h b/src/ge/graph/build/memory/graph_mem_assigner.h
index e1e408be..201e6d01 100644
--- a/src/ge/graph/build/memory/graph_mem_assigner.h
+++ b/src/ge/graph/build/memory/graph_mem_assigner.h
@@ -135,6 +135,9 @@ class GraphMemoryAssigner {
 
   ge::Status ReAssignAtomicMemory(bool is_loop_graph);
 
+  ge::Status FilterAtomicNodesForMemoryAssign(std::map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
+                                              std::vector<NodePtr> &connecting_output_atomic_nodes);
+
   ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
                                          int64_t &continuous_mem_size);
 
@@ -165,14 +168,8 @@ class GraphMemoryAssigner {
 
   ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
                                       const std::vector<int64_t> &mem_offset_end);
-  ///
-  /// @brief set loop graph atomic attr
-  /// @param node, atomic memory assignment start offset
-  /// @param atomic_mem_start: atomic op memory start address
-  ///
-  ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start);
 
-  ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start,
+  ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start,
                                 const std::vector<int64_t> &atomic_mem_size);
 
   ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node);
diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc
index e4e3a63f..c6283d92 100644
--- a/src/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/src/ge/graph/load/new_model_manager/data_dumper.cc
@@ -695,11 +695,7 @@ Status DataDumper::LoadDumpInfo() {
     }
     if (dump_properties_.GetDumpMode() == kDumpInput) {
       if (op_iter.is_task) {
-        Status ret = DumpInput(op_iter, task);
-        if (ret != SUCCESS) {
-          GELOGE(ret, "Dump input failed");
-          return ret;
-        }
+        GE_CHK_STATUS_RET(DumpInput(op_iter, task), "Dump input failed");
       }
       op_mapping_info.mutable_task()->Add(std::move(task));
       continue;
@@ -726,7 +722,7 @@ Status DataDumper::LoadDumpInfo() {
 
   SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);
 
-  if (!op_list_.empty() || is_op_debug_) {
+  if (!op_list_.empty() || is_op_debug_ || is_end_graph_) {
     auto ret = ExecuteLoadDumpInfo(op_mapping_info);
     if (ret != SUCCESS) {
       GELOGE(ret, "Execute load dump info failed");
@@ -740,7 +736,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
                                       aicpu::dump::OpMappingInfo &op_mapping_info) {
   if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput ||
       dump_properties_.GetDumpMode() == kDumpAll) {
-    GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
     aicpu::dump::Task task;
     task.set_end_graph(true);
     task.set_task_id(end_graph_task_id_);
@@ -748,6 +743,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
     task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH);
     task.mutable_op()->set_op_type(ENDGRAPH);
     op_mapping_info.mutable_task()->Add(std::move(task));
+
+    is_end_graph_ = true;
+    if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) {
+      GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u",
+             op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_);
+      return;
+    }
+    GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
   }
 }
 
diff --git a/src/ge/graph/load/new_model_manager/data_dumper.h b/src/ge/graph/load/new_model_manager/data_dumper.h
index 0a1c2274..30218416 100644
--- a/src/ge/graph/load/new_model_manager/data_dumper.h
+++ b/src/ge/graph/load/new_model_manager/data_dumper.h
@@ -116,6 +116,7 @@ class DataDumper {
   std::vector<InnerDumpInfo> op_list_;
   uint32_t end_graph_task_id_ = 0;
   uint32_t end_graph_stream_id_ = 0;
+  bool is_end_graph_ = false;
   std::multimap<std::string, InnerInputMapping> input_map_;
   bool load_flag_;
   uint32_t device_id_;
diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc
index 81eb4bc9..50867782 100644
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -1928,13 +1928,7 @@ Status DavinciModel::SinkModelProfile() {
     name = name_;
   }
   size_t name_len = name.size();
-  // phy device id
-  uint32_t phy_device_id = 0;
-  rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
-  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
-                  return FAILED);
-  reporter_data.deviceId = phy_device_id;
+  reporter_data.deviceId = device_id_;
   reporter_data.data = (unsigned char *)&name_len;
   reporter_data.dataLen = sizeof(int32_t);
   GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.",
@@ -2103,12 +2097,7 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
   GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                    return FAILED, "Sink model tag memcpy error.");
   // device id
-  uint32_t phy_device_id = 0;
-  rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
-  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id);
-                  return FAILED);
-  reporter_data.deviceId = phy_device_id;
+  reporter_data.deviceId = device_id_;
 
   // Model Header
   string name;
diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc
index f6995052..4a596738 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -236,7 +236,6 @@ ModelManager::~ModelManager() {
   std::lock_guard<std::mutex> lock(map_mutex_);
   model_map_.clear();
   model_aicpu_kernel_.clear();
-  cust_aicpu_so_.clear();
 
   GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0)));
 }
@@ -400,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) {
   }
   std::lock_guard<std::mutex> lock(exeception_infos_mutex_);
   exception_infos_.clear();
+  cust_aicpu_so_.clear();
   return SUCCESS;
 }
 
diff --git a/src/ge/graph/load/new_model_manager/model_utils.cc b/src/ge/graph/load/new_model_manager/model_utils.cc
index 9cbb684f..2bb111f3 100644
--- a/src/ge/graph/load/new_model_manager/model_utils.cc
+++ b/src/ge/graph/load/new_model_manager/model_utils.cc
@@ -328,15 +328,14 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
            op_desc->GetName().c_str(), v_memory_type.size(), inputs_size);
     return v_input_data_addr;
   }
-  for (size_t i = 0; i < inputs_size; ++i) {
+  for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
+    const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
+    if (tensor_desc == nullptr) {
+      GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i);
+      continue;
+    }
     if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) {
       // TBE: add weights address to input
-      const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
-      if (tensor_desc == nullptr) {
-        GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
-        continue;
-      }
-
       int64_t tensor_size = 0;
       GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size));
       if (tensor_size) {
diff --git a/src/ge/graph/passes/attach_stream_label_pass.cc b/src/ge/graph/passes/attach_stream_label_pass.cc
index b8065325..6b718418 100644
--- a/src/ge/graph/passes/attach_stream_label_pass.cc
+++ b/src/ge/graph/passes/attach_stream_label_pass.cc
@@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
   nodes.push(node);
 
   static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE};
-  bool merge_flag = false;
-  bool exit_flag = false;
-  bool net_output_flag = false;
   while (!nodes.empty()) {
     NodePtr cur_node = nodes.top();
     nodes.pop();
     if (visited.count(cur_node) > 0) {
       continue;
     }
-    if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) {
+    if (AttachFlag(cur_node, stream_label) != SUCCESS) {
       GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str());
       return FAILED;
     }
@@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
     GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed.");
   }
 
-  bool attach_flag = (merge_flag || exit_flag) && net_output_flag;
-  if (attach_flag) {
-    GELOGI("No need to keep on attaching label.");
-    return SUCCESS;
-  }
-
   for (const NodePtr &tmp_node : branch_nodes) {
     GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str());
     GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed.");
@@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
 /// @brief attach flag
 /// @param [in] node
 /// @param [out] stream_label
-/// @param [out] merge_flag
-/// @param [out] exit_flag
-/// @param [out] net_output_flag
 /// @return Status
 ///
-Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag,
-                                         bool &exit_flag, bool &net_output_flag) {
+Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) {
   const std::string &type = node->GetType();
   if (type == STREAMSWITCH) {
     if (node->GetInDataNodes().empty()) {
@@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea
   } else if (type == STREAMMERGE) {
     stream_label = node->GetName();
     GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
-    merge_flag = true;
   } else if ((type == EXIT) || (type == REFEXIT)) {
     GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
-    exit_flag = true;
-  } else if (type == NETOUTPUT) {
-    net_output_flag = true;
   }
 
   return SUCCESS;
diff --git a/src/ge/graph/passes/attach_stream_label_pass.h b/src/ge/graph/passes/attach_stream_label_pass.h
index 5820480d..28e828b5 100644
--- a/src/ge/graph/passes/attach_stream_label_pass.h
+++ b/src/ge/graph/passes/attach_stream_label_pass.h
@@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass {
   /// @brief attach flag
   /// @param [in] node
   /// @param [out] stream_label
-  /// @param [out] merge_flag
-  /// @param [out] exit_flag
-  /// @param [out] net_output_flag
   /// @return Status
   ///
-  static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag,
-                           bool &net_output_flag);
+  static Status AttachFlag(const NodePtr &node, std::string &stream_label);
 
   ///
   /// @brief Update stream_label for loop_branch
diff --git a/src/ge/graph/passes/enter_pass.cc b/src/ge/graph/passes/enter_pass.cc
index 84621689..ad3d78fc 100644
--- a/src/ge/graph/passes/enter_pass.cc
+++ b/src/ge/graph/passes/enter_pass.cc
@@ -20,13 +20,14 @@
 #include "framework/common/debug/log.h"
 #include "graph/utils/graph_utils.h"
 
+namespace {
+const size_t kOutNodesNum = 1;
+}
+
 namespace ge {
 Status EnterPass::Run(NodePtr &node) {
   GELOGD("EnterPass running");
-  if (node == nullptr) {
-    GELOGE(PARAM_INVALID, "param [node] must not be null.");
-    return PARAM_INVALID;
-  }
+  GE_CHECK_NOTNULL(node);
 
   if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) {
     return SUCCESS;
@@ -38,18 +39,17 @@ Status EnterPass::Run(NodePtr &node) {
     return PARAM_INVALID;
   }
   NodePtr in_node = node->GetInDataNodes().at(0);
-  if (in_node == nullptr) {
-    GELOGE(PARAM_INVALID, "param [in_node] must not be null");
-    return PARAM_INVALID;
-  }
+  GE_CHECK_NOTNULL(in_node);
 
   if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) {
     return SUCCESS;
   }
 
-  bool need_remove_flag =
-    in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty();
-  if (need_remove_flag) {
+  bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty();
+  if (!need_remove_flag) {
+    return SUCCESS;
+  }
+  if (node->GetOutDataNodes().empty()) {
     for (auto &out_ctrl_node : node->GetOutControlNodes()) {
       if (out_ctrl_node == nullptr) {
         continue;
@@ -60,9 +60,47 @@ Status EnterPass::Run(NodePtr &node) {
         return FAILED;
       }
     }
+  } else {
+    if (OptimizeEnter(node, in_node) != SUCCESS) {
+      GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str());
+      return FAILED;
+    }
   }
 
   GELOGD("EnterPass success");
   return SUCCESS;
 }
+
+Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) {
+  auto out_nodes_of_in_node = in_node->GetOutAllNodes();
+  if (out_nodes_of_in_node.size() != kOutNodesNum) {
+    return SUCCESS;
+  }
+
+  if (!node->GetOutControlNodes().empty()) {
+    return SUCCESS;
+  }
+
+  for (const auto &out_node : node->GetOutDataNodes()) {
+    GE_CHECK_NOTNULL(out_node);
+    if (out_node->GetType() == MERGE) {
+      return SUCCESS;
+    }
+  }
+
+  GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0));
+  GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0)));
+  auto out_data_anchor = node->GetOutDataAnchor(0);
+  GE_CHECK_NOTNULL(out_data_anchor);
+  for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
+    GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor));
+    GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor));
+  }
+
+  auto graph = node->GetOwnerComputeGraph();
+  GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node))
+  AddRePassNodesWithInOut(in_node);
+
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/src/ge/graph/passes/enter_pass.h b/src/ge/graph/passes/enter_pass.h
index 04ac62ee..73702c38 100644
--- a/src/ge/graph/passes/enter_pass.h
+++ b/src/ge/graph/passes/enter_pass.h
@@ -23,6 +23,9 @@ namespace ge {
 class EnterPass : public BaseNodePass {
  public:
   Status Run(NodePtr &node) override;
+
+ private:
+  Status OptimizeEnter(NodePtr &node, NodePtr &in_node);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_ENTER_PASS_H_
diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
index 331d9c31..336527fb 100644
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -41,7 +41,6 @@
 #include "inc/pass_manager.h"
 #include "graph/common/local_context.h"
 
-using std::map;
 using std::set;
 using std::string;
 using std::vector;
@@ -266,24 +265,27 @@ Status MultiBatchGraphCopyer::Init() {
 }
 
 Status MultiBatchGraphCopyer::LabelStatus() {
-  map<string, vector<NodePtr>> frame_enters;
-  InitStatus(frame_enters);
-
+  for (const auto &data : origin_data_nodes_) {
+    auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
+    if (!IsAllDimsPositive(data_shape.GetDims())) {
+      origin_nodes_status_[data.get()] = kNodeInBatchBranch;
+    }
+  }
   bool changed = true;
   // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch
   while (changed) {
     changed = false;
     for (const auto &node : origin_all_nodes_) {
+      auto iter = origin_nodes_status_.find(node.get());
+      if (iter != origin_nodes_status_.end()) {
+        continue;
+      }
       for (auto &in_node : node->GetInAllNodes()) {
         bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() &&
                            origin_nodes_status_[in_node.get()] == kNodeInBatchBranch;
         if (is_in_batch) {
-          if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() ||
-              origin_nodes_status_[node.get()] != kNodeInBatchBranch) {
-            origin_nodes_status_[node.get()] = kNodeInBatchBranch;
-            ResetEnterStatus(frame_enters, node);
-            changed = true;
-          }
+          origin_nodes_status_[node.get()] = kNodeInBatchBranch;
+          changed = true;
           break;
         }
       }
@@ -314,45 +316,6 @@ Status MultiBatchGraphCopyer::LabelStatus() {
   return SUCCESS;
 }
 
-void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) {
-  for (const auto &node : origin_all_nodes_) {
-    if (node->GetType() != ENTER && node->GetType() != REFENTER) {
-      continue;
-    }
-    auto op_desc = node->GetOpDesc();
-    if (op_desc == nullptr) {
-      continue;
-    }
-    string frame_name;
-    if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) {
-      frame_enters[frame_name].emplace_back(node);
-    }
-  }
-
-  for (const auto &data : origin_data_nodes_) {
-    auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
-    if (!IsAllDimsPositive(data_shape.GetDims())) {
-      origin_nodes_status_[data.get()] = kNodeInBatchBranch;
-    }
-  }
-}
-
-void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) {
-  if (node->GetType() != ENTER && node->GetType() != REFENTER) {
-    return;
-  }
-
-  for (const auto &frame_enter : frame_enters) {
-    auto &enters = frame_enter.second;
-    if (std::find(enters.begin(), enters.end(), node) != enters.end()) {
-      for (const auto &enter : enters) {
-        origin_nodes_status_[enter.get()] = kNodeInBatchBranch;
-      }
-      break;
-    }
-  }
-}
-
 Status MultiBatchGraphCopyer::CreateNewNodes() {
   shape_data_ = InsertShapeDataNode();
   if (shape_data_ == nullptr) {
@@ -1200,7 +1163,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s
   }
 }
 
-// Connect NetOutput directly: DTS2020070612498
+// Connect NetOutput directly
 void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index,
                           vector<string> &dynamic_output_dims) {
   GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str());
diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h
index f665b65e..062b98d2 100644
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.h
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h
@@ -68,8 +68,6 @@ class MultiBatchGraphCopyer {
 
   // label status for origin_all_nodes_
   Status LabelStatus();
-  void InitStatus(std::map<string, vector<NodePtr>> &frame_enters);
-  void ResetEnterStatus(std::map<string, vector<NodePtr>> &frame_enters, const NodePtr &node);
   // add nodes functions
   Status CreateNewNodes();
 
diff --git a/src/ge/host_cpu_engine/module.mk b/src/ge/host_cpu_engine/module.mk
index 41de4503..e35c68c9 100644
--- a/src/ge/host_cpu_engine/module.mk
+++ b/src/ge/host_cpu_engine/module.mk
@@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY}
 include $(CLEAR_VARS)
 LOCAL_MODULE := atclib/libhost_cpu_engine
 LOCAL_CFLAGS += -Werror
-LOCAL_CFLAGS += -std=c++11
+LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE
 LOCAL_LDFLAGS :=
 
 LOCAL_STATIC_LIBRARIES :=
diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc
index ec56cc0a..e00268ea 100644
--- a/src/ge/init/gelib.cc
+++ b/src/ge/init/gelib.cc
@@ -165,8 +165,10 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
     }
   }
 
-  // In train and infer, profiling is always needed.
   InitOptions(options);
+
+  // In train and infer, profiling is always needed.
+  InitProfiling(this->options_);
   auto model_manager = ModelManager::GetInstance();
   GE_CHECK_NOTNULL(model_manager);
   GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS,
@@ -176,21 +178,19 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
   // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer
   // these two case with logical device id
   if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) {
-    InitProfiling(this->options_, true);
     status = InitSystemWithOptions(this->options_);
   } else {
-    InitProfiling(this->options_);
     status = InitSystemWithoutOptions();
   }
   return status;
 }
 
-void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) {
+void GELib::InitProfiling(Options &options) {
   GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id);
   std::lock_guard<std::mutex> lock(status_mutex_);
   GetContext().Init();
   // Profiling init
-  if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) {
+  if (ProfilingManager::Instance().Init(options) != SUCCESS) {
     GELOGW("Profiling init failed.");
   }
 }
diff --git a/src/ge/init/gelib.h b/src/ge/init/gelib.h
index c8b3ff8a..b5621dfd 100644
--- a/src/ge/init/gelib.h
+++ b/src/ge/init/gelib.h
@@ -68,7 +68,7 @@ class GELib {
   // get incre build cache path
   const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; }
 
-  void InitProfiling(Options &options, bool convert_2_phy_device_id = false);
+  void InitProfiling(Options &options);
   void ShutDownProfiling();
 
   Status InitSystemWithoutOptions();
diff --git a/src/ge/ir_build/atc_ir_common.cc b/src/ge/ir_build/atc_ir_common.cc
index 82ed40bd..1f8abf37 100644
--- a/src/ge/ir_build/atc_ir_common.cc
+++ b/src/ge/ir_build/atc_ir_common.cc
@@ -522,7 +522,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip
   for (auto iter = options.begin(); iter != options.end(); iter++) {
     std::string key = iter->first;
     std::string option_name = iter->second;
-    GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str());
+    GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str());
   }
 }
 
diff --git a/src/ge/ir_build/ge_ir_build.cc b/src/ge/ir_build/ge_ir_build.cc
index 90f7a8ca..86b304c1 100644
--- a/src/ge/ir_build/ge_ir_build.cc
+++ b/src/ge/ir_build/ge_ir_build.cc
@@ -96,6 +96,12 @@ static graphStatus CheckGlobalOptions(std::map<std::string, std::string> &global
                    return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!");
   global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode;
 
+  // set precision mode default value
+  std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == global_options.end()
+                                 ? "force_fp16"
+                                 : global_options[ge::ir_option::PRECISION_MODE];
+  global_options[ge::ir_option::PRECISION_MODE] = precision_mode;
+
   return GRAPH_SUCCESS;
 }
 
diff --git a/src/ge/opskernel_manager/ops_kernel_manager.cc b/src/ge/opskernel_manager/ops_kernel_manager.cc
index 51e8f438..11eb3061 100644
--- a/src/ge/opskernel_manager/ops_kernel_manager.cc
+++ b/src/ge/opskernel_manager/ops_kernel_manager.cc
@@ -175,25 +175,25 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options,
       } else if (flag == 1) {
         enable_flag = true;
       } else {
-        GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(),
-               iter->second.c_str());
+        GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.",
+               plugin_name.c_str(), iter->second.c_str());
         return GE_GRAPH_OPTIONS_INVALID;
       }
     } catch (std::invalid_argument &) {
-      GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.",
+      GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.",
              iter->second.c_str());
       return GE_GRAPH_OPTIONS_INVALID;
     } catch (std::out_of_range &) {
-      GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
+      GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
              iter->second.c_str());
       return GE_GRAPH_OPTIONS_INVALID;
     } catch (...) {
-      GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(),
-             iter->second.c_str());
+      GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.",
+             plugin_name.c_str(), iter->second.c_str());
       return GE_GRAPH_OPTIONS_INVALID;
     }
   } else {
-    GELOGI("Not find key %s, set to default value false.", plugin_name.c_str());
+    GELOGI("Not find option_key %s, set to default value false.", plugin_name.c_str());
     enable_flag = false;
   }
 
diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc
index bcf42032..0fb342e1 100644
--- a/src/ge/session/omg.cc
+++ b/src/ge/session/omg.cc
@@ -618,11 +618,16 @@ Status ParseOutNodes(const string &out_nodes) {
     if (!out_nodes.empty()) {
       domi::GetContext().out_nodes_map.clear();
       domi::GetContext().user_out_nodes.clear();
+      domi::GetContext().user_out_nodes_top_vec.clear();
 
       vector<string> nodes_v = StringUtils::Split(out_nodes, ';');
       for (const string &node : nodes_v) {
         vector<string> key_value_v = StringUtils::Split(node, ':');
         if (key_value_v.size() != 2) {  // The size must be 2.
+          if (key_value_v.size() == 1 && domi::GetContext().type == domi::CAFFE) {
+            domi::GetContext().user_out_nodes_top_vec.push_back(node);
+            continue;
+          }
           ErrorManager::GetInstance().ATCReportErrMessage(
             "E10001", {"parameter", "value", "reason"},
             {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""});
@@ -632,7 +637,13 @@ Status ParseOutNodes(const string &out_nodes) {
                  node.c_str());
           return PARAM_INVALID;
         }
-        auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
+        if (!domi::GetContext().user_out_nodes_top_vec.empty()) {
+          ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
+                                                          {"--out_nodes", out_nodes, "is not all index or top_name"});
+          GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s",
+                 out_nodes.c_str());
+          return PARAM_INVALID;
+        }
         // stoi: The method may throw an exception: invalid_argument/out_of_range
         if (!CheckDigitStr(key_value_v[1])) {
           ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
@@ -640,7 +651,10 @@ Status ParseOutNodes(const string &out_nodes) {
           GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str());
           return PARAM_INVALID;
         }
+
+        auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
         int32_t index = stoi(StringUtils::Trim(key_value_v[1]));
+        GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index);
         if (iter != domi::GetContext().out_nodes_map.end()) {
           iter->second.emplace_back(index);
         } else {
diff --git a/src/ge/single_op/single_op.cc b/src/ge/single_op/single_op.cc
index 8e68208d..f59fb7bd 100644
--- a/src/ge/single_op/single_op.cc
+++ b/src/ge/single_op/single_op.cc
@@ -279,7 +279,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, con
   if (op_task_->GetOpTaskType() == OP_TASK_TBE) {
     return ExecuteTbeTask(input_desc, inputs, output_desc, outputs);
   } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) {
-    return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_);
+    return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_);
   } else {
     GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u",
            op_task_->GetOpTaskType());
diff --git a/src/ge/single_op/task/build_task_utils.cc b/src/ge/single_op/task/build_task_utils.cc
index 9e97ee57..268cbfd1 100644
--- a/src/ge/single_op/task/build_task_utils.cc
+++ b/src/ge/single_op/task/build_task_utils.cc
@@ -75,8 +75,11 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) {
     // Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]]
     // OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]]
     ss << op_type << " IN[";
-    for (uint32_t idx = 0; idx < op_desc->GetInputsSize(); idx++) {
+    for (uint32_t idx = 0; idx < op_desc->GetAllInputsSize(); idx++) {
       const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx);
+      if (input == nullptr) {
+        continue;
+      }
       ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " ";
       ss << TypeUtils::FormatToSerialString(input->GetFormat());
       ss << VectorToString(input->GetShape().GetDims());
diff --git a/src/ge/single_op/task/op_task.cc b/src/ge/single_op/task/op_task.cc
index 0c489aa4..78db835e 100644
--- a/src/ge/single_op/task/op_task.cc
+++ b/src/ge/single_op/task/op_task.cc
@@ -34,6 +34,11 @@ constexpr int kLaunchRetryTimes = 1000;
 constexpr int kSleepTime = 10;
 constexpr uint64_t kReleaseFlag = 1;
 constexpr int kCopyNum = 2;
+void FreeHbm(void *var) {
+  if (var) {
+    (void)rtFree(var);
+  }
+}
 }  // namespace
 
 Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) {
@@ -336,49 +341,23 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor
 }
 
 AiCpuTask::~AiCpuTask() {
-  if (args_ != nullptr) {
-    (void)rtFree(args_);
-  }
-
-  if (io_addr_ != nullptr) {
-    (void)rtFree(io_addr_);
-  }
-
-  if (dynamic_flag_ && workspace_addr_ != nullptr) {
-    (void)rtFree(workspace_addr_);
-  }
-  if (copy_workspace_buf_ != nullptr) {
-    (void)rtFree(copy_workspace_buf_);
-  }
-
-  if (copy_ioaddr_dev_ != nullptr) {
-    (void)rtFree(copy_ioaddr_dev_);
-  }
-
-  if (copy_input_release_flag_dev_ != nullptr) {
-    (void)rtFree(copy_input_release_flag_dev_);
-  }
-
-  if (copy_input_data_size_dev_ != nullptr) {
-    (void)rtFree(copy_input_data_size_dev_);
-  }
-
-  if (copy_input_src_dev_ != nullptr) {
-    (void)rtFree(copy_input_src_dev_);
-  }
-
-  if (copy_input_dst_dev_ != nullptr) {
-    (void)rtFree(copy_input_dst_dev_);
-  }
-
-  if (copy_task_args_buf_ != nullptr) {
-    (void)rtFree(copy_task_args_buf_);
-  }
-
+  FreeHbm(args_);
+  FreeHbm(io_addr_);
+  if (dynamic_flag_) {
+    FreeHbm(workspace_addr_);
+  }
+  FreeHbm(copy_workspace_buf_);
+  FreeHbm(copy_ioaddr_dev_);
+  FreeHbm(copy_input_release_flag_dev_);
+  FreeHbm(copy_input_data_size_dev_);
+  FreeHbm(copy_input_src_dev_);
+  FreeHbm(copy_input_dst_dev_);
+  FreeHbm(copy_task_args_buf_);
   for (auto summary : output_summary_) {
-    if (summary != nullptr) {
-      (void)rtFree(summary);
-    }
+    FreeHbm(summary);
+  }
+  for (auto out_shape : out_shape_hbm_) {
+    FreeHbm(out_shape);
   }
 }
 
@@ -405,7 +384,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
   return SUCCESS;
 }
 
-Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm) {
+Status AiCpuTask::PrepareCopyInputs(vector<DataBuffer> &outputs) {
   std::vector<uint64_t> copy_input_release_flag;
   std::vector<uint64_t> copy_input_data_size;
   std::vector<uint64_t> copy_input_src;
@@ -417,11 +396,15 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v
            summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size);
     auto output = outputs[i];
     copy_input_release_flag.emplace_back(kReleaseFlag);
-    copy_input_data_size.emplace_back(summary.raw_data_size);
+    if (summary.raw_data_size > 0) {
+      copy_input_data_size.emplace_back(output.length);
+    } else {
+      copy_input_data_size.emplace_back(summary.raw_data_size);
+    }
     copy_input_src.emplace_back(summary.raw_data_ptr);
-    copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output));
+    copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output.data));
 
-    const auto &shape_buffer = out_shape_hbm[i];
+    const auto &shape_buffer = out_shape_hbm_[i];
     copy_input_release_flag.emplace_back(kReleaseFlag);
     copy_input_data_size.emplace_back(summary.shape_data_size);
     copy_input_src.emplace_back(summary.shape_data_ptr);
@@ -441,7 +424,7 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v
   return SUCCESS;
 }
 
-Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm) {
+Status AiCpuTask::ReadResultSummaryAndPrepareMemory() {
   for (size_t i = 0; i < num_outputs_; ++i) {
     auto &result_summary = output_summary_host_[i];
 
@@ -449,36 +432,39 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_sha
                            sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST));
     auto shape_data_size = result_summary.shape_data_size;
     void *shape_buffer = nullptr;
-    GE_MAKE_GUARD_RTMEM(shape_buffer);
-    GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM));
-    out_shape_hbm.emplace_back(shape_buffer);
+    if (shape_data_size > 0) {
+      GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM));
+    }
+    out_shape_hbm_.emplace_back(shape_buffer);
   }
   return SUCCESS;
 }
 
-Status AiCpuTask::CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream) {
-  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm));
+Status AiCpuTask::CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) {
+  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs));
 
   GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream));
   GE_CHK_RT_RET(rtStreamSynchronize(stream));
   return SUCCESS;
 }
 
-Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm) {
+Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
   for (size_t i = 0; i < num_outputs_; ++i) {
     const auto &result_summary = output_summary_host_[i];
     std::vector<int64_t> shape_dims;
-    const auto &shape_hbm = out_shape_hbm[i];
-
-    uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
-    std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
-    GE_CHECK_NOTNULL(shape_addr);
-    GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size,
-                           RT_MEMCPY_DEVICE_TO_HOST));
-
-    for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
-      shape_dims.emplace_back(shape_addr[dim_idx]);
-      GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]);
+    if (result_summary.shape_data_size > 0) {
+      const auto &shape_hbm = out_shape_hbm_[i];
+
+      uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
+      std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
+      GE_CHECK_NOTNULL(shape_addr);
+      GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm,
+                             result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));
+
+      for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
+        shape_dims.emplace_back(shape_addr[dim_idx]);
+        GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]);
+      }
     }
 
     GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]),
@@ -487,7 +473,7 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, cons
   return SUCCESS;
 }
 
-Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs,
+Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs,
                                                     rtStream_t stream) {
   if (num_outputs_ == 0) {
     GELOGI("Output num is 0, there is no need to update the output and size.");
@@ -496,13 +482,20 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output
 
   GELOGI("Update shape and data by result summary begin.");
 
-  std::vector<void *> out_shape_hbm;
-  GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm),
-                    "Read ResultSummary and update output shape failed.");
+  for (auto out_shape : out_shape_hbm_) {
+    FreeHbm(out_shape);
+  }
+  out_shape_hbm_.clear();
+  GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed.");
+
+  GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed.");
 
-  GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed.");
+  GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed.");
 
-  GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed.");
+  for (auto out_shape : out_shape_hbm_) {
+    FreeHbm(out_shape);
+  }
+  out_shape_hbm_.clear();
 
   GELOGI("Update shape and data by result summary end.");
   return SUCCESS;
@@ -603,10 +596,18 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
   return SUCCESS;
 }
 
-Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
-                               std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs,
-                               rtStream_t stream) {
+Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
+                               const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc,
+                               std::vector<DataBuffer> &output_buffers, rtStream_t stream) {
   GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc));
+  std::vector<void *> inputs;
+  std::vector<void *> outputs;
+  for (auto &buffer : input_buffers) {
+    inputs.emplace_back(buffer.data);
+  }
+  for (auto &buffer : output_buffers) {
+    outputs.emplace_back(buffer.data);
+  }
   GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs));
   GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));
   GE_CHK_RT_RET(rtStreamSynchronize(stream));
@@ -614,7 +615,7 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, cons
   if (unknown_type_ == DEPEND_SHAPE_RANGE) {
     GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc));
   } else if (unknown_type_ == DEPEND_COMPUTE) {
-    GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream));
+    GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, output_buffers, stream));
   }
 
   return SUCCESS;
@@ -658,9 +659,9 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
   return SUCCESS;
 }
 
-Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
-                                 std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs,
-                                 rtStream_t stream) {
+Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
+                                 const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc,
+                                 std::vector<DataBuffer> &output_buffers, rtStream_t stream) {
   GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED,
                          "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_);
 
@@ -669,11 +670,11 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, co
   size_t arg_index = 0;
   auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_);
   GE_CHECK_NOTNULL(task_io_addr);
-  for (auto &input : inputs) {
-    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input);
+  for (auto &input : input_buffers) {
+    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input.data);
   }
-  for (auto &output : outputs) {
-    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output);
+  for (auto &output : output_buffers) {
+    task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output.data);
   }
 
   GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));
diff --git a/src/ge/single_op/task/op_task.h b/src/ge/single_op/task/op_task.h
index b6ea9114..5f742197 100644
--- a/src/ge/single_op/task/op_task.h
+++ b/src/ge/single_op/task/op_task.h
@@ -57,8 +57,9 @@ class OpTask {
   void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
   const OpDescPtr &GetOpdesc() const { return op_desc_; }
   Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream);
-  virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
-                              std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) {
+  virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers,
+                              std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers,
+                              rtStream_t stream) {
     return UNSUPPORTED;
   }
 
@@ -138,8 +139,9 @@ class AiCpuTask : public AiCpuBaseTask {
   OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; }
   const void *GetIOAddr() const override;
 
-  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
-                      std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override;
+  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers,
+                      std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers,
+                      rtStream_t stream) override;
   Status SetMemCopyTask(const domi::KernelExDef &kernel_def);
 
  private:
@@ -147,14 +149,14 @@ class AiCpuTask : public AiCpuBaseTask {
 
   // for copy task.
   Status InitForSummaryAndCopy();
-  Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs,
+  Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs,
                                            rtStream_t stream);
-  Status ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm);
+  Status ReadResultSummaryAndPrepareMemory();
 
-  Status CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream);
-  Status PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm);
+  Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream);
+  Status PrepareCopyInputs(vector<DataBuffer> &outputs);
 
-  Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm);
+  Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc);
 
   friend class AiCpuTaskBuilder;
   void *workspace_addr_ = nullptr;
@@ -178,6 +180,8 @@ class AiCpuTask : public AiCpuBaseTask {
   void *copy_input_data_size_dev_;
   void *copy_input_src_dev_;
   void *copy_input_dst_dev_;
+
+  vector<void *> out_shape_hbm_;
 };
 
 class AiCpuCCTask : public AiCpuBaseTask {
@@ -197,8 +201,9 @@ class AiCpuCCTask : public AiCpuBaseTask {
   void SetIoAddr(void *io_addr);
   size_t GetArgSize() const;
 
-  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs,
-                      std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override;
+  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers,
+                      std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers,
+                      rtStream_t stream) override;
 
  private:
   friend class AiCpuCCTaskBuilder;
diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h
index 0c1d5112..dd01ac5f 100644
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -25,16 +25,21 @@
 
 namespace ge {
 /**
-*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC),
-image normalization (by subtracting the mean value or multiplying a factor), image cropping
-(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n
+*@brief Performs AI pre-processing (AIPP) on images including color space 
+conversion (CSC),
+image normalization (by subtracting the mean value or multiplying a factor), 
+image cropping
+(by specifying the crop start and cropping the image to the size required by 
+the neural network), and much more. \n
 
 *@par Inputs:
-*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer.
+*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the 
+data layer.
 *@li params: Dynamic AIPP configuration parameters of type uint8. \n
 
 *@par Attributes:
-*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n
+*aipp_config_path: A required string, specifying the path of the AIPP 
+configuration file. \n
 
 *@par Outputs:
 *features: The AIPP-processed output tensor of type float16 or uint8.
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 5d68b977..6d865399 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -28,9 +28,10 @@ namespace ge {
 
 *@par Inputs:
 *Dynamic inputs, including:
-* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
+* @li x: A list of Tensor objects, each with same shape and type. The supported 
+types are:
 *   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
-*   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
+*   qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same shape and type as the elements of "x". \n
@@ -121,7 +122,8 @@ REG_OP(MinimumGrad)
 
 *@par Inputs:
 *One input:
-*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
+*x:A Tensor. Must be one of the following types: bool, float16, float, int8, 
+int32, uint32, uint8,
    int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n
 
 *@par Attributes:
@@ -385,7 +387,8 @@ REG_OP(Sign)
 
 *@par Inputs:
 *Two inputs, including: \n
-*@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128
+*@li x1: A Tensor. Must be one of the following types: float16, float32,
+ float64, int32, int64, complex64,complex128
 *@li x2: A Tensor. Has the same type as "x1". \n
 
 *@par Outputs:
@@ -484,12 +487,16 @@ REG_OP(Equal)
 
 *@par Inputs:
 *One input:\n
-*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
+*x: A Tensor. Must be one of the following types: float16, float32, double, 
+complex64, complex128. \n
 
 *@par Attributes:
-*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0".
-*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0".
-*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n
+*@li base: An optional attribute of type float32, specifying the base gamma. 
+Defaults to "-1.0".
+*@li scale: An optional attribute of type float32, specifying the scale alpha. 
+Defaults to "1.0".
+*@li shift: An optional attribute of type float32, specifying the shift beta. 
+Defaults to "0.0". \n
 
 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
@@ -510,7 +517,8 @@ REG_OP(Exp)
 
 *@par Inputs:
 *One input:
-*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
+*x: A Tensor. Must be one of the following types: float16, float32, double, 
+complex64, complex128. \n
 
 *@par Outputs:
 *y: A Tensor of the same type as "x". \n
@@ -527,7 +535,9 @@ REG_OP(Expm1)
 *@brief: Computes the reciprocal of "x". \n
 
 *@par Inputs:\n
-*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n
+*x: A Tensor. Must be one of the following types: float16, float32,
+int32, int64, double,
+complex64, complex128. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -749,7 +759,8 @@ REG_OP(Xlogy)
 
 *@par Inputs:
 *One input: \n
-*x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128
+*x: A Tensor. Must be one of the following types: float16, float32, float64,
+int32, int64, complex64, complex128
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -790,7 +801,8 @@ REG_OP(Rsqrt)
 
 *
 *@par Inputs:
-* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128.
+* x: A tensor. Must be one of the following types: float16, float32, float64, 
+int32, int64, complex64, complex128.
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "x".
@@ -811,7 +823,8 @@ REG_OP(Asin)
 
 *
 *@par Inputs:
-*@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128.
+*@li y: A tensor of type float16, float32, float64, 
+int32, int64, complex64, complex128.
 *@li dy: A tensor of the same type as "y".
 *
 *@attention Constraints:
@@ -838,7 +851,8 @@ REG_OP(AsinGrad)
 
 *
 *@par Inputs:
-* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128.
+* x: A tensor. Must be one of the following types: float16, float32, float64,
+int32, int64, complex64, complex128.
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "x".
@@ -883,7 +897,8 @@ REG_OP(AcosGrad)
 
 *
 *@par Inputs:
-* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128.
+* x: A tensor. Must be one of the following types: float16, float32, float64,
+ complex64, complex128.
 *
 *@attention Constraints:
 * x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n
@@ -1160,7 +1175,8 @@ REG_OP(FusedMulAdd)
 
 *
 *@par Inputs:
-*@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128.
+*@li x1: A tensor. Must be one of the following types: float16, float32, float64,
+uint8, int8, int16, int32, int64, complex64, complex128.
 *@li x2: A tensor of the same type as "x1".
 *
 *@attention Constraints:
@@ -1189,7 +1205,8 @@ REG_OP(AddV2)
 *@brief Updates "ref" by adding "value" to it. \n
 
 *@par Inputs:
-*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*@li ref: A Tensor. Must be one of the following types: float16, float32, int8,
+int16, int32, int64, uint8, uint16, uint32, uint64.
 *@li value: A Tensor of the same type as "ref". \n
 
 *@par Attributes:
@@ -1218,12 +1235,14 @@ REG_OP(AssignAdd)
 *@brief Updates "ref" by assigning "value" to it. \n
 
 *@par Inputs:
-*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, 
+int32, int64, uint8, uint16, uint32, uint64.
 *@li value: A Tensor of the same type as "ref". \n
 
 *@par Attributes:
 *@li validate_shape: An optional bool. Defaults to "true".
-                     If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to.
+                     If "true", the operation will validate that the shape of "value"
+                     matches the shape of the Tensor being assigned to.
 *                    If "false", "ref" will take on the shape of "value".
 *                    This attribute is reserved.
 *@li use_locking: An optional bool. Defaults to True.
@@ -1252,7 +1271,8 @@ REG_OP(Assign)
 
 *
 *@par Inputs:
-*@li var: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64
+*@li var: A tensor. Must be one of the following types: float32, float64,
+int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64
 *@li value: A tensor of the same type as "var".
 *
 *@par Attributes:
@@ -1644,7 +1664,9 @@ REG_OP(Atan2)
 
 *
 *@par Inputs:
-*@li x1: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64
+*@li x1: A tensor. Must be one of the following types: float32, float64, int32,
+ uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128,
+float16, uint32, uint64
 *@li x2: A tensor of the same type as "x1".
 *
 *@par Attributes:
@@ -1666,16 +1688,18 @@ REG_OP(ApproximateEqual)
 
 /**
 *@brief Returns the element-wise sum of a list of tensors.\n
-* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs
-to be ready before beginning to sum.\n This can save memory if inputs are ready at different times,
-since minimum temporary storage is proportional to the output size rather than the inputs size.
- Returns a Tensor of same shape and type as the elements of inputs. \n
+* AccumulateNV2 performs the same operation as AddN, but does not wait for all 
+of its inputs to be ready before beginning to sum.\n This can save memory if 
+inputs are ready at different times, \n since minimum temporary storage is 
+proportional to the output size rather than the inputs size.\n Returns a Tensor 
+of same shape and type as the elements of inputs. \n
 
 *
 *@par Inputs:
 *Dynamic inputs, including:
-* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64,
-qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n
+* x: A tensor. Must be one of the following types: float32, float64, int32, 
+uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, 
+complex128, float16, uint32, uint64.
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "x".
@@ -1731,7 +1755,8 @@ REG_OP(FakeQuantWithMinMaxArgs)
 
 *@par Inputs:
 *Two inputs, including: \n
-*@li gradients: A Tensor of type float32. Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
+*@li gradients: A Tensor of type float32. Backpropagated gradients 
+above the FakeQuantWithMinMaxArgs operation.
 *@li x: A Tensor of type float32. Has the same type and format as "gradients".\n
 * This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n
 
@@ -2210,9 +2235,13 @@ REG_OP(BiasAdd)
 
 *@par Inputs:
 *Two inputs, including:
-*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64.
+*@li x: A Tensor. Must be one of the following types: float32, float64, int32, 
+uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, 
+complex128, float16, uint32, uint64.
 *format is ND.
-*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across.
+*@li dimension: A Tensor. Must be one of the following types: int32, int64. 
+Must be in the range [-rank(input x), rank(input x)]. Describes which dimension 
+of the input Tensor to reduce across.
 * The format is ND.
 *@par Attributes:
 *dtype: The output type, either "int32" or "int64". Defaults to "int64". \n
@@ -2286,6 +2315,7 @@ REG_OP(ArgMaxV2)
     .ATTR(dtype, Type, DT_INT64)
     .OP_END_FACTORY_REG(ArgMaxV2)
 
+
 /**
 *@brief Returns the index with the largest value across axes of a tensor. \n
 
@@ -2298,15 +2328,16 @@ REG_OP(ArgMaxV2)
 *@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n
 
 *@par Outputs:
-*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n
+*y: A multi-dimensional Tensor of type int32, specifying the index with the 
+largest value. The dimension is one less than that of "x". \n
 
 *@attention Constraints:
 *@li x: If there are multiple maximum values, the index of the first maximum value is used.
-*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the 
+dimension length of "x". \n
 
 *@par Third-party framework compatibility
 * Compatible with TensorFlow operator ArgMax.
-*
 * @par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
@@ -2929,9 +2960,13 @@ REG_OP(FusedMulAddN)
 *@li bias: An ND tensor of type float16 or float32. \n
 
 *@par Attributes:
-*@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1".
-*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1".
-*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n
+*@li axis: An optional int32 used to compute the shape of bias input from the 
+online bottoms. Defaults to "1".
+*@li num_axes: An optional int32 used to compute the shape of bias input from a 
+Caffe model trained offline. Defaults to "1".
+*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe 
+model trained offline. If "false", bias is input from online bottoms. Defaults 
+to "true". \n
 
 *@par Outputs:
 *y: An ND tensor of type float16 or float32. \n
@@ -2939,13 +2974,25 @@ REG_OP(FusedMulAddN)
 *@attention Constraints:\n
 * Assume that the shape length of "x" is "n" and that of "bias" is "m".
 *@li "axis" is within the range [-n, n-1]. num_axes >= -1.
-*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n
-* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis).
-*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1.
-*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
-* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
-*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n
-* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m).
+*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis 
+of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < 
+n-axis).\n
+* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must 
+have the same size (0 <= i < -axis).
+*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with 
+shape length 1 and dimension size 1.
+*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + 
+num_axes" must be less than or equal to "n" and the ith axis of "bias" and the 
+(i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
+* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and 
+the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same 
+size (0 <= i < num_axes).
+*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis 
++ m" must be less than or equal to "n" and the ith axis of "bias" and the (i
++"axis")th axis of "x" must have the same size (0 <= i < m).\n
+* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith 
+axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= 
+i < m).
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Bias.
 */
@@ -3023,10 +3070,12 @@ REG_OP(FusedMulAddNL2loss)
 *@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n
 
 *@par Attributes:
-*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
+*@li threshold: A required float32. Defaults to "0.0". "x" is compared with 
+"threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
 
 *@par Outputs:
-*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
+*@li y: A Tensor with any format. Has the same type as the input. Must be one 
+of the following types: float16, float32.
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Threshold.
 */
@@ -3044,11 +3093,16 @@ REG_OP(FusedMulAddNL2loss)
 *@li x: A tensor. Must be one of the following types: float16, float32. \n
 
 *@par Attributes:
-*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
-*@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output.
+*@li axis: An optional int. Specify the axis to be cut at the input tensor. If 
+this parameter is not provided, find the topk for each batch. Defaults to 10000
+*@li out_max_val: An optional bool. Whether to output the maximum value. If it 
+is True, the maximum value and index are output, otherwise only the index is 
+output.
 * Defaults to False
-*@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)].
-* Defaults to 1
+*@li topk: An optional int. It means the number of top tok in each axis (the 
+value is greater than or equal to 1), and the value range must be in [1,x.shape
+(axis)].
+* Defaults to 1 \n
 
 *@par Outputs:
 *@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output.
@@ -3168,7 +3222,8 @@ REG_OP(Axpy)
     .OP_END_FACTORY_REG(Axpy)
 
 /**
-*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n
+*@brief Creates a criterion that measures the loss given input tensors x1 x2 
+and a Tensor label y with values 1 or -1. \n
 
 *@par Inputs:
 *@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32.
diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h
index 1e67c41f..bf5ebd51 100644
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -36,7 +36,7 @@ namespace ge {
  *          if "cond" is a numerical scalar, non-zero means True and zero means False;
  *          if "cond" is a string scalar, non-empty means True and empty means False;
  *          if "cond" is not a scalar, non-empty means True and empty means False.
- *@li input: The input tensors . It's a dynamic input. \n
+ *@li input: The input tensors . \n
 
  *@par Graphs:
  *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
@@ -69,7 +69,7 @@ REG_OP(_If)
  *          if "cond" is a numerical scalar, non-zero means True and zero means False;
  *          if "cond" is a string scalar, non-empty means True and empty means False;
  *          if "cond" is not a scalar, non-empty means True and empty means False.
- *@li input: The input tensors . It's a dynamic input. \n
+ *@li input: The input tensors . \n
 
  *@par Graphs:
  *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
@@ -102,7 +102,7 @@ REG_OP(StatelessIf)
  *          if "cond" is a numerical scalar, non-zero means True and zero means False;
  *          if "cond" is a string scalar, non-empty means True and empty means False;
  *          if "cond" is not a scalar, non-empty means True and empty means False.
- *@li input: The input tensors . It's a dynamic input. \n
+ *@li input: The input tensors . \n
 
  *@par Graphs:
  *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
@@ -129,7 +129,7 @@ REG_OP(If)
 
  *@par Inputs:
  *@li branch_index: A int32 scalar which determines the selected subgraph.
- *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n
+ *@li input: The input tensors, which will be passed to the subgraph . \n
 
  *@par Graphs:
  *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors,
@@ -152,7 +152,7 @@ REG_OP(Case)
  *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n
 
  *@par Inputs:
- *input: The input tensors . It's a dynamic input. \n
+ *input: The input tensors . \n
 
  *@par Graphs:
  *@li cond: A subgraph takes 'input' and returns a tensor.
@@ -183,7 +183,7 @@ REG_OP(_While)
  *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n
 
  *@par Inputs:
- *input: The input tensors . It's a dynamic input. \n
+ *input: The input tensors . \n
 
  *@par Graphs:
  *@li cond: A subgraph takes 'input' and returns a tensor.
@@ -215,7 +215,7 @@ REG_OP(While)
  *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n
 
  *@par Inputs:
- *input: The input tensors . It's a dynamic input. \n
+ *input: The input tensors . \n
 
  *@par Graphs:
  *@li cond: A subgraph takes 'input' and returns a tensor.
@@ -250,7 +250,7 @@ REG_OP(StatelessWhile)
  *@li start: A int32 scalar. The lower bound.
  *@li limit: A int32 scalar. The upper bound.
  *@li delta: A int32 scalar. The step size.
- *@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n
+ *@li input: The input tensors, which will be passed to "body" . \n
 
  *@par Graphs:
  *body: A subgraph takes 'input' and returns a another list of tensors . \n
@@ -274,7 +274,7 @@ REG_OP(For)
  *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n
 
  *@par Inputs:
- *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n
+ *args: The input tensors, which will be passed to "f" . \n
 
  *@par Graphs:
  *f: A subgraph takes 'args' and returns a another list of tensors . \n
@@ -303,7 +303,7 @@ REG_OP(PartitionedCall)
  *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n
 
  *@par Inputs:
- *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n
+ *args: The input tensors, which will be passed to "f" . \n
 
  *@par Graphs:
  *f: A subgraph takes 'args' and returns a another list of tensors . \n
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index 27fb79a9..302823a2 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -160,8 +160,10 @@ REG_OP(CropAndResize)
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n
 
 *@par Attributes:
-*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size.
-*@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable.
+*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches 
+are resized to this size.
+*@li extrapolation_value: An optional float. Defaults to 0. Value used for 
+extrapolation, when applicable.
 *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n
 
 *@par Outputs:
@@ -172,7 +174,6 @@ REG_OP(CropAndResize)
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow CropAndResize operator.
-
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead.
 */
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index 4fa85cbc..073d541d 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -87,39 +87,58 @@ REG_OP(L2NormalizeGrad)
 
 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW 
+for 4D or NC1HWC0 for 5D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format 
+NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the offset.
-*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
-if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format 
+NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the mean used for inference. 
+Must be "None" if the
 operation is used for training.
-*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
-5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format 
+NHWC or NCHW. Must be
+5D if input "x" is with format NC1HWC0. Specifies the variance used for 
+inference. Must be "None"
 if the operation is used for training . \n
 
 *@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
-*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
-*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+*@li epsilon: An optional float32, specifying the small value added to variance 
+to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to 
+"NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for 
+training or inference. Defaults to "True" . \n
 
 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
-*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", 
+with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with 
+format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean of "x".
-*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with 
+format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
-*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
-Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
-*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
-Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input 
+"x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for 
+gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input 
+"x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" 
+for gradient computation. Pass "None" to skip this output . \n
 
 *@attention Constraints:
-*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
-then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
-*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+*@li If the operation is used for inference and outputs "reserve_space_1" and 
+"reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has 
+the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1� due to the square 
+root instruction . \n
 
 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator fused_batch_norm.
@@ -166,13 +185,17 @@ is used for training or inference. Defaults to "True" . \n
 *@li y: A 4D Tensor of type float16 or float32, for the normalized "x".
 *@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 1D Tensor of type float32, for the variance of "x".
-*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation.
-*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n
+*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for
+gradient computation.
+*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" 
+for gradient computation . \n
 
 *@attention Constraints:
 *@li If the operation is used for inference, then output "reserve_space_1"
-has the same value as "mean" and output "reserve_space_2" has the same value as "variance".
-*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+has the same value as "mean" and output "reserve_space_2" has the same value as
+"variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square 
+root instruction . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator fused_batch_norm_v2.
@@ -198,23 +221,34 @@ REG_OP(BatchNormExt2)
 
 *@par Inputs:
 * Five inputs, including:
-*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient.
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
-*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
-*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
-*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n
+*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format 
+NHWC, NCHW, or NC1HWC0, for the gradient.
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, 
+or NC1HWC0.
+*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or 
+NC1HWC0.
+*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, 
+NCHW, or NC1HWC0. It is an output of BatchNorm.
+*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, 
+NCHW, or NC1HWC0. It is an output of BatchNorm . \n
 
 *@par Attributes:
-*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
+*@li epsilon: An optional float32. Defaults to "0.0001". A small float number 
+added to the variance of "x".
 *@li data_format: An optional string. Defaults to "NHWC".
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
 
 *@par Outputs:
-*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
-*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale".
-*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset".
-*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output.
-*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, 
+or NC1HWC0, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or 
+NC1HWC0, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or 
+NC1HWC0, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or 
+NC1HWC0. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or 
+NC1HWC0. Pass "None" to skip this output . \n
 
 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n
@@ -244,21 +278,28 @@ REG_OP(BatchNormGrad)
 
 *@par Inputs:
 * Five inputs, including:
-*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
+*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or 
+NCHW, for the gradient.
 *@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW.
 *@li scale: A 4D Tensor of type float32, with format NHWC or NCHW.
-*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2.
-*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n
+*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It 
+is an output of BatchNormExt2.
+*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It 
+is an output of BatchNormExt2 . \n
 
 *@par Attributes:
 *@li epsilon: A required float32. A small float number added to the variance of "x".
 *@li data_format: A required string for the format.
-*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n
+*@li is_training: A required bool for specifying the operation is for training 
+(true) or inference (false) . \n
 
 *@par Outputs:
-*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x".
-*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale".
-*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset".
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, 
+for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for 
+the offset of "scale".
+*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for 
+the offset of "offset".
 *@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW.
 *@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n
 
@@ -290,14 +331,18 @@ REG_OP(BatchNormGradExt2)
 *@brief Performs batch normalization . \n
 
 *@par Inputs:
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW 
+for 4D or NC1HWC0 for 5D.
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  
+Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  
+Specifies the variance used for inference.
 *@li momentum: A Tensor,represents the mean and the variance's scale factor
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li epsilon: An optional float32, specifying the small value added to variance 
+to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional input, not use
 *@par Outputs:
@@ -315,16 +360,20 @@ REG_OP(BNInference)
     .ATTR(use_global_stats, Bool,true)
     .ATTR(mode, Int,1)
     .OP_END_FACTORY_REG(BNInference)
+
 /**
 *@brief aicpu batch normalization host  . \n
 
 *@par Inputs:
 
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  
+Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  
+Specifies the variance used for inference.
 *@li momentum: An optional float, mean and variance's Scale factor
 *@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li epsilon: An optional float32, specifying the small value added to variance 
+to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional attr, not use
 *@par Outputs:
@@ -348,14 +397,19 @@ REG_OP(BnHost)
 *@brief Performs batch normalization . \n
 
 *@par Inputs:
-*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW 
+for 4D or NC1HWC0 for 5D.
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" 
+Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" 
+Specifies the variance used for inference.
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
-*@li momentum: An optional float32 num, represents the mean and the variance's scale factor
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li momentum: An optional float32 num, represents the mean and the variance's 
+scale factor
+*@li epsilon: An optional float32, specifying the small value added to variance 
+to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional attr, not use
 *@par Outputs:
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 12412516..6307889d 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -310,9 +310,6 @@ REG_OP(DepthwiseConv2DBackpropInputD)
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2D.
 * @li Compatible with the Caffe operator DepthwiseConv2D.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(DepthwiseConv2D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
@@ -460,9 +457,9 @@ REG_OP(Conv2DBackpropInputD)
 *@par Attributes:
  * Six attributes:
  * @li strides: A tuple or list of 2 integers. The stride of the sliding window
- * for H/W dimension.
+ * for H/W dimension, defaults to [1,1].
  * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right]
- * padding on the feature map.
+ * padding on the feature map, defaults to [0,0,0,0].
  * @li dilations: A tuple or list of 4 integers. The dilation factor for each
  * dimension of input, defaults to [1,1,1,1].
  * @li groups: Number of blocked connections from input channels to
@@ -482,8 +479,8 @@ REG_OP(Deconvolution)
     .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
-    .REQUIRED_ATTR(strides, ListInt)
-    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(strides, ListInt, {1, 1})
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .ATTR(groups, Int, 1)
     .ATTR(data_format, String, "NCHW")
@@ -593,7 +590,7 @@ REG_OP(Conv2DBackpropFilterD)
 
 *@li bias: An optional 1D tensor. Shape is [out_channels].
 *@li offset_w: An optional 1D tensor for quantized convolution. Shape is
-* [out_channels]. Reserved.
+* [out_channels]. Not supported.
 *\n
 *\n
 * Note that there is a strict data type mapping between the input and output
@@ -622,7 +619,8 @@ REG_OP(Conv2DBackpropFilterD)
 * and right padding.
 * @li dilations: Optional. A list of 4 integers. Specifying the dilation rate
 * to use for dilated convolution. Has the same dimension order and value as
-* "strides". Defaults to [1, 1, 1, 1].
+* "strides". Dilation > 1 is not supported for quantized convolution. Defaults
+* to [1, 1, 1, 1].
 * @li groups: Optional. An integer of type int32, for the number of blocked
 * connections from input channels to output channels. Input channels and output
 * channels must both be divisible by "groups". "x" in_channels must be equal to
@@ -704,13 +702,62 @@ REG_OP(Conv2D)
     .ATTR(offset_x, Int, 0)
     .OP_END_FACTORY_REG(Conv2D)
 
+/**
+*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
+*@par Inputs:
+* @li x: A 4D tensor of input images.
+* @li filter_compress: A 4D tensor of compressed filters.
+* @li compress_index: A 1D Tensor dtype of int8.
+* @li bias: An optional 1D tensor.
+* @li offset_w: An optional 1D tensor for quantized convolution. Reserved.
+*
+* The input and output tensor attributes are listed as follows:
+* @verbatim
+    |Tensor    | x       | filter_compress  | bias    | offset_w | y
+    -----------|---------|---------|---------|----------|--------
+    |Data Type | float16 | float16 | float16 | _        | float16
+    |          |---------|---------|---------|----------|--------
+    |          | float32 | float32 | float32 | _        | float32
+    |          |---------|---------|---------|----------|--------
+    |          | int8    | int8    | int32   | int8     | int32
+    -----------|---------|---------|---------|----------|--------
+    |Format    | NCHW    | NCHW    | ND      | ND       | NCHW
+    |          | NHWC    | NHWC    |         |          | NHWC
+    |          |         | HWCN    |         |          |
+@endverbatim
+* It should be noted that the data types must correspond to each other, but the
+* format does not need to . \n
+
+*@par Attributes:
+* @li strides: A list of 4 integers. Specifying the strides of the
+* convolution along the height and width. The dimension order is determined
+* by the data format of "x". By default the N and C dimensions are set to 1.
+* @li pads: A list of 4 integers. Specifying the top, bottom, left and right
+* padding.
+* @li dilations: A list of 4 integers. Specifying the dilation rate to use
+* for dilated convolution. Has the same dimension order and value as "strides".
+* @li groups: Number of blocked connections from input channels to output
+* channels. Input channels and output channels must both be divisible by
+* "groups".Type is int32.
+* @li offset_x: An optional integer for quantized convolution. Type is int32.
+* Defaults to "0".
+* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
+* data format of the input and output images. Type is string.
+* Defaults to "NHWC". Reserved . \n
+
+*@par Outputs:
+* @li y: A 4D Tensor of output images . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED.
+*/
 REG_OP(Conv2DCompress)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
-    .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
     .INPUT(compress_index, TensorType({DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 415cc4ef..bd8bb9bf 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -158,18 +158,25 @@ REG_OP(Iou)
 *@par Inputs:
 * Three inputs, including:
 *@li ydiff: A 5HD gradient input of type float32.
-*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
-the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
-*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
+*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" 
+indicates the number of ROIs,
+the value "5" indicates the indexes of images where the ROIs are located, "x0", 
+"x1", "y0", and "y1".
+*@li rois_n: An optional input, specifying the number of valid ROIs. This 
+parameter is reserved . \n
 
 *@par Attributes:
 *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
 *@li pooled_width: A required attribute of type int, specifying the W dimension.
 *@li pooled_height: A required attribute of type int, specifying the H dimension.
-*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
-*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
-sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
-equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
+*@li spatial_scale: A required attribute of type float, specifying the scaling 
+ratio of "features" to the original image.
+*@li sample_num: An optional attribute of type int, specifying the horizontal 
+and vertical
+sampling frequency of each output. If this attribute is set to "0", the 
+sampling frequency is
+equal to the rounded up value of "rois", which is a floating point number. 
+Defaults to "2" . \n
 
 *@par Outputs:
 *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
@@ -876,9 +883,7 @@ REG_OP(YoloV3DetectionOutputV2)
 A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
 *@li imginfo: A float16, describing the image information including the required image height and width
 and the actual image height and width.
-*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
-[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
-is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
+*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n
 
 *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
 
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 14949c54..0d0032cf 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -896,29 +896,7 @@ REG_OP(InstanceNormV2)
     .ATTR(epsilon, Float, 0.00001)
     .OP_END_FACTORY_REG(InstanceNormV2)
 
-/**
-*@brief Performs instance normalization for inference.
-
-*@par Inputs:\n
-* Five inputs, including: (NC1HWC0 supported)
-*@li x: A Tensor of type float16 or float32.
-*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
-*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
-*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean.
-*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
-*@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
-
-*@par Outputs:\n
-*y: A Tensor of type float16 or float32 for the normalized "x".
-*batch_mean: A Tensor of type float32 for the result mean.
-*batch_ variance: A Tensor of type float32 for the result variance.
 
-*@attention Constraints:
-*For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead.
-*/
 REG_OP(INInferV2D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
@@ -931,6 +909,20 @@ REG_OP(INInferV2D)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(INInferV2D)
 
+/**
+*@brief Performs instance normalization for inference of InHost part.
+
+*@par Inputs:\n
+* One input, including: (NC1HWC0 supported)
+* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
+
+*@par Attributes:
+* epsilon: An optional float32, specifying the small value added to
+variance to avoid dividing by zero. Defaults to "0.00001" . \n
+
+*@par Outputs:\n
+* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
+*/
 REG_OP(InHost)
      .INPUT(variance, TensorType({DT_FLOAT}))
      .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index 5d3cd931..fb7fc127 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -128,9 +128,6 @@ REG_OP(AvgPool)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
-*
-* @par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(AvgPool3D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 65fb462e..0621a96c 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -111,9 +111,6 @@ REG_OP(ApplyAdaMax)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdaMax.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead.
 */
 REG_OP(ApplyAdaMaxD)
     .INPUT(var, TensorType::NumberType())
@@ -352,9 +349,6 @@ REG_OP(ApplyMomentum)
 * accum: A mutable tensor. Has the same type as input "accum".
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyMomentum.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead.
 */
 
 REG_OP(ApplyMomentumD)
@@ -681,9 +675,6 @@ REG_OP(ApplyPowerSign)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyPowerSign.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead.
 */
 REG_OP(ApplyPowerSignD)
     .INPUT(var, TensorType::NumberType())
@@ -804,9 +795,6 @@ REG_OP(ApplyAddSign)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ApplyAddSign.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead.
 */
 REG_OP(ApplyAddSignD)
     .INPUT(var, TensorType::NumberType())
@@ -928,9 +916,6 @@ REG_OP(ApplyCenteredRMSProp)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyCenteredRMSPropD.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead.
 */
 REG_OP(ApplyCenteredRMSPropD)
     .INPUT(var, TensorType::NumberType())
@@ -1049,9 +1034,6 @@ REG_OP(ApplyAdagrad)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdagrad.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead.
 */
 REG_OP(ApplyAdagradD)
     .INPUT(var, TensorType::NumberType())
@@ -1236,9 +1218,6 @@ REG_OP(ApplyAdagradDA)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdagradDA.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead.
 */
 REG_OP(ApplyAdagradDAD)
     .INPUT(var, TensorType::NumberType())
@@ -1496,9 +1475,6 @@ REG_OP(ApplyProximalAdagrad)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyProximalAdagradD.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead.
 */
 REG_OP(ApplyProximalAdagradD)
     .INPUT(var, TensorType::NumberType())
@@ -1592,9 +1568,6 @@ REG_OP(SparseApplyProximalAdagrad)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SparseApplyProximalAdagrad.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead.
 */
 REG_OP(SparseApplyProximalAdagradD)
     .INPUT(var, TensorType::NumberType())
@@ -1681,9 +1654,6 @@ REG_OP(ApplyFtrl)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyFtrl.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead.
 */
 REG_OP(ApplyFtrlD)
     .INPUT(var, TensorType::NumberType())
@@ -1775,9 +1745,6 @@ REG_OP(ApplyFtrlV2)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyFtrlV2.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead.
 */
 REG_OP(ApplyFtrlV2D)
     .INPUT(var, TensorType::NumberType())
@@ -1890,9 +1857,6 @@ REG_OP(ApplyAdam)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ApplyAdam.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead.
 */
 REG_OP(ApplyAdamD)
     .INPUT(var, TensorType::NumberType())
@@ -1981,9 +1945,6 @@ REG_OP(ApplyAdadelta)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ApplyAdadelta.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead.
 */
 REG_OP(ApplyAdadeltaD)
     .INPUT(var, TensorType::NumberType())
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index 4f42008e..567bc63d 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -65,9 +65,6 @@ REG_OP(Fill)
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "value".
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead.
 */
 REG_OP(FillD)
     .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
@@ -125,9 +122,6 @@ REG_OP(BroadcastTo)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator BroadcastTo.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead.
 */
 REG_OP(BroadcastToD)
     .INPUT(x, TensorType::BasicType())
@@ -175,9 +169,6 @@ REG_OP(Pad)
 
 *@par Third-party framework compatibility:
 * Compatible with TensorFlow operator Pad.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
 */
 REG_OP(PadD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
@@ -269,9 +260,6 @@ REG_OP(PadV3D)
 *@see Diag()
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Diag.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead.
 */
 REG_OP(DiagD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
index cd6cfdfe..ec88c618 100644
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -30,7 +30,7 @@ namespace ge {
 *@par Inputs:
 *Two inputs, including:
 *@li rt_nested_splits: A list of at least 1 Tensor objects with the same type
-in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input.
+in: int32, int64. The row_splits for the RaggedTensor.
 *@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor
 Must be one of the following types: bool, int8, int16, uint16, int32,
 int64, double, float, float16 . \n
@@ -66,7 +66,7 @@ REG_OP(RaggedTensorToSparse)
 *@li values:A 1D tensor representing the values of the ragged tensor.
 *@li default_value:A `Tensor`. Must have the same type as `values`.
 *@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same
-type in: `int64`, `int32` . It's a dynamic input.\n
+type in: `int64`, `int32` .\n
 
 *@par Attributes:
 *@li num_row_partition_tensors:Numbers of row partition tensors.
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index b97d824f..24a9edd1 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -374,9 +374,6 @@ REG_OP(DropOutGenMask)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator lin_space.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead.
 */
 REG_OP(LinSpaceD)
     .INPUT(assist, TensorType({DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 626dda59..80169344 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -353,9 +353,6 @@ REG_OP(ReduceSum)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Sum.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead.
 */
 REG_OP(ReduceSumD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -381,9 +378,6 @@ REG_OP(ReduceSumD)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ReduceAll.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead.
 */
 REG_OP(ReduceAllD)
     .INPUT(x, TensorType({DT_BOOL}))
@@ -459,9 +453,6 @@ REG_OP(ReduceProd)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ReduceProd.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead.
 */
 REG_OP(ReduceProdD)
     .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16}))
@@ -516,9 +507,6 @@ REG_OP(ReduceMean)
 
 *@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator ReduceMean.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead.
 */
 REG_OP(ReduceMeanD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -573,9 +561,6 @@ REG_OP(ReduceMax)
 
 *@par Third-party framework compatibility
 * Compatible with TensorFlow operator Max.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead.
 */
 REG_OP(ReduceMaxD)
     .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8,
@@ -630,9 +615,6 @@ REG_OP(ReduceMin)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator reduce_min.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
 */
 REG_OP(ReduceMinD)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
@@ -699,9 +681,6 @@ REG_OP(ReduceAny)
 *
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator reduce_any.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead.
 */
 REG_OP(ReduceAnyD)
     .INPUT(x, TensorType({DT_BOOL}))
@@ -787,9 +766,6 @@ REG_OP(EuclideanNorm)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator EuclideanNorm.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead.
 */
 REG_OP(EuclideanNormD)
     .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index e1a83f43..0766d2c6 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -92,6 +92,7 @@ REG_OP(DynamicLSTM)
     .OUTPUT(output_h, TensorType({DT_FLOAT32}))
     .OP_END_FACTORY_REG(DynamicLSTM)
 
+
 /**
 *@brief: DynamicRNNGrad calculation.
 *@par Inputs:
@@ -126,7 +127,7 @@ REG_OP(DynamicLSTM)
 *@li keep_prob:An float identifying the keep prob in the op. Default to 1.
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
-*@li time_major:An bool identifying the time major in the op. Default to false.
+*@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
 *@li is_training:An bool identifying is training in the op. Default to true.
@@ -138,6 +139,9 @@ REG_OP(DynamicLSTM)
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 */
 REG_OP(DynamicRNNGrad)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h
index 159e7382..7fd853d3 100644
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -28,7 +28,7 @@ namespace ge {
 /**
 *@brief Mark which tensors need to be saved to the ckpt file.
 *@par Inputs:
-*tensors: A list of input tensor.It's a dynamic input.
+*tensors: A list of input tensor.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h
index dc6852d4..acf1c34d 100644
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -35,16 +35,16 @@ namespace ge {
 *rate . \n
 
 *@par Inputs:
-*@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input.
-*@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input.
-*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input.
-*@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input.
+*@li sparse_example_indices: a list of vectors which contain example indices.
+*@li sparse_feature_indices: a list of vectors which contain feature indices.
+*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.
+*@li dense_features: a list of matrices which contains the dense feature values.
 *@li example_weights: a vector which contains the weight associated with each example.
 *@li example_labels: a vector which contains the label/target associated with each example.
 *@li sparse_indices: a list of vectors where each value is the indices which has
-*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
+*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.
 *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
-*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
+*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.
 *@li example_state_data: a list of vectors containing the example state data.
 *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
 *@li l1: Symmetric l1 regularization strength.
@@ -61,7 +61,6 @@ namespace ge {
 *@par Third-party framework compatibility
 * Compatible with tensorflow SdcaOptimizerV2 operator.
 */
-
 REG_OP(SdcaOptimizerV2)
     .DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64}))
     .DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64}))
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 613ce358..8ef4a42c 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -79,9 +79,6 @@ REG_OP(Range)
 
 *@see Range()
 *@since V100R001C33
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead.
 */
 REG_OP(RangeD)
     .INPUT(x, TensorType({DT_FLOAT,DT_INT32}))
@@ -186,7 +183,8 @@ REG_OP(GatherNd)
 *     uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16,
 *     uint16, complex128, float16, uint32, uint64, complex64, complex128.
 * @li indices: A Tensor of type int32 or int64.
-* @li axis: A Tensor of type as int32 . \n
+* @li axis: A Tensor of type as int32 or int64,
+*     Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -225,9 +223,6 @@ REG_OP(GatherV2)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator GatherV2.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead.
 */
 REG_OP(GatherV2D)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8,
@@ -330,9 +325,6 @@ REG_OP(StridedSlice)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSlice.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead.
 */
 REG_OP(StridedSliceD)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
@@ -388,9 +380,6 @@ REG_OP(StridedSliceD)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceGradD.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead.
 */
 REG_OP(StridedSliceGradD)
     .INPUT(dy, TensorType::BasicType())
@@ -502,9 +491,6 @@ REG_OP(UnsortedSegmentSum)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator UnsortedSegmentSum.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead.
 */
 REG_OP(UnsortedSegmentSumD)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8}))
@@ -729,9 +715,6 @@ REG_OP(OneHot)
 
 *@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator OneHot.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead.
 */
 REG_OP(OneHotD)
     .INPUT(x, TensorType({DT_UINT8, DT_INT32}))
@@ -807,7 +790,7 @@ REG_OP(SliceD)
 * @li assist_seq: A 1D tensor of type float16.
 * with size of 2N, which "N" is the last dimension.
 * The first N numbers is indices, and the next N numbers is deviation of casting
-* int32 to float16. \n
+* float16 to int32 . \n
 
 * @par Attributes:
 * @li k: A required int that is at least 0, specifying the number of top elements
@@ -816,7 +799,7 @@ REG_OP(SliceD)
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
 * @li dim: An optional int. Defaults to -1. For reserved use.
-* @li largest: An optional bool. Defaults to true. For reserved use. \n
+* @li largest: An optional bool. Defaults to true. For reserved use.
 
 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as "input".
@@ -1270,9 +1253,6 @@ REG_OP(InplaceUpdate)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator InplaceUpdate.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead.
 */
 REG_OP(InplaceUpdateD)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
@@ -1325,9 +1305,6 @@ REG_OP(InplaceAdd)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator InplaceAdd.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead.
 */
 REG_OP(InplaceAddD)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
@@ -1379,9 +1356,6 @@ REG_OP(InplaceSub)
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator InplaceSub.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead.
 */
 REG_OP(InplaceSubD)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
@@ -1433,9 +1407,6 @@ REG_OP(ScatterNonAliasingAdd)
 * @par Outputs:
 * y: A Tensor of type RealNumberType . \n
 
-* @attention Constraints:
-* @li segment_ids must be non-negative tensor.
-
 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),
 
 * @par Third-party framework compatibility
@@ -1463,9 +1434,6 @@ REG_OP(UnsortedSegmentMin)
 * @par Outputs:
 * y: A Tensor.Must have the same type as input "x" . \n
 
-* @attention Constraints:
-* @li segment_ids must be non-negative tensor.
-
 * @see UnsortedSegmentProdD(), UnsortedSegmentSumD(),
 *
 * @par Restrictions:
@@ -1491,9 +1459,6 @@ REG_OP(UnsortedSegmentMinD)
 * @par Outputs:
 * y: A Tensor of type RealNumberType . \n
 
-* @attention Constraints:
-* @li segment_ids must be non-negative tensor.
-
 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),
 
 * @par Third-party framework compatibility
@@ -1521,9 +1486,6 @@ REG_OP(UnsortedSegmentMax)
 * @par Outputs:
 * y: A Tensor.Must have the same type as input "x" . \n
 
-* @attention Constraints:
-* @li segment_ids must be non-negative tensor.
-
 * @see UnsortedSegmentProdD(),
 *
 * @par Restrictions:
@@ -1548,9 +1510,6 @@ REG_OP(UnsortedSegmentMaxD)
 * @par Outputs:
 * y: A Tensor of type NumberType . \n
 
-* @attention Constraints:
-* @li segment_ids must be non-negative tensor.
-
 * @see UnsortedSegmentSum(), UnsortedSegmentMin(),
 
 * @par Third-party framework compatibility
@@ -1582,9 +1541,6 @@ REG_OP(UnsortedSegmentProd)
 * @li segment_ids must be non-negative tensor.
 
 * @see UnsortedSegmentMinD()
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentProd instead.
 */
 REG_OP(UnsortedSegmentProdD)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
@@ -1900,9 +1856,6 @@ REG_OP(CumulativeLogsumexp)
 *y: A Tensor. Has the same type as "x".
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Cumsum.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead.
 */
 REG_OP(CumulativeLogsumexpD)
     .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h
index b0bd14c0..b66a0213 100644
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -75,9 +75,6 @@ REG_OP(Split)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Split.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead.
 */
 REG_OP(SplitD)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
@@ -144,9 +141,6 @@ Under the caffe framework, the conversion of slice_point through the cut point t
 Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect.
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SplitV.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead.
 */
 REG_OP(SplitVD)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
@@ -164,8 +158,7 @@ REG_OP(SplitVD)
 * Two inputs, including:
 * @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
 *     int64, uint8, uint16, uint32, uint64, float16, float32.
-*     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
-*     It's a dynamic input.
+*     Tensors to be concatenated. All must have size 1 in the first dimension and same shape. 
 * @li shape: A Tensor of the same type as "x".
 * The final shape of the result. Should be equal to the shapes of any input
 * but with the number of input values in the first dimension . \n
@@ -314,7 +307,7 @@ REG_OP(Concat)
 
 *@par Inputs:
 * x: A list of N Tensors. Must be one of the following types: int8, int16, int32,
-*     int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n
+*     int64, uint8, uint16, uint32, uint64, float16, float32, bool . \n
 
 *@par Attributes:
 *@li axis: A optional int, defaultvalue is 0.
@@ -340,7 +333,7 @@ REG_OP(Pack)
 *@par Inputs:
 *Two inputs, including:
 * @li concat_dim: A Tensor of type int32.
-* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n
+* @li x: A list of 1D Tensor objects of type int32 . \n
 
 *@par Attributes:
 *N: A required int . \n
@@ -364,7 +357,7 @@ REG_OP(ConcatOffset)
 *@par Inputs:
 *Two inputs, including:
 * @li concat_dim: A Tensor of type int32.
-* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n
+* @li x: A list of 1D Tensor objects of type int32 . \n
 
 *@par Attributes:
 *@li Concat_dim: A required int. Must be within the rank of input "x".
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index edc55820..ed46d95c 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -235,12 +235,8 @@ REG_OP(BatchToSpaceND)
 *@par Outputs:
 *y: A Tensor with format NC1HWC0. Has the same type as input "x".
 
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchToSpaceND.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead.
 */
 REG_OP(BatchToSpaceNDD)
     .INPUT(x, TensorType::BasicType())
@@ -287,9 +283,6 @@ REG_OP(SpaceToBatchND)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SpaceToBatchND.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead.
 */
 REG_OP(SpaceToBatchNDD)
     .INPUT(x, TensorType::BasicType())
@@ -411,9 +404,6 @@ REG_OP(BatchToSpace)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchToSpace.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead.
 */
 REG_OP(BatchToSpaceD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
@@ -467,9 +457,6 @@ REG_OP(SpaceToBatch)
 *y: A Tensor. Has the same type as input "x".
 *@par Third-party framework compatibility
 *@ Compatible with the TensorFlow operator SpaceToBatch.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead.
 */
 REG_OP(SpaceToBatchD)
     .INPUT(x, TensorType::BasicType())
@@ -598,9 +585,6 @@ REG_OP(ExtractVolumePatches)
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead.
 */
 REG_OP(ConfusionTransposeD)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
@@ -664,11 +648,6 @@ REG_OP(FlattenV2)
     .ATTR(end_axis, Int, -1)
     .OP_END_FACTORY_REG(FlattenV2)
 
-REG_OP(DeConvTrans)
-    .INPUT(x, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_INT8}))
-    .OP_END_FACTORY_REG(DeConvTrans)
-
 /**
 *@brief Compress large weight to small one. Usually inserted before Conv2d.
 *
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 4b08916e..17243802 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -19,7 +19,7 @@
 
 #include <stdint.h>
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -580,7 +580,8 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d
  * @return RT_ERROR_INVALID_VALUE for error input
  */
 RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
-#ifdef __cplusplus
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index c64ed16f..6de84c02 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -185,7 +185,7 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
  */
 RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index cc74a5ed..39651817 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -149,7 +149,7 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint
  */
 RTS_API rtError_t rtGetGroupCount(uint32_t *count);
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index 048be69a..0bff548b 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -339,7 +339,7 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
  * @return RT_ERROR_NONE for ok
  */
 RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
index 60f400b3..e27cd832 100644
--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile();
  */
 RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index 9dc44766..af7b16d8 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -229,7 +229,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
  */
 RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index 956e033b..2030634a 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "stream.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -529,7 +529,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
  * @return RT_ERROR_INVALID_VALUE for error input 
  */
 RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index 8c1a4326..a506e94a 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -24,7 +24,7 @@
 #include "config.h"
 #include "stream.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -491,7 +491,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  */
 RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index 089a90b7..59a1ba7d 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -19,7 +19,7 @@
 
 #include "base.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -430,7 +430,7 @@ rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint
  */
 RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h
index 3123c3a9..ab542d89 100644
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "event.h"
 
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif
 
@@ -188,7 +188,7 @@ RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream);
  */
 RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
                                   uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 
diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
index a1c39a51..67adecd9 100644
--- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
@@ -1,12 +1,18 @@
 /**
-* @file adx_datadump_server.h
-*
-* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef ADX_DATADUMP_SERVER_H
 #define ADX_DATADUMP_SERVER_H
diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001
index 666f28ce..01c2d769 100644
--- a/third_party/patch/securec/securec.patch001
+++ b/third_party/patch/securec/securec.patch001
@@ -1,5 +1,5 @@
-diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt
---- bounds_checking_function/CMakeLists.txt	1970-01-01 08:00:00.000000000 +0800
+diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt
+--- libboundscheck/CMakeLists.txt	1970-01-01 08:00:00.000000000 +0800
 +++ securec/CMakeLists.txt	2020-09-19 16:53:48.689460700 +0800
 @@ -0,0 +1,18 @@
 +cmake_minimum_required(VERSION 3.14)

From 0667dcfa3fdd763102aadf0ad95dbb97667fb598 Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Wed, 14 Oct 2020 18:38:08 +0800
Subject: [PATCH 5/7] handle empty label resource

Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
---
 src/ge/ge_runtime/runtime_model.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/ge/ge_runtime/runtime_model.cc b/src/ge/ge_runtime/runtime_model.cc
index f0405056..9f549313 100644
--- a/src/ge/ge_runtime/runtime_model.cc
+++ b/src/ge/ge_runtime/runtime_model.cc
@@ -344,6 +344,9 @@ void RuntimeModel::RtStreamDestory() noexcept {
 
 void RuntimeModel::RtLabelDestory() noexcept {
   for (size_t i = 0; i < label_list_.size(); i++) {
+    if (label_list_[i] == nullptr) {
+      continue;
+    }
     if (rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "Destroy label failed! Index: %zu.", i);
       return;

From 1f4f0a47b1aec2d32435431d6faf0de2ab81a25b Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 22 Oct 2020 09:38:44 +0800
Subject: [PATCH 6/7] speed up third party download via domestic source

---
 cmake/external_libs/eigen.cmake    | 13 +++++++++++--
 cmake/external_libs/gtest.cmake    | 12 ++++++++++--
 cmake/external_libs/json.cmake     | 17 ++++++++++++++---
 cmake/external_libs/onnx.cmake     | 12 ++++++++++--
 cmake/external_libs/protobuf.cmake | 13 +++++++++++--
 5 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/cmake/external_libs/eigen.cmake b/cmake/external_libs/eigen.cmake
index b43e70b4..5cdfc346 100644
--- a/cmake/external_libs/eigen.cmake
+++ b/cmake/external_libs/eigen.cmake
@@ -1,10 +1,19 @@
 set(Eigen3_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 set(Eigen3_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 set(Eigen3_NS "ge_")
+
+if (ENABLE_GITEE)
+    set(REQ_URL "https://gitee.com/mirrors/eigen-git-mirrorsource/repository/archive/3.3.7.tar.gz")
+    set(MD5 "cf6552a5d90c1aca4b5e0b011f65ea93")
+else()
+    set(REQ_URL "https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz")
+    set(MD5 "9e30f67e8531477de4117506fe44669b")
+endif ()
+
 graphengine_add_pkg(Eigen3
         VER 3.3.7
-        URL https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz
-        MD5 9e30f67e8531477de4117506fe44669b
+        URL ${REQ_URL}
+        MD5 ${MD5}
         CMAKE_OPTION -DBUILD_TESTING=OFF)
 
 find_package(Eigen3 3.3.7 REQUIRED ${GE_FIND_NO_DEFAULT_PATH})
diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake
index e6d1ab1d..5e175fd2 100644
--- a/cmake/external_libs/gtest.cmake
+++ b/cmake/external_libs/gtest.cmake
@@ -1,11 +1,19 @@
 set(ge_gtest_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
 set(ge_gtest_CFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
 
+if (ENABLE_GITEE)
+    set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
+    set(MD5 "89e13ca1aa48d370719d58010b83f62c")
+else()
+    set(REQ_URL "https://github.com/google/googletest/archive/release-1.8.0.tar.gz")
+    set(MD5 "16877098823401d1bf2ed7891d7dce36")
+endif ()
+
 graphengine_add_pkg(ge_gtest
         VER 1.8.0
         LIBS gtest gtest_main
-        URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz
-        MD5 16877098823401d1bf2ed7891d7dce36
+        URL ${REQ_URL}
+        MD5 ${MD5}
         CMAKE_OPTION -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON
         -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON)
 
diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake
index 4b9fa4e3..f2ae5310 100644
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -1,9 +1,20 @@
 set(nlohmann_json_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 set(nlohmann_json_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+
+if (ENABLE_GITEE)
+    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
+    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
+    set(INCLUDE "./include")
+else()
+    set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
+    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
+    set(INCLUDE "./")
+endif ()
+
 graphengine_add_pkg(ge_nlohmann_json
         VER 3.6.1
-        HEAD_ONLY ./
-        URL https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip
-        MD5 0dc903888211db3a0f170304cd9f3a89)
+        HEAD_ONLY ${INCLUDE}
+        URL ${REQ_URL}
+        MD5 ${MD5})
 include_directories(${ge_nlohmann_json_INC})
 add_library(graphengine::json ALIAS ge_nlohmann_json)
\ No newline at end of file
diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake
index 621f67c6..a092f964 100644
--- a/cmake/external_libs/onnx.cmake
+++ b/cmake/external_libs/onnx.cmake
@@ -1,5 +1,13 @@
+if (ENABLE_GITEE)
+    set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
+    set(MD5 "1bdbcecdd68ea8392630467646776e02")
+else()
+    set(REQ_URL "https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz")
+    set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
+endif ()
+
 graphengine_add_pkg(onnx
         VER 1.6.0
         HEAD_ONLY ./
-        URL https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz
-        MD5 512f2779d6215d4a36f366b6b9acdf1e)
\ No newline at end of file
+        URL ${REQ_URL}
+        MD5 ${MD5})
diff --git a/cmake/external_libs/protobuf.cmake b/cmake/external_libs/protobuf.cmake
index bbd86bc4..8be594c7 100644
--- a/cmake/external_libs/protobuf.cmake
+++ b/cmake/external_libs/protobuf.cmake
@@ -5,12 +5,21 @@ set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 set(_ge_tmp_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
 string(REPLACE " -Wall" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+if (ENABLE_GITEE)
+    set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
+    set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
+else()
+    set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
+    set(MD5 "3d9e32700639618a4d2d342c99d4507a")
+endif ()
+
 graphengine_add_pkg(protobuf
         VER 3.8.0
         LIBS protobuf
         EXE protoc
-        URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz
-        MD5 3d9e32700639618a4d2d342c99d4507a
+        URL ${REQ_URL}
+        MD5 ${MD5}
         CMAKE_PATH ../cmake/
         CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF)
 set(CMAKE_CXX_FLAGS ${_ge_tmp_CMAKE_CXX_FLAGS})

From b25b70c9c2e6d09aba0815234fbe76f7e9b28d43 Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Fri, 23 Oct 2020 20:08:45 +0800
Subject: [PATCH 7/7] add stub files

---
 src/common/graph/stub/Makefile       |   6 +
 src/common/graph/stub/gen_stubapi.py | 578 +++++++++++++++++++++++++++++++++++
 src/ge/stub/Makefile                 |   6 +
 src/ge/stub/README                   |   4 +
 src/ge/stub/README.md                |  44 +++
 src/ge/stub/gen_stubapi.py           | 578 +++++++++++++++++++++++++++++++++++
 6 files changed, 1216 insertions(+)
 create mode 100644 src/common/graph/stub/Makefile
 create mode 100644 src/common/graph/stub/gen_stubapi.py
 create mode 100644 src/ge/stub/Makefile
 create mode 100644 src/ge/stub/README
 create mode 100755 src/ge/stub/README.md
 create mode 100644 src/ge/stub/gen_stubapi.py

diff --git a/src/common/graph/stub/Makefile b/src/common/graph/stub/Makefile
new file mode 100644
index 00000000..f339fa33
--- /dev/null
+++ b/src/common/graph/stub/Makefile
@@ -0,0 +1,6 @@
+inc_path := $(shell pwd)/metadef/inc/external/
+out_path := $(shell pwd)/out/graph/lib64/stub/
+stub_path := $(shell pwd)/metadef/graph/stub/
+
+mkdir_stub := $(shell mkdir -p $(out_path))
+graph_local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path))
diff --git a/src/common/graph/stub/gen_stubapi.py b/src/common/graph/stub/gen_stubapi.py
new file mode 100644
index 00000000..7263ff17
--- /dev/null
+++ b/src/common/graph/stub/gen_stubapi.py
@@ -0,0 +1,578 @@
+import os
+import re
+import sys
+import logging
+
+logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s',
+                    level=logging.INFO)
+
+"""
+    this attr is used for symbol table visible
+"""
+GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
+
+"""
+    generate stub func body by return type
+"""
+RETURN_STATEMENTS = {
+    'graphStatus': '    std::cout << "[ERROR]: stub library libgraph or libge_compiler cannot be used for execution, please check your "\n '
+                   '        << "environment variables and compilation options to make sure you use the correct library."\n'
+                   '        << std::endl;\n'
+                   '    return ACL_ERROR_COMPILING_STUB_MODE;',
+    'Status': '    return SUCCESS;',
+    'Graph': '    return Graph();',
+    'Graph&': '    return *this;',
+    'Format': '    return Format();',
+    'Format&': '    return *this;',
+    'Shape': '    return Shape();',
+    'Shape&': '    return *this;',
+    'TensorDesc': '    return TensorDesc();',
+    'TensorDesc&': '    return *this;',
+    'Tensor': '    return Tensor();',
+    'Tensor&': '    return *this;',
+    'Operator': '    return Operator();',
+    'Operator&': '    return *this;',
+    'Ptr': '    return nullptr;',
+    'std::string': '    return "";',
+    'std::string&': '    return "";',
+    'string': ' return "";',
+    'int': '    return 0;',
+    'DataType': '    return DT_FLOAT;',
+    'InferenceContextPtr': '    return nullptr;',
+    'SubgraphBuilder': '    return nullptr;',
+    'OperatorImplPtr': '    return nullptr;',
+    'OutHandler': '    return nullptr;',
+    'std::vector<std::string>': '    return {};',
+    'std::vector<int64_t>': '    return {};',
+    'std::map': '    return {};',
+    'uint32_t': '    return 0;',
+    'int64_t': '    return 0;',
+    'uint64_t': '    return 0;',
+    'size_t': '    return 0;',
+    'float': '    return 0.0f;',
+    'bool': '    return false;',
+}
+
+"""
+    max code len per line in hua_wei software programming specifications
+"""
+max_code_len_per_line = 100
+
+"""
+    white_list_for_debug, include_dir_key_words is to
+    determines which header files to generate cc files from
+    when DEBUG on
+"""
+white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", "inference_context.h",
+                        "ge_ir_build.h", "ge_api.h", "ascend_string.h", "gnode.h"]
+include_dir_key_words = ["ge", "graph"]
+DEBUG = True
+
+
+def need_generate_func(func_line):
+    """
+    :param func_line:
+    :return:
+    """
+    if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \
+            or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"):
+        return False
+    return True
+
+
+def file_endswith_white_list_suffix(file):
+    """
+    :param file:
+    :return:
+    """
+    if DEBUG:
+        for suffix in white_list_for_debug:
+            if file.endswith(suffix):
+                return True
+        return False
+    else:
+        return True
+
+
+"""
+    belows are patterns used for analyse .h file
+"""
+# pattern function
+pattern_func = re.compile(r"""(^[\s]*)          #leading with space,we will find and delete after
+([a-zA-Z~_]            # void int likely
+.*
+[)]                     #we find )
+(?!.*{)                 # we do not want the case int abc() const
+.*)
+(;.*)                   #we want to find ; and after for we will replace these later
+\n$
+""", re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+# pattern comment
+pattern_comment = re.compile(r'^\s*//')
+pattern_comment_2_start = re.compile(r'^\s*/[*]')
+pattern_comment_2_end = re.compile(r'[*]/\s*$')
+# pattern define
+pattern_define = re.compile(r'^\s*#define')
+pattern_define_return = re.compile(r'\\\s*$')
+# blank line
+pattern_blank_line = re.compile(r'^\s*$')
+# virtual,explicit,friend,static
+pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)')
+# lead space
+pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]')
+# functions will have patterns such as func ( or func(
+# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist
+# format like :"operator = ()"
+pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]')
+# template
+pattern_template = re.compile(r'^\s*template')
+pattern_template_end = re.compile(r'>\s*$')
+# namespace
+pattern_namespace = re.compile(r'namespace.*{')
+# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with
+pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR)
+# {}
+pattern_start = re.compile('{')
+pattern_end = re.compile('}')
+
+line_index = 0
+
+
+class H2CC(object):
+    def __init__(self, input_file, output_file, shared_includes_content):
+        """
+        :param input_file:
+        :param output_file:
+        :param shared_includes_content:
+        """
+        self.input_file = input_file
+        self.output_file = output_file
+        self.shared_includes_content = shared_includes_content
+        self.line_index = 0
+        self.input_fd = open(self.input_file, 'r')
+        self.input_content = self.input_fd.readlines()
+        self.output_fd = open(self.output_file, 'w')
+
+        # The state may be normal_now(in the middle of {}),class_now,namespace_now
+        self.stack = []
+        self.stack_class = []
+        self.stack_template = []
+        # record funcs generated by h2cc func
+        self.func_list_exist = []
+
+    def __del__(self):
+        self.input_fd.close()
+        self.output_fd.close()
+        del self.stack
+        del self.stack_class
+        del self.stack_template
+        del self.func_list_exist
+
+    def just_skip(self):
+        # skip blank line or comment
+        if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search(
+                self.input_content[self.line_index]):  # /n or comment using //
+            self.line_index += 1
+        if pattern_comment_2_start.search(self.input_content[self.line_index]):  # comment using /*
+            while not pattern_comment_2_end.search(self.input_content[self.line_index]):  # */
+                self.line_index += 1
+            self.line_index += 1
+        # skip define
+        if pattern_define.search(self.input_content[self.line_index]):
+            while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search(
+                    self.input_content[self.line_index]):
+                self.line_index += 1
+            self.line_index += 1
+
+    def write_inc_content(self):
+        for shared_include_content in self.shared_includes_content:
+            self.output_fd.write(shared_include_content)
+
+    def h2cc(self):
+        """
+        :return:
+        """
+        logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file)
+        global pattern_comment
+        global pattern_comment_2_start
+        global pattern_comment_2_end
+        global pattern_blank_line
+        global pattern_func
+        global pattern_keyword
+        global pattern_leading_space
+        global pattern_func_name
+        global pattern_template
+        global pattern_template_end
+        global pattern_namespace
+        global pattern_class
+        global pattern_start
+        global pattern_end
+        global line_index
+        # write inc content
+        self.write_inc_content()
+        # core processing cycle, process the input .h file by line
+        while self.line_index < len(self.input_content):
+            # handle comment and blank line
+            self.just_skip()
+
+            # match namespace
+            self.handle_namespace()
+
+            # match template
+            template_string = self.handle_template()
+            # match class
+            line = self.input_content[self.line_index]
+            match_class = pattern_class.search(line)
+            match_start = pattern_start.search(line)
+            handle_class_result = self.handle_class(template_string, line, match_start, match_class)
+            if handle_class_result == "continue":
+                continue
+
+            # match "}"
+            handle_stack_result = self.handle_stack(match_start)
+            if handle_stack_result == "continue":
+                continue
+            # handle func
+            handle_func1_result, line, start_i = self.handle_func1(line)
+            if handle_func1_result == "continue":
+                continue
+
+            # here means func is found
+            # delete key word
+            line = pattern_keyword.sub('', line)
+            logging.info("line[%s]", line)
+
+            # Class member function
+            # if friend we will not add class name
+            friend_match = re.search('friend ', line)
+            if len(self.stack_class) > 0 and not friend_match:
+                line, func_name = self.handle_class_member_func(line, template_string)
+            # Normal functions
+            else:
+                line, func_name = self.handle_normal_func(line, template_string)
+
+            need_generate = need_generate_func(line)
+            # func body
+            line += self.implement_function(line)
+            # comment
+            line = self.gen_comment(start_i) + line
+            # write to out file
+            self.write_func_content(line, func_name, need_generate)
+            # next loop
+            self.line_index += 1
+
+        logging.info('Added %s functions', len(self.func_list_exist))
+        logging.info('Successfully converted,please see ' + self.output_file)
+
+    def handle_func1(self, line):
+        """
+        :param line:
+        :return:
+        """
+        find1 = re.search('[(]', line)
+        if not find1:
+            self.line_index += 1
+            return "continue", line, None
+        find2 = re.search('[)]', line)
+        start_i = self.line_index
+        space_match = pattern_leading_space.search(line)
+        # deal with
+        # int abc(int a,
+        #        int b)
+        if find1 and (not find2):
+            self.line_index += 1
+            line2 = self.input_content[self.line_index]
+            if space_match:
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+            line += line2
+            while self.line_index < len(self.input_content) and (not re.search('[)]', line2)):
+                self.line_index += 1
+                line2 = self.input_content[self.line_index]
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+                line += line2
+
+        match_start = pattern_start.search(self.input_content[self.line_index])
+        match_end = pattern_end.search(self.input_content[self.line_index])
+        if match_start:  # like  ) {  or ) {}    int the last line
+            if not match_end:
+                self.stack.append('normal_now')
+            ii = start_i
+            while ii <= self.line_index:
+                ii += 1
+            self.line_index += 1
+            return "continue", line, start_i
+        logging.info("line[%s]", line)
+        # '  int abc();'->'int abc()'
+        (line, match) = pattern_func.subn(r'\2\n', line)
+        logging.info("line[%s]", line)
+        # deal with case:
+        # 'int \n abc(int a, int b)'
+        if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]):
+            line = self.input_content[start_i - 1] + line
+        line = line.lstrip()
+        if not match:
+            self.line_index += 1
+            return "continue", line, start_i
+        return "pass", line, start_i
+
+    def handle_stack(self, match_start):
+        """
+        :param match_start:
+        :return:
+        """
+        line = self.input_content[self.line_index]
+        match_end = pattern_end.search(line)
+        if match_start:
+            self.stack.append('normal_now')
+        if match_end:
+            top_status = self.stack.pop()
+            if top_status == 'namespace_now':
+                self.output_fd.write(line + '\n')
+            elif top_status == 'class_now':
+                self.stack_class.pop()
+                self.stack_template.pop()
+        if match_start or match_end:
+            self.line_index += 1
+            return "continue"
+
+        if len(self.stack) > 0 and self.stack[-1] == 'normal_now':
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_class(self, template_string, line, match_start, match_class):
+        """
+        :param template_string:
+        :param line:
+        :param match_start:
+        :param match_class:
+        :return:
+        """
+        if match_class:  # we face a class
+            self.stack_template.append(template_string)
+            self.stack.append('class_now')
+            class_name = match_class.group(3)
+
+            # class template specializations: class A<u,Node<u> >
+            if '<' in class_name:
+                k = line.index('<')
+                fit = 1
+                for ii in range(k + 1, len(line)):
+                    if line[ii] == '<':
+                        fit += 1
+                    if line[ii] == '>':
+                        fit -= 1
+                    if fit == 0:
+                        break
+                class_name += line[k + 1:ii + 1]
+            logging.info('class_name[%s]', class_name)
+            self.stack_class.append(class_name)
+            while not match_start:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                match_start = pattern_start.search(line)
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_template(self):
+        line = self.input_content[self.line_index]
+        match_template = pattern_template.search(line)
+        template_string = ''
+        if match_template:
+            match_template_end = pattern_template_end.search(line)
+            template_string = line
+            while not match_template_end:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                template_string += line
+                match_template_end = pattern_template_end.search(line)
+            self.line_index += 1
+        return template_string
+
+    def handle_namespace(self):
+        line = self.input_content[self.line_index]
+        match_namespace = pattern_namespace.search(line)
+        if match_namespace:  # we face namespace
+            self.output_fd.write(line + '\n')
+            self.stack.append('namespace_now')
+            self.line_index += 1
+
+    def handle_normal_func(self, line, template_string):
+        template_line = ''
+        self.stack_template.append(template_string)
+        if self.stack_template[-1] != '':
+            template_line = re.sub(r'\s*template', 'template', self.stack_template[-1])
+            # change '< class T = a, class U = A(3)>' to '<class T, class U>'
+            template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+            template_line = re.sub(r'\s*=.*,', ',', template_line)
+            template_line = re.sub(r'\s*=.*', '', template_line)
+        line = re.sub(r'\s*=.*,', ',', line)
+        line = re.sub(r'\s*=.*\)', ')', line)
+        line = template_line + line
+        self.stack_template.pop()
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def handle_class_member_func(self, line, template_string):
+        template_line = ''
+        x = ''
+        if template_string != '':
+            template_string = re.sub(r'\s*template', 'template', template_string)
+            template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string)
+            template_string = re.sub(r'\s*=.*,', ',', template_string)
+            template_string = re.sub(r'\s*=.*', '', template_string)
+        if self.stack_template[-1] != '':
+            if not (re.search(r'<\s*>', stack_template[-1])):
+                template_line = re.sub(r'^\s*template', 'template', stack_template[-1])
+                if not (re.search(r'<.*>', self.stack_class[-1])):
+                    # for x we get like template<class T, typename U> -> <T,U>
+                    x = re.sub(r'template\s*<', '<', template_line)  # remove template -> <class T, typename U>
+                    x = re.sub(r'\n', '', x)
+                    x = re.sub(r'\s*=.*,', ',', x)
+                    x = re.sub(r'\s*=.*\>', '>', x)
+                    x = x.rstrip()  # remove \n
+                    x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '',
+                               x)  # remove class,typename ->  <T, U>
+                    x = re.sub(r'<\s+', '<', x)
+                    x = re.sub(r'\s+>', '>', x)
+                    x = re.sub(r'\s+,', ',', x)
+                    x = re.sub(r',\s+', ', ', x)
+        line = re.sub(r'\s*=\s+0', '', line)
+        line = re.sub(r'\s*=\s+.*,', ',', line)
+        line = re.sub(r'\s*=\s+.*\)', ')', line)
+        logging.info("x[%s]\nline[%s]", x, line)
+        # if the function is long, void ABC::foo()
+        # breaks into two lines void ABC::\n foo()
+        temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1)
+        if len(temp_line) > max_code_len_per_line:
+            line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1)
+        else:
+            line = temp_line
+        logging.info("line[%s]", line)
+        # add template as the above if there is one
+        template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+        template_line = re.sub(r'\s*=.*,', ',', template_line)
+        template_line = re.sub(r'\s*=.*', '', template_line)
+        line = template_line + template_string + line
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def write_func_content(self, content, func_name, need_generate):
+        if not (func_name in self.func_list_exist) and need_generate:
+            self.output_fd.write(content)
+            self.func_list_exist.append(func_name)
+            logging.info('add func:[%s]', func_name)
+
+    def gen_comment(self, start_i):
+        comment_line = ''
+        # Function comments are on top of function declarations, copy them over
+        k = start_i - 1  # one line before this func start
+        if pattern_template.search(self.input_content[k]):
+            k -= 1
+        if pattern_comment_2_end.search(self.input_content[k]):
+            comment_line = self.input_content[k].lstrip()
+            while not pattern_comment_2_start.search(self.input_content[k]):
+                k -= 1
+                comment_line = self.input_content[k].lstrip() + comment_line
+        else:
+            for j in range(k, 0, -1):
+                c_line = self.input_content[j]
+                if pattern_comment.search(c_line):
+                    c_line = re.sub(r'\s*//', '//', c_line)
+                    comment_line = c_line + comment_line
+                else:
+                    break
+        return comment_line
+
+    @staticmethod
+    def implement_function(func):
+        function_def = ''
+        function_def += '{\n'
+
+        all_items = func.split()
+        start = 0
+        return_type = all_items[start]
+        if return_type == "const":
+            start += 1
+            return_type = all_items[start]
+        if return_type.startswith(('std::map', 'std::set', 'std::vector')):
+            return_type = "std::map"
+        if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')):
+            return_type = "Ptr"
+        if len(all_items) > start + 1 and all_items[start + 1].startswith('&'):
+            return_type += "&"
+        if RETURN_STATEMENTS.__contains__(return_type):
+            function_def += RETURN_STATEMENTS[return_type]
+        else:
+            logging.warning("Unhandled return type[%s]", return_type)
+
+        function_def += '\n'
+        function_def += '}\n'
+        function_def += '\n'
+        return function_def
+
+
+def collect_header_files(path):
+    """
+    :param path:
+    :return:
+    """
+    header_files = []
+    shared_includes_content = []
+    for root, dirs, files in os.walk(path):
+        files.sort()
+        for file in files:
+            if file.find("git") >= 0:
+                continue
+            if not file.endswith('.h'):
+                continue
+            file_path = os.path.join(root, file)
+            file_path = file_path.replace('\\', '/')
+            header_files.append(file_path)
+            include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:])
+            shared_includes_content.append(include_str)
+    # for acl error code
+    shared_includes_content.append('#include <iostream>\n')
+    shared_includes_content.append('const int ACL_ERROR_COMPILING_STUB_MODE = 100039;\n')
+    return header_files, shared_includes_content
+
+
+def generate_stub_file(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    target_header_files, shared_includes_content = collect_header_files(inc_dir)
+    for header_file in target_header_files:
+        if not file_endswith_white_list_suffix(header_file):
+            continue
+        cc_file = re.sub('.h*$', '.cc', header_file)
+        h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content)
+        h_2_cc.h2cc()
+
+
+def gen_code(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    if not inc_dir.endswith('/'):
+        inc_dir += '/'
+    if not out_cc_dir.endswith('/'):
+        out_cc_dir += '/'
+    for include_dir_key_word in include_dir_key_words:
+        generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir)
+
+
+if __name__ == '__main__':
+    inc_dir = sys.argv[1]
+    out_cc_dir = sys.argv[2]
+    gen_code(inc_dir, out_cc_dir)
diff --git a/src/ge/stub/Makefile b/src/ge/stub/Makefile
new file mode 100644
index 00000000..820fc70d
--- /dev/null
+++ b/src/ge/stub/Makefile
@@ -0,0 +1,6 @@
+inc_path := $(shell pwd)/inc/external/
+out_path := $(shell pwd)/out/ge/lib64/stub/
+stub_path := $(shell pwd)/framework/domi/stub/
+
+mkdir_stub := $(shell mkdir -p $(out_path))
+local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path))
diff --git a/src/ge/stub/README b/src/ge/stub/README
new file mode 100644
index 00000000..ca98ce85
--- /dev/null
+++ b/src/ge/stub/README
@@ -0,0 +1,4 @@
+###################################################################################
+the directory (stub) saves the stub file
+gen_stubapi.py is using for retrieving API and generating stub functions
+###################################################################################
diff --git a/src/ge/stub/README.md b/src/ge/stub/README.md
new file mode 100755
index 00000000..a085e537
--- /dev/null
+++ b/src/ge/stub/README.md
@@ -0,0 +1,44 @@
+# "stub"  usage:
+
+## Description
+
+- File libge_compiler.so ,libgraph.so are used in IR build application interface.
+
+# Attention
+
+- Don't link other library except libge_compiler.so ,libgraph.so, as they may be changed in the future.
+
+# Usage
+
+## Compile:   compile  the application invoking the IR build API.
+
+Makefile:
+
+'''
+
+ATC_INCLUDE_DIR := $(ASCEND_PATH)/atc/include
+OPP_INCLUDE_DIR := $(ASCEND_PATH)/opp/op_proto/built-in/inc
+LOCAL_MODULE_NAME := ir_build
+CC := g++
+CFLAGS := -std=c++11 -g -Wall
+SRCS := $(wildcard $(LOCAL_DIR)/main.cpp)
+INCLUDES := -I $(ASCEND_OPP_PATH)/op_proto/built-in/inc \
+            -I $(ATC_INCLUDE_DIR)/graph \
+            -I $(ATC_INCLUDE_DIR)/ge \
+
+LIBS := -L ${ASCEND_PATH}/atc/lib64/stub \
+    -lgraph \
+    -lge_compiler
+ir_build:
+    mkdir -p out
+    $(CC) $(SRCS) $(INCLUDES) $(LIBS) $(CFLAGS) -o ./out/$(LOCAL_MODULE_NAME)
+clean:
+    rm -rf out
+
+'''
+make
+
+## Run the application after set the LD_LIBRARY_PATH to include the real path of the library which locates in the directory of atc/lib64
+
+export LD_LIBRARY_PATH= $(ASCEND_PATH)/atc/lib64
+ -  ./ ir_build
diff --git a/src/ge/stub/gen_stubapi.py b/src/ge/stub/gen_stubapi.py
new file mode 100644
index 00000000..b6e1e70c
--- /dev/null
+++ b/src/ge/stub/gen_stubapi.py
@@ -0,0 +1,578 @@
+import os
+import re
+import sys
+import logging
+
+logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s',
+                    level=logging.INFO)
+
+"""
+    this attr is used for symbol table visible
+"""
+GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
+
+"""
+    generate stub func body by return type
+"""
+RETURN_STATEMENTS = {
+    'graphStatus': '    std::cout << "[ERROR]: stub library libgraph or libge_compiler cannot be used for execution, please check your "\n '
+                   '        << "environment variables and compilation options to make sure you use the correct library."\n'
+                   '        << std::endl;\n'
+                   '    return ACL_ERROR_COMPILING_STUB_MODE;',
+    'Status': '    return SUCCESS;',
+    'Graph': '    return Graph();',
+    'Graph&': '    return *this;',
+    'Format': '    return Format();',
+    'Format&': '    return *this;',
+    'Shape': '    return Shape();',
+    'Shape&': '    return *this;',
+    'TensorDesc': '    return TensorDesc();',
+    'TensorDesc&': '    return *this;',
+    'Tensor': '    return Tensor();',
+    'Tensor&': '    return *this;',
+    'Operator': '    return Operator();',
+    'Operator&': '    return *this;',
+    'Ptr': '    return nullptr;',
+    'std::string': '    return "";',
+    'std::string&': '    return "";',
+    'string': ' return "";',
+    'int': '    return 0;',
+    'DataType': '    return DT_FLOAT;',
+    'InferenceContextPtr': '    return nullptr;',
+    'SubgraphBuilder': '    return nullptr;',
+    'OperatorImplPtr': '    return nullptr;',
+    'OutHandler': '    return nullptr;',
+    'std::vector<std::string>': '    return {};',
+    'std::vector<int64_t>': '    return {};',
+    'std::map': '    return {};',
+    'uint32_t': '    return 0;',
+    'int64_t': '    return 0;',
+    'uint64_t': '    return 0;',
+    'size_t': '    return 0;',
+    'float': '    return 0.0f;',
+    'bool': '    return false;',
+}
+
+"""
+    max code len per line in hua_wei software programming specifications
+"""
+max_code_len_per_line = 100
+
+"""
+    white_list_for_debug, include_dir_key_words is to
+    determines which header files to generate cc files from
+    when DEBUG on
+"""
+white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h",
+                        "ge_ir_build.h", "ge_api.h", "tensorflow_parser.h", "caffe_parser.h"]
+include_dir_key_words = ["ge", "graph", "parser"]
+DEBUG = True
+
+
+def need_generate_func(func_line):
+    """
+    :param func_line:
+    :return:
+    """
+    if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \
+            or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"):
+        return False
+    return True
+
+
+def file_endswith_white_list_suffix(file):
+    """
+    :param file:
+    :return:
+    """
+    if DEBUG:
+        for suffix in white_list_for_debug:
+            if file.endswith(suffix):
+                return True
+        return False
+    else:
+        return True
+
+
+"""
+    belows are patterns used for analyse .h file
+"""
+# pattern function
+pattern_func = re.compile(r"""(^[\s]*)          #leading with space,we will find and delete after
+([a-zA-Z~_]            # void int likely
+.*
+[)]                     #we find )
+(?!.*{)                 # we do not want the case int abc() const { return 1;}
+.*)
+(;.*)                   #we want to find ; and after for we will replace these later
+\n$
+""", re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+# pattern comment
+pattern_comment = re.compile(r'^\s*//')
+pattern_comment_2_start = re.compile(r'^\s*/[*]')
+pattern_comment_2_end = re.compile(r'[*]/\s*$')
+# pattern define
+pattern_define = re.compile(r'^\s*#define')
+pattern_define_return = re.compile(r'\\\s*$')
+# blank line
+pattern_blank_line = re.compile(r'^\s*$')
+# virtual,explicit,friend,static
+pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)')
+# lead space
+pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]')
+# functions will have patterns such as func ( or func(
+# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist
+# format like :"operator = ()"
+pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]')
+# template
+pattern_template = re.compile(r'^\s*template')
+pattern_template_end = re.compile(r'>\s*$')
+# namespace
+pattern_namespace = re.compile(r'namespace.*{')
+# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with
+pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR)
+# {}
+pattern_start = re.compile('{')
+pattern_end = re.compile('}')
+
+line_index = 0
+
+
+class H2CC(object):
+    def __init__(self, input_file, output_file, shared_includes_content):
+        """
+        :param input_file:
+        :param output_file:
+        :param shared_includes_content:
+        """
+        self.input_file = input_file
+        self.output_file = output_file
+        self.shared_includes_content = shared_includes_content
+        self.line_index = 0
+        self.input_fd = open(self.input_file, 'r')
+        self.input_content = self.input_fd.readlines()
+        self.output_fd = open(self.output_file, 'w')
+
+        # The state may be normal_now(in the middle of {}),class_now,namespace_now
+        self.stack = []
+        self.stack_class = []
+        self.stack_template = []
+        # record funcs generated by h2cc func
+        self.func_list_exist = []
+
+    def __del__(self):
+        self.input_fd.close()
+        self.output_fd.close()
+        del self.stack
+        del self.stack_class
+        del self.stack_template
+        del self.func_list_exist
+
+    def just_skip(self):
+        # skip blank line or comment
+        if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search(
+                self.input_content[self.line_index]):  # /n or comment using //
+            self.line_index += 1
+        if pattern_comment_2_start.search(self.input_content[self.line_index]):  # comment using /*
+            while not pattern_comment_2_end.search(self.input_content[self.line_index]):  # */
+                self.line_index += 1
+            self.line_index += 1
+        # skip define
+        if pattern_define.search(self.input_content[self.line_index]):
+            while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search(
+                    self.input_content[self.line_index]):
+                self.line_index += 1
+            self.line_index += 1
+
+    def write_inc_content(self):
+        for shared_include_content in self.shared_includes_content:
+            self.output_fd.write(shared_include_content)
+
+    def h2cc(self):
+        """
+        :return:
+        """
+        logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file)
+        global pattern_comment
+        global pattern_comment_2_start
+        global pattern_comment_2_end
+        global pattern_blank_line
+        global pattern_func
+        global pattern_keyword
+        global pattern_leading_space
+        global pattern_func_name
+        global pattern_template
+        global pattern_template_end
+        global pattern_namespace
+        global pattern_class
+        global pattern_start
+        global pattern_end
+        global line_index
+        # write inc content
+        self.write_inc_content()
+        # core processing cycle, process the input .h file by line
+        while self.line_index < len(self.input_content):
+            # handle comment and blank line
+            self.just_skip()
+
+            # match namespace
+            self.handle_namespace()
+
+            # match template
+            template_string = self.handle_template()
+            # match class
+            line = self.input_content[self.line_index]
+            match_class = pattern_class.search(line)
+            match_start = pattern_start.search(line)
+            handle_class_result = self.handle_class(template_string, line, match_start, match_class)
+            if handle_class_result == "continue":
+                continue
+
+            # match "}"
+            handle_stack_result = self.handle_stack(match_start)
+            if handle_stack_result == "continue":
+                continue
+            # handle func
+            handle_func1_result, line, start_i = self.handle_func1(line)
+            if handle_func1_result == "continue":
+                continue
+
+            # here means func is found
+            # delete key word
+            line = pattern_keyword.sub('', line)
+            logging.info("line[%s]", line)
+
+            # Class member function
+            # if friend we will not add class name
+            friend_match = re.search('friend ', line)
+            if len(self.stack_class) > 0 and not friend_match:
+                line, func_name = self.handle_class_member_func(line, template_string)
+            # Normal functions
+            else:
+                line, func_name = self.handle_normal_func(line, template_string)
+
+            need_generate = need_generate_func(line)
+            # func body
+            line += self.implement_function(line)
+            # comment
+            line = self.gen_comment(start_i) + line
+            # write to out file
+            self.write_func_content(line, func_name, need_generate)
+            # next loop
+            self.line_index += 1
+
+        logging.info('Added %s functions', len(self.func_list_exist))
+        logging.info('Successfully converted,please see ' + self.output_file)
+
+    def handle_func1(self, line):
+        """
+        :param line:
+        :return:
+        """
+        find1 = re.search('[(]', line)
+        if not find1:
+            self.line_index += 1
+            return "continue", line, None
+        find2 = re.search('[)]', line)
+        start_i = self.line_index
+        space_match = pattern_leading_space.search(line)
+        # deal with
+        # int abc(int a,
+        #        int b)
+        if find1 and (not find2):
+            self.line_index += 1
+            line2 = self.input_content[self.line_index]
+            if space_match:
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+            line += line2
+            while self.line_index < len(self.input_content) and (not re.search('[)]', line2)):
+                self.line_index += 1
+                line2 = self.input_content[self.line_index]
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+                line += line2
+
+        match_start = pattern_start.search(self.input_content[self.line_index])
+        match_end = pattern_end.search(self.input_content[self.line_index])
+        if match_start:  # like  ) {  or ) {}    int the last line
+            if not match_end:
+                self.stack.append('normal_now')
+            ii = start_i
+            while ii <= self.line_index:
+                ii += 1
+            self.line_index += 1
+            return "continue", line, start_i
+        logging.info("line[%s]", line)
+        # '  int abc();'->'int abc()'
+        (line, match) = pattern_func.subn(r'\2\n', line)
+        logging.info("line[%s]", line)
+        # deal with case:
+        # 'int \n abc(int a, int b)'
+        if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]):
+            line = self.input_content[start_i - 1] + line
+        line = line.lstrip()
+        if not match:
+            self.line_index += 1
+            return "continue", line, start_i
+        return "pass", line, start_i
+
+    def handle_stack(self, match_start):
+        """
+        :param match_start:
+        :return:
+        """
+        line = self.input_content[self.line_index]
+        match_end = pattern_end.search(line)
+        if match_start:
+            self.stack.append('normal_now')
+        if match_end:
+            top_status = self.stack.pop()
+            if top_status == 'namespace_now':
+                self.output_fd.write(line + '\n')
+            elif top_status == 'class_now':
+                self.stack_class.pop()
+                self.stack_template.pop()
+        if match_start or match_end:
+            self.line_index += 1
+            return "continue"
+
+        if len(self.stack) > 0 and self.stack[-1] == 'normal_now':
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_class(self, template_string, line, match_start, match_class):
+        """
+        :param template_string:
+        :param line:
+        :param match_start:
+        :param match_class:
+        :return:
+        """
+        if match_class:  # we face a class
+            self.stack_template.append(template_string)
+            self.stack.append('class_now')
+            class_name = match_class.group(3)
+
+            # class template specializations: class A<u,Node<u> >
+            if '<' in class_name:
+                k = line.index('<')
+                fit = 1
+                for ii in range(k + 1, len(line)):
+                    if line[ii] == '<':
+                        fit += 1
+                    if line[ii] == '>':
+                        fit -= 1
+                    if fit == 0:
+                        break
+                class_name += line[k + 1:ii + 1]
+            logging.info('class_name[%s]', class_name)
+            self.stack_class.append(class_name)
+            while not match_start:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                match_start = pattern_start.search(line)
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_template(self):
+        line = self.input_content[self.line_index]
+        match_template = pattern_template.search(line)
+        template_string = ''
+        if match_template:
+            match_template_end = pattern_template_end.search(line)
+            template_string = line
+            while not match_template_end:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                template_string += line
+                match_template_end = pattern_template_end.search(line)
+            self.line_index += 1
+        return template_string
+
+    def handle_namespace(self):
+        line = self.input_content[self.line_index]
+        match_namespace = pattern_namespace.search(line)
+        if match_namespace:  # we face namespace
+            self.output_fd.write(line + '\n')
+            self.stack.append('namespace_now')
+            self.line_index += 1
+
+    def handle_normal_func(self, line, template_string):
+        template_line = ''
+        self.stack_template.append(template_string)
+        if self.stack_template[-1] != '':
+            template_line = re.sub(r'\s*template', 'template', self.stack_template[-1])
+            # change '< class T = a, class U = A(3)>' to '<class T, class U>'
+            template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+            template_line = re.sub(r'\s*=.*,', ',', template_line)
+            template_line = re.sub(r'\s*=.*', '', template_line)
+        line = re.sub(r'\s*=.*,', ',', line)
+        line = re.sub(r'\s*=.*\)', ')', line)
+        line = template_line + line
+        self.stack_template.pop()
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def handle_class_member_func(self, line, template_string):
+        template_line = ''
+        x = ''
+        if template_string != '':
+            template_string = re.sub(r'\s*template', 'template', template_string)
+            template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string)
+            template_string = re.sub(r'\s*=.*,', ',', template_string)
+            template_string = re.sub(r'\s*=.*', '', template_string)
+        if self.stack_template[-1] != '':
+            if not (re.search(r'<\s*>', stack_template[-1])):
+                template_line = re.sub(r'^\s*template', 'template', stack_template[-1])
+                if not (re.search(r'<.*>', self.stack_class[-1])):
+                    # for x we get like template<class T, typename U> -> <T,U>
+                    x = re.sub(r'template\s*<', '<', template_line)  # remove template -> <class T, typename U>
+                    x = re.sub(r'\n', '', x)
+                    x = re.sub(r'\s*=.*,', ',', x)
+                    x = re.sub(r'\s*=.*\>', '>', x)
+                    x = x.rstrip()  # remove \n
+                    x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '',
+                               x)  # remove class,typename ->  <T, U>
+                    x = re.sub(r'<\s+', '<', x)
+                    x = re.sub(r'\s+>', '>', x)
+                    x = re.sub(r'\s+,', ',', x)
+                    x = re.sub(r',\s+', ', ', x)
+        line = re.sub(r'\s*=\s+0', '', line)
+        line = re.sub(r'\s*=\s+.*,', ',', line)
+        line = re.sub(r'\s*=\s+.*\)', ')', line)
+        logging.info("x[%s]\nline[%s]", x, line)
+        # if the function is long, void ABC::foo()
+        # breaks into two lines void ABC::\n foo()
+        temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1)
+        if len(temp_line) > max_code_len_per_line:
+            line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1)
+        else:
+            line = temp_line
+        logging.info("line[%s]", line)
+        # add template as the above if there is one
+        template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+        template_line = re.sub(r'\s*=.*,', ',', template_line)
+        template_line = re.sub(r'\s*=.*', '', template_line)
+        line = template_line + template_string + line
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def write_func_content(self, content, func_name, need_generate):
+        if not (func_name in self.func_list_exist) and need_generate:
+            self.output_fd.write(content)
+            self.func_list_exist.append(func_name)
+            logging.info('add func:[%s]', func_name)
+
+    def gen_comment(self, start_i):
+        comment_line = ''
+        # Function comments are on top of function declarations, copy them over
+        k = start_i - 1  # one line before this func start
+        if pattern_template.search(self.input_content[k]):
+            k -= 1
+        if pattern_comment_2_end.search(self.input_content[k]):
+            comment_line = self.input_content[k].lstrip()
+            while not pattern_comment_2_start.search(self.input_content[k]):
+                k -= 1
+                comment_line = self.input_content[k].lstrip() + comment_line
+        else:
+            for j in range(k, 0, -1):
+                c_line = self.input_content[j]
+                if pattern_comment.search(c_line):
+                    c_line = re.sub(r'\s*//', '//', c_line)
+                    comment_line = c_line + comment_line
+                else:
+                    break
+        return comment_line
+
+    @staticmethod
+    def implement_function(func):
+        function_def = ''
+        function_def += '{\n'
+
+        all_items = func.split()
+        start = 0
+        return_type = all_items[start]
+        if return_type == "const":
+            start += 1
+            return_type = all_items[start]
+        if return_type.startswith(('std::map', 'std::set', 'std::vector')):
+            return_type = "std::map"
+        if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')):
+            return_type = "Ptr"
+        if len(all_items) > start + 1 and all_items[start + 1].startswith('&'):
+            return_type += "&"
+        if RETURN_STATEMENTS.__contains__(return_type):
+            function_def += RETURN_STATEMENTS[return_type]
+        else:
+            logging.warning("Unhandled return type[%s]", return_type)
+
+        function_def += '\n'
+        function_def += '}\n'
+        function_def += '\n'
+        return function_def
+
+
+def collect_header_files(path):
+    """
+    :param path:
+    :return:
+    """
+    header_files = []
+    shared_includes_content = []
+    for root, dirs, files in os.walk(path):
+        files.sort()
+        for file in files:
+            if file.find("git") >= 0:
+                continue
+            if not file.endswith('.h'):
+                continue
+            file_path = os.path.join(root, file)
+            file_path = file_path.replace('\\', '/')
+            header_files.append(file_path)
+            include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:])
+            shared_includes_content.append(include_str)
+    # for acl error code
+    shared_includes_content.append('#include <iostream>\n')
+    shared_includes_content.append('const int ACL_ERROR_COMPILING_STUB_MODE = 100039;\n')
+    return header_files, shared_includes_content
+
+
+def generate_stub_file(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    target_header_files, shared_includes_content = collect_header_files(inc_dir)
+    for header_file in target_header_files:
+        if not file_endswith_white_list_suffix(header_file):
+            continue
+        cc_file = re.sub('.h*$', '.cc', header_file)
+        h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content)
+        h_2_cc.h2cc()
+
+
+def gen_code(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    if not inc_dir.endswith('/'):
+        inc_dir += '/'
+    if not out_cc_dir.endswith('/'):
+        out_cc_dir += '/'
+    for include_dir_key_word in include_dir_key_words:
+        generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir)
+
+
+if __name__ == '__main__':
+    inc_dir = sys.argv[1]
+    out_cc_dir = sys.argv[2]
+    gen_code(inc_dir, out_cc_dir)