diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake
index 58321f04..421f2632 100755
--- a/cmake/external_libs/protoc.cmake
+++ b/cmake/external_libs/protoc.cmake
@@ -48,8 +48,14 @@ function(protobuf_generate comp c_var h_var)
     endif()
     set(${c_var})
     set(${h_var})
+    set(_add_target FALSE)
 
     foreach(file ${ARGN})
+        if("${file}" STREQUAL "TARGET")
+            set(_add_target TRUE)
+            continue()
+        endif()
+
         get_filename_component(abs_file ${file} ABSOLUTE)
         get_filename_component(file_name ${file} NAME_WE)
         get_filename_component(file_dir ${abs_file} PATH)
@@ -67,11 +73,18 @@ function(protobuf_generate comp c_var h_var)
                 OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                 COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
+                COMMAND ${CMAKE_COMMAND} -E echo "generate proto cpp_out ${comp} by ${abs_file}"
                 COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
                 DEPENDS protoc_build ${abs_file}
                 COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
     endforeach()
 
+    if(_add_target)
+        add_custom_target(
+            ${comp} DEPENDS ${${c_var}} ${${h_var}}
+        )
+    endif()
+
     set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
     set(${c_var} ${${c_var}} PARENT_SCOPE)
     set(${h_var} ${${h_var}} PARENT_SCOPE)
@@ -84,8 +97,14 @@ function(protobuf_generate_py comp py_var)
         return()
     endif()
     set(${py_var})
+    set(_add_target FALSE)
 
     foreach(file ${ARGN})
+        if("${file}" STREQUAL "TARGET")
+            set(_add_target TRUE)
+            continue()
+        endif()
+
         get_filename_component(abs_file ${file} ABSOLUTE)
         get_filename_component(file_name ${file} NAME_WE)
         get_filename_component(file_dir ${abs_file} PATH)
@@ -102,11 +121,18 @@ function(protobuf_generate_py comp py_var)
                 OUTPUT "${proto_output_path}/${file_name}_pb2.py"
                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                 COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
+                COMMAND ${CMAKE_COMMAND} -E echo "generate proto cpp_out ${comp} by ${abs_file}"
                 COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
                 DEPENDS protoc_build ${abs_file}
                 COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
     endforeach()
 
+    if(_add_target)
+        add_custom_target(
+            ${comp} DEPENDS ${${py_var}}
+        )
+    endif()
+
     set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
     set(${py_var} ${${py_var}} PARENT_SCOPE)
 
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 1698710e..d10d73e4 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -173,10 +173,12 @@ set(TRAIN_SRC_LIST
     "graph/manager/graph_manager_utils.cc"
     "graph/manager/graph_mem_allocator.cc"
     "graph/manager/graph_caching_allocator.cc"
+    "graph/manager/session_scope_mem_allocator.cc"
     "graph/manager/graph_var_manager.cc"
     "graph/manager/host_mem_manager.cc"
     "graph/manager/rdma_pool_allocator.cc"
     "graph/manager/host_mem_allocator.cc"
+    "graph/manager/graph_mem_manager.cc"
     "graph/manager/memory_api.cc"
     "graph/manager/model_manager/event_manager.cc"
     "graph/manager/trans_var_data_utils.cc"
@@ -270,7 +272,6 @@ set(TRAIN_SRC_LIST
     "graph/passes/identity_pass.cc"
     "graph/passes/ref_identity_delete_op_pass.cc"
     "graph/passes/infershape_pass.cc"
-    "graph/passes/isolated_op_remove_pass.cc"
     "graph/passes/iterator_op_pass.cc"
     "graph/passes/link_gen_mask_nodes_pass.cc"
     "graph/passes/merge_pass.cc"
@@ -307,6 +308,7 @@ set(TRAIN_SRC_LIST
     "graph/passes/merge_to_stream_merge_pass.cc"
     "graph/passes/merge_input_memcpy_pass.cc"
     "graph/passes/switch_to_stream_switch_pass.cc"
+    "graph/passes/mark_force_unknown_for_cond_pass.cc"
     "graph/passes/attach_stream_label_pass.cc"
     "graph/passes/switch_dead_branch_elimination.cc"
     "graph/passes/replace_transshape_pass.cc"
@@ -316,13 +318,11 @@ set(TRAIN_SRC_LIST
     "graph/passes/transop_without_reshape_fusion_pass.cc"
     "graph/passes/transpose_transdata_pass.cc"
     "graph/passes/unused_const_pass.cc"
-    "graph/passes/unused_op_remove_pass.cc"
     "graph/passes/var_is_initialized_op_pass.cc"
     "graph/passes/parallel_concat_start_op_pass.cc"
     "graph/passes/cond_pass.cc"
     "graph/passes/cond_remove_pass.cc"
     "graph/passes/for_pass.cc"
-    "graph/passes/variable_format_pass.cc"
     "graph/passes/variable_op_pass.cc"
     "graph/passes/variable_prepare_op_pass.cc"
     "graph/passes/variable_ref_delete_op_pass.cc"
@@ -391,6 +391,8 @@ set(TRAIN_SRC_LIST
     "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
     "hybrid/node_executor/hccl/hccl_node_executor.cc"
     "hybrid/node_executor/rts/rts_node_executor.cc"
+    "hybrid/node_executor/rts/rts_node_task.cc"
+    "hybrid/node_executor/rts/rts_task_factory.cc"
     "hybrid/node_executor/node_executor.cc"
     "hybrid/node_executor/task_context.cc"
     "hybrid/hybrid_davinci_model.cc"
@@ -475,6 +477,8 @@ set(INFER_SRC_LIST
     "graph/manager/host_mem_allocator.cc"
     "graph/manager/graph_mem_allocator.cc"
     "graph/manager/graph_caching_allocator.cc"
+    "graph/manager/session_scope_mem_allocator.cc"
+    "graph/manager/graph_mem_manager.cc"
     "model/ge_model.cc"
     "model/ge_root_model.cc"
     "graph/common/transop_util.cc"
@@ -519,12 +523,10 @@ set(INFER_SRC_LIST
     "graph/passes/dimension_adjust_pass.cc"
     "graph/passes/get_original_format_pass.cc"
     "graph/passes/shape_operate_op_remove_pass.cc"
-    "graph/passes/unused_op_remove_pass.cc"
     "graph/passes/assert_pass.cc"
     "graph/passes/dropout_pass.cc"
     "graph/passes/infershape_pass.cc"
     "graph/passes/unused_const_pass.cc"
-    "graph/passes/isolated_op_remove_pass.cc"
     "graph/passes/permute_pass.cc"
     "graph/passes/ctrl_edge_transfer_pass.cc"
     "graph/passes/end_of_sequence_add_control_pass.cc"
@@ -582,6 +584,7 @@ set(INFER_SRC_LIST
     "graph/passes/merge_to_stream_merge_pass.cc"
     "graph/passes/merge_input_memcpy_pass.cc"
     "graph/passes/switch_to_stream_switch_pass.cc"
+    "graph/passes/mark_force_unknown_for_cond_pass.cc"
     "graph/passes/attach_stream_label_pass.cc"
     "graph/passes/multi_batch_pass.cc"
     "graph/passes/multi_batch_clone_pass.cc"
@@ -606,7 +609,6 @@ set(INFER_SRC_LIST
     "graph/passes/switch_logic_remove_pass.cc"
     "graph/passes/switch_data_edges_bypass.cc"
     "graph/passes/merge_pass.cc"
-    "graph/passes/variable_format_pass.cc"
     "graph/passes/variable_op_pass.cc"
     "graph/passes/cast_remove_pass.cc"
     "graph/passes/transpose_transdata_pass.cc"
@@ -746,6 +748,7 @@ target_include_directories(ge_runner SYSTEM PRIVATE
     ${GE_CODE_DIR}/../inc/external
     ${GE_CODE_DIR}/../inc/cce
     ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
+    ${GE_CODE_DIR}/../abl/adump/external
     #### blue zone
     ${ASCEND_DIR}/driver/include
     ${ASCEND_DIR}/fwkacllib/include
@@ -822,6 +825,7 @@ target_include_directories(ge_compiler SYSTEM PRIVATE
     ${GE_CODE_DIR}/../inc/external
     ${GE_CODE_DIR}/../inc/cce
     ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
+    ${GE_CODE_DIR}/../abl/adump/external
     #### blue zone ####
     ${ASCEND_DIR}/driver/include
     ${ASCEND_DIR}/fwkacllib/include
@@ -982,6 +986,7 @@ target_include_directories(atc_stub_ge_compiler PRIVATE
     #### yellow zone ####
     ${GE_CODE_DIR}/../inc/cce
     ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
+    ${GE_CODE_DIR}/../abl/adump/external
     #### blue zone ####
     ${ASCEND_DIR}/driver/include
     ${ASCEND_DIR}/fwkacllib/include
@@ -1022,6 +1027,7 @@ target_include_directories(fwk_stub_ge_runner PRIVATE
     #### yellow zone ####
     ${GE_CODE_DIR}/../inc/cce
     ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
+    ${GE_CODE_DIR}/../abl/adump/external
     #### blue zone ####
     ${ASCEND_DIR}/driver/include
     ${ASCEND_DIR}/fwkacllib/include
diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc
index 8f6fba95..9cbd2d06 100644
--- a/ge/client/ge_api.cc
+++ b/ge/client/ge_api.cc
@@ -598,6 +598,47 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s
   return ret;
 }
 
+// Run Graph with stream Asynchronously
+Status Session::RunGraphWithStreamAsync(uint32_t graph_id, void *stream, const std::vector<Tensor> &inputs,
+                                        std::vector<Tensor> &outputs) {
+  ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
+  GELOGT(TRACE_INIT, "Session run graph with stream async start");
+
+  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
+  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
+  if (instance_ptr == nullptr) {
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED,
+           "[Run][Graph]Run graph with stream asyn failed, the GELib instance is nullptr,"
+           "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream);
+    REPORT_INNER_ERROR("E19999",
+                       "Run graph with stream asyn failed, the GELib instance is nullptr"
+                       "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream);
+    return FAILED;
+  }
+  if (!instance_ptr->InitFlag()) {
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED,
+           "[Run][Graph]Run graph with stream asyn failed, the GELib instance is not init,"
+           "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream);
+    REPORT_INNER_ERROR("E19999",
+                       "Run graph with stream asyn failed, the GELib instance is not init,"
+                       "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream);
+    return FAILED;
+  }
+  GELOGT(TRACE_RUNNING, "Run Graph Run graph with stream asyn.");
+  Status ret = instance_ptr->SessionManagerObj().RunGraphWithStreamAsync(sessionId_, graph_id, stream, inputs,
+                                                                         outputs);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[Run][Graph]Run graph with stream asyn Failed,"
+           "error code = %u, session id = %lu, graph id = %u, stream = %p.", ret, sessionId_, graph_id, stream);
+    REPORT_CALL_ERROR("E19999", "[Run][Graph]Run graph with stream asyn failed, error code = %u, session id = %lu,"
+                      "graph id = %u, stream = %p.", ret, sessionId_, graph_id, stream);
+    return FAILED;
+  }
+
+  GELOGT(TRACE_STOP, "Session run graph with stream async finished");
+  return SUCCESS;
+}
+
 // Register Call Back
 Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
   ErrorManager::GetInstance().GenWorkStreamIdDefault();
@@ -640,8 +681,35 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo>
   return SUCCESS;
 }
 
+// Build Graph
+Status Session::BuildGraph(uint32_t graph_id, const std::vector<ge::Tensor> &inputs) {
+  ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
+  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
+  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
+  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED,
+           "[Build][Graph]Failed, the GELib instance is nullptr or is not InitFlag, "
+           "session_id %lu, graph_id %u", sessionId_, graph_id);
+    REPORT_INNER_ERROR("E19999",
+                       "Build graph failed, the GELib instance is nullptr or is not InitFlag, "
+                       "session_id %lu, graph_id %u", sessionId_, graph_id);
+    return FAILED;
+  }
+  GELOGT(TRACE_RUNNING, "Building Graph");
+  Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs);
+  if (ret != SUCCESS) {
+    GELOGE(ret,
+           "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.",
+           ret, sessionId_, graph_id);
+    REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, "
+                      "session_id:%lu, graph_id:%u", ret, sessionId_, graph_id);
+    return FAILED;
+  }
+  return SUCCESS;
+}
+
 // Run Graph Asynchronously
-Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
+Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<ge::Tensor> &inputs,
                               RunAsyncCallback callback) {
   ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute);
   ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc
index 50dcf776..b1e94c59 100755
--- a/ge/common/auth/file_saver.cc
+++ b/ge/common/auth/file_saver.cc
@@ -128,7 +128,6 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi
 
 Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header,
                                      ModelPartitionTable &model_partition_table,
-
                                      const std::vector<ModelPartition> &partition_datas) {
   GE_CHK_BOOL_RET_STATUS(!partition_datas.empty() && model_partition_table.num != 0
       && model_partition_table.num == partition_datas.size(), FAILED,
diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc
index 51936260..a6944fc6 100644
--- a/ge/common/dump/dump_manager.cc
+++ b/ge/common/dump/dump_manager.cc
@@ -23,6 +23,7 @@ const char *const kDumpOFF = "OFF";
 const char *const kDumpoff = "off";
 const char *const kDumpOn = "on";
 const uint64_t kInferSessionId = 0;
+const uint32_t kAllOverflow = 3;
 }  // namespace
 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() {
@@ -30,78 +31,103 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetIn
   return instance;
 }
 
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) {
-  DumpProperties dump_properties;
-  std::string dump_status;
-  std::string dump_path;
-  std::string dump_mode;
-  std::string dump_op_switch;
-
-  if (dump_config.dump_status.empty()) {
+bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties) {
+  if (dump_config.dump_status.empty() && dump_config.dump_debug.empty()) {
     dump_properties_map_.emplace(kInferSessionId, dump_properties);
     GELOGI("Dump does not open");
-    return SUCCESS;
+    return false;
   }
-
-  dump_status = dump_config.dump_status;
-  GELOGI("Dump status is %s", dump_status.c_str());
-  if (dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) {
+  GELOGI("Dump status is %s, dump debug is %s.", dump_config.dump_status.c_str(), dump_config.dump_debug.c_str());
+  if ((dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) &&
+       dump_config.dump_debug == kDumpoff) {
     dump_properties.ClearDumpPropertyValue();
     dump_properties_map_.emplace(kInferSessionId, dump_properties);
-    return SUCCESS;
+    return false;
+  }
+  if (dump_config.dump_status == kDumpOn && dump_config.dump_debug == kDumpOn) {
+    GELOGW("Not support coexistence of dump debug and dump status.");
+    return false;
   }
-  dump_properties.SetDumpStatus(dump_status);
+  return true;
+}
 
-  dump_op_switch = dump_config.dump_op_switch;
-  dump_properties.SetDumpOpSwitch(dump_op_switch);
-  if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) {
-    dump_properties_map_.emplace(kInferSessionId, dump_properties);
-    GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s",
-           dump_op_switch.c_str());
-    REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s",
-                       dump_op_switch.c_str());
-    return PARAM_INVALID;
+void DumpManager::SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties) {
+  if (dump_config.dump_debug == kDumpOn) {
+    GELOGI("Only do overflow detection, dump debug is %s.", dump_config.dump_debug.c_str());
+    dump_properties.InitInferOpDebug();
+    dump_properties.SetOpDebugMode(kAllOverflow);
   }
+}
 
-  if (!dump_config.dump_list.empty()) {
-    for (auto model_dump : dump_config.dump_list) {
-      std::string model_name = model_dump.model_name;
-      GELOGI("Dump model is %s", model_name.c_str());
-      std::set<std::string> dump_layers;
-      for (auto layer : model_dump.layers) {
-        GELOGI("Dump layer is %s in model", layer.c_str());
-        dump_layers.insert(layer);
-      }
-      dump_properties.AddPropertyValue(model_name, dump_layers);
+void DumpManager::SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties) {
+  for (const auto &model_dump : dump_config.dump_list) {
+    std::string model_name = model_dump.model_name;
+    GELOGI("Dump model is %s", model_name.c_str());
+    std::set<std::string> dump_layers;
+    for (const auto &layer : model_dump.layers) {
+      GELOGI("Dump layer is %s in model", layer.c_str());
+      dump_layers.insert(layer);
+    }
+    dump_properties.AddPropertyValue(model_name, dump_layers);
+  }
+}
+
+Status DumpManager::SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties) {
+  if (dump_config.dump_status == kDumpOn) {
+    GELOGI("Only do normal dump process, dump status is %s.", dump_config.dump_status.c_str());
+    dump_properties.SetDumpStatus(dump_config.dump_status);
+    std::string dump_op_switch = dump_config.dump_op_switch;
+    dump_properties.SetDumpOpSwitch(dump_op_switch);
+    if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) {
+      dump_properties_map_.emplace(kInferSessionId, dump_properties);
+      GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", dump_op_switch.c_str());
+      REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", dump_op_switch.c_str());
+      return PARAM_INVALID;
     }
-    if (dump_op_switch == kDumpOn) {
-      GELOGI("Start to dump model and single op,dump op switch is %s", dump_op_switch.c_str());
+
+    if (!dump_config.dump_list.empty()) {
+      if (dump_op_switch == kDumpOn) {
+        GELOGI("Start to dump model and single op, dump op switch is %s", dump_op_switch.c_str());
+      } else {
+        GELOGI("Only dump model, dump op switch is %s", dump_op_switch.c_str());
+      }
+      SetDumpList(dump_config, dump_properties);
     } else {
-      GELOGI("Only dump model,dump op switch is %s", dump_op_switch.c_str());
+      GELOGI("Only dump single op, dump op switch is %s", dump_op_switch.c_str());
     }
-  } else {
-    GELOGI("Only dump single op,dump op switch is %s", dump_op_switch.c_str());
+    GELOGI("Dump mode is %s", dump_config.dump_mode.c_str());
+    dump_properties.SetDumpMode(dump_config.dump_mode);
   }
+  return SUCCESS;
+}
 
-  dump_path = dump_config.dump_path;
+Status DumpManager::SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties) {
+  std::string dump_path = dump_config.dump_path;
   if (dump_path.empty()) {
     GELOGE(PARAM_INVALID, "[Check][DumpPath]It is empty");
     REPORT_INNER_ERROR("E19999", "Dump path check is empty");
     return PARAM_INVALID;
   }
-
   if (dump_path[dump_path.size() - 1] != '/') {
     dump_path = dump_path + "/";
   }
   dump_path = dump_path + CurrentTimeInStr() + "/";
   GELOGI("Dump path is %s", dump_path.c_str());
   dump_properties.SetDumpPath(dump_path);
+  return SUCCESS;
+}
 
-  dump_mode = dump_config.dump_mode;
-  GELOGI("Dump mode is %s", dump_mode.c_str());
-  dump_properties.SetDumpMode(dump_mode);
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) {
+  DumpProperties dump_properties;
+  if (!NeedDoDump(dump_config, dump_properties)) {
+    GELOGD("No need do dump process.");
+    return SUCCESS;
+  }
+  SetDumpDebugConf(dump_config, dump_properties);
+  GE_CHK_STATUS_RET(SetNormalDumpConf(dump_config, dump_properties), "[Init][DumpConf] failed when dump status is on.");
+  GE_CHK_STATUS_RET(SetDumpPath(dump_config, dump_properties), "[Init][DumpPath] failed.");
   dump_properties_map_[kInferSessionId] = dump_properties;
-
+  
   return SUCCESS;
 }
 
diff --git a/ge/common/dump/dump_manager.h b/ge/common/dump/dump_manager.h
index 095344b7..fa96de93 100644
--- a/ge/common/dump/dump_manager.h
+++ b/ge/common/dump/dump_manager.h
@@ -34,6 +34,11 @@ class DumpManager {
   void RemoveDumpProperties(uint64_t session_id);
 
  private:
+  bool NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties);
+  void SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties);
+  Status SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties);
+  Status SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties);
+  void SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties);
   std::mutex mutex_;
   std::map<uint64_t, DumpProperties> dump_properties_map_;
 };
diff --git a/ge/common/dump/dump_properties.cc b/ge/common/dump/dump_properties.cc
index 65b1e89a..08bddf43 100644
--- a/ge/common/dump/dump_properties.cc
+++ b/ge/common/dump/dump_properties.cc
@@ -53,7 +53,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti
   dump_path_.clear();
   dump_step_.clear();
   dump_mode_.clear();
-  is_op_debug_ = false;
+  is_train_op_debug_ = false;
+  is_infer_op_debug_ = false;
   op_debug_mode_ = 0;
 
   std::string enable_dump;
@@ -124,7 +125,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpI
   dump_mode_.clear();
   dump_op_switch_.clear();
   dump_status_.clear();
-  is_op_debug_ = false;
+  is_train_op_debug_ = false;
+  is_infer_op_debug_ = false;
   op_debug_mode_ = 0;
 }
 
@@ -203,6 +205,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti
   return dump_status_;
 }
 
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitInferOpDebug() {
+  is_infer_op_debug_ = true;
+}
+
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetOpDebugMode(const uint32_t &op_debug_mode) {
+  op_debug_mode_ = op_debug_mode;
+}
+
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch(
   const std::string &dump_op_switch) {
   dump_op_switch_ = dump_op_switch;
@@ -237,7 +247,8 @@ void DumpProperties::CopyFrom(const DumpProperties &other) {
     dump_op_switch_ = other.dump_op_switch_;
 
     model_dump_properties_map_ = other.model_dump_properties_map_;
-    is_op_debug_ = other.is_op_debug_;
+    is_train_op_debug_ = other.is_train_op_debug_;
+    is_infer_op_debug_ = other.is_infer_op_debug_;
     op_debug_mode_ = other.op_debug_mode_;
   }
 }
@@ -254,15 +265,15 @@ void DumpProperties::SetDumpDebugOptions() {
 
     if (dump_debug_mode == OP_DEBUG_AICORE) {
       GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open.");
-      is_op_debug_ = true;
+      is_train_op_debug_ = true;
       op_debug_mode_ = kAicoreOverflow;
     } else if (dump_debug_mode == OP_DEBUG_ATOMIC) {
       GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open.");
-      is_op_debug_ = true;
+      is_train_op_debug_ = true;
       op_debug_mode_ = kAtomicOverflow;
     } else if (dump_debug_mode == OP_DEBUG_ALL) {
       GELOGD("ge.exec.dumpDebugMode=all, op debug is open.");
-      is_op_debug_ = true;
+      is_train_op_debug_ = true;
       op_debug_mode_ = kAllOverflow;
     } else {
       GELOGW("ge.exec.dumpDebugMode is invalid.");
diff --git a/ge/common/dump/dump_properties.h b/ge/common/dump/dump_properties.h
index 8c064d58..98487491 100644
--- a/ge/common/dump/dump_properties.h
+++ b/ge/common/dump/dump_properties.h
@@ -65,16 +65,26 @@ class DumpProperties {
 
   const std::string &GetDumpStatus() const;
 
+  void InitInferOpDebug();
+
+  bool IsInferOpDebug() const {
+    return is_infer_op_debug_;
+  }
+
   void SetDumpOpSwitch(const std::string &dump_op_switch);
 
   const std::string &GetDumpOpSwitch() const;
 
-  bool IsOpDebugOpen() const { return is_op_debug_; }
+  bool IsOpDebugOpen() const {
+    return is_train_op_debug_ || is_infer_op_debug_;
+  }
 
   bool IsDumpOpen() const;
 
   bool IsSingleOpNeedDump() const;
 
+  void SetOpDebugMode(const uint32_t &op_debug_mode);
+
   uint32_t GetOpDebugMode() const { return op_debug_mode_; }
 
   const std::string &GetEnableDump() const {return enable_dump_;}
@@ -96,7 +106,8 @@ class DumpProperties {
   std::string dump_op_switch_;
   std::map<std::string, std::set<std::string>> model_dump_properties_map_;
 
-  bool is_op_debug_ = false;
+  bool is_train_op_debug_ = false;
+  bool is_infer_op_debug_ = false;
   uint32_t op_debug_mode_ = 0;
 };
 }
diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc
index ac3343ed..ff32f123 100644
--- a/ge/common/helper/model_helper.cc
+++ b/ge/common/helper/model_helper.cc
@@ -21,6 +21,7 @@
 #include "framework/omg/version.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
+#include "framework/omg/omg_inner_types.h"
 
 using std::string;
 using domi::ModelTaskDef;
@@ -304,7 +305,6 @@ Status ModelHelper::SaveAllModelPartiton(std::shared_ptr<OmFileSaveHelper>& om_f
     return FAILED;
   }
 
-
   if (SaveModelTaskDef(om_file_save_helper, ge_model, task_buffer, model_index) != SUCCESS) {
     GELOGE(FAILED, "[Save][TaskDef]Failed, model %s, model index %zu",
            ge_model->GetName().c_str(), model_index);
@@ -333,6 +333,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
   ge::Buffer model_buffer;
   ge::Buffer task_buffer;
 
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetStr(*(ge_model.get()), ATTR_MODEL_ATC_CMDLINE,
+                   domi::GetContext().atc_cmdline),
+                   GELOGE(FAILED, "SetStr for atc_cmdline failed.");
+                   return FAILED);
   auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffer, task_buffer);
   if (ret != SUCCESS) {
     GELOGE(ret, "[Save][AllModelPartition]Failed, model %s, error_code %u",
@@ -386,9 +390,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmRoo
                   REPORT_INNER_ERROR("E19999", "GraphBuilder SaveModel received invalid "
                                      "file name prefix");
                   return FAILED);
-
   if (!is_unknown_shape) {
     auto &model_root = name_to_ge_model.begin()->second;
+    GE_CHK_BOOL_EXEC(ge::AttrUtils::SetStr(*(model_root.get()), ATTR_MODEL_ATC_CMDLINE,
+                     domi::GetContext().atc_cmdline),
+                     GELOGE(FAILED, "SetStr for atc_cmdline failed.");
+                     return FAILED);
     return SaveToOmModel(model_root, save_param, output_file, model);
   }
 
@@ -396,6 +403,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmRoo
   GE_CHECK_NOTNULL(om_file_save_helper);
 
   auto &first_ge_model = name_to_ge_model.at(ge_root_model->GetRootGraph()->GetName());
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetStr(*(first_ge_model.get()), ATTR_MODEL_ATC_CMDLINE,
+                   domi::GetContext().atc_cmdline),
+                   GELOGE(FAILED, "SetStr for atc_cmdline failed.");
+                   return FAILED);
 
   // ge root model must be the first to be loaded
   vector<string> model_names{ge_root_model->GetRootGraph()->GetName()};
diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc
index d746fd10..38440d70 100755
--- a/ge/common/kernel_store.cc
+++ b/ge/common/kernel_store.cc
@@ -38,6 +38,10 @@ bool KernelStore::Build() {
     buffer_.resize(total_len);
   } catch (std::bad_alloc &e) {
     GELOGE(ge::MEMALLOC_FAILED, "All build memory failed, memory size %zu", total_len);
+    GELOGE(ge::MEMALLOC_FAILED, "[Malloc][Memmory]Resize buffer failed, memory size %zu, "
+           "exception %s", total_len, e.what());
+    REPORT_CALL_ERROR("E19999", "Resize buffer failed, memory size %zu, exception %s",
+                      total_len, e.what());
     return false;
   }
 
diff --git a/ge/common/model_parser/model_parser.cc b/ge/common/model_parser/model_parser.cc
index 9c00ab08..ce654887 100644
--- a/ge/common/model_parser/model_parser.cc
+++ b/ge/common/model_parser/model_parser.cc
@@ -31,18 +31,24 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro
                                                                                       ge::ModelData &model_data) {
   std::string real_path = RealPath(model_path);
   if (real_path.empty()) {
-    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path);
+    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "[Check][Param]Model file path %s is invalid",
+           model_path);
+    REPORT_CALL_ERROR("E19999", "Model file path %s is invalid", model_path);
     return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
   }
 
   if (GetFileLength(model_path) == -1) {
-    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "File size not valid, file: %s.", model_path);
+    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "[Check][Param]File size not valid, file %s",
+           model_path);
+    REPORT_INNER_ERROR("E19999", "File size not valid, file %s", model_path);
     return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
   }
 
   std::ifstream fs(real_path.c_str(), std::ifstream::binary);
   if (!fs.is_open()) {
-    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Open file: %s failed, error: %s", model_path, strerror(errno));
+    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "[Open][File]Failed, file %s, error %s",
+           model_path, strerror(errno));
+    REPORT_CALL_ERROR("E19999", "Open file %s failed, error %s", model_path, strerror(errno));
     return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
   }
 
@@ -57,6 +63,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro
   char *data = new (std::nothrow) char[len];
   if (data == nullptr) {
     GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Load model From file failed, bad memory allocation occur. (need:%u)", len);
+    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Load][ModelFromFile]Failed, "
+           "bad memory allocation occur(need %u), file %s", len, model_path);
+    REPORT_CALL_ERROR("E19999", "Load model from file %s failed, "
+                      "bad memory allocation occur(need %u)", model_path, len);
     return ACL_ERROR_GE_MEMORY_ALLOCATION;
   }
 
@@ -105,7 +115,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo
     model_len = file_header->length;
     GELOGD("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader));
   } else {
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Invalid model. ModelEncryptType not supported.");
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param]Invalid, model encrypt type not supported");
+    REPORT_CALL_ERROR("E19999","Invalid model, encrypt type not supported");
     res = ACL_ERROR_GE_PARAM_INVALID;
   }
 
diff --git a/ge/common/model_saver.cc b/ge/common/model_saver.cc
index 49382e7f..42cdf26e 100755
--- a/ge/common/model_saver.cc
+++ b/ge/common/model_saver.cc
@@ -33,7 +33,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
                                                                                    const Json &model) {
   Status ret = SUCCESS;
   if (file_path == nullptr || SUCCESS != CheckPath(file_path)) {
-    GELOGE(FAILED, "Check output file failed.");
+    GELOGE(FAILED, "[Check][OutputFile]Failed, file %s", file_path);
+    REPORT_CALL_ERROR("E19999", "Output file %s check invalid", file_path);
     return FAILED;
   }
   std::string model_str;
@@ -41,11 +42,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
     model_str = model.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
   } catch (std::exception &e) {
     ErrorManager::GetInstance().ATCReportErrMessage("E19007", {"exception"}, {e.what()});
-    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+    GELOGE(FAILED, "[Convert][File]Failed to convert JSON to string, file %s, reason %s",
+           file_path, e.what());
     return FAILED;
   } catch (...) {
     ErrorManager::GetInstance().ATCReportErrMessage("E19008");
-    GELOGE(FAILED, "Failed to convert JSON to string.");
+    GELOGE(FAILED, "[Convert][File]Failed to convert JSON to string, file %s", file_path);
     return FAILED;
   }
 
@@ -59,7 +61,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
   int32_t fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode);
   if (fd == EN_ERROR || fd == EN_INVALID_PARAM) {
     ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {file_path, strerror(errno)});
-    GELOGE(FAILED, "Open file[%s] failed. errmsg:%s", file_path, strerror(errno));
+    GELOGE(FAILED, "[Open][File]Failed, file %s, errmsg %s", file_path, strerror(errno));
     return FAILED;
   }
   const char *model_char = model_str.c_str();
@@ -70,12 +72,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
     ErrorManager::GetInstance().ATCReportErrMessage(
         "E19004", {"file", "errmsg"}, {file_path, strerror(errno)});
     // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose
-    GELOGE(FAILED, "Write to file failed. errno:%ld, errmsg:%s", mmpa_ret, strerror(errno));
+    GELOGE(FAILED, "[Write][Data]To file %s failed. errno %ld, errmsg %s",
+           file_path, mmpa_ret, strerror(errno));
     ret = FAILED;
   }
   // Close file
   if (mmClose(fd) != EN_OK) {
-    GELOGE(FAILED, "Close file failed. errmsg:%s", strerror(errno));
+    GELOGE(FAILED, "[Close][File]Failed, file %s, errmsg %s", file_path, strerror(errno));
+    REPORT_CALL_ERROR("E19999", "Close file %s failed, errmsg %s", file_path, strerror(errno));
     ret = FAILED;
   }
   return ret;
diff --git a/ge/common/op/ge_op_utils.cc b/ge/common/op/ge_op_utils.cc
index fc2990b6..ee5f6d34 100644
--- a/ge/common/op/ge_op_utils.cc
+++ b/ge/common/op/ge_op_utils.cc
@@ -62,6 +62,10 @@ const uint32_t SWITCH_TRUE_OUTPUT = 1;
 const uint32_t SWITCH_DATA_INPUT = 0;
 const uint32_t SWITCH_PRED_INPUT = 1;
 
+// Merge
+const uint32_t MERGE_DATA_OUTPUT = 0;
+const uint32_t MERGE_INDEX_OUTPUT = 1;
+
 // FunctionOp
 const uint32_t IF_COND_INPUT = 0;
 const uint32_t FOR_START_INPUT = 0;
@@ -239,7 +243,8 @@ Status OpUtils::SetDataByDataType(size_t out_size, const std::vector<char *> &ch
                                   const std::vector<char *> &chunk_output, GeTensor *output) {
   unique_ptr<T[]> output_data(new (std::nothrow) T[out_size]());
   if (output_data == nullptr) {
-    GELOGE(MEMALLOC_FAILED, "New buf failed");
+    GELOGE(MEMALLOC_FAILED, "[Malloc][Data]New buf failed");
+    REPORT_CALL_ERROR("E19999", "New buf failed");
     return INTERNAL_ERROR;
   }
 
@@ -275,7 +280,8 @@ Status OpUtils::SetOutputSliceDataByDataType(void *data, int64_t data_size, cons
     int64_t dim_i = input_dims[i];
     int64_t stride_i = stride[i];
     if (dim_i == 0) {
-      GELOGE(PARAM_INVALID, "Dim_i of size tensor can't be 0.");
+      GELOGE(PARAM_INVALID, "[Check][Param]Invalid, Dim_i of size tensor is 0");
+      REPORT_INNER_ERROR("E19999", "Dim_i of size tensor is 0, invalid");
       return PARAM_INVALID;
     }
     chunk_size = chunk_size / dim_i;
@@ -299,7 +305,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::SetOutputSliceD
   void *data, int64_t data_size, int32_t data_type, std::vector<int64_t> &input_dims, std::vector<int64_t> &begin,
   std::vector<int64_t> &output_dims, GeTensor *output, std::vector<int64_t> &stride) {
   if (data == nullptr || output == nullptr) {
-    GELOGE(PARAM_INVALID, "Input param is nullptr.");
+    GELOGE(PARAM_INVALID, "[Check][Param]Input param is nullptr");
+    REPORT_INNER_ERROR("E19999", "Input param is nullptr");
     return PARAM_INVALID;
   }
 
@@ -436,14 +443,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::SetWeights(ge::
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
 OpUtils::GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector<int64_t> &dims) {
   if (tensor == nullptr) {
-    GELOGE(PARAM_INVALID, "Input tensor is nullptr");
+    GELOGE(PARAM_INVALID, "[Check][Param]Input tensor is nullptr");
+    REPORT_INNER_ERROR("E19999","Input tensor is nullptr");
     return PARAM_INVALID;
   }
 
   // If the tensor data is a vector, the shape dimension must be 1
   if (tensor->GetTensorDesc().GetShape().GetDims().size() > 1) {
-    GELOGE(PARAM_INVALID, "The dimension of the input tensor shape cannot be more than 1, it is %zu",
+    GELOGE(PARAM_INVALID, "[Check][Param]The dimension of the input tensor shape "
+           "cannot be more than 1, it is %zu",
            tensor->GetTensorDesc().GetShape().GetDims().size());
+    REPORT_CALL_ERROR("E19999", "The dimension of the input tensor shape %zu invalid, "
+                      "more than 1", tensor->GetTensorDesc().GetShape().GetDims().size());
     return PARAM_INVALID;
   }
 
@@ -462,8 +473,10 @@ OpUtils::GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType ty
       dims.push_back(shape_data[i]);
     }
   } else {
-    GELOGE(PARAM_INVALID, "Data type only can be DT_INT32 or DT_INT64. type is %s",
-           TypeUtils::DataTypeToSerialString(type).c_str());
+    GELOGE(PARAM_INVALID, "[Check][DataType]Invalid, type only can be DT_INT32 or DT_INT64, "
+           "type is %s", TypeUtils::DataTypeToSerialString(type).c_str());
+    REPORT_INNER_ERROR("E19999", "Data type %s check invalid, only can be DT_INT32 or DT_INT64",
+                       TypeUtils::DataTypeToSerialString(type).c_str());
     return PARAM_INVALID;
   }
 
diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc
index 08fdc0ae..5e892ff8 100644
--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -67,11 +67,13 @@ bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<str
 
 bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
   if (deviceid_list == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "deviceIdList is nullptr");
+    GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr");
+    REPORT_INNER_ERROR("E19999", "Device id list is nullptr");
     return false;
   }
   if (device_nums == 0 || device_nums > MAX_DEV_NUM) {
-    GELOGE(ge::PARAM_INVALID, "The device nums: %u is invalid.", device_nums);
+    GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums);
+    REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums);
     return false;
   }
 
@@ -79,12 +81,16 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
   int32_t dev_count = 0;
   rtError_t rt_err = rtGetDeviceCount(&dev_count);
   if (rt_err != RT_ERROR_NONE) {
-    GELOGE(ge::INTERNAL_ERROR, "Get the Device count fail.");
+    GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err);
+    REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err);
     return false;
   }
 
   if (device_nums > static_cast<uint32_t>(dev_count)) {
-    GELOGE(ge::PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count);
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]",
+           device_nums, dev_count);
+    REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]",
+                       device_nums, dev_count);
     return false;
   }
 
@@ -92,11 +98,14 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
   for (size_t i = 0; i < device_nums; ++i) {
     uint32_t dev_id = deviceid_list[i];
     if (dev_id >= static_cast<uint32_t>(dev_count)) {
-      GELOGE(ge::PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count);
+      GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)",
+             dev_id, dev_count);
+      REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count);
       return false;
     }
     if (record.count(dev_id) > 0) {
-      GELOGE(ge::PARAM_INVALID, "Device id %u is duplicatedly set", dev_id);
+      GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id);
+      REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id);
       return false;
     }
     record.insert(dev_id);
@@ -106,7 +115,8 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
 
 ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
   if (func == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "Msprof ctrl callback is nullptr.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]Msprof ctrl callback is nullptr");
+    REPORT_INNER_ERROR("E19999", "Msprof ctrl callback is nullptr");
     return ge::PARAM_INVALID;
   }
   if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
@@ -119,13 +129,15 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
 
 ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
   if (func == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "MsprofSetDeviceCallback callback is nullptr.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofSetDeviceCallback callback is nullptr");
+    REPORT_INNER_ERROR("E19999", "MsprofSetDeviceCallback callback is nullptr");
     return ge::PARAM_INVALID;
   }
   // Pass MsprofSetDeviceCallback to runtime
   ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
   if (rt_ret != ge::SUCCESS) {
-    GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!");
+    GELOGE(rt_ret, "[Pass][MsprofSetDeviceCallback]To runtime failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Pass MsprofSetDeviceCallback to runtime failed, ret 0x%X", rt_ret);
     return rt_ret;
   }
   return ge::SUCCESS;
@@ -133,7 +145,8 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
 
 ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
   if (func == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
+    REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
     return ge::PARAM_INVALID;
   }
   if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) {
@@ -144,7 +157,10 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
     // Pass MsprofReporterCallback to runtime
     ge::Status rt_ret = rtSetMsprofReporterCallback(func);
     if (rt_ret != ge::SUCCESS) {
-      GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!");
+      GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u",
+             rt_ret);
+      REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u",
+                        rt_ret);
       return rt_ret;
     }
     // Pass MsprofReporterCallback to hccl
@@ -167,9 +183,10 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
     if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) {
       return ge::FAILED;
     }
-  
+
     if (!TransProfConfigToParam(*prof_config_param, prof_params)) {
-      GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed");
+      GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed");
+      REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed");
       return ge::PARAM_INVALID;
     }
   }
@@ -188,7 +205,10 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
   }
   ge::Status ret = graph_loader.CommandHandle(command);
   if (ret != ge::SUCCESS) {
-    GELOGE(ret, "Handle profiling command failed");
+    GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u",
+           iter->second.c_str(), ret);
+    REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u",
+                      iter->second.c_str(), ret);
     return ge::FAILED;
   }
 
diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index 443f5213..f7015525 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -87,21 +87,26 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
   struct MsprofGeOptions prof_conf = {{ 0 }};
   Status ret = InitFromOptions(options, prof_conf);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Failed to init profiling.");
+    GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret);
+    REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret);
     return ret;
   }
 
   if (is_execute_profiling_) {
     if (prof_cb_.msprofCtrlCallback == nullptr) {
-      GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr.");
+      GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr");
+      REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr");
       return ge::PARAM_INVALID;
     }
     int32_t cb_ret = prof_cb_.msprofCtrlCallback(
         static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
         static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions));
     if (cb_ret != 0) {
-      GELOGE(FAILED, "Call msprofCtrlCallback failed, type:%u, return:%d",
+      GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d",
              static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
+      REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d",
+                        static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
+                        cb_ret);
       return FAILED;
     }
     GELOGI("Profiling init success");
@@ -122,7 +127,10 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
     // enable profiling by ge option
     if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
                   MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
-      GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
+      GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s",
+             options.profiling_options.c_str());
+      REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed",
+                        options.profiling_options.c_str());
       return INTERNAL_ERROR;
     }
     is_execute_profiling_ = true;
@@ -147,13 +155,17 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
   // Parse json str for bp fp
   Status ret = ParseOptions(prof_conf.options);
   if (ret != ge::SUCCESS) {
-    GELOGE(ge::PARAM_INVALID, "Parse training trace param failed.");
+    GELOGE(ge::PARAM_INVALID, "[Parse][Options]Parse training trace param %s failed, error_code %u",
+           prof_conf.options, ret);
+    REPORT_CALL_ERROR("E19999", "Parse training trace param %s failed, error_code %u",
+                      prof_conf.options, ret);
     return ge::PARAM_INVALID;
   }
 
   if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) !=
       EOK) {
-    GELOGE(INTERNAL_ERROR, "copy job_id failed.");
+    GELOGE(INTERNAL_ERROR, "[Copy][JobId]Failed, original job_id %s", options.job_id.c_str());
+    REPORT_CALL_ERROR("E19999", "Copy job_id %s failed", options.job_id.c_str());
     return INTERNAL_ERROR;
   }
   GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
@@ -163,7 +175,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
 
 ge::Status ProfilingManager::ParseOptions(const std::string &options) {
   if (options.empty()) {
-    GELOGE(ge::PARAM_INVALID, "Profiling options is empty.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]Profiling options is empty");
+    REPORT_INNER_ERROR("E19999", "Profiling options is empty");
     return ge::PARAM_INVALID;
   }
   try {
@@ -178,7 +191,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
     }
     GELOGI("GE profiling training trace:%s", training_trace.c_str());
     if (training_trace != "on") {
-      GELOGE(ge::PARAM_INVALID, "Training trace param:%s is invalid.", training_trace.c_str());
+      GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.",
+             training_trace.c_str());
+      REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str());
       return ge::PARAM_INVALID;
     }
     fp_point_ = prof_options[kFpPoint];
@@ -188,7 +203,8 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
     }
     is_training_trace_ = true;
   } catch (...) {
-    GELOGE(FAILED, "Json prof_conf options is invalid.");
+    GELOGE(FAILED, "[Check][Param]Json prof_conf options is invalid");
+    REPORT_INNER_ERROR("E19999", "Json prof_conf options is invalid");
     return ge::PARAM_INVALID;
   }
   return ge::SUCCESS;
@@ -202,7 +218,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
   if (device_num != 0) {
     auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
     if (device_id_ptr == nullptr) {
-      GELOGE(FAILED, "Stop profiling: device id ptr is null.");
+      GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null.");
+      REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null");
       return;
     }
     for (int32_t i = 0; i < device_num; i++) {
@@ -216,7 +233,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
 
   // stop profiling
   if (prof_cb_.msprofCtrlCallback == nullptr) {
-      GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr.");
+      GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr");
+      REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr");
       return;
   }
   int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE),
@@ -278,10 +296,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
     try {
       reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
     } catch (std::exception &e) {
-      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+      GELOGE(FAILED, "[Convert][ReportData]Failed to convert json to string, reason %s.",
+             e.what());
+      REPORT_CALL_ERROR("E19999", "Failed to convert reported_data from json to string, reason %s",
+                        e.what());
       return ;
     } catch (...) {
-      GELOGE(FAILED, "Failed to convert JSON to string.");
+      GELOGE(FAILED, "[Convert][ReportedData]Failed to convert JSON to string");
+      REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string");
       return;
     }
     reported_data.append(",")
@@ -300,7 +322,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil
          index_id, model_id, tag_id);
   rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx]Failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtProfilerTraceEx failed, ret 0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
@@ -314,7 +337,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil
   uint32_t stream_id = 0;
   rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
+    GELOGE(RT_FAILED, "[Get][RtsInfo]Task_id and stream_id failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Get task_id and stream_id failed, ret 0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);
@@ -333,8 +357,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil
     reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
   } catch (std::exception &e) {
     GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+    GELOGE(FAILED, "[Convert][ReportedData]Failed to convert from json to string, reason: %s",
+           e.what());
+    REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string, reason: %s",
+                      e.what());
   } catch (...) {
-    GELOGE(FAILED, "Failed to convert JSON to string.");
+    GELOGE(FAILED, "[Convert][ReportedData]Failed to convert from json to string");
+    REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string");
   }
   reported_data.append(",")
                .append("\n");
@@ -390,7 +419,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
   int32_t logic_device_id = 0;
   rtError_t rt_ret = rtGetDevice(&logic_device_id);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
+    GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret);
     return;
   }
   GELOGD("current logic_device_id:%d", logic_device_id);
@@ -452,7 +482,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
     // register Framework to profiling
     int32_t cb_ret = PluginInit();
     if (cb_ret != 0) {
-      GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret);
+      GELOGE(cb_ret, "[Init][ProfilingPlugin]Failed, ret %d", cb_ret);
+      REPORT_CALL_ERROR("E19999", "Init profiling plugin failed, ret %d", cb_ret);
       return cb_ret;
     }
     GELOGI("Prof subscribe: model load profiling on.");
@@ -465,7 +496,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
   device[0] = davinci_model->GetDeviceId();
   rtError_t rt_ret = rtProfilerStart(module, device_num, device);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(FAILED, "Runtime profiler start failed.");
+    GELOGE(FAILED, "[Start][Profiler]Malloc buffer failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Malloc buffer failed when start profiling, ret 0x%X", rt_ret);
     return FAILED;
   }
   UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module);
@@ -473,7 +505,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
   // Report profiling data
   Status p_ret = davinci_model->ReportProfilingData();
   if (p_ret != SUCCESS) {
-    GELOGE(p_ret, "Report profiling data failed.");
+    GELOGE(p_ret, "[Report][ProfilingData]Failed, ret %u", p_ret);
+    REPORT_CALL_ERROR("E19999", "Report profiling data failed, ret %u", p_ret);
     return p_ret;
   }
 #endif
@@ -499,13 +532,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
       // The same device_id, only stop at last time
       rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device);
       if (rt_ret != RT_ERROR_NONE) {
-        GELOGE(FAILED, "Runtime profiler stop failed.");
+        GELOGE(FAILED, "[Stop][Profiler]Malloc buffer Failed, ret %d", rt_ret);
+        REPORT_CALL_ERROR("E19999", "Malloc buffer failed when stop profiling, ret %d", rt_ret);
         return FAILED;
       }
     }
     UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module);
   } else {
-    GELOGE(FAILED, "The device_id:%u has not been subscribed, do not need to cancel.", device[0]);
+    GELOGE(FAILED, "[Cancel][DeviceId]The device_id %u has not been subscribed, "
+           "do not need to cancel", device[0]);
+    REPORT_CALL_ERROR("E19999", "The device_id %u has not been subscribed, do not need to cancel",
+                      device[0]);
     return FAILED;
   }
 
@@ -527,14 +564,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn
     // register Framework to profiling
     int32_t cb_ret = PluginInit();
     if (cb_ret != 0) {
-      GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret);
+      GELOGE(cb_ret, "[Init][ProfilingPlugin]Failed, ret %d", cb_ret);
+      REPORT_CALL_ERROR("E19999", "Init profiling plugin failed, ret %d", cb_ret);
       return cb_ret;
     }
 
     int32_t device_num = -1;
     rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr);
     if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(FAILED, "Runtime profiler start failed.");
+      GELOGE(FAILED, "[Start][Profiler]Malloc buffer failed, ret 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Malloc buffer failed when start profiling, ret 0x%X", rt_ret);
       return FAILED;
     }
     is_load_profiling_ = true;
@@ -563,7 +602,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi
   int32_t dev_num = -1;
   rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(FAILED, "Runtime profiler stop failed.");
+    GELOGE(FAILED, "[Stop][Profiler]Malloc buffer failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Malloc buffer failed when stop profiling, ret 0x%X", rt_ret);
     return FAILED;
   }
   for (auto device_id_module : device_id_module_map_) {
@@ -572,7 +612,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi
       GELOGI("Prof finalize: device_id: %u, module: 0x%lx.", device_id, device_id_module.second);
       rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id);
       if (rt_ret != RT_ERROR_NONE) {
-        GELOGE(FAILED, "Runtime profiler stop failed.");
+        GELOGE(FAILED, "[Stop][Profiler]Failed, device_id %d, ret 0x%X", device_id, rt_ret);
+        REPORT_CALL_ERROR("E19999", "Stop runtime profiler failed, device_id %d, ret 0x%X",
+                          device_id,rt_ret);
         return FAILED;
       }
     }
@@ -611,18 +653,26 @@ Status ProfilingManager::ProfParseDeviceId(const std::map<std::string, std::stri
         int32_t dev_id = std::stoi(decvice_id[i]);
         device_list.push_back(dev_id);
       } catch (std::invalid_argument &) {
-        GELOGE(FAILED, "Device id: %s is invalid.", decvice_id[i].c_str());
+        GELOGE(FAILED, "[Parse][DeviceId]Failed, it is invalid, %s", decvice_id[i].c_str());
+        REPORT_CALL_ERROR("E19999", "Parse device id %s failed, it is invalid",
+                          decvice_id[i].c_str());
         return FAILED;
       } catch (std::out_of_range &) {
-        GELOGE(FAILED, "Device id: %s is  out of range.", decvice_id[i].c_str());
+        GELOGE(FAILED, "[Parse][DeviceId]Failed, it is out of range, %s", decvice_id[i].c_str());
+        REPORT_CALL_ERROR("E19999", "Parse device id %s failed, it is out of range",
+                          decvice_id[i].c_str());
         return FAILED;
       } catch (...) {
-        GELOGE(FAILED, "Device id: %s cannot change to int.", decvice_id[i].c_str());
+        GELOGE(FAILED, "[Parse][DeviceId]Faield, it cannot change to int, %s",
+               decvice_id[i].c_str());
+        REPORT_CALL_ERROR("E19999", "Parse device id %s failed, it cannot change to int",
+                          decvice_id[i].c_str());
         return FAILED;
       }
     }
   } else {
-    GELOGE(FAILED, "Config para not contain device id list.");
+    GELOGE(FAILED, "[Parse][DeviceId]Config para not contain device id list");
+    REPORT_CALL_ERROR("E19999", "Parse device id failed, config para not contain device id list");
     return FAILED;
   }
 #endif
@@ -638,27 +688,41 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string>
     try {
       device_num = std::stoi(iter->second);
     } catch (std::invalid_argument &) {
-      GELOGE(FAILED, "Device nun: %s is invalid.", iter->second.c_str());
+      GELOGE(FAILED, "[Parse][Param]Failed, device num %s is invalid", iter->second.c_str());
+      REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s is invalid",
+                        iter->second.c_str());
       return FAILED;
     } catch (std::out_of_range &) {
-      GELOGE(FAILED, "Device num: %s is  out of range.", iter->second.c_str());
+      GELOGE(FAILED, "[Parse][Param]Failed, device num %s cannot change to int",
+             iter->second.c_str());
+      REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s cannot change to int",
+                        iter->second.c_str());
       return FAILED;
     } catch (...) {
-      GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str());
+      GELOGE(FAILED, "[Parse][Param]Failed, device num %s cannot change to int",
+             iter->second.c_str());
+      REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s cannot change to int",
+                        iter->second.c_str());
       return FAILED;
     }
   } else {
-    GELOGE(FAILED, "Config para not contain device num.");
+    GELOGE(FAILED, "[Parse][Param]Config para not contain device num %s", iter->second.c_str());
+    REPORT_CALL_ERROR("E19999", "Parse param failed, config para not contain device num %s",
+                      iter->second.c_str());
     return FAILED;
   }
   // device id
   if (ProfParseDeviceId(config_para, device_list) != SUCCESS) {
-    GELOGE(FAILED, "Parse config para device id failed.");
+    GELOGE(FAILED, "[Parse][DeviceId]Failed");
+    REPORT_CALL_ERROR("E19999", "Parse device id failed");
     return FAILED;
   }
 
   if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) {
-    GELOGE(FAILED, "Config para device num: %d not equal to device list size: %zu.", device_num, device_list.size());
+    GELOGE(FAILED, "[Parse][Param]Failed, config para device num %d not equal to "
+           "device list size %zu", device_num, device_list.size());
+    REPORT_INNER_ERROR("E19999", "[Parse][Param]Failed, config para device num %d "
+                       "not equal to device list size %zu", device_num, device_list.size());
     return FAILED;
   }
 #endif
@@ -676,13 +740,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
   int32_t device_num = 0;
   vector<int32_t> device_list;
   if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
-    GELOGE(FAILED, "Prof start parse param failed.");
+    GELOGE(FAILED, "[Parse][Param]Prof start parse param failed, device num %d, "
+           "device list size %zu", device_num, device_list.size());
+    REPORT_CALL_ERROR("E19999", "Prof start parse param failed, device num %d, "
+                      "device list size %zu", device_num, device_list.size());
     return FAILED;
   }
 
   auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
   if (device_id_ptr == nullptr) {
-    GELOGE(FAILED, "Prof start: device id ptr is null.");
+    GELOGE(FAILED, "[Start][Profiling]Malloc buffer failed when start profiling, device num %d",
+           device_num);
+    REPORT_CALL_ERROR("E19999", "Malloc buffer failed when start profiling, device num %d",
+                      device_num);
     return FAILED;
   }
   for (int32_t i = 0; i < device_num; i++) {
@@ -692,7 +762,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
 
   rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get());
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(FAILED, "Runtime profiler config proc failed.");
+    GELOGE(FAILED, "[Start][Profiler]Runtime profiler config proc failed, config param 0x%lx, "
+           "device num %d, ret 0x%X", module, device_num, rt_ret);
+    REPORT_CALL_ERROR("E19999", "Runtime profiler config proc failed, config param 0x%lx, "
+                      "device num %d, ret 0x%X", module, device_num, rt_ret);
     return FAILED;
   }
   if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) {
@@ -719,12 +792,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
   int32_t device_num = 0;
   vector<int32_t> device_list;
   if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
-    GELOGE(FAILED, "Prof stop parse param failed.");
+    GELOGE(FAILED, "[Stop][Profiling]Prof stop parse param failed, device num %d, "
+           "device list size %zu", device_num, device_list.size());
+    REPORT_CALL_ERROR("E19999", "Prof stop parse param failed, device num %d, device list size %zu",
+                      device_num, device_list.size());
     return FAILED;
   }
   auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
   if (device_id_ptr == nullptr) {
-    GELOGE(FAILED, "Prof stop: device id ptr is null.");
+    GELOGE(FAILED, "[Stop][Profiling]Malloc buffer failed when stop profiling, device num %d",
+           device_num);
+    REPORT_CALL_ERROR("E19999", "Malloc buffer failed when stop profiling, device num %d",
+                      device_num);
     return FAILED;
   }
   for (int32_t i = 0; i < device_num; i++) {
@@ -733,7 +812,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
   GELOGI("Prof stop: runtime config param: 0x%lx, device num: %d", module, device_num);
   rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(FAILED, "Prof stop: runtime profiler config proc failed.");
+    GELOGE(FAILED, "[Stop][Profiler]Runtime profiler config proc failed, config param 0x%lx, "
+           "device num: %d, ret 0x%X", module, device_num, rt_ret);
+    REPORT_CALL_ERROR("E19999", "Runtime profiler config proc failed, config param 0x%lx, "
+                      "device num %d, ret 0x%X", module, device_num, rt_ret);
     return FAILED;
   }
   uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK;
@@ -790,7 +872,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
   int32_t logic_device_id = 0;
   rtError_t rt_ret = rtGetDevice(&logic_device_id);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
+    GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret);
   }
   GELOGI("Current logic_device_id:%d", logic_device_id);
 
@@ -805,7 +888,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() {
   if (prof_cb_.msprofReporterCallback == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
+    REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
     return ge::PARAM_INVALID;
   }
   int32_t cb_ret = prof_cb_.msprofReporterCallback(
@@ -813,8 +897,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Plugin
       static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT),
       nullptr, 0);
   if (cb_ret != MSPROF_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Profiling reporter init failed, ret = %d.", cb_ret);
-    GELOGE(INTERNAL_ERROR, "[Init][ProfilingReporter] profiling init failed, ret = %d.", cb_ret);
+    REPORT_CALL_ERROR("E19999", "Profiling reporter init failed, ret 0x%X", cb_ret);
+    GELOGE(INTERNAL_ERROR, "[Init][ProfilingReporter]Failed, ret 0x%X", cb_ret);
     return INTERNAL_ERROR;
   }
 
@@ -823,8 +907,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Plugin
       static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_DATA_MAX_LEN),
       &reporter_max_len_, sizeof(uint32_t));
   if (cb_ret != MSPROF_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Get profiling reporter data max len failed, ret = %d.", cb_ret);
-    GELOGE(INTERNAL_ERROR, "[Init][ProfilingReporter] Get profiling reporter data max len failed, ret = %d.", cb_ret);
+    REPORT_CALL_ERROR("E19999", "Get profiling reporter data max len failed, ret 0x%X", cb_ret);
+    GELOGE(INTERNAL_ERROR, "[Get][ProfilingDataMaxLen]Failed, ret 0x%X", cb_ret);
     return INTERNAL_ERROR;
   }
 
@@ -834,7 +918,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Plugin
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const {
 #ifdef DAVINCI_SUPPORT_PROFILING
   if (prof_cb_.msprofReporterCallback == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
+    REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
     return;
   }
   int32_t cb_ret = prof_cb_.msprofReporterCallback(
@@ -850,7 +935,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport(
     ReporterData &reporter_data) const {
   if (prof_cb_.msprofReporterCallback == nullptr) {
-    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
+    GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
+    REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
     return ge::PARAM_INVALID;
   }
   return prof_cb_.msprofReporterCallback(
@@ -946,5 +1032,4 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
   return;
 }
 
-
 }  // namespace ge
diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h
index af9fce06..12293fc5 100755
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -81,7 +81,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   Status ProfModelUnsubscribe(void *model);
   void StopProfiling();
   bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
+  // report model load profiling data flag, data contain task desc info, step info, model load fusion op info
   bool ProfilingModelLoadOn() const { return is_load_profiling_; }
+  // report model execute profiling data flag, data contain model execute time info
   bool ProfilingModelExecuteOn() const;
   // is_execute_profiling_ only used by ge option and env
   bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
diff --git a/ge/common/properties_manager.cc b/ge/common/properties_manager.cc
index eae29e34..e1f4c66e 100644
--- a/ge/common/properties_manager.cc
+++ b/ge/common/properties_manager.cc
@@ -69,7 +69,8 @@ bool PropertiesManager::LoadFileContent(const std::string &file_path) {
   std::ifstream fs(resolved_file_path, std::ifstream::in);
 
   if (!fs.is_open()) {
-    GELOGE(PARAM_INVALID, "Open %s failed.", file_path.c_str());
+    GELOGE(PARAM_INVALID, "[Open][File]Failed, file path %s invalid", file_path.c_str());
+    REPORT_CALL_ERROR("E19999", "Open file failed, path %s invalid", file_path.c_str());
     return false;
   }
 
@@ -77,7 +78,8 @@ bool PropertiesManager::LoadFileContent(const std::string &file_path) {
 
   while (getline(fs, line)) {  // line not with \n
     if (!ParseLine(line)) {
-      GELOGE(PARAM_INVALID, "Parse line failed. content is [%s].", line.c_str());
+      GELOGE(PARAM_INVALID, "[Parse][Line]Failed, content is %s", line.c_str());
+      REPORT_CALL_ERROR("E19999", "Parse line failed, content is %s", line.c_str());
       fs.close();
       return false;
     }
@@ -100,15 +102,18 @@ bool PropertiesManager::ParseLine(const std::string &line) {
   if (!temp.empty()) {
     std::string::size_type pos = temp.find_first_of(delimiter);
     if (pos == std::string::npos) {
-      GELOGE(PARAM_INVALID, "Incorrect line [%s], it must include [%s].Perhaps you use illegal chinese symbol",
+      GELOGE(PARAM_INVALID, "[Check][Param]Incorrect line %s, it must include %s",
              line.c_str(), delimiter.c_str());
+      REPORT_CALL_ERROR("E19999", "Incorrect line %s, it must include %s",
+                        line.c_str(), delimiter.c_str());
       return false;
     }
 
     std::string map_key = Trim(temp.substr(0, pos));
     std::string value = Trim(temp.substr(pos + 1));
     if (map_key.empty() || value.empty()) {
-      GELOGE(PARAM_INVALID, "Map_key or value empty. %s", line.c_str());
+      GELOGE(PARAM_INVALID, "[Check][Param]Map_key or value empty, line %s", line.c_str());
+      REPORT_CALL_ERROR("E19999", "Map_key or value empty, line %s", line.c_str());
       return false;
     }
 
diff --git a/ge/common/util.cc b/ge/common/util.cc
index 63d75de1..6887b096 100644
--- a/ge/common/util.cc
+++ b/ge/common/util.cc
@@ -83,7 +83,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(co
   std::ifstream fs(real_path, std::ifstream::in | std::ifstream::binary);
   if (!fs.is_open()) {
     ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {file, "ifstream is_open failed"});
-    GELOGE(ge::FAILED, "Open real path[%s] failed.", file);
+    GELOGE(ge::FAILED, "[Open][File]Failed, file path %s", file);
     return false;
   }
 
@@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(co
 
   if (!ret) {
     ErrorManager::GetInstance().ATCReportErrMessage("E19005", {"file"}, {file});
-    GELOGE(ge::FAILED, "Parse file[%s] failed.", file);
+    GELOGE(ge::FAILED, "[Parse][File]Failed, file %s", file);
     return ret;
   }
 
@@ -155,7 +155,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co
 
   std::ifstream file(real_path.c_str(), std::ios::binary | std::ios::ate);
   if (!file.is_open()) {
-    GELOGE(ge::FAILED, "Read file %s failed.", file_name);
+    GELOGE(ge::FAILED, "[Read][File]Failed, file %s", file_name);
+    REPORT_CALL_ERROR("E19999", "Read file %s failed", file_name);
     return false;
   }
 
@@ -182,7 +183,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co
 
   std::ifstream file(real_path.c_str(), std::ios::binary | std::ios::ate);
   if (!file.is_open()) {
-    GELOGE(ge::FAILED, "Read file %s failed.", file_name);
+    GELOGE(ge::FAILED, "[Read][File]Failed, file %s", file_name);
+    REPORT_CALL_ERROR("E19999", "Read file %s failed", file_name);
     return false;
   }
 
@@ -250,7 +252,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string CurrentTimeInStr()
   std::time_t now = std::time(nullptr);
   std::tm *ptm = std::localtime(&now);
   if (ptm == nullptr) {
-    GELOGE(ge::FAILED, "Localtime failed.");
+    GELOGE(ge::FAILED, "[Check][Param]Localtime incorrect, errmsg %s", strerror(errno));
+    REPORT_CALL_ERROR("E19999", "Localtime incorrect, errmsg %s", strerror(errno));
     return "";
   }
 
@@ -277,18 +280,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromText(const ch
 
   if (!fs.is_open()) {
     ErrorManager::GetInstance().ATCReportErrMessage("E19017", {"realpth", "protofile"}, {real_path, file});
-    GELOGE(ge::FAILED, "Fail to open proto file real path is '%s' when orginal file path is '%s'.", real_path.c_str(),
-           file);
+    GELOGE(ge::FAILED, "[Open][ProtoFile]Failed, real path %s, orginal file path %s",
+           real_path.c_str(), file);
     return false;
   }
 
   google::protobuf::io::IstreamInputStream input(&fs);
   bool ret = google::protobuf::TextFormat::Parse(&input, message);
   GE_IF_BOOL_EXEC(!ret, ErrorManager::GetInstance().ATCReportErrMessage("E19018", {"protofile"}, {file});
-                  GELOGE(ret,
-                         "Parse file[%s] through [google::protobuf::TextFormat::Parse] failed, "
-                         "please check whether the file is a valid protobuf format file.",
-                         file));
+                  GELOGE(ret, "[Parse][File]Through [google::protobuf::TextFormat::Parse] failed, "
+                         "file %s", file));
   fs.close();
 
   return ret;
@@ -490,7 +491,8 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str
   ret = regexec(&reg, str.c_str(), 0, NULL, 0);
   if (ret) {
     regerror(ret, &reg, ebuff, kMaxBuffSize);
-    GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff);
+    GELOGE(ge::PARAM_INVALID, "[Rgexec][Param]Failed, reason %s", ebuff);
+    REPORT_CALL_ERROR("E19999", "Rgexec failed, reason %s", ebuff);
     regfree(&reg);
     return false;
   }
@@ -518,35 +520,44 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str
 
 FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) {
   if (file_path == nullptr) {
-    GELOGE(PARAM_INVALID, "Config path is null.");
+    GELOGE(PARAM_INVALID, "[Check][Param]Config path is null");
+    REPORT_INNER_ERROR("E19999", "Config path is null");
     return false;
   }
   if (!CheckInputPathValid(file_path)) {
-    GELOGE(PARAM_INVALID, "Config path is invalid: %s", file_path);
+    GELOGE(PARAM_INVALID, "[Check][Param]Config path %s is invalid", file_path);
+    REPORT_CALL_ERROR("E19999", "Config path %s is invalid", file_path);
     return false;
   }
   // Normalize the path
   std::string resolved_file_path = RealPath(file_path);
   if (resolved_file_path.empty()) {
-    GELOGE(PARAM_INVALID, "Invalid input file path [%s], make sure that the file path is correct.", file_path);
+    GELOGE(PARAM_INVALID, "[Check][Param]Invalid input file path %s, errmsg %s", file_path, strerror(errno));
+    REPORT_CALL_ERROR("E19999", "Invalid input file path %s, errmsg %s", file_path, strerror(errno));
     return false;
   }
 
   mmStat_t stat = {0};
   int32_t ret = mmStatGet(resolved_file_path.c_str(), &stat);
   if (ret != EN_OK) {
-    GELOGE(PARAM_INVALID, "cannot get config file status, which path is %s, maybe not exist, return %d, errcode %d",
-           resolved_file_path.c_str(), ret, mmGetErrorCode());
+    GELOGE(PARAM_INVALID, "[Get][FileStatus]Failed, which path %s maybe not exist, "
+           "return %d, errcode %d", resolved_file_path.c_str(), ret, mmGetErrorCode());
+    REPORT_CALL_ERROR("E19999", "Get config file status failed, which path %s maybe not exist, "
+                      "return %d, errcode %d", resolved_file_path.c_str(), ret, mmGetErrorCode());
     return false;
   }
   if ((stat.st_mode & S_IFMT) != S_IFREG) {
-    GELOGE(PARAM_INVALID, "config file is not a common file, which path is %s, mode is %u", resolved_file_path.c_str(),
-           stat.st_mode);
+    GELOGE(PARAM_INVALID, "[Check][Param]Config file is not a common file, which path is %s, "
+           "mode is %u", resolved_file_path.c_str(), stat.st_mode);
+    REPORT_CALL_ERROR("E19999", "Config file is not a common file, which path is %s, "
+                      "mode is %u", resolved_file_path.c_str(), stat.st_mode);
     return false;
   }
   if (stat.st_size > kMaxConfigFileByte) {
-    GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]",
-           resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte);
+    GELOGE(PARAM_INVALID, "[Check][Param]Config file %s size %ld is larger than max config "
+           "file Bytes %u", resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte);
+    REPORT_CALL_ERROR("E19999", "Config file %s size %ld is larger than max config file Bytes %u",
+                      resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte);
     return false;
   }
   return true;
@@ -554,29 +565,36 @@ FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) {
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status CheckPath(const char *path, size_t length) {
   if (path == nullptr) {
-    GELOGE(PARAM_INVALID, "Config path is invalid.");
+    GELOGE(PARAM_INVALID, "[Check][Param]Config path is invalid");
+    REPORT_CALL_ERROR("E19999", "Config path is invalid");
     return PARAM_INVALID;
   }
 
   if (strlen(path) != length) {
-    GELOGE(PARAM_INVALID, "Path is invalid or length of config path is not equal to given length.");
+    GELOGE(PARAM_INVALID, "[Check][Param]Path %s is invalid or length %zu "
+           "not equal to given length %zu", path, strlen(path), length);
+    REPORT_CALL_ERROR("E19999", "Path %s is invalid or length %zu "
+                      "not equal to given length %zu", path, strlen(path), length);
     return PARAM_INVALID;
   }
 
   if (length == 0 || length > MMPA_MAX_PATH) {
-    GELOGE(PARAM_INVALID, "Length of config path is invalid.");
+    GELOGE(PARAM_INVALID, "[Check][Param]Length of config path %zu is invalid", length);
+    REPORT_INNER_ERROR("E19999", "Length of config path %zu is invalid", length);
     return PARAM_INVALID;
   }
 
   INT32 is_dir = mmIsDir(path);
   if (is_dir != EN_OK) {
-    GELOGE(PATH_INVALID, "Open directory %s failed, maybe it is not exit or not a dir. errmsg:%s",
+    GELOGE(PATH_INVALID, "[Open][Directory]Failed, directory path %s, errmsg %s",
            path, strerror(errno));
+    REPORT_CALL_ERROR("E19999", "Open directory %s failed, errmsg %s", path, strerror(errno));
     return PATH_INVALID;
   }
 
   if (mmAccess2(path, M_R_OK) != EN_OK) {
-    GELOGE(PATH_INVALID, "Read path[%s] failed, errmsg[%s]", path, strerror(errno));
+    GELOGE(PATH_INVALID, "[Read][Path]Failed, path %s, errmsg %s", path, strerror(errno));
+    REPORT_CALL_ERROR("E19999", "Read path %s failed, errmsg %s", path, strerror(errno));
     return PATH_INVALID;
   }
   return SUCCESS;
diff --git a/ge/engine_manager/dnnengine_manager.cc b/ge/engine_manager/dnnengine_manager.cc
index 2bd9b3e5..7a3f7aec 100644
--- a/ge/engine_manager/dnnengine_manager.cc
+++ b/ge/engine_manager/dnnengine_manager.cc
@@ -71,13 +71,15 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
   std::vector<std::string> so_func{so_api_func};
   Status status = plugin_mgr_.Load(path, so_func);
   if (status != SUCCESS) {
-    GELOGE(status, "Load engine's so failed. LibPath is %s", path.c_str());
+    GELOGE(status, "[Load][EngineSo]Failed, lib path %s", path.c_str());
+    REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", path.c_str());
     return status;
   }
 
   status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(so_api_func, engines_map_);
   if (status != SUCCESS) {
-    GELOGE(status, "Get DNNEngineObjs failed.");
+    GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", so_api_func.c_str());
+    REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", so_api_func.c_str());
     return status;
   }
 
@@ -94,16 +96,21 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
 
     status = iter->second->Initialize(options);
     if (status != SUCCESS) {
-      GELOGE(status, "Engine: %s initialize failed.", (iter->first).c_str());
+      GELOGE(status, "[Init][Engine]Failed, engine %s", (iter->first).c_str());
+      REPORT_CALL_ERROR("E19999", "Initialize engine %s failed", (iter->first).c_str());
       return status;
     }
 
+
     // Check engines' attribute
     DNNEngineAttribute attrs;
     iter->second->GetAttributes(attrs);
     if (attrs.runtime_type == RuntimeType::DEVICE) {
       if ((attrs.mem_type.size()) != 1 || (attrs.mem_type[0] != GE_ENGINE_ATTR_MEM_TYPE_HBM)) {
-        GELOGE(GE_ENG_MEMTYPE_ERROR, "Engine: %s in aicore, but the memory type is not HBM", (iter->first).c_str());
+        GELOGE(GE_ENG_MEMTYPE_ERROR, "[Check][Param]Engine %s in aicore, but the memory type is "
+               "not HBM, mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size());
+        REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, "
+                          "mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size());
         return GE_ENG_MEMTYPE_ERROR;
       }
     }
@@ -111,13 +118,13 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
 
   status = ParserJsonFile();
   if (status != SUCCESS) {
-    GELOGE(status, "parse json file failed");
+    GELOGE(status, "[Parse][JsonFile]Failed");
     return status;
   }
 
   status = CheckJsonFile();
   if (status != SUCCESS) {
-    GELOGE(status, "check json file failed");
+    GELOGE(status, "[Check][JsonFile]Failed");
     return status;
   }
 
@@ -138,7 +145,8 @@ Status DNNEngineManager::Finalize() {
       GELOGI("DNNEngine name: %s.", (iter->first).c_str());
       Status status = iter->second->Finalize();
       if (status != SUCCESS) {
-        GELOGE(status, "Engine finalize failed.");
+        GELOGE(status, "[Finalize][Engine]Failed, engine %s", (iter->first).c_str());
+        REPORT_CALL_ERROR("E19999", "Finalize engine %s failed", (iter->first).c_str());
         return status;
       }
     }
@@ -188,7 +196,8 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
   // Use the OpsKernelManager in GELib to get the opInfos for this opCode
   std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
   if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
-    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GetDNNEngineName failed.");
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][DNNEngineName]Failed, gelib not init before");
+    REPORT_INNER_ERROR("E19999", "Get DNNEngineName failed, gelib not init before");
     return "";
   }
   OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
@@ -234,8 +243,9 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
           ErrorManager::GetInstance().ATCReportErrMessage("E13001", {"kernelname", "optype", "opname"},
                                                           {kernel_name, op_desc->GetType(), op_desc->GetName()});
           GELOGE(FAILED,
-                 "The custom operator registered by the user does not support the logic function delivered by this "
-                 "network. Check support failed, kernel_name is %s, op type is %s, op name is %s",
+                 "[Check][Param]The custom operator registered by the user does not support "
+                 "the logic function delivered by this network, kernel_name %s, op type %s, "
+                 "op name %s",
                  kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
           std::string error_info = "The custom operator registered by the user does not support the logic function"
                                    "delivered by this network";
@@ -262,7 +272,8 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
     reason += it.first + ":" + it.second + ";";
     ErrorManager::GetInstance().ATCReportErrMessage(
         "E13002", {"optype", "opskernel", "reason"}, {op_desc->GetType(), it.first, it.second});
-    GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s",
+    GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Check][OpSupported]Op type %s of ops kernel %s "
+           "is unsupported, reason %s",
            op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str());
   }
 
@@ -273,7 +284,8 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
 
   ErrorManager::GetInstance().ATCReportErrMessage(
       "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()});
-  GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s",
+  GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Get][DNNEngineName]Can't find any supported ops kernel "
+         "and engine of %s, type is %s",
          op_desc->GetName().c_str(), op_desc->GetType().c_str());
   return "";
 }
@@ -289,8 +301,10 @@ std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op
       return kHostCpuEngineName;
     }
   }
-  GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].",
+  GELOGE(FAILED, "[Get][HostCpuEngineName]Failed, HostCpuEngine not support [%s, %s]",
          op_desc->GetName().c_str(), op_desc->GetType().c_str());
+  REPORT_INNER_ERROR("E19999", "Get HostCpuEngineName failed, HostCpuEngine not support [%s, %s]",
+                    op_desc->GetName().c_str(), op_desc->GetType().c_str());
   return "";
 }
 
@@ -304,7 +318,8 @@ Status DNNEngineManager::ParserJsonFile() {
   nlohmann::json scheduler_json_file;
   Status status = ReadJsonFile(path, &scheduler_json_file);
   if (status != SUCCESS) {
-    GELOGE(FAILED, "Read scheduler json file failed and the file path is %s", path.c_str());
+    GELOGE(FAILED, "[Read][JsonFile]Failed, file %s", path.c_str());
+    REPORT_CALL_ERROR("E19999", "Read json file %s failed", path.c_str());
     return FAILED;
   }
   if (scheduler_json_file.is_null()) {
@@ -316,11 +331,15 @@ Status DNNEngineManager::ParserJsonFile() {
   try {
     nlohmann::json scheduler_utils_json = scheduler_json_file[kSchedulerUnits];
     if (scheduler_utils_json.is_null()) {
-      GELOGE(FAILED, "The message of scheduler units is not found");
+      GELOGE(FAILED, "[Check[Param]Find scheduler units failed, the message is null, file %s", path.c_str());
+      REPORT_INNER_ERROR("E19999", "Find scheduler units failed, the message is null, file %s", path.c_str());
       return FAILED;
     }
     if (!scheduler_utils_json.is_array()) {
-      GELOGE(FAILED, "The message of kSchedulerUnits is not array and the file path is %s", json_file_path.c_str());
+      GELOGE(FAILED, "[Check][Param]The message of kSchedulerUnits is not array and "
+             "the file path is %s", path.c_str());
+      REPORT_INNER_ERROR("E19999", "The message of kSchedulerUnits is not array and "
+                        "the file path is %s", path.c_str());
       return FAILED;
     }
     auto size = scheduler_json_file[kSchedulerUnits].size();
@@ -329,19 +348,23 @@ Status DNNEngineManager::ParserJsonFile() {
       std::map<std::string, EngineConfPtr> engine_conf_map;
       nlohmann::json engines_json_map = scheduler_utils_json[i][kCalEngines];
       if (engines_json_map.is_null()) {
-        GELOGE(FAILED, "The message of cal_engines is not found");
+        GELOGE(FAILED, "[Check][Param]The message of cal_engines is null, file %s", path.c_str());
+        REPORT_INNER_ERROR("E19999", "The message of cal_engines is null, file %s", path.c_str());
         return FAILED;
       }
       std::string scheduler_id_temp = scheduler_utils_json[i][kId];
       if (!scheduler_id_temp.empty()) {
         scheduler_conf.id = scheduler_id_temp;
       } else {
-        GELOGE(FAILED, "Scheduler ID is null");
+        GELOGE(FAILED, "[Check][Param]Scheduler ID is null, file %s", path.c_str());
+        REPORT_INNER_ERROR("E19999", "Scheduler ID is null, file %s", path.c_str());
         return FAILED;
       }
       status = ParserEngineMessage(engines_json_map, scheduler_id_temp, engine_conf_map);
       if (status != SUCCESS) {
-        GELOGE(FAILED, "Parser engines messages failed");
+        GELOGE(FAILED, "[Parse][EngineMessage]Failed, scheduler_id_temp %s", scheduler_id_temp.c_str());
+        REPORT_CALL_ERROR("E19999", "Parse engine message failed, scheduler_id_temp %s",
+                          scheduler_id_temp.c_str());
         return FAILED;
       }
       scheduler_conf.name = scheduler_utils_json[i][kName];
@@ -349,13 +372,17 @@ Status DNNEngineManager::ParserJsonFile() {
       scheduler_conf.cal_engines = engine_conf_map;
       auto it = schedulers_.find(scheduler_id_temp);
       if (it != schedulers_.end()) {
-        GELOGE(FAILED, "There are the same scheduler ts %s in the json file", scheduler_id_temp.c_str());
+        GELOGE(FAILED, "[Check][Param]There are the same scheduler ts %s in the json file",
+               scheduler_id_temp.c_str());
+        REPORT_INNER_ERROR("E19999", "[Check][Param]There are the same scheduler ts %s "
+                          "in the json file", scheduler_id_temp.c_str());
         return FAILED;
       }
       schedulers_.emplace(scheduler_id_temp, scheduler_conf);
     }
   } catch (const nlohmann::detail::type_error &e) {
-    GELOGE(FAILED, "Parser json file failed");
+    GELOGE(FAILED, "[Parse][JsonFile]Failed, file %s, reason %s", path.c_str(), e.what());
+    REPORT_CALL_ERROR("E19999", "Parse json file %s failed, reason %s", path.c_str(), e.what());
     return FAILED;
   }
 
@@ -367,7 +394,8 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std:
                                              std::map<std::string, EngineConfPtr> &engines) {
   GELOGI("Begin to parser engine massage");
   if (engines_json.is_null()) {
-    GELOGE(FAILED, "The message of cal_engines is null");
+    GELOGE(FAILED, "[Check][Param]The message of cal_engines is null");
+    REPORT_INNER_ERROR("E19999", "The message of cal_engines is null");
     return FAILED;
   }
   try {
@@ -382,7 +410,8 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std:
         if (!engine_id.empty()) {
           engine_conf_ptr->id = engine_id;
         } else {
-          GELOGE(FAILED, "engineID is null");
+          GELOGE(FAILED, "[Check][Param]Engine ID is null");
+          REPORT_INNER_ERROR("E19999", "Engine ID is null");
           return FAILED;
         }
         if (engines_elems.find(kName) != engines_elems.end()) {
@@ -404,17 +433,22 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std:
         engine_conf_ptr->scheduler_id = scheduler_mark;
         auto it = engines.find(engine_id);
         if (it != engines.end()) {
-          GELOGE(FAILED, "There are the same engine %s message in the json file", engine_id.c_str());
+          GELOGE(FAILED, "[Check][Param]There are the same engine %s message in the json file",
+                 engine_id.c_str());
+          REPORT_INNER_ERROR("E19999", "There are the same engine %s message in the json file",
+                            engine_id.c_str());
           return FAILED;
         }
         engines.emplace(engine_id, engine_conf_ptr);
       }
     } else {
-      GELOGE(FAILED, "The message of cal_engines is not array in the json file");
+      GELOGE(FAILED, "[Check][Param]The message of cal_engines is not array in the json file");
+      REPORT_INNER_ERROR("E19999", "The message of cal_engines is not array in the json file");
       return FAILED;
     }
   } catch (const json::exception &e) {
-    GELOGE(FAILED, "construct json content failed");
+    GELOGE(FAILED, "[Construct][JsonContent]Failed, reason %s", e.what());
+    REPORT_INNER_ERROR("E19999", "Construct json content failed, reason %s", e.what());
     return FAILED;
   }
   GELOGI("Parser engine massage success");
@@ -424,18 +458,23 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std:
 Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) {
   GELOGD("Begin to read json file");
   if (file_path.empty()) {
-    GELOGE(FAILED, "Json path %s is not valid", file_path.c_str());
+    GELOGE(FAILED, "[Check][Param]Json path is empty");
+    REPORT_INNER_ERROR("E19999", "Json path is empty");
     return FAILED;
   }
   nlohmann::json *json_file = reinterpret_cast<nlohmann::json *>(handle);
   if (json_file == nullptr) {
-    GELOGE(FAILED, "JsonFile is nullptr");
+    GELOGE(FAILED, "[Check][Param]Json file is nullptr");
+    REPORT_CALL_ERROR("E19999", "Json file is nullptr");
     return FAILED;
   }
   const char *file = file_path.data();
   if ((mmAccess2(file, M_F_OK)) != EN_OK) {
     if (engines_map_.size() != 0) {
-      GELOGE(FAILED, "The json file %s is not exist, errmsg:%s", file_path.c_str(), strerror(errno));
+      GELOGE(FAILED, "[Check][Param]The json file %s not exists, err %s",
+             file_path.c_str(), strerror(errno));
+      REPORT_CALL_ERROR("E19999", "Json file %s not exists, err %s",
+                        file_path.c_str(), strerror(errno));
       return FAILED;
     } else {
       GELOGW("The json file %s is not needed.", file_path.c_str());
@@ -445,14 +484,16 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
 
   std::ifstream ifs(file_path);
   if (!ifs.is_open()) {
-    GELOGE(FAILED, "Open json file %s failed", file_path.c_str());
+    GELOGE(FAILED, "[Open][JsonFile]Failed, file %s", file_path.c_str());
+    REPORT_CALL_ERROR("E19999", "Open json file %s failed", file_path.c_str());
     return FAILED;
   }
 
   try {
     ifs >> *json_file;
   } catch (const json::exception &e) {
-    GELOGE(FAILED, "Read json file failed");
+    GELOGE(FAILED, "[Read][JsonFile]Failed, reason %s", e.what());
+    REPORT_CALL_ERROR("E19999", "Read json file failed, reason %s", e.what());
     ifs.close();
     return FAILED;
   }
@@ -474,11 +515,17 @@ Status DNNEngineManager::CheckJsonFile() {
       }
     }
     if (count == 0) {
-      GELOGE(FAILED, "The engine message %s is not found in the json file", engine_name.c_str());
+      GELOGE(FAILED, "[Check][JsonFile]The engine message %s is not found in the json file",
+             engine_name.c_str());
+      REPORT_INNER_ERROR("E19999", "The engine message %s is not found in the json file",
+                         engine_name.c_str());
       return FAILED;
     }
     if (count > 1) {
-      GELOGE(FAILED, "The same engine message %s is existed in the json file", engine_name.c_str());
+      GELOGE(FAILED, "[Check][JsonFile]The same engine message %s exists in the json file",
+             engine_name.c_str());
+      REPORT_INNER_ERROR("E19999", "The same engine message %s exists in the json file",
+                         engine_name.c_str());
       return FAILED;
     }
   }
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 1782d497..820518ad 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -19,6 +19,7 @@ set(SRC_LIST
     "../common/dump/exception_dumper.cc"
     "../common/dump/dump_manager.cc"
     "../common/dump/dump_op.cc"
+    "../common/dump/dump_server.cc"
     "../common/dump/opdebug_register.cc"
     "../common/profiling/ge_profiling.cc"
     "../graph/load/graph_loader.cc"
@@ -27,6 +28,8 @@ set(SRC_LIST
     "../graph/manager/graph_var_manager.cc"
     "../graph/manager/graph_mem_allocator.cc"
     "../graph/manager/graph_caching_allocator.cc"
+    "../graph/manager/session_scope_mem_allocator.cc"
+    "../graph/manager/graph_mem_manager.cc"
     "../graph/manager/trans_var_data_utils.cc"
     "../graph/manager/util/debug.cc"
     "../graph/manager/rdma_pool_allocator.cc"
@@ -110,6 +113,8 @@ set(SRC_LIST
     "../hybrid/node_executor/controlop/control_op_executor.cc"
     "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
     "../hybrid/node_executor/rts/rts_node_executor.cc"
+    "../hybrid/node_executor/rts/rts_node_task.cc"
+    "../hybrid/node_executor/rts/rts_task_factory.cc"
     "../hybrid/node_executor/node_executor.cc"
     "../hybrid/node_executor/task_context.cc"
     "../hybrid/hybrid_davinci_model.cc"
@@ -199,6 +204,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE
     ${GE_CODE_DIR}/../inc/cce
     #### blue zone ####
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
 target_link_libraries(ge_executor PRIVATE
@@ -245,6 +251,7 @@ target_include_directories(ge_executor_shared PRIVATE
     ${GE_CODE_DIR}/../inc/cce
     #### blue zone ####
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
 target_link_options(ge_executor_shared PRIVATE
diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index 4081bdf2..e66dcb58 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -26,7 +26,7 @@
 #include "graph/execute/graph_execute.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/model_manager.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "single_op/single_op_manager.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
@@ -731,6 +731,23 @@ Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType
   return SUCCESS;
 }
 
+Status GeExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                             std::string &attr_value) {
+  GELOGI("Begin to get op attr.");
+  if (!isInit_) {
+    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Init][GeExecutor]Ge executor not inited yet!");
+    REPORT_INNER_ERROR("E19999", "Ge executor not inited yet!");
+    return ACL_ERROR_GE_EXEC_NOT_INIT;
+  }
+  Status ret = GraphExecutor::GetOpAttr(model_id, op_name, attr_name, attr_value);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str());
+    REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str());
+    return ret;
+  }
+  return SUCCESS;
+}
+
 Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) {
   if (!isInit_) {
     GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!");
diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk
index ae1288f5..a56eaadf 100755
--- a/ge/ge_inference.mk
+++ b/ge/ge_inference.mk
@@ -122,12 +122,10 @@ OMG_HOST_SRC_FILES := \
     graph/passes/dimension_adjust_pass.cc \
     graph/passes/get_original_format_pass.cc \
     graph/passes/shape_operate_op_remove_pass.cc \
-    graph/passes/unused_op_remove_pass.cc \
     graph/passes/assert_pass.cc \
     graph/passes/dropout_pass.cc \
     graph/passes/infershape_pass.cc \
     graph/passes/unused_const_pass.cc \
-    graph/passes/isolated_op_remove_pass.cc \
     graph/passes/permute_pass.cc \
     graph/passes/ctrl_edge_transfer_pass.cc \
     graph/passes/end_of_sequence_add_control_pass.cc \
@@ -209,7 +207,6 @@ OMG_HOST_SRC_FILES := \
     graph/passes/switch_logic_remove_pass.cc \
     graph/passes/switch_data_edges_bypass.cc \
     graph/passes/merge_pass.cc \
-    graph/passes/variable_format_pass.cc \
     graph/passes/variable_op_pass.cc \
     graph/passes/cast_remove_pass.cc \
     graph/passes/transpose_transdata_pass.cc \
diff --git a/ge/ge_local_engine/engine/ge_local_engine.cc b/ge/ge_local_engine/engine/ge_local_engine.cc
index 58f24d45..ac3e5473 100755
--- a/ge/ge_local_engine/engine/ge_local_engine.cc
+++ b/ge/ge_local_engine/engine/ge_local_engine.cc
@@ -35,7 +35,8 @@ Status GeLocalEngine::Initialize(const std::map<string, string> &options) {
   if (ops_kernel_store_ == nullptr) {
     ops_kernel_store_ = MakeShared<GeLocalOpsKernelInfoStore>();
     if (ops_kernel_store_ == nullptr) {
-      GELOGE(FAILED, "Make GeLocalOpsKernelInfoStore failed.");
+      REPORT_CALL_ERROR("E19999", "create GeLocalOpsKernelInfoStore failed.");
+      GELOGE(FAILED, "[Call][MakeShared] Make GeLocalOpsKernelInfoStore failed.");
       return FAILED;
     }
   }
diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc
index 8bc159dc..8f01a166 100755
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -43,7 +43,7 @@ namespace {
     }                                                                                                                  \
     auto tensor = TensorAdapter::AsTensor(*ge_tensor);                                                                 \
     auto tensor_name = op_desc->GetOutputNameByIndex(i);                                                               \
-    GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu",               \
+    GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "[Get][OutputName] failed. node = %s, index = %zu",                \
                                op_desc->GetName().c_str(), i);                                                         \
     named_outputs.emplace(tensor_name, tensor);                                                                        \
     break;                                                                                                             \
@@ -61,7 +61,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
   if (out_desc.GetShape().IsUnknownShape()) {
     std::vector<std::pair<int64_t, int64_t>> range;
     if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Get shape range failed.");
+      REPORT_CALL_ERROR("E19999", "GetShapeRange failed.");
+      GELOGE(INTERNAL_ERROR, "[Get][ShapeRange] failed.");
       return INTERNAL_ERROR;
     }
     int64_t max_range_size = 1;
@@ -72,7 +73,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
     num_size = max_range_size;
   }
   if (num_size < 0) {
-    GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size);
+    REPORT_INNER_ERROR("E19999", "Get negative size, num_size=%ld.", num_size);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Get negative size, num_size=%ld.", num_size);
     return INTERNAL_ERROR;
   }
   data_num = static_cast<uint64_t>(num_size);
@@ -137,10 +139,10 @@ Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc,
                                     map<std::string, const Tensor> &named_inputs) {
   auto num_inputs = op_desc->GetInputsSize();
   if (num_inputs != inputs.size()) {
-    GELOGE(PARAM_INVALID,
-           "Mismatching input sizes. op_desc has %zu input(s), but given %zu",
-           num_inputs,
-           inputs.size());
+    REPORT_INNER_ERROR("E19999", "Mismatching input sizes. op_desc:%s(%s) has %zu input(s), but given %zu",
+                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), num_inputs, inputs.size());
+    GELOGE(PARAM_INVALID, "[Check][Param] Mismatching input sizes. op_desc:%s(%s) has %zu input(s), but given %zu",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), num_inputs, inputs.size());
     return PARAM_INVALID;
   }
 
@@ -149,8 +151,8 @@ Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc,
     GE_CHECK_NOTNULL(ge_tensor);
     auto tensor = TensorAdapter::AsTensor(*ge_tensor);
     auto tensor_name = op_desc->GetInputNameByIndex(i);
-    GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(),
-                               "Failed to get input name. node = %s, index = %zu", op_desc->GetName().c_str(), i);
+    GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "[Get][InputName] failed. node = %s, index = %zu",
+                               op_desc->GetName().c_str(), i);
     GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s",
            op_desc->GetName().c_str(), i, tensor_name.c_str());
     named_inputs.emplace(tensor_name, tensor);
@@ -173,7 +175,7 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc,
     uint64_t data_num = 0;
     if (need_create_flag) {
       if (GetDataNumber(out_desc, data_num) != SUCCESS) {
-        GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed", op_desc->GetName().c_str(), i);
+        GELOGE(INTERNAL_ERROR, "[Get][Number] node:%s get size for output %zu failed", op_desc->GetName().c_str(), i);
         return INTERNAL_ERROR;
       }
     }
@@ -234,12 +236,16 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs,
   for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) {
     auto tensor_name = op_desc->GetOutputNameByIndex(i);
     if (tensor_name.empty()) {
-      GELOGE(INTERNAL_ERROR, "Failed to get output name. node = %s, index = %zu", op_desc->GetName().c_str(), i);
+      REPORT_INNER_ERROR("E19999", "GetOutputNameByIndex failed, node = %s, index = %zu",
+                         op_desc->GetName().c_str(), i);
+      GELOGE(INTERNAL_ERROR, "[Get][OutputName] failed. node = %s, index = %zu", op_desc->GetName().c_str(), i);
       return INTERNAL_ERROR;
     }
     auto iter = named_outputs.find(tensor_name);
     if (iter == named_outputs.end()) {
-       GELOGE(INTERNAL_ERROR, "Failed to get output tensor. node = %s, index = %zu, tensor_name = %s",
+       REPORT_INNER_ERROR("E19999", "get output tensor failed, node = %s, index = %zu, tensor_name = %s",
+                          op_desc->GetName().c_str(), i, tensor_name.c_str());
+       GELOGE(INTERNAL_ERROR, "[Get][OutputTensor] failed. node = %s, index = %zu, tensor_name = %s",
               op_desc->GetName().c_str(), i, tensor_name.c_str());
       return INTERNAL_ERROR;
     }
@@ -328,7 +334,8 @@ Status HostCpuEngine::LoadLib(const std::string &lib_path) {
   if (handle == nullptr) {
     const char *error = mmDlerror();
     error = (error == nullptr) ? "" : error;
-    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), error);
+    REPORT_CALL_ERROR("E19999", "mmDlopen failed, path = %s, error = %s", lib_path.c_str(), error);
+    GELOGE(INTERNAL_ERROR, "[Invoke][DlOpen] failed. path = %s, error = %s", lib_path.c_str(), error);
     return INTERNAL_ERROR;
   }
 
diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc
index 602dc35f..5842fe29 100644
--- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc
+++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc
@@ -52,7 +52,8 @@ Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) {
   GELOGD("[%s] CalcOpRunningParam In.", ge_node.GetName().c_str());
   OpDescPtr op_desc = ge_node.GetOpDesc();
   if (op_desc == nullptr) {
-    GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null");
+    REPORT_CALL_ERROR("E19999", "param ge_node has no opdesc, check invalid.");
+    GELOGE(FAILED, "[Get][OpDesc] CalcOpRunningParam failed, as op desc is null");
     return FAILED;
   }
 
@@ -97,15 +98,21 @@ Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) {
     }
 
     if (graph_status != GRAPH_SUCCESS) {
-      GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, format=%s, data_type=%s, error=%u.", node_name.c_str(),
-             node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(),
+      REPORT_CALL_ERROR("E19999", "calc op[%s:%s] out[%zu] mem size failed, format=%s, data_type=%s, error=%u.",
+                        node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(),
+                        TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status);
+      GELOGE(FAILED, "[Calc][MemSize] for op[%s:%s] out[%zu] failed, format=%s, data_type=%s, error=%u.",
+             node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(),
              TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status);
       return FAILED;
     }
 
     if (output_mem_size < 0) {
-      GELOGE(FAILED,
-             "Calc op[%s:%s] out[%zu] mem size is negative(not support),"
+      REPORT_INNER_ERROR("E19999", "Calc op[%s:%s] out[%zu] mem size is negative(not support),"
+                         " format=%s, data_type=%s, mem_size=%ld.",
+                         node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(),
+                         TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size);
+      GELOGE(FAILED, "[Calc][MemSize] op[%s:%s] out[%zu] mem size is negative(not support),"
              " format=%s, data_type=%s, mem_size=%ld.",
              node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(),
              TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size);
@@ -133,17 +140,20 @@ Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) {
 
 Status GeLocalOpsKernelBuilder::CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size) {
   if (op_desc == nullptr) {
-    GELOGE(FAILED, "CalcConstantStrMemSize failed, as op desc is null");
+    REPORT_INNER_ERROR("E19999", "param op_desc is nullptr, check invalid");
+    GELOGE(FAILED, "[Check][Param] CalcConstantStrMemSize failed, as op desc is null");
     return FAILED;
   }
   ConstGeTensorPtr value = MakeShared<const GeTensor>();
   if (value == nullptr) {
-    GELOGE(FAILED, "make shared ConstGeTensor exception.");
+    REPORT_CALL_ERROR("E19999", "make shared ConstGeTensor exception.");
+    GELOGE(FAILED, "[Create][GeTensor] make shared ConstGeTensor exception.");
     return FAILED;
   }
   // Constant op attr name is "value"
   if (!AttrUtils::GetTensor(op_desc, kConstantOpAttrName, value)) {
-    GELOGE(FAILED, "Get Constant op attr value failed");
+    REPORT_CALL_ERROR("E19999", "get op:%s attr value failed", op_desc->GetName().c_str());
+    GELOGE(FAILED, "[Get][Value] of Constant op attr failed");
     return FAILED;
   }
   mem_size = static_cast<int64_t>(value->GetData().size());
@@ -165,13 +175,15 @@ Status GeLocalOpsKernelBuilder::GenerateTask(const Node &node, RunContext &conte
 
   auto op = OpFactory::Instance().CreateOp(node, context);
   if (op == nullptr) {
-    GELOGE(FAILED, "CreateOp for node:%s(%s) failed.", name.c_str(), type.c_str());
+    REPORT_CALL_ERROR("E19999", "create op for node:%s(%s) failed.", name.c_str(), type.c_str());
+    GELOGE(FAILED, "[Create][Op] for node:%s(%s) failed.", name.c_str(), type.c_str());
     return FAILED;
   }
 
   Status ret = op->Run();
   if (ret != SUCCESS) {
-    GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str());
+    REPORT_CALL_ERROR("E19999", "Node:%s(%s) op run failed.", name.c_str(), type.c_str());
+    GELOGE(ret, "[Call][Run] for Node:%s(%s) op failed.", name.c_str(), type.c_str());
     return ret;
   }
   GELOGD("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size());
diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc
index 90d95217..ee601a99 100755
--- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc
+++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc
@@ -24,7 +24,8 @@ namespace ge_local {
 GeDeletedOp::GeDeletedOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}
 
 Status GeDeletedOp::Run() {
-  GELOGE(FAILED, "Node:%s type is %s, should be deleted by ge.", name_.c_str(), type_.c_str());
+  REPORT_INNER_ERROR("E19999", "Node:%s type is %s, should be deleted by ge.", name_.c_str(), type_.c_str());
+  GELOGE(FAILED, "[Delelte][Node] Node:%s type is %s, should be deleted by ge.", name_.c_str(), type_.c_str());
   // Do nothing
   return FAILED;
 }
diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc
index c57b4f4d..2e56b7bb 100644
--- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc
+++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc
@@ -31,8 +31,10 @@ std::shared_ptr<Op> OpFactory::CreateOp(const Node &node, RunContext &run_contex
   if (iter != op_creator_map_.end()) {
     return iter->second(node, run_context);
   }
-
-  GELOGE(FAILED, "Not supported OP, type = %s, name = %s", node.GetType().c_str(), node.GetName().c_str());
+  REPORT_INNER_ERROR("E19999", "Not supported OP, type = %s, name = %s",
+                     node.GetType().c_str(), node.GetName().c_str());
+  GELOGE(FAILED, "[Check][Param] Not supported OP, type = %s, name = %s",
+         node.GetType().c_str(), node.GetName().c_str());
   return nullptr;
 }
 
diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk
index 2aa19e7a..8ca8572c 100644
--- a/ge/ge_runner.mk
+++ b/ge/ge_runner.mk
@@ -187,7 +187,6 @@ LIBGE_LOCAL_SRC_FILES := \
     graph/passes/identity_pass.cc \
     graph/passes/ref_identity_delete_op_pass.cc \
     graph/passes/infershape_pass.cc \
-    graph/passes/isolated_op_remove_pass.cc \
     graph/passes/iterator_op_pass.cc \
     graph/passes/link_gen_mask_nodes_pass.cc \
     graph/passes/merge_pass.cc \
@@ -233,13 +232,11 @@ LIBGE_LOCAL_SRC_FILES := \
     graph/passes/transop_without_reshape_fusion_pass.cc \
     graph/passes/transpose_transdata_pass.cc \
     graph/passes/unused_const_pass.cc \
-    graph/passes/unused_op_remove_pass.cc \
     graph/passes/var_is_initialized_op_pass.cc \
     graph/passes/parallel_concat_start_op_pass.cc \
     graph/passes/cond_pass.cc \
     graph/passes/cond_remove_pass.cc \
     graph/passes/for_pass.cc \
-    graph/passes/variable_format_pass.cc \
     graph/passes/variable_op_pass.cc \
     graph/passes/variable_prepare_op_pass.cc \
     graph/passes/variable_ref_delete_op_pass.cc \
diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc
index 23b9b78a..a12f3cf7 100644
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -94,7 +94,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
     ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
         {op_desc->GetName(), op_desc->GetType(), "engine type",
         "it only support default/AIcoreEngine/VectorEngine"});
-    GELOGE(FAILED, "[Check][EngineType]value:%d not support, "
+    GELOGE(FAILED, "[Check][Param] value:%d not support, "
            "only support default/AIcoreEngine/VectorEngine now", static_cast<int>(engine_type));
     return FAILED;
   }
@@ -107,7 +107,8 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
   // set op engine name and opkernelLib. when engine support
   std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
   if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
-    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "CheckEngineType failed.");
+    REPORT_INNER_ERROR("E19999", "get gelib failed, as get instance failed or initflag failed.");
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] CheckEngineType failed, as get gelib failed.");
     return FAILED;
   }
   OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
@@ -115,7 +116,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
   if (op_infos.empty()) {
     ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
         {op_desc->GetName(), op_desc->GetType(), "optype", "it can not find"});
-    GELOGE(FAILED, "CheckEngineType: Can not get op info by op type %s", op_desc->GetType().c_str());
+    GELOGE(FAILED, "[Get][OpInfo] by op type %s failed.", op_desc->GetType().c_str());
     return FAILED;
   }
   string kernel_name;
@@ -128,7 +129,8 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
   if (kernel_name.empty()) {
     ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
         {op_desc->GetName(), op_desc->GetType(), "engine name" + FmtToStr(op_engine_name), "it can not find"});
-    GELOGE(FAILED, "CheckEngineType:Can not find ops kernel, engine name: %s.", op_engine_name.c_str());
+    GELOGE(FAILED, "[Check][Param] Can not find ops kernel, engine name:%s. op:%s(%s)",
+           op_engine_name.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return FAILED;
   }
   auto &kernel_map = ops_kernel_manager.GetAllOpsKernelInfoStores();
@@ -144,15 +146,14 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
     } else {
       ErrorManager::GetInstance().ATCReportErrMessage(
         "E13002", {"optype", "opskernel", "reason"}, {op_desc->GetType(), kernel_name, unsupported_reason});
-      GELOGE(FAILED, "CheckEngineType: check support failed, Op type %s of ops kernel %s is unsupported, reason:%s",
+      GELOGE(FAILED, "[Call][CheckSupported] failed, Op type %s of ops kernel %s is unsupported, reason:%s",
              op_desc->GetType().c_str(), kernel_name.c_str(), unsupported_reason.c_str());
       return FAILED;
     }
   } else {
     ErrorManager::GetInstance().ATCReportErrMessage(
         "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()});
-    GELOGE(FAILED,
-           "CheckEngineType:Can not find any supported ops kernel info store by kernel_name %s,"
+    GELOGE(FAILED, "[Check][Param] Can not find any supported ops kernel info store by kernel_name %s,"
            "op type is %s, op name is %s",
            kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
   }
@@ -183,34 +184,47 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
   string op_name = node->GetName() + "_in_" + std::to_string(index);
   OpDescPtr data_op = MakeShared<ge::OpDesc>(op_name, op_type);
   if (data_op == nullptr) {
+    REPORT_CALL_ERROR("E19999", "create OpDesc failed, name:%s", op_name.c_str());
+    GELOGE(FAILED, "[Create][OpDesc] failed, name:%s", op_name.c_str());
     return FAILED;
   }
   if (is_const) {
     ConstGeTensorPtr tensor_value;
     if (!AttrUtils::GetTensor(tensor, ge::ATTR_NAME_WEIGHTS, tensor_value)) {
-      GELOGE(FAILED, "Get value failed, node name:%s.", tensor.GetName().c_str());
+      REPORT_CALL_ERROR("E19999", "get attr %s failed, tensor:%s.",
+                        ge::ATTR_NAME_WEIGHTS.c_str(), tensor.GetName().c_str());
+      GELOGE(FAILED, "[Get][Attr] %s failed, tensor:%s.", ge::ATTR_NAME_WEIGHTS.c_str(), tensor.GetName().c_str());
       return FAILED;
     }
     if (!AttrUtils::SetTensor(data_op, ge::ATTR_NAME_WEIGHTS, tensor_value)) {
-      GELOGE(FAILED, "Set attr ATTR_NAME_WEIGHTS fail.");
+      REPORT_CALL_ERROR("E19999", "set attr %s failed, op:%s.", ge::ATTR_NAME_WEIGHTS.c_str(), op_name.c_str());
+      GELOGE(FAILED, "[Set][Attr] %s failed, op:%s.", ge::ATTR_NAME_WEIGHTS.c_str(), op_name.c_str());
       return FAILED;
     }
   }
 
   (void)AttrUtils::SetBool(data_op, "_is_single_op", true);
 
-  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
-                   "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str());
-  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
-                   "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str());
+  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS,
+                   REPORT_CALL_ERROR("E19999", "AddInputDesc failed for node:%s", data_op->GetName().c_str());
+                   return FAILED, "[Add][InputDesc] fail for node:%s", data_op->GetName().c_str());
+  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS,
+                   REPORT_CALL_ERROR("E19999", "AddOutputDesc failed for node:%s", data_op->GetName().c_str());
+                   return FAILED, "[Add][OutputDesc] fail for node:%s", data_op->GetName().c_str());
   if (attr && !is_const) {
-    GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED,
-                     "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
+    GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index),
+                     REPORT_CALL_ERROR("E19999", "set attr %s failed for node:%s",
+                                       ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
+                     return FAILED,
+                     "[Set][Attr:%s] fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
     ++data_index;
   }
 
   ge::NodePtr arg_node = graph->AddNode(data_op);
-  GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail");
+  GE_CHK_BOOL_EXEC(arg_node != nullptr,
+                   REPORT_CALL_ERROR("E19999", "add node:%s to graph:%s failed", data_op->GetName().c_str(),
+                                     graph->GetName().c_str());
+                   return FAILED, "[Add][Node] Insert Data node:%s fail", data_op->GetName().c_str());
 
   GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)),
                 "[Add][Edge]fail from node:%s to node:%s", data_op->GetName().c_str(), node->GetName().c_str());
@@ -221,6 +235,8 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
 static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, const vector<GeTensor> &outputs) {
   OpDescPtr op_desc = MakeShared<ge::OpDesc>(graph->GetName() + "_" + NODE_NAME_NET_OUTPUT, NETOUTPUT);
   if (op_desc == nullptr) {
+    REPORT_CALL_ERROR("E19999", "create OpDesc failed, graph:%s", graph->GetName().c_str());
+    GELOGE(FAILED, "[Create][OpDesc] failed, graph:%s", graph->GetName().c_str());
     return FAILED;
   }
   (void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
@@ -228,18 +244,23 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons
   for (const auto &out_desc : outputs) {
     GeTensorDesc tensor = out_desc.GetTensorDesc();
     TensorUtils::SetInputTensor(tensor, true);
-    GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
-                     "[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str());
+    GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS,
+                     REPORT_CALL_ERROR("E19999", "AddInputDesc failed for node:%s", op_desc->GetName().c_str());
+                     return FAILED, "[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str());
 
     TensorUtils::SetInputTensor(tensor, false);
     TensorUtils::SetOutputTensor(tensor, true);
-    GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
-                     "[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str());
+    GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS,
+                     REPORT_CALL_ERROR("E19999", "AddOutputDesc failed for node:%s", op_desc->GetName().c_str());
+                     return FAILED, "[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str());
     count++;
   }
   GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
   ge::NodePtr out_node = graph->AddNode(op_desc);
-  GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED,
+  GE_CHK_BOOL_EXEC(out_node != nullptr,
+                   REPORT_CALL_ERROR("E19999", "add node:%s to graph:%u failed.",
+                                     op_desc->GetName().c_str(), graph->GetGraphID());
+                   return FAILED,
                    "[Add][Node:%s]fail in graph:%u", op_desc->GetName().c_str(), graph->GetGraphID());
   GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
   for (int32_t i = 0; i < count; ++i) {
@@ -256,7 +277,8 @@ static void GetOpsProtoPath(string &opsproto_path) {
     string path = path_env;
     string file_path = RealPath(path.c_str());
     if (file_path.empty()) {
-      GELOGE(FAILED, "File path %s is invalid.", path.c_str());
+      REPORT_CALL_ERROR("E19999", "File path %s is invalid.", path.c_str());
+      GELOGE(FAILED, "[Call][RealPath] File path %s is invalid.", path.c_str());
       return;
     }
     opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
@@ -288,7 +310,8 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso
       int64_t storage_format = FORMAT_NCHW;
       if (ge::AttrUtils::GetInt(desc, ge::ATTR_NAME_STORAGE_FORMAT, storage_format) &&
           !ge::AttrUtils::SetListInt(desc, ge::ATTR_NAME_STORAGE_SHAPE, dynamic_shape_dims)) {
-        GELOGE(FAILED, "Set attr ATTR_NAME_STORAGE_SHAPE fail.");
+        REPORT_CALL_ERROR("E19999", "Set attr ATTR_NAME_STORAGE_SHAPE failed to op:%s.", desc.GetName().c_str());
+        GELOGE(FAILED, "[Set][Attr] ATTR_NAME_STORAGE_SHAPE fail.");
         return FAILED;
       }
       desc.SetShape(dynamic_shape);
@@ -373,7 +396,8 @@ Status GeGenerator::Initialize(const map<string, string> &options) {
 Status GeGenerator::Initialize(const map<string, string> &options, OmgContext &omg_context) {
   impl_ = ge::MakeShared<Impl>(omg_context);
   if (impl_ == nullptr) {
-    GELOGE(MEMALLOC_FAILED, "Make shared failed");
+    REPORT_CALL_ERROR("E19999", "create Impl failed.");
+    GELOGE(MEMALLOC_FAILED, "[Create][Impl] Make shared failed");
     return MEMALLOC_FAILED;
   }
 
@@ -388,7 +412,7 @@ Status GeGenerator::Initialize(const map<string, string> &options, OmgContext &o
 
   Status ret = impl_->graph_manager_.Initialize(options);
   if (ret != SUCCESS) {
-    GELOGE(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, "Graph manager initialize failed.");
+    GELOGE(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, "[Call][Initialize] Graph manager initialize failed.");
     return GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED;
   }
   // get ek file
@@ -430,7 +454,7 @@ Status GeGenerator::Finalize() {
   GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
   Status ret = impl_->graph_manager_.Finalize();
   if (ret != SUCCESS) {
-    GELOGE(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, "Graph manager finalize failed.");
+    GELOGE(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, "[Call][Finalize] Graph manager finalize failed.");
     return GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED;
   }
   return SUCCESS;
@@ -454,9 +478,9 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) {
 
   Status ret = impl_->GenerateInfershapeGraph(graph);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Dump infershape json failed");
+    GELOGE(ret, "[Call][GenerateInfershapeGraph] Dump infershape json failed");
     if (impl_->graph_manager_.Finalize() != SUCCESS) {
-      GELOGE(FAILED, "graph_manager finalize fail.");
+      GELOGE(FAILED, "[Call][Finalize] graph_manager finalize fail.");
     }
     return ret;
   }
@@ -653,9 +677,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
   impl_->is_offline_ = is_offline;
   Status ret = impl_->BuildModel(graph, inputs, ge_root_model);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Build model failed.");
+    GELOGE(ret, "[Build][Model] failed, ret:%d.", ret);
     if (impl_->graph_manager_.Finalize() != SUCCESS) {
-      GELOGE(FAILED, "graph_manager finalize fail.");
+      GELOGE(FAILED, "[Call][Finalize] graph_manager finalize fail.");
     }
     return ret;
   }
@@ -679,7 +703,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
   }
   ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Save model failed");
+    GELOGE(ret, "[Save][RootModel] failed, ret:%d, file:%s", ret, file_name_prefix.c_str());
     if (impl_->graph_manager_.Finalize() != SUCCESS) {
       GELOGE(FAILED, "graph_manager finalize fail.");
     }
@@ -764,14 +788,16 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
     ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
         {op_desc->GetName(), op_desc->GetType(), "inputs size" + FmtToStr(op_desc->GetAllInputsSize()),
         "tensor size is " + FmtToStr(inputs.size())});
-    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize());
+    GELOGE(PARAM_INVALID, "[Check][Param] Tensor size: %zu, op:%s(%s) Inputs size: %zu, not equal",
+           inputs.size(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_desc->GetAllInputsSize());
     return PARAM_INVALID;
   }
   if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
     ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
         {op_desc->GetName(), op_desc->GetType(), "outputs size" + FmtToStr(op_desc->GetOutputsSize()),
         "tensor size is " + FmtToStr(outputs.size())});
-    GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size: %zu", outputs.size(), op_desc->GetOutputsSize());
+    GELOGE(PARAM_INVALID, "[Check][Param] Tensor size: %zu, op:%s(%s) Outputs size: %zu, not equal",
+           outputs.size(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_desc->GetOutputsSize());
     return PARAM_INVALID;
   }
   return SUCCESS;
@@ -786,7 +812,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
   (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true);
 
   if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) {
-    GELOGE(PARAM_INVALID, "input param is invalid when build single op!");
+    GELOGE(PARAM_INVALID, "[Check][Param] input param is invalid when build single op:%s!",
+           op_desc->GetName().c_str());
     return PARAM_INVALID;
   }
   OmgContext &omg_context = (impl_ == nullptr) ? domi::GetContext() : impl_->omg_context_;
@@ -805,6 +832,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
     fuzz_compile_flag = true;
   }
   if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) {
+    REPORT_CALL_ERROR("E19999", "set ATTR_NAME_FUZZ_BUILD failed for %s.", op_desc->GetName().c_str());
     GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str());
     return FAILED;
   }
@@ -813,7 +841,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
   // 1. Create ComputeGraph.
   string name = ge::CurrentTimeInStr() + "_" + model_file_name;
   Graph graph;
-  GE_CHK_STATUS(BuildSingleOpGraph(op_desc, inputs, outputs, name, graph), "make graph fail.");
+  GE_CHK_STATUS(BuildSingleOpGraph(op_desc, inputs, outputs, name, graph),
+                "[Build][Graph] for single op:%s fail.", op_desc->GetName().c_str());
 
   // 2. check engine type when compile online
   if (model_file_name == kFileNameSuffix) {
@@ -838,7 +867,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
   GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
   map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
   if (name_to_ge_model.empty()) {
-    GELOGE(PARAM_INVALID, "GetSubgraphInstanceNameToModel is empty.");
+    REPORT_CALL_ERROR("E19999", "GetSubgraphInstanceNameToModel failed.");
+    GELOGE(PARAM_INVALID, "[Get][Name] GetSubgraphInstanceNameToModel is empty.");
     return PARAM_INVALID;
   }
   const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph();
@@ -869,7 +899,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
     }
     if (!fuzz_build_attrs.empty()) {
       GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs),
-                       return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed.");
+                       REPORT_CALL_ERROR("E19999", "Set model:%s(id:%u) attr:%s failed.",
+                                         ge_model->GetName().c_str(), ge_model->GetModelId(),
+                                         ATTR_NAME_FUZZ_BUILD_RES_ATTRS.c_str());
+                       return FAILED, "Set model:%s(id:%u) attr:%s failed.",
+                       ge_model->GetName().c_str(), ge_model->GetModelId(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS.c_str());
     }
     GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
   } else {
@@ -998,7 +1032,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &
   model_helper.SetSaveMode(is_offline_);
   Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Save to om model failed");
+    GELOGE(ret, "[Call][SaveToOmModel] Save to om model failed");
     return ret;
   }
   return SUCCESS;
@@ -1009,12 +1043,15 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
   bool is_unknown_shape = false;
   auto ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape);
   if (ret != SUCCESS) {
-    GELOGE(FAILED, "Check root model is unkonwn shape failed");
+    REPORT_CALL_ERROR("E19999", "root model(id:%u) CheckIsUnknownShape failed, ret:%d",
+                      ge_root_model->GetModelId(), ret);
+    GELOGE(FAILED, "[Check][RootModel] is unkonwn shape failed, ret:%d", ret);
     return FAILED;
   }
   GELOGD("begin save root model, cur model is unkonwn shape model ? : %d", is_unknown_shape);
-  GE_CHK_BOOL_EXEC(!ge_root_model->GetSubgraphInstanceNameToModel().empty(), return FAILED,
-                   "ge root model has no sub model")
+  GE_CHK_BOOL_EXEC(!ge_root_model->GetSubgraphInstanceNameToModel().empty(),
+                   REPORT_CALL_ERROR("E19999", "root model(id:%u) has no sub model.", ge_root_model->GetModelId());
+                   return FAILED, "[Get][SubModel] ge root model has no sub model")
   GeModelPtr model_root = nullptr;
   if (is_unknown_shape) {
     auto name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
@@ -1038,7 +1075,8 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
   model_helper.SetSaveMode(is_offline_);
   ret = model_helper.SaveToOmRootModel(ge_root_model, save_param_, file_name_prefix, model_buff, is_unknown_shape);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Save to om model failed");
+    REPORT_CALL_ERROR("E19999", "SaveToOmRootModel failed, ret:%d, model id:%u", ret, ge_root_model->GetModelId());
+    GELOGE(ret, "[Call][SaveToOmRootModel] failed, ret:%d, model id:%u", ret, ge_root_model->GetModelId());
     return ret;
   }
   return SUCCESS;
@@ -1051,7 +1089,8 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
   const std::map<std::string, std::string> options;
   Status ret = graph_manager_.AddGraph(graph_id, graph, options, omg_context_);
   if (ret != SUCCESS) {
-    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", graph_id);
+    REPORT_CALL_ERROR("E19999", "add graph(id:%u) failed, ret:%d", graph_id, ret);
+    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "[Add][Graph] fail, graph id: %u", graph_id);
     (void)graph_manager_.Finalize();
     return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
   }
@@ -1075,7 +1114,8 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
   if (ret != SUCCESS) {
-    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", graph_id);
+    REPORT_CALL_ERROR("E19999", "build graph failed, graph id:%u, ret:%d", graph_id, ret);
+    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "[Build][Graph] fail, graph id: %u", graph_id);
     ret = GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
   }
 
@@ -1091,14 +1131,17 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) {
   const std::map<std::string, std::string> options;
   Status ret = graph_manager_.AddGraph(graph_id, graph, options, omg_context_);
   if (ret != SUCCESS) {
-    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", graph_id);
+    REPORT_CALL_ERROR("E19999", "add graph failed, graph id:%u, ret:%d", graph_id, ret);
+    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "[Add][Graph] failed, graph id: %u", graph_id);
     (void)graph_manager_.Finalize();
     return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
   }
 
   ret = graph_manager_.GenerateInfershapeGraph(graph_id);
   if (ret != SUCCESS) {
-    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed");
+    REPORT_CALL_ERROR("E19999", "GenerateInfershapeGraph failed, graph id:%u, ret:%d", graph_id, ret);
+    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED,
+           "[Generate][Graph] failed, graph id:%u, ret:%d", graph_id, ret);
     return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
   }
 
diff --git a/ge/generator/generator_api.cc b/ge/generator/generator_api.cc
index 675b8811..b64a9eb3 100644
--- a/ge/generator/generator_api.cc
+++ b/ge/generator/generator_api.cc
@@ -23,22 +23,24 @@
 #include "graph/op_desc.h"
 #include "graph/utils/tensor_utils.h"
 
-#define CHECK_PARAM_NOT_NULL(param)                            \
-  do {                                                         \
-    if (param == nullptr) {                                    \
-      GELOGE(ge::PARAM_INVALID, "Param: %s is null.", #param); \
-      return ge::PARAM_INVALID;                                \
-    }                                                          \
+#define CHECK_PARAM_NOT_NULL(param)                                              \
+  do {                                                                           \
+    if (param == nullptr) {                                                      \
+      REPORT_INNER_ERROR("E19999", "param:%s is null", #param);                  \
+      GELOGE(ge::PARAM_INVALID, "[Check][Param] %s is null.", #param);           \
+      return ge::PARAM_INVALID;                                                  \
+    }                                                                            \
   } while (0)
 
-#define CHECK_PARAM_OBJECT(object, param)                      \
-  ({                                                           \
-    object *obj_value = reinterpret_cast<object *>(param);     \
-    if (obj_value == nullptr) {                                \
-      GELOGE(ge::PARAM_INVALID, "Param: %s is null.", #param); \
-      return ge::PARAM_INVALID;                                \
-    }                                                          \
-    obj_value;                                                 \
+#define CHECK_PARAM_OBJECT(object, param)                                        \
+  ({                                                                             \
+    object *obj_value = reinterpret_cast<object *>(param);                       \
+    if (obj_value == nullptr) {                                                  \
+      REPORT_INNER_ERROR("E19999", "param:%s is null.", #param);                 \
+      GELOGE(ge::PARAM_INVALID, "[Check][Param] %s is null.", #param);           \
+      return ge::PARAM_INVALID;                                                  \
+    }                                                                            \
+    obj_value;                                                                   \
   })
 
 class OpAttr {
@@ -118,6 +120,8 @@ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int
   std::string op_name = std::string(op_type) + "_" + std::to_string(ge::GetCurrentTimestamp());
   ge::OpDescPtr op_desc = ge::MakeShared<ge::OpDesc>(op_name, op_type);
   if (op_desc == nullptr) {
+    REPORT_CALL_ERROR("E19999", "MakeShared ge::OpDesc failed, as return nullptr");
+    GELOGE(ge::FAILED, "[Call][MakeShared] create ge::OpDesc failed.");
     return ge::FAILED;
   }
   std::vector<ge::GeTensor> inputs;
@@ -132,7 +136,8 @@ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int
     ge::TensorUtils::SetOutputTensor(tensor_desc, false);
 
     if (op_desc->AddInputDesc(tensor_desc) != ge::GRAPH_SUCCESS) {
-      GELOGE(ge::FAILED, "AddInputDesc fail.");
+      REPORT_CALL_ERROR("E19999", "add inputdesc failed, op:%s", op_desc->GetName().c_str());
+      GELOGE(ge::FAILED, "[Add][InputDesc] fail, op:%s.", op_desc->GetName().c_str());
       return ge::FAILED;
     }
     inputs.emplace_back(tensor_desc);
@@ -157,6 +162,8 @@ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int
     OpAttr *op_attr = CHECK_PARAM_OBJECT(OpAttr, attr);
     for (const auto &it : op_attr->Attrs()) {
       GE_IF_BOOL_EXEC(op_desc->SetAttr(it.first, it.second) != ge::SUCCESS, GELOGE(ge::FAILED, "SetAttr failed.");
+                      REPORT_CALL_ERROR("E19999", "set attr:%s failed, op:%s",
+                                        it.first.c_str(), op_desc->GetName().c_str());
                       return ge::FAILED);
     }
   }
diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc
index bcd80b0c..8b172e63 100644
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -79,7 +79,8 @@ Status HandleSubgraphDataNode(NodePtr &src_node, OutDataAnchorPtr &src_out_ancho
   if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, index)) {
     REPORT_INNER_ERROR("E19999", "get attr:%s failed from node:%s",
                        ATTR_NAME_PARENT_NODE_INDEX.c_str(), src_node->GetName().c_str());
-    GELOGE(FAILED, "Get attr ATTR_NAME_PARENT_NODE_INDEX failed, node:%s.", src_node->GetName().c_str());
+    GELOGE(FAILED, "[Get][Attr] %s failed, node:%s.", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
+           src_node->GetName().c_str());
     return FAILED;
   }
   const NodePtr &parent_node = src_node->GetOwnerComputeGraph()->GetParentNode();
@@ -113,7 +114,8 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
   if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
     REPORT_INNER_ERROR("E19999", "check gelib instance null, graph:%s",
                        graph->GetName().c_str());
-    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GraphBuilder: GE is not initialized");
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][GELib] GraphBuilder: GE is not initialized, graph:%s",
+           graph->GetName().c_str());
     return GE_CLI_GE_NOT_INITIALIZED;
   }
 
@@ -127,7 +129,7 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
       if (kernel_lib_name.empty()) {
         REPORT_INNER_ERROR("E19999", "op kernel lib is empty in node:%s(%s)",
                            node_ptr->GetName().c_str(), node_ptr->GetType().c_str());
-        GELOGE(INTERNAL_ERROR, "Get node:%s(%s) kernel lib failed.", node_ptr->GetName().c_str(),
+        GELOGE(INTERNAL_ERROR, "[Get][KernelLibName] of node:%s(%s) failed.", node_ptr->GetName().c_str(),
                node_ptr->GetType().c_str());
         return INTERNAL_ERROR;
       }
@@ -137,7 +139,7 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
     if (ret != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Set node:%s(%s) inputDesc size failed",
                         node_ptr->GetName().c_str(), node_ptr->GetType().c_str());
-      GELOGE(ret, "Set node inputDesc size failed, node name is %s", node_ptr->GetName().c_str());
+      GELOGE(ret, "[Set][InputSize] to node:%s failed.", node_ptr->GetName().c_str());
       return ret;
     }
 
@@ -145,7 +147,7 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
     if (ret != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Call Calculate op:%s(%s) running param failed",
                         node_ptr->GetName().c_str(), node_ptr->GetType().c_str());
-      GELOGE(ret, "Calculate op running param failed, node name is %s", node_ptr->GetName().c_str());
+      GELOGE(ret, "[Call][Calculate] op running param failed, node name is %s", node_ptr->GetName().c_str());
       return ret;
     }
     GE_CHK_STATUS_RET(AddOutputMemTypeForNode(node_ptr));
@@ -202,7 +204,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph
 Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
   if (comp_graph == nullptr) {
     REPORT_INNER_ERROR("E19999", "check compute_graph nullptr, session_id:%lu", session_id);
-    GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
+    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] comp_graph is null, session_id:%lu", session_id);
     return GE_GRAPH_PARAM_NULLPTR;
   }
   ge_root_model_ptr = MakeShared<ge::GeRootModel>(comp_graph);
@@ -216,12 +218,13 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_
   if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) {
     GE_CHK_STATUS_RET(
         BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id),
-        "Build for dynamic shape graph failed.");
+        "[Build][DynamicShapeGraph] failed, graph:%s, session id:%lu.", comp_graph->GetName().c_str(), session_id);
     return SUCCESS;
   }
 
   GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id),
-                    "Build for known shape graph failed.");
+                    "[Build][KnownShapeGraph] failed, graph:%s, session id:%lu.",
+                    comp_graph->GetName().c_str(), session_id);
   ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr);
   return SUCCESS;
 }
@@ -229,28 +232,29 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_
 Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
                                              GeModelPtr &ge_model_ptr, uint64_t session_id) {
   if (ge::GetContext().GetHostExecFlag()) {
-    GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed.");
+    GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id),
+                      "[Build][HostCpuGraph] failed, graph:%s, session id:%lu.",
+                      comp_graph->GetName().c_str(), session_id);
     return SUCCESS;
   }
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kPreBuild);
   GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str());
   Status ret = SecondPartition(comp_graph);
-  GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str());
+  GE_CHK_STATUS_RET(ret, "[Call][SecondPartition] for Graph[%s] failed.", comp_graph->GetName().c_str());
   auto subgraph_map = graph_partitioner_.GetSubGraphMap();
 
   GE_TIMESTAMP_START(BuildSubgraph);
   ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
   GE_DUMP(comp_graph, "BeforePreBuildModel");
   GE_TIMESTAMP_START(PreBuildModel);
-  GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.",
+  GE_CHK_STATUS_RET(builder.PreBuildModel(), "[PreBuild][Model] failed, Graph[%s].",
                     comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
 
   GE_DUMP(comp_graph, "AfterPreBuildModel");
   GE_TIMESTAMP_START(CalcOpParam);
-  GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.",
-                    comp_graph->GetName().c_str());
+  GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "[Calc][OpParam] fail, Graph[%s].", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam");
   GE_DUMP(comp_graph, "AfterCalcOpParam");
 
@@ -259,7 +263,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
     return MEMALLOC_FAILED;
   }
   GE_TIMESTAMP_START(BuildModelForGetTask);
-  GE_CHK_STATUS_RET(builder.BuildModelForGetTask(*model_ptr), "Graph[%s] builder BuildModelForGetTask() return fail.",
+  GE_CHK_STATUS_RET(builder.BuildModelForGetTask(*model_ptr), "[Build][Model] ForGetTask fail, Graph[%s].",
                     comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(BuildModelForGetTask, "GraphBuilder::BuildModelForGetTask");
   GE_DUMP(comp_graph, "AfterBuildModel");
@@ -270,7 +274,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
   GE_TIMESTAMP_END(GetTaskInfo, "GraphBuilder::GetTaskInfo");
   GE_DUMP(comp_graph, "AfterGetTask");
   if (ret != SUCCESS) {
-    GELOGE(ret, "Graph[%s] builder GetTaskInfo() return fail.", comp_graph->GetName().c_str());
+    GELOGE(ret, "[Get][TaskInfo] fail, Graph[%s].", comp_graph->GetName().c_str());
     return ret;
   }
 
@@ -280,7 +284,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
     return MEMALLOC_FAILED;
   }
   GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr),
-                    "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str());
+                    "[Save][Data] ToModel fail, Graph[%s].", comp_graph->GetName().c_str());
   GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(BuildSubgraph, "GraphBuilder::Build");
   return SUCCESS;
@@ -315,7 +319,7 @@ Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) {
       if (weights.empty()) {
         REPORT_INNER_ERROR("E19999", "check weights size of node %s(%s) is empty",
                            node->GetName().c_str(), node->GetType().c_str());
-        GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
+        GELOGE(FAILED, "[Check][Param] weights size of node %s is empty", node->GetName().c_str());
         return FAILED;
       }
       GeTensorPtr weight = weights[0];
@@ -342,23 +346,21 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
   ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
   GE_DUMP(comp_graph, "BeforePreBuildModel");
   GE_TIMESTAMP_START(PreBuildModel);
-  GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.",
-                    comp_graph->GetName().c_str());
+  GE_CHK_STATUS_RET(builder.PreBuildModel(), "[PreBuild][Model] fail, Graph[%s].", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
   GE_DUMP(comp_graph, "AfterPreBuildModel");
 
   GE_TIMESTAMP_START(CalcOpParam);
-  GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.",
-                    comp_graph->GetName().c_str());
+  GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "[Calc][OpParam] fail, Graph[%s].", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam");
   GE_DUMP(comp_graph, "AfterCalcOpParam");
 
   GE_TIMESTAMP_START(SetConstantInputOffset);
   GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph),
-                    "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str());
+                    "[Set][Offset] Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(SetConstantInputOffset, "GraphBuilder::SetConstantInputOffset");
   GE_TIMESTAMP_START(MergeWeights);
-  GE_CHK_STATUS_RET(builder.MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str());
+  GE_CHK_STATUS_RET(builder.MergeWeights(), "[Merge][Weights] failed for Graph[%s].", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights");
 
   ModelPtr model_ptr = MakeShared<ge::Model>();
@@ -367,7 +369,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
   }
   GE_TIMESTAMP_START(BuildModelForGetDynShapeTask);
   GE_CHK_STATUS_RET(builder.BuildModelForGetDynShapeTask(*model_ptr),
-                    "Graph[%s] builder BuildModelForGetDynShapeTask() return fail.", comp_graph->GetName().c_str());
+                    "[Build][Model] ForGetDynShapeTask fail, Graph[%s].", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(BuildModelForGetDynShapeTask, "GraphBuilder::BuildModelForGetDynShapeTask");
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kTaskGenerate);
   GE_TIMESTAMP_START(GetTaskInfo);
@@ -378,7 +380,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
   GraphUtils::DumpGEGraph(comp_graph, "AfterGetTask");
   GraphUtils::DumpGEGraphToOnnx(*comp_graph, "AfterGetTask");
   if (ret != SUCCESS) {
-    GELOGE(ret, "Graph[%s] builder GetTaskInfo() return fail.", comp_graph->GetName().c_str());
+    GELOGE(ret, "[Get][TaskInfo] fail, Graph[%s].", comp_graph->GetName().c_str());
     return ret;
   }
   ge_model_ptr = MakeShared<ge::GeModel>();
@@ -386,7 +388,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
     return MEMALLOC_FAILED;
   }
   GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr),
-                    "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str());
+                    "[Save][Data] ToModel fail, Graph[%s].", comp_graph->GetName().c_str());
   GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str());
   return SUCCESS;
 }
@@ -433,8 +435,9 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
         GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
                         REPORT_INNER_ERROR("E19999", "Multiply result is out of range when calc profiling ar log id "
                                            "for node:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                        GELOGE(FAILED, "Multiply result is out of range.");
-                          return FAILED);
+                        GELOGE(FAILED, "[Check][Param] Multiply result is out of range, node:%s(%s)",
+                               op_desc->GetName().c_str(), op_desc->GetType().c_str());
+                        return FAILED);
         int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid;
         (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
         continue;
@@ -458,14 +461,14 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
     GE_CHECK_NOTNULL(op_desc);
     op_desc->SetStreamId(kInvalidStream);
     if (node->GetType() == DATA) {
-      GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "Calc dynamic shape root graph data[%s] size failed.",
+      GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "[Calc][DynShapeRootGraphDataSize] failed, op:%s.",
                         op_desc->GetName().c_str());
     }
   }
 
   // Set fp bp profiling task attr for graph
   if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) {
-    GELOGE(FAILED, "Set fp bp profiling task attr for graph.");
+    GELOGE(FAILED, "[Mark][TaskAttr]Set fp bp profiling task attr for graph:%s failed.", comp_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -482,18 +485,20 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
     if (sub_graph->GetGraphUnknownFlag()) {
       // unknown shape build flow
       GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
-                        "Build for unknown shape graph failed.");
+                        "[Build][Graph] as unknown shape failed, session id:%lu.", session_id);
     } else {
       // reset functional subgraph parent graph as known subgraph
       for (const auto &node : sub_graph->GetDirectNode()) {
         for (const auto &sub_graph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) {
           auto sub_sub_graph = comp_graph->GetSubgraph(sub_graph_name);
-          GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph), "Failed add subgraph to known graph.");
+          GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph),
+                            "[Add][SubGraph] %s to known graph:%s failed.", sub_sub_graph->GetName().c_str(),
+                            sub_graph->GetName().c_str());
         }
       }
       // known shape build flow
       GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id),
-                        "Build for known shape graph failed.");
+                        "[Build][Graph] for known shape failed, session id:%lu.", session_id);
     }
     ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr);
   }
@@ -510,19 +515,20 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr
   int64_t memory_size = 0;
   if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_MEMORY_SIZE, memory_size)) {
     REPORT_INNER_ERROR("E19999", "Get Attr:%s fail in model", ATTR_MODEL_MEMORY_SIZE.c_str());
-    GELOGE(INTERNAL_ERROR, "Get memory size fail.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] memory size fail, graph:%s, session id:%lu.", comp_graph->GetName().c_str(),
+           session_id);
     return INTERNAL_ERROR;
   }
   int64_t p2p_memory_size = 0;
   if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_memory_size)) {
     REPORT_INNER_ERROR("E19999", "Get Attr:%s fail in model", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
-    GELOGE(INTERNAL_ERROR, "Get p2p memory size fail.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s fail in model", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
     return INTERNAL_ERROR;
   }
   int64_t weight_size = 0;
   if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_WEIGHT_SIZE, weight_size)) {
     REPORT_INNER_ERROR("E19999", "Get Attr:%s fail in model", ATTR_MODEL_WEIGHT_SIZE.c_str());
-    GELOGE(INTERNAL_ERROR, "Get weight memory size fail.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s fail in model", ATTR_MODEL_WEIGHT_SIZE.c_str());
     return INTERNAL_ERROR;
   }
 
@@ -548,20 +554,20 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr
   Status ret = run_context.InitMemInfo(get_mem_base, memory_size, mem_type_to_data_mem_base, mem_type_to_data_mem_size,
                                        get_weight_mem_base, weight_size);
   if (ret != SUCCESS) {
-    GELOGE(ret, "task_generator init mem info fail.");
+    GELOGE(ret, "[Init][MemInfo] fail, ret:%d.", ret);
     return ret;
   }
   auto weight_buffer = builder.GetWeightBuffer();
   ret = run_context.CreateRunContext(*model_ptr, comp_graph, weight_buffer, session_id);
   if (ret != SUCCESS) {
-    GELOGE(ret, "runContext create run context fail.");
+    GELOGE(ret, "[Create][RunContext] fail, ret:%d, graph:%s.", ret, comp_graph->GetName().c_str());
     return ret;
   }
 
   StreamGraphOptimizer stream_optimizer;
   ret = stream_optimizer.OptimizeStreamedSubGraph(comp_graph, subgraph_map, run_context.GetRunContext());
   if (ret != SUCCESS) {
-    GELOGE(ret, "Optimize streamed subGraph fail.");
+    GELOGE(ret, "[Optimize][StreamedSubGraph] fail, graph:%s.", comp_graph->GetName().c_str());
     return ret;
   }
   GE_DUMP(comp_graph, "AfterOptimizeStreamedSubGraph");
@@ -578,13 +584,13 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) {
   if (node_ptr->GetType() == DATA) {
     bool is_unknown_shape = false;
     GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape),
-                      "Get data node[%s] shape status failed!", node_ptr->GetName().c_str());
+                      "[Get][Status] of data node[%s] shape failed!", node_ptr->GetName().c_str());
     if (is_unknown_shape) {
       GELOGD("data node: %s is unknown shape, do not set input size!", node_ptr->GetName().c_str());
       return SUCCESS;
     }
     if (UpdateDataInputSize(node_ptr) != SUCCESS) {
-      GELOGE(FAILED, "Update data input size failed.");
+      GELOGE(FAILED, "[Update][Data] input size failed, node:%s.", node_ptr->GetName().c_str());
       return FAILED;
     }
   }
@@ -632,7 +638,7 @@ Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
   const auto &op_desc = node_ptr->GetOpDesc();
   if (op_desc == nullptr) {
     REPORT_INNER_ERROR("E19999", "check op_desc is nullptr");
-    GELOGE(FAILED, "Op desc is nullptr.");
+    GELOGE(FAILED, "[Check][Param] Op desc is nullptr.");
     return FAILED;
   }
   // data op only has one output anchor
@@ -651,7 +657,7 @@ Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
     if (graph_status != GRAPH_SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed for op:%s(%s) index:0",
                         op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "Get tensor size in bytes failed.");
+      GELOGE(FAILED, "[Get][TensorSize] in bytes failed, op:%s.", op_desc->GetName().c_str());
       return FAILED;
     }
     // data op only has one input anchor
@@ -660,7 +666,7 @@ Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
     if (op_desc->UpdateInputDesc(0, input_desc) != GRAPH_SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Update input desc size failed for op:%s(%s) index:0",
                         op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "Update input desc size failed.");
+      GELOGE(FAILED, "[Update][InputDesc] failed, op:%s.", op_desc->GetName().c_str());
       return FAILED;
     }
   }
@@ -690,7 +696,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
     if (graph_status != GRAPH_SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed for op:%s(%s) index:0 ",
                         op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "Get tensor size in bytes failed.");
+      GELOGE(FAILED, "[Get][TensorSize] in bytes failed, op:%s.", op_desc->GetName().c_str());
       return FAILED;
     }
 
@@ -699,7 +705,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
     if (op_desc->UpdateOutputDesc(0, output_desc) != GRAPH_SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Update output desc size failed for op:%s(%s) index:0 ",
                         op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "Update dynamic shape graph data output desc size failed.");
+      GELOGE(FAILED, "[Update][OutputDesc] for dynamic shape graph data failed, op:%s.", op_desc->GetName().c_str());
       return FAILED;
     }
   }
@@ -710,15 +716,13 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) {
   GE_TIMESTAMP_START(GraphPartition2);
   auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Graph partition Failed");
+    GELOGE(ret, "[Call][Partition] for Graph Failed");
     return ret;
   }
-  GE_CHK_STATUS_RET(ret, "Graph partition Failed.");
   const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
   if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) {
-    REPORT_INNER_ERROR("E19999", "find subgraphlis in graph:%s failed",
-                       comp_graph->GetName().c_str());
-    GELOGE(FAILED, "Find subgraph failed.");
+    REPORT_INNER_ERROR("E19999", "find subgraphlis in graph:%s failed", comp_graph->GetName().c_str());
+    GELOGE(FAILED, "[Check][Param] Find subgraph graph:%s failed.", comp_graph->GetName().c_str());
     return FAILED;
   }
   GE_TIMESTAMP_END(GraphPartition2, "GraphPartitioner::Partition2");
@@ -749,18 +753,18 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
         REPORT_INNER_ERROR("E19999", "Set Attr:%s for node:%s(%s) out_index:%u failed",
                            ATTR_OUTPUT_MEMORY_TYPE.c_str(), src_desc->GetName().c_str(), src_desc->GetType().c_str(),
                            src_out_anchor->GetIdx());
-        GELOGE(INTERNAL_ERROR, "Set out_memory_type attr for [%s:%d] failed.", src_desc->GetName().c_str(),
+        GELOGE(INTERNAL_ERROR, "[Set][Attr] out_memory_type for [%s:%d] failed.", src_desc->GetName().c_str(),
                src_out_anchor->GetIdx());
         return INTERNAL_ERROR;
       }
       switch (TransferNodeType(src_node)) {
         case kSubgraphNode:
-          GE_CHK_STATUS_RET(HandleSubgraphNode(src_node, src_out_anchor), "Handle subgraph node %s failed",
-                            src_node->GetName().c_str());
+          GE_CHK_STATUS_RET(HandleSubgraphNode(src_node, src_out_anchor),
+                            "[Handle][Node] %s in subgraph failed", src_node->GetName().c_str());
           break;
         case kSubgraphData:
-          GE_CHK_STATUS_RET(HandleSubgraphDataNode(src_node, src_out_anchor), "Handle Data node %s in subgraph failed",
-                            src_node->GetName().c_str());
+          GE_CHK_STATUS_RET(HandleSubgraphDataNode(src_node, src_out_anchor),
+                            "[Handle][DataNode] %s in subgraph failed", src_node->GetName().c_str());
           break;
         case kOthers:
         default:
diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc
index b6ef8dc9..32bdd0a3 100644
--- a/ge/graph/build/label_allocator.cc
+++ b/ge/graph/build/label_allocator.cc
@@ -29,7 +29,7 @@ LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(gr
 Status LabelAllocator::AssignFunctionalLabels() {
   if (compute_graph_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "check param compute_graph nullptr");
-    GELOGE(INTERNAL_ERROR, "ComputeGraph not set, Assign labels failed.");
+    GELOGE(INTERNAL_ERROR, "[Check][Param] ComputeGraph not set, Assign labels failed.");
     return INTERNAL_ERROR;
   }
 
@@ -49,14 +49,14 @@ Status LabelAllocator::AssignFunctionalLabels() {
     if (maker == nullptr) {
       REPORT_CALL_ERROR("E19999", "Check Node:%s(%s) label maker not registed",
                         node->GetName().c_str(), node->GetType().c_str());
-      GELOGE(INTERNAL_ERROR, "Node: %s label maker not registed.", node->GetType().c_str());
+      GELOGE(INTERNAL_ERROR, "[Create][LabelMaker] Node: %s label maker not registed.", node->GetType().c_str());
       return INTERNAL_ERROR;
     }
 
     if (maker->Run(label_index) != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Node:%s(%s) run label maker failed",
                         node->GetName().c_str(), node->GetType().c_str());
-      GELOGE(INTERNAL_ERROR, "Node: %s run label maker failed.", node->GetType().c_str());
+      GELOGE(INTERNAL_ERROR, "[Call][Run] Node: %s run label maker failed.", node->GetType().c_str());
       return INTERNAL_ERROR;
     }
   }
@@ -69,7 +69,7 @@ Status LabelAllocator::AssignFunctionalLabels() {
 bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<NodePtr> &functional_nodes) {
   if (graph == nullptr) {
     REPORT_INNER_ERROR("E19999", "check param compute_graph nullptr");
-    GELOGE(INTERNAL_ERROR, "Sub ComputeGraph is null.");
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Sub ComputeGraph is null.");
     return false;
   }
 
@@ -82,7 +82,7 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node
   if (func_node == nullptr) {
     REPORT_INNER_ERROR("E19999", "Parent node not set in node:%s(%s), graph:%s",
                        func_node->GetName().c_str(), func_node->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Parent functional node not set: %s.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Node] Parent functional node not set: %s.", graph->GetName().c_str());
     return false;
   }
 
@@ -90,7 +90,7 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node
   if (owner_graph == nullptr) {
     REPORT_INNER_ERROR("E19999", "ComputeGraph owner not set in node:%s(%s), graph:%s",
                        func_node->GetName().c_str(), func_node->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", func_node->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Graph] ComputeGraph owner not set: %s.", func_node->GetName().c_str());
     return false;
   }
 
diff --git a/ge/graph/build/logical_stream_allocator.cc b/ge/graph/build/logical_stream_allocator.cc
index 88b4a97f..c74cdf7a 100644
--- a/ge/graph/build/logical_stream_allocator.cc
+++ b/ge/graph/build/logical_stream_allocator.cc
@@ -322,7 +322,7 @@ Status SingleStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &s
       if (!stream_label.empty()) {
         REPORT_INNER_ERROR("E19999", "Stream labels are not supported in SingleStream mode "
                            "(subgraph: %s, stream label: %s)", subgraph->name.c_str(), stream_label.c_str());
-        GELOGE(INTERNAL_ERROR, "Stream labels are not supported (subgraph: %s, stream label: %s).",
+        GELOGE(INTERNAL_ERROR, "[Get][Label] Stream labels are not supported (subgraph: %s, stream label: %s).",
                subgraph->name.c_str(), stream_label.c_str());
         return INTERNAL_ERROR;
       }
@@ -341,8 +341,8 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr
     if (!IsEngineSkip(*subgraph) && !HasAssignedStream(*subgraph)) {
       REPORT_INNER_ERROR("E19999", "Subgraph %s has not yet been assigned a stream (engine: %s)",
                          subgraph->name.c_str(), engine_name.c_str());
-      GELOGE(INTERNAL_ERROR, "Subgraph %s has not yet been assigned a stream (engine: %s).", subgraph->name.c_str(),
-             engine_name.c_str());
+      GELOGE(INTERNAL_ERROR, "[Check][Param] Subgraph %s has not yet been assigned a stream (engine: %s).",
+             subgraph->name.c_str(), engine_name.c_str());
       return INTERNAL_ERROR;
     } else {
       GELOGI("[Assign][StreamId] %ld for Subgraph %s (engine: %s).", subgraph->stream_id, subgraph->name.c_str(),
@@ -402,7 +402,7 @@ Status UpdateForParallelGroupPass::Run(ComputeGraphPtr graph, const vector<Subgr
     for (const auto &op_desc : itr.second) {
       std::string group_name;
       if (!AttrUtils::GetStr(op_desc, ATTR_NAME_PARALLEL_GROUP, group_name)) {
-        GELOGE(FAILED, "[GetAttr][OpDesc]Get node %s ATTR_NAME_PARALLEL_GROUP failed.", op_desc->GetName().c_str());
+        GELOGE(FAILED, "[Get][Attr] ATTR_NAME_PARALLEL_GROUP of node %s failed.", op_desc->GetName().c_str());
         REPORT_INNER_ERROR("E19999", "Get node %s ATTR_NAME_PARALLEL_GROUP failed.", op_desc->GetName().c_str());
         return FAILED;
       }
@@ -606,7 +606,7 @@ Status LogicalStreamAllocator::Assign(const ComputeGraphPtr &root_graph, const G
 
   Status status = DoAssign(root_graph, subgraph_map, engine_confs);
   if (status != SUCCESS) {
-    GELOGE(status, "Assign streams failed.");
+    GELOGE(status, "[Assign][Streams] failed, graph:%s.", root_graph->GetName().c_str());
     return status;
   }
 
@@ -614,7 +614,7 @@ Status LogicalStreamAllocator::Assign(const ComputeGraphPtr &root_graph, const G
   for (const ComputeGraphPtr &subgraph : subgraphs) {
     Status status = DoAssign(subgraph, subgraph_map, engine_confs);
     if (status != SUCCESS) {
-      GELOGE(status, "Assign streams failed.");
+      GELOGE(status, "[Assign][Streams] failed, graph:%s.", subgraph->GetName().c_str());
       return status;
     }
   }
@@ -642,7 +642,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
   if (iter == subgraph_map.end()) {
     REPORT_INNER_ERROR("E19999", "Graph %s not found in subgraph_map when do logical stream assign ",
                        graph->GetName().c_str());
-    GELOGE(FAILED, "Graph %s not found.", graph->GetName().c_str());
+    GELOGE(FAILED, "[Check][Param] Graph %s not found.", graph->GetName().c_str());
     return FAILED;
   }
 
@@ -652,7 +652,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
   Status status = ConvertSubgraphs(subgraph_info_list, engine_confs, subgraphs);
   GE_TIMESTAMP_END(ConvertSubgraphs, "GraphBuilder::AssignStreamConvertSubgraphs");
   if (status != SUCCESS) {
-    GELOGE(status, "Create subgraphs failed.");
+    GELOGE(status, "[Convert][SubGraphs] failed.");
     return status;
   }
 
@@ -683,8 +683,8 @@ Status LogicalStreamAllocator::ConvertSubgraphs(const vector<SubGraphInfoPtr> &s
     if ((engine_conf_iter == engine_confs.end()) || (engine_conf_iter->second == nullptr)) {
       REPORT_INNER_ERROR("E19999", "Engine conf of subgraph %s not found (engine name: %s)",
                          subgraph_name.c_str(), engine_name.c_str());
-      GELOGE(INTERNAL_ERROR, "Engine conf of subgraph %s not found (engine name: %s).", subgraph_name.c_str(),
-             engine_name.c_str());
+      GELOGE(INTERNAL_ERROR, "[Check][Param] Engine conf of subgraph %s not found (engine name: %s).",
+             subgraph_name.c_str(), engine_name.c_str());
 
       return INTERNAL_ERROR;
     }
@@ -731,7 +731,7 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec
       GELOGD("[Show][Status]Stream pass %s return NOT_CHANGED.", pass->GetName().c_str());
     } else {
       REPORT_CALL_ERROR("E19999", "Stream pass %s run failed.", pass->GetName().c_str());
-      GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str());
+      GELOGE(status, "[Call][Run] Stream pass %s failed.", pass->GetName().c_str());
       return status;
     }
   }
diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc
index 9825d1ed..9b81eae3 100755
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -239,6 +239,10 @@ bool MemoryBlock::IsSameBatchLabel() {
   return all_same_label;
 }
 
+bool MemoryBlock::CanReuse(int32_t thread_scope_id) const {
+  return (thread_scope_id_.find(thread_scope_id) == thread_scope_id_.end());
+}
+
 bool CanNotLifeReuse(MemoryBlock *block) {
   if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) {
     return true;
@@ -283,6 +287,14 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_
   if (CanNotLifeReuse(this) || CanNotLifeReuse(block) || (batch_label_ != block->batch_label_)) {
     return;
   }
+
+  // not same thread scode id can reuse
+  for (auto thread_scope_id : ThreadScopeId()) {
+    if (!block->CanReuse(thread_scope_id)) {
+      return;
+    }
+  }
+
   if (block->continuous_block_) {
     AddContinuousLifeReuseBlock(block, total_node_depend_stream_life);
     return;
@@ -431,7 +443,7 @@ void SetLastUsedInputMemAttr(NodePtr &node, int input_index) {
   auto node_op_desc = node->GetOpDesc();
   if (node_op_desc != nullptr) {
     auto input_desc = node_op_desc->MutableInputDesc(input_index);
-    if (!ge::AttrUtils::SetInt(*input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) {
+    if (!ge::AttrUtils::SetBool(*input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) {
       GELOGW("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true failed.", node_op_desc->GetName().c_str(),
              input_index);
       return;
@@ -488,6 +500,7 @@ string MemoryBlock::String() {
   ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " ";
   ss << "real_size_list: " << ToString(real_size_list_) << " ";
   ss << "ref_count: " << ref_count_ << " ";
+  ss << "reuse_mem_: " << reuse_mem_ << " ";
   ss << "members: ";
   for (auto x : NodeTypeIndexList()) {
     ss << "__node: " << ToString(x) << " ";
@@ -501,8 +514,8 @@ string MemoryBlock::String() {
 
 BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<string, string> &anchor_to_symbol,
                                    const map<string, list<NodeIndexIO>> &symbol_to_anchors)
-    : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)),
-      symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {}
+    : compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors),
+      anchor_to_symbol_(anchor_to_symbol), life_time_(0) {}
 
 BlockMemAssigner::~BlockMemAssigner() {
   GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size());
@@ -659,7 +672,12 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) {
   return false;
 }
 
-bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) {
+bool CanReuseBlock(int32_t thread_scope_id, size_t continuous_life_begin, const MemoryBlock &reusable_block,
+                   size_t block_size) {
+  if (!reusable_block.CanReuse(thread_scope_id)) {
+    return false;
+  }
+
   bool can_reuse = false;
   if (reusable_block.Size() == block_size) {
     // in some continuous input case, continuous first input node's is not same as topo first node.
@@ -781,7 +799,8 @@ bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) {
       }
       auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc();
       GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr,
-                      GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index);
+                      GELOGE(FAILED, "[Get][OpDesc] Node[%s] output[%u] peer in node desc is null.",
+                             n->GetName().c_str(), out_index);
       return false;);
 
       if(peer_in_node_desc->GetId() > max_node_life_time) {
@@ -1057,7 +1076,8 @@ void BlockMemAssigner::UpdateOpTensorMemType(std::list<NodeIndexIO> node_index_i
 
 bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) {
   if (n == nullptr) {
-    GELOGE(FAILED, "Node is null.");
+    REPORT_INNER_ERROR("E19999", "param n is nullptr, check invalid.");
+    GELOGE(FAILED, "[Check][Param] Node is null.");
     return false;
   }
 
@@ -1065,7 +1085,8 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) {
   bool is_output_continuous = false;
   auto node_desc = n->GetOpDesc();
   if (node_desc == nullptr) {
-    GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str());
+    REPORT_INNER_ERROR("E19999", "param node:%s opdesc is nullptr, check invalid.", n->GetName().c_str());
+    GELOGE(FAILED, "[Get][OpDesc] Node[%s] nodedesc is null.", n->GetName().c_str());
     return false;
   }
 
@@ -1103,7 +1124,7 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) {
 MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size,
                                            OpMemoryType mem_type, const NodePtr &n, uint32_t out_index,
                                            const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
-                                           const bool continuous, int64_t memory_type) {
+                                           const bool continuous, uint64_t memory_type) {
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
       n == nullptr,
       REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
@@ -1122,6 +1143,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
   }
 
   bool is_reuse_memory = false;
+  int32_t thread_scope_id = kInvalidThreadScopeId;
+  (void)ge::AttrUtils::GetInt(node_op_desc, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id);
   if (ge_disable_reuse_mem_env_ != "1") {
     bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) :
                           !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]);
@@ -1141,8 +1164,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
         GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue);
 
         // A node can reuse blocks of the same stream and preorder streams
-        if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) {
-          reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_},
+        if (CanReuseBlock(thread_scope_id, continuous_life_begin_, *reusable_block, block_size)) {
+          reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_, thread_scope_id},
                                            real_size, no_align_size);
           if (mem_type == kOutput) {
             auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
@@ -1168,7 +1191,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
 
   // Data and netoutput need zero copy block
   block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
-  block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);
+  block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_, thread_scope_id},
+                          real_size, no_align_size);
   block->stream_id_ = node_op_desc->GetStreamId();
   block->continuous_block_ = continuous;
   block->batch_label_ = batch_label;
@@ -1430,7 +1454,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
     auto op_desc = owner_node->GetOpDesc();
     GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
     Params *instance = Params::Instance();
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(instance == nullptr, return nullptr, "Params instance is nullptr.");
+    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(instance == nullptr,
+                                   REPORT_INNER_ERROR("E19999", "Params instance is nullptr.");
+                                   return nullptr, "[Get][Instance] Params instance is nullptr.");
     if (!((instance->GetTarget() == TARGET_TYPE_TINY) && (op_desc->GetType() == NETOUTPUT))) {
       out_count++;
     }
@@ -1442,7 +1468,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
 
 bool IsOutputBlock(const ge::InDataAnchorPtr &in_data_anchor) {
   auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
-  GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, GELOGE(FAILED, "Peer out anchor is nullptr."); return false);
+  GE_IF_BOOL_EXEC(peer_out_anchor == nullptr,
+                  REPORT_INNER_ERROR("E19999", "Peer out anchor is nullptr.");
+                  GELOGE(FAILED, "[Check][Param] Peer out anchor is nullptr."); return false);
   auto src = peer_out_anchor->GetOwnerNode();
   int32_t index = peer_out_anchor->GetIdx();
   auto iter = GetLocalOmgContext().out_nodes_map.find(src->GetName());
@@ -1491,10 +1519,13 @@ bool IsKnownSubgraphData(const NodePtr &node) {
 
 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory,
                                      bool same_stream) {
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null.");
-  GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory");
-  GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory");
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr,
+                                 return, "[Check][Param] Input parameter to_release is null.");
+  GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0,
+                        return, "[Check][Param] to_release->ref_count_ must greater than 0");
+  GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "[Check][Param] doesn't reuse memory");
   --to_release->ref_count_;
+  GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory");
   if (!same_stream) {
     to_release->same_stream_ = false;
   }
@@ -1593,13 +1624,13 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
   string env_str;
   env_str = string(env);
   if (env_str.size() > kReuseMaxCharNum) {
-    GELOGE(FAILED, "The OP_NO_REUSE_MEM has more than %d characters.", kReuseMaxCharNum);
+    GELOGE(FAILED, "[Check][Param] The OP_NO_REUSE_MEM has more than %d characters.", kReuseMaxCharNum);
     return;
   }
 
   SplitStringByComma(env_str, env_vec);
   if (env_vec.size() > kReuseMaxOpNum) {
-    GELOGE(FAILED, "The OP_NO_REUSE_MEM has more than %d nodes.", kReuseMaxOpNum);
+    GELOGE(FAILED, "[Check][Param] The OP_NO_REUSE_MEM has more than %d nodes.", kReuseMaxOpNum);
     return;
   }
 
@@ -1794,8 +1825,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
         zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false);
         continue;
       }
-      int64_t memory_type = RT_MEMORY_HBM;
-      if (!GetWorkSpaceMemoryType(n, i, memory_type)) {
+      uint64_t memory_type = RT_MEMORY_HBM;
+      if (!GetWorkSpaceMemoryType(n, i, memory_type, workspace_reuse_flag)) {
         GELOGW("Get workspace memory type failed.");
         return;
       }
@@ -1830,7 +1861,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
 }
 
 void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id,
-                                           MemoryBlock *mem_block, int64_t memory_type) {
+                                           MemoryBlock *mem_block, uint64_t memory_type) {
   bool reuse_mem_flag =
       ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true;
   if (reuse_mem_flag) {
@@ -1840,7 +1871,9 @@ void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_f
 
 void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector<int64_t> &workspace_memory,
                                             int64_t &total_size) {
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null.");
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr,
+                                 REPORT_INNER_ERROR("E19999", "param node opdesc is nullptr, check invalid.");
+                                 return, "[Check][Param] Op desc is null.");
   vector<int64_t> workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes();
 
   GELOGD("node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size());
@@ -1960,24 +1993,29 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) {
   }
 }
 
-void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) {
-  if (block.memory_type_ == RT_MEMORY_HBM) {
-    if (block.first_continuous_block_) {
-      mem_offset += MEM_ALIGN_SIZE;
-    }
-    block.Resize();
-    block.SetHeadOffset(mem_offset);
-    mem_offset += block.Size();
-    block.SetTailOffset(mem_offset - 1);
-  } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) {
-    if (block.first_continuous_block_) {
-      p2p_mem_offset += MEM_ALIGN_SIZE;
+void AddBlockMemOffset(std::map<uint64_t, size_t> &mem_offsets, MemoryBlock &block) {
+  auto it = mem_offsets.find(block.memory_type_);
+  if (it == mem_offsets.end()) {
+    auto result = mem_offsets.insert(std::pair<int64_t, size_t>(block.memory_type_, 0));
+    // Insert failure is unlikely
+    if (!result.second) {
+      return;
     }
-    block.Resize();
-    block.SetHeadOffset(p2p_mem_offset);
-    p2p_mem_offset += block.Size();
-    block.SetTailOffset(p2p_mem_offset - 1);
+    it = result.first;
   }
+
+  if (it == mem_offsets.end()) {
+    return;
+  }
+
+  auto &mem_offset = it->second;
+  if (block.first_continuous_block_) {
+    mem_offset += MEM_ALIGN_SIZE;
+  }
+  block.Resize();
+  block.SetHeadOffset(mem_offset);
+  mem_offset += block.Size();
+  block.SetTailOffset(mem_offset - 1);
 }
 
 bool DynamicBatchBlockReuse(MemoryBlock &block) {
@@ -2004,27 +2042,27 @@ void BlockMemAssigner::ResizeDynamicBatchBlocks() {
     }
   }
 
-  size_t max_mem_offset = mem_offset_;
-  size_t max_p2p_mem_offset = p2p_mem_offset_;
+  std::map<uint64_t, size_t> max_mem_offsets = mem_offsets_;
   for (auto &batch_blocks : dynamic_batch_blocks) {
-    size_t mem_offset = mem_offset_;
-    size_t p2p_mem_offset = p2p_mem_offset_;
+    std::map<uint64_t, size_t> mem_offsets = mem_offsets_;
     for (auto block : batch_blocks.second) {
       if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) {
         continue;
       }
-      AddBlockMemOffset(mem_offset, p2p_mem_offset, *block);
-    }
-    if (mem_offset > max_mem_offset) {
-      max_mem_offset = mem_offset;
+      AddBlockMemOffset(mem_offsets, *block);
     }
-    if (p2p_mem_offset > max_p2p_mem_offset) {
-      max_p2p_mem_offset = p2p_mem_offset;
+
+    for (auto &it : mem_offsets) {
+      auto itmax = max_mem_offsets.find(it.first);
+      if (itmax == max_mem_offsets.end()) {
+        max_mem_offsets[it.first] = it.second;
+      } else if (it.second > itmax->second) {
+        itmax->second = it.second;
+      }
+      GELOGI("Batch:%s memory type:%ld offset:%zu", batch_blocks.first.c_str(), it.first, it.second);
     }
-    GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset);
   }
-  mem_offset_ = max_mem_offset;
-  p2p_mem_offset_ = max_p2p_mem_offset;
+  mem_offsets_ = max_mem_offsets;
 }
 
 ///
@@ -2042,11 +2080,13 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
       continue;
     }
 
-    AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block);
+    AddBlockMemOffset(mem_offsets_, *memory_block);
   }
   ResizeDynamicBatchBlocks();
-  GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu,"
-         "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_);
+  for (auto it : mem_offsets_) {
+    GELOGI("Memory type:%ld mem_offset exclude zero_copy_memory:%zu, theory_min_memory_size:%zu", it.first, it.second,
+           theory_min_memory_size_);
+  }
 }
 
 ///
@@ -2062,7 +2102,13 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
                    size_t real_size, size_t no_align_size, int32_t child_block_level) {
   ge::OpDescPtr op_desc = node_type.node->GetOpDesc();
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null.");
-  string graph_name = node_type.node->GetOwnerComputeGraph()->GetName();
+  auto owner_graph = node_type.node->GetOwnerComputeGraph();
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(owner_graph == nullptr, return, "owner_graph is null.");
+  string graph_name = owner_graph->GetName();
+  if (owner_graph->GetParentGraph() != nullptr) {
+    graph_name = owner_graph->GetParentGraph()->GetName();
+  }
+
   vector<int64_t> memorys_type;
   int64_t offset = block->HeadOffset();
   size_t end = node_type.life_time_end;
@@ -2108,12 +2154,12 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
     op_desc->SetWorkspace(workspace_list);
   }
   GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] "
-         "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]",
+         "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s] scope[%d]",
          graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(),
          node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_,
          block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level,
          block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input,
-         block->batch_label_.c_str());
+         block->batch_label_.c_str(), node_type.thread_scope_id);
 }
 
 void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) {
@@ -2176,11 +2222,11 @@ Status BlockMemAssigner::Assign() {
 
 bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
   return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
-         (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
-         (node_type == ASSIGN) || (node_type == HVDWAIT);
+         (node_type == CONSTANTOP) || (node_type == HVDWAIT);
 }
 
-bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
+bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type,
+                                              vector<bool> &workspace_reuse_flag) {
   memory_type = RT_MEMORY_HBM;
   vector<int64_t> workspace_memory_type;
   auto op_desc = node->GetOpDesc();
@@ -2196,6 +2242,20 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index,
     return false;
   }
   memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM;
+
+  vector<int32_t> workspace_no_reuse_scope;
+  bool has_workspace_no_reuse_scope =
+    ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+  if (has_workspace_no_reuse_scope && (index < workspace_no_reuse_scope.size())
+      && (workspace_no_reuse_scope[index] == kSessionNoReuse)) {
+    memory_type |= kSessionScopeMemory;
+    if (workspace_reuse_flag.empty()) {
+      workspace_reuse_flag.assign(workspace_no_reuse_scope.size(), true);
+    }
+    // set to no reuse
+    workspace_reuse_flag[index] = false;
+    GELOGI("%s's workspace is session scope no reuse, memory type:%lu.", node->GetName().c_str(), memory_type);
+  }
   return true;
 }
 }  // namespace ge
diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h
index 474db17c..231cce09 100755
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -33,14 +33,21 @@
 
 namespace ge {
 const size_t kMaxLifeTime = 0xffffffff;
+const int32_t kInvalidThreadScopeId = -1;
+const uint64_t kSessionScopeMemory = 0x100000000;
+const uint64_t kMemoryTypeMask = 0xffffffff;
+
+enum MemoryNoReuseScope { kReuse, kSessionNoReuse, kGraphNoReuse };
 
 using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;
 
 enum OpMemoryType { kOutput, kWorkspace };
 
 struct NodeTypeIndex {
-  NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0)
-      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {}
+  NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0,
+                int32_t thread_scope_id = kInvalidThreadScopeId)
+      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin),
+        thread_scope_id(thread_scope_id) {}
 
   ge::NodePtr node = nullptr;
   OpMemoryType mem_type = kOutput;
@@ -48,6 +55,7 @@ struct NodeTypeIndex {
   bool ref_input = false;
   size_t life_time_begin = 0;
   size_t life_time_end = kMaxLifeTime;
+  int32_t thread_scope_id = kInvalidThreadScopeId;
   const string GetMemType() const {
     if (mem_type == kOutput) {
       return "output";
@@ -143,6 +151,9 @@ class MemoryBlock {
         same_stream_ = false;
       }
     }
+    if (node_type_index.thread_scope_id != kInvalidThreadScopeId) {
+      thread_scope_id_.insert(node_type_index.thread_scope_id);
+    }
   }
 
   void AddSymbol(const std::string &symbol) {
@@ -154,6 +165,7 @@ class MemoryBlock {
   const std::vector<size_t> &RealSizeList() const { return real_size_list_; }
   const std::vector<MemoryBlock *> &ChildBlockList() const { return child_blocks_; }
   const std::vector<size_t> &NoAlignSizeList() const { return no_align_size_list_; }
+  const std::set<int32_t> &ThreadScopeId() const { return thread_scope_id_; }
 
   void Resize();
 
@@ -175,6 +187,8 @@ class MemoryBlock {
 
   size_t GetDependLifeBegin(int64_t stream_id, DependStreamLife &node_depend_stream_life);
 
+  bool CanReuse(int32_t thread_scope_id) const;
+
   int ref_count_;
   int64_t stream_id_;
   bool deleted_block_;
@@ -198,6 +212,7 @@ class MemoryBlock {
   std::vector<NodeTypeIndex> node_type_index_list_;
   std::vector<std::string> symbol_list_;
   std::vector<MemoryBlock *> child_blocks_;
+  std::set<int32_t> thread_scope_id_;
 };
 
 class BlockMemAssigner : public MemAssigner {
@@ -213,9 +228,7 @@ class BlockMemAssigner : public MemAssigner {
 
   Status Assign() override;
 
-  size_t GetMemOffset() const { return mem_offset_; }
-
-  size_t GetP2PMemOffset() const { return p2p_mem_offset_; }
+  const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; }
 
   int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; }
 
@@ -318,14 +331,10 @@ class BlockMemAssigner : public MemAssigner {
   ///
   void UpdateOpTensorMemType(std::list<NodeIndexIO> node_index_io_list, int64_t memory_type);
 
-  size_t mem_offset_;
-  size_t p2p_mem_offset_;
-
+  std::map<uint64_t, size_t> mem_offsets_;
   ge::ComputeGraphPtr compute_graph_;
-
   std::vector<MemoryBlock *> memory_blocks_;
   std::vector<MemoryBlock *> blocks_store_;
-
   std::vector<NodeTypeIndex> zero_memory_list_;
 
   // ref mapping
@@ -369,7 +378,7 @@ class BlockMemAssigner : public MemAssigner {
   ///
   MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type,
                            const ge::NodePtr &n, uint32_t out_index, const std::vector<bool> &workspace_reuse_flag,
-                           const bool is_op_reuse_mem, const bool continuous, int64_t memory_type);
+                           const bool is_op_reuse_mem, const bool continuous, uint64_t memory_type);
 
   ///
   /// @ingroup GE
@@ -383,7 +392,7 @@ class BlockMemAssigner : public MemAssigner {
   /// @author
   ///
   void CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id,
-                           MemoryBlock *mem_block, int64_t memory_type);
+                           MemoryBlock *mem_block, uint64_t memory_type);
 
   ///
   /// @ingroup GE
@@ -446,7 +455,8 @@ class BlockMemAssigner : public MemAssigner {
 
   bool IsContinuousOutput(const NodePtr &n);
 
-  bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type);
+  bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type,
+                              vector<bool> &workspace_reuse_flag);
 
   void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n);
 
diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc
index b9f80070..8becd90e 100755
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -36,6 +36,9 @@ namespace {
 const int kAllInputAddrIsAtomic = -1;
 const int kVirtualInputNodeMemoryReuse = 0;
 const int kVirtualOutputNodeMemoryReuse = 1;
+const int kPrevNextDistanceNum = 2;
+const int64_t kInvalidStream = -1;
+const char *const kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
 // One state per bit cannot be repeated
 enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
 
@@ -104,11 +107,22 @@ Status GraphMemoryAssigner::AssignMemory() {
            compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
-  MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
-  memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
 
-  if (mem_assigner->GetP2PMemOffset() >= 0) {
-    MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
+  for (auto pair : mem_assigner->GetMemOffsets()) {
+    MemoryOffset offset(pair.first, pair.second);
+    memory_offset_.emplace(pair.first, offset);
+  }
+
+  // base memtype offset must be exist
+  auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it == mem_assigner->GetMemOffsets().end()) {
+    MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
+    memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
+  }
+
+  it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR);
+  if (it == mem_assigner->GetMemOffsets().end()) {
+    MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0);
     memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
   }
 
@@ -221,7 +235,7 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out
   return SUCCESS;
 }
 
-Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
+Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset) {
   if (memory_offset_.empty()) {
     REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
                        compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
@@ -230,9 +244,12 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size
     return ge::FAILED;
   }
 
-  GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
-  GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
-  GE_CHK_STATUS_RET(AssignBufferPoolMemory(), "AssignBufferPoolMemory Failed!");
+  GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph),
+                    "[ReAssign][ContinuousMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
+  GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph),
+                    "[ReAssign][AtomicMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
+  GE_CHK_STATUS_RET(AssignBufferPoolMemory(),
+                    "[Assign][BufferPoolMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
 
   size_t total_mem_offset = 0;
   for (auto pair : memory_offset_) {
@@ -258,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size
   return SUCCESS;
 }
 
-Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
+Status GraphMemoryAssigner::AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
   BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
   if (priority_assigner == nullptr) {
     REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s",
@@ -1006,7 +1023,9 @@ Status GraphMemoryAssigner::AssignReferenceMemory() {
            node->GetName().c_str());
 
     auto out_op_desc = node->GetOpDesc();
-    GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
+    GE_IF_BOOL_EXEC(out_op_desc == nullptr,
+                    REPORT_INNER_ERROR("E19999", "out_op_desc is null.");
+                    GELOGE(ge::FAILED, "[Check][Param] out_op_desc is null."); return ge::FAILED);
     vector<int64_t> output_list = out_op_desc->GetOutputOffset();
 
     if (out_op_desc->GetOutputsSize() > output_list.size()) {
@@ -1231,6 +1250,7 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc
           batch_label.c_str());
 
       mem_type_iter->second.mem_offset_ += workspace_size;
+      AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
       mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
     }
   }
@@ -1273,6 +1293,7 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt
           op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
 
       mem_type_iter->second.mem_offset_ += workspace_size;
+      AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
       mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
       index_offset.insert(std::make_pair(workspace_index, workspace_offset));
     }
@@ -1388,6 +1409,9 @@ ge::Status GraphMemoryAssigner::SetInputOffset() {
            "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
   }
   for (auto pair : memory_offset_) {
+    if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) {
+      continue;
+    }
     GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
             pair.second.mem_offset_, pair.first);
   }
@@ -1944,4 +1968,281 @@ Status GraphMemoryAssigner::AssignBufferPoolMemory() {
          compute_graph_->GetName().c_str(), mem_type, buffer_pool_mem_assigner.GetMemOffset());
   return SUCCESS;
 }
+
+// if producer and customers in the same stream, or customers on the same stream when producer not assign a stream,
+// then return false.
+bool GraphMemoryAssigner::IsOutputVisitedByMultiStream(const NodePtr &peer_out_node, int64_t out_anchor_index) {
+  GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, return true);
+  int64_t unique_stream_id = peer_out_node->GetOpDesc()->GetStreamId();
+
+  GE_IF_BOOL_EXEC(peer_out_node->GetOutDataAnchor(out_anchor_index) == nullptr, return true);
+  for (const auto &in_data_anchor : peer_out_node->GetOutDataAnchor(out_anchor_index)->GetPeerInDataAnchors()) {
+    auto node = in_data_anchor->GetOwnerNode();
+    GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, continue);
+    if (node->GetOpDesc()->GetStreamId() == kInvalidStream) {
+      continue;
+    }
+    if (unique_stream_id == kInvalidStream) { // peer_out_node not belong to any stream
+      unique_stream_id = node->GetOpDesc()->GetStreamId();
+      continue;
+    }
+    if (node->GetOpDesc()->GetStreamId() != unique_stream_id) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void GraphMemoryAssigner::UpdatePrevNodeInputDesc(const NodePtr &prev_node,
+                                                  const vector<int64_t> &prev_node_input_index_vec,
+                                                  int64_t distance) {
+  GE_IF_BOOL_EXEC(prev_node == nullptr, return);
+  auto prev_node_op_desc = prev_node->GetOpDesc();
+  GE_IF_BOOL_EXEC(prev_node_op_desc == nullptr, return);
+
+  for (const auto prev_node_input_index : prev_node_input_index_vec) {
+    auto input_desc = prev_node_op_desc->GetInputDesc(prev_node_input_index);
+    vector<int64_t> prev_next_distances;
+    if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
+      GELOGW("Get [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed",
+             prev_node_op_desc->GetName().c_str(),
+             prev_node_input_index);
+      continue;
+    }
+
+    if (prev_next_distances.size() == kPrevNextDistanceNum) {
+      prev_next_distances[1] = distance;
+    } else {
+      GELOGW("Size of prev_next_distances is not %d.", kPrevNextDistanceNum);
+      continue;
+    }
+    if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
+      GELOGW("Set [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
+             prev_node_op_desc->GetName().c_str(),
+             prev_node_input_index);
+      continue;
+    }
+
+    if (prev_node_op_desc->UpdateInputDesc(prev_node_input_index, input_desc) != GRAPH_SUCCESS) {
+      GELOGW("Update [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
+             prev_node_op_desc->GetName().c_str(),
+             prev_node_input_index);
+      continue;
+    }
+    GELOGD("Set the next distance[%ld] to node[%s], input index[%ld]",
+           distance,
+           prev_node->GetName().c_str(),
+           prev_node_input_index);
+  }
+  return;
+}
+
+void GraphMemoryAssigner::UpdateCurNodeInputDesc(const NodePtr &cur_node,
+                                                 int64_t cur_node_input_index,
+                                                 int64_t distance) {
+  GE_IF_BOOL_EXEC(cur_node == nullptr, return);
+  GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, return);
+  auto input_desc = cur_node->GetOpDesc()->GetInputDesc(cur_node_input_index);
+  vector<int64_t> prev_next_distances{distance, -1};
+
+  if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
+    GELOGW("Set [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
+           cur_node->GetOpDesc()->GetName().c_str(),
+           cur_node_input_index);
+    return;
+  }
+  if (cur_node->GetOpDesc()->UpdateInputDesc(cur_node_input_index, input_desc) != GRAPH_SUCCESS) {
+    GELOGW("Update [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
+           cur_node->GetOpDesc()->GetName().c_str(),
+           cur_node_input_index);
+    return;
+  }
+  GELOGD("Set the prev distance[%ld] to node[%s], input index[%ld]",
+         distance,
+         cur_node->GetName().c_str(),
+         cur_node_input_index);
+  return;
+}
+
+void GraphMemoryAssigner::CheckNeedCalcDistAndUpdateVisitInfo(
+    const NodePtr &peer_out_node,
+    const OutDataAnchorPtr &peer_out_anchor,
+    size_t matched_mem_offset,
+    map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
+    bool &is_need_calc_distance) {
+  auto iter = mem_block_visit_info.find(matched_mem_offset);
+  // cannot find visit info, peer_out_node must be a producer and this data is the first time to be visited.
+  if (iter == mem_block_visit_info.end()) {
+    if (IsOutputVisitedByMultiStream(peer_out_node, peer_out_anchor->GetIdx())) {
+      vector<int64_t> temp;
+      mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(nullptr, temp)));
+      is_need_calc_distance = false;
+      return;
+    } else {
+      vector<int64_t> temp = {-1};
+      // producer's prev_node_index set to -1 as default
+      mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(peer_out_node, temp)));
+      is_need_calc_distance = true;
+      return;
+    }
+  } else {
+    if (mem_block_visit_info[matched_mem_offset].first == nullptr) {
+      // multi-stream visit, no need to calculate
+      is_need_calc_distance = false;
+      return;
+    }
+    if (peer_out_node->GetOpDesc()->GetStreamId() !=
+        mem_block_visit_info[matched_mem_offset].first->GetOpDesc()->GetStreamId()) {
+      // cur node and peer_out_node not in the same stream, no need to calculate
+      is_need_calc_distance = false;
+      return;
+    }
+  }
+  is_need_calc_distance = true;
+  return;
+}
+
+// calculate distance, update visit info, update prev_node input desc, update cur node input desc
+void GraphMemoryAssigner::CalcDistanceAndUpdateDesc(const map<string, int64_t> &node_index_in_stream,
+                                                    const InDataAnchorPtr &in_data_anchor,
+                                                    size_t matched_mem_offset,
+                                                    NodePtr &node,
+                                                    map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
+                                                    bool &is_need_skip) {
+  int64_t distance = -1;
+  auto prev_node = mem_block_visit_info[matched_mem_offset].first;
+  auto prev_node_input_index_vec = mem_block_visit_info[matched_mem_offset].second;
+  GE_IF_BOOL_EXEC(prev_node == nullptr, is_need_skip = true; return);
+  if (prev_node_input_index_vec.size() == 1 && prev_node_input_index_vec[0] == -1) {
+    // prev_node is producer and the data is just be produced(not visited by other node)
+    GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
+    if (prev_node->GetOpDesc()->GetStreamId() == -1) { // producer not assigned a stream
+      distance = 0;
+    } else {
+      auto iter = node_index_in_stream.find(prev_node->GetName());
+      if (iter == node_index_in_stream.end()) {
+        distance = 0;
+      } else {
+        distance = node_index_in_stream.at(node->GetName()) - iter->second - 1;
+      }
+    }
+    mem_block_visit_info[matched_mem_offset].first = node;
+    mem_block_visit_info[matched_mem_offset].second.clear();
+    mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
+  } else { // the data is visit by other customer just before.
+    if (prev_node_input_index_vec.empty()) {
+      GELOGW("Missing prev node[%s] input index.", prev_node->GetName().c_str());
+      is_need_skip = true;
+      return;
+    }
+    if (prev_node == node) { // scene: multiple anchors of a node access the same data
+      vector<int64_t> prev_next_distances;
+      GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
+      auto input_desc = prev_node->GetOpDesc()->GetInputDesc(prev_node_input_index_vec[0]);
+      if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
+        GELOGW("Get ATTR_NAME_DATA_VISIT_DISTANCE failed.");
+        is_need_skip = true;
+        return;
+      }
+      if (prev_next_distances.size() != kPrevNextDistanceNum) {
+        GELOGW("Size of prev_next_distance is not %d.", kPrevNextDistanceNum);
+        is_need_skip = true;
+        return;
+      } else {
+        distance = prev_next_distances[0]; // use the same prev_distance as previous anchor
+      }
+      mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
+    } else {
+      distance = node_index_in_stream.at(node->GetName()) - node_index_in_stream.at(prev_node->GetName()) - 1;
+      UpdatePrevNodeInputDesc(prev_node, prev_node_input_index_vec, distance);
+      mem_block_visit_info[matched_mem_offset].first = node;
+      mem_block_visit_info[matched_mem_offset].second.clear();
+      mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
+    }
+  }
+  UpdateCurNodeInputDesc(node, in_data_anchor->GetIdx(), distance);
+}
+
+void GraphMemoryAssigner::DeleteVisitInfoWhenLifecycleEnded(
+    const NodePtr &node,
+    const InDataAnchorPtr &in_data_anchor,
+    size_t matched_mem_offset,
+    map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info) {
+  GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, return);
+  auto input_desc = node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx());
+  bool is_end_of_inputmem_lifecycle = false;
+  // if is_end_of_inputmem_lifecycle is true, indicating that cur node is the last customer of this data,
+  // then we need to delete the visit info of the block in case that the memblock be reused and visited.
+  if (ge::AttrUtils::GetBool(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, is_end_of_inputmem_lifecycle) &&
+      is_end_of_inputmem_lifecycle) {
+    GELOGD("ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE is true, node name is [%s], in_data_anchor index is [%d]",
+           node->GetName().c_str(),
+           in_data_anchor->GetIdx());
+    auto iter = mem_block_visit_info.find(matched_mem_offset);
+    if (iter != mem_block_visit_info.end()) {
+      mem_block_visit_info.erase(iter);
+    }
+  }
+}
+
+
+void GraphMemoryAssigner::MarkNodeDistanceAttr(const ComputeGraphPtr &compute_graph,
+                                               NodePtr &node,
+                                               map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
+                                               const map<string, int64_t> &node_index_in_stream) {
+  GELOGD("Begin to mark node distance attr, node name is [%s]", node->GetName().c_str());
+  GE_IF_BOOL_EXEC(node == nullptr, return);
+  for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
+    auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
+    GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
+    auto peer_out_node = peer_out_anchor->GetOwnerNode();
+    GE_IF_BOOL_EXEC(peer_out_node == nullptr, continue);
+
+    GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, continue);
+    auto matched_mem_offset = peer_out_node->GetOpDesc()->GetOutputOffset().at(peer_out_anchor->GetIdx());
+
+    bool is_need_calc_distance = false;
+    CheckNeedCalcDistAndUpdateVisitInfo(peer_out_node, peer_out_anchor, matched_mem_offset,
+                                        mem_block_visit_info, is_need_calc_distance);
+    if (!is_need_calc_distance) {
+      continue;
+    }
+
+    bool is_need_skip = false;
+    CalcDistanceAndUpdateDesc(node_index_in_stream, in_data_anchor, matched_mem_offset, node,
+                              mem_block_visit_info, is_need_skip);
+    if (is_need_skip) {
+      continue;
+    }
+
+    DeleteVisitInfoWhenLifecycleEnded(node, in_data_anchor, matched_mem_offset, mem_block_visit_info);
+  }
+}
+
+void GraphMemoryAssigner::MarkDistanceAttr() {
+  // key: mem_offset of the memory which we visited. value: node we visited and input index of this node
+  map<size_t, pair<NodePtr, vector<int64_t>>> mem_block_visit_info;
+  // key: node name, value: topo order of node in it's belonged stream(exclude ge_local_op)
+  map<string, int64_t> node_index_in_stream;
+  // key: stream id, value: cur nodes num in that stream
+  map<int64_t, int64_t> stream_nodes_num;
+
+  for (auto &node : compute_graph_->GetAllNodes()) {
+    auto node_op_desc = node->GetOpDesc();
+    GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
+    int64_t stream_id = node_op_desc->GetStreamId();
+    if (node_op_desc->GetOpKernelLibName() != kEngineNameGeLocal) {
+      if (stream_nodes_num.find(stream_id) == stream_nodes_num.end()) {
+        stream_nodes_num.insert(std::make_pair(stream_id, 1));
+      } else {
+        ++stream_nodes_num[stream_id];
+      }
+      node_index_in_stream.insert(std::make_pair(node->GetName(), stream_nodes_num[stream_id] - 1));
+
+      MarkNodeDistanceAttr(compute_graph_, node, mem_block_visit_info, node_index_in_stream);
+    } else {
+      GELOGD("node[%s] is ge_local_op, no need to calculate distance.", node->GetName().c_str());
+    }
+  }
+}
 }  // namespace ge
diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h
index 33a5b6d3..a6a2a686 100755
--- a/ge/graph/build/memory/graph_mem_assigner.h
+++ b/ge/graph/build/memory/graph_mem_assigner.h
@@ -103,9 +103,9 @@ class GraphMemoryAssigner {
 
   ge::Status AssignMemory2HasRefAttrNode();
 
-  ge::Status ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset);
+  ge::Status ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset);
 
-  ge::Status AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);
+  ge::Status AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);
 
   ge::Status SetInputOffset();
 
@@ -118,6 +118,13 @@ class GraphMemoryAssigner {
 
   ge::Status AssignReferenceMemory();
 
+  void MarkDistanceAttr();
+
+  void MarkNodeDistanceAttr(const ComputeGraphPtr &compute_graph,
+                            NodePtr &node,
+                            map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
+                            const map<string, int64_t> &node_index_in_stream);
+
  private:
   ///
   /// @ingroup ge_graph
@@ -197,6 +204,32 @@ class GraphMemoryAssigner {
 
   Status UpdateRefOpOffsetReverse(const NodePtr &node);
 
+  bool IsOutputVisitedByMultiStream(const NodePtr &peer_out_node, int64_t out_anchor_index);
+
+  void UpdatePrevNodeInputDesc(const NodePtr &prev_node,
+                               const vector<int64_t> &prev_node_input_index_vec,
+                               int64_t distance);
+
+  void UpdateCurNodeInputDesc(const NodePtr &cur_node, int64_t cur_node_input_index, int64_t distance);
+
+  void CheckNeedCalcDistAndUpdateVisitInfo(const NodePtr &peer_out_node,
+                                           const OutDataAnchorPtr &peer_out_anchor,
+                                           size_t matched_mem_offset,
+                                           map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
+                                           bool &is_need_calc_distance);
+
+  void CalcDistanceAndUpdateDesc(const map<string, int64_t> &node_index_in_stream,
+                                 const InDataAnchorPtr &in_data_anchor,
+                                 size_t matched_mem_offset,
+                                 NodePtr &node,
+                                 map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
+                                 bool &is_need_skip);
+
+  void DeleteVisitInfoWhenLifecycleEnded(const NodePtr &node,
+                                         const InDataAnchorPtr &in_data_anchor,
+                                         size_t matched_mem_offset,
+                                         map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info);
+
   MemoryOffsetMap memory_offset_;
   ge::ComputeGraphPtr compute_graph_;
   HybridMemAssignerPtr mem_assigner_;
diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc
index 4ea52d9d..ccf673b3 100755
--- a/ge/graph/build/memory/hybrid_mem_assigner.cc
+++ b/ge/graph/build/memory/hybrid_mem_assigner.cc
@@ -23,27 +23,30 @@
 
 namespace ge {
 HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph)
-    : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {}
+    : compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {}
 
 Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size) {
   vector<int64_t> ranges;
   GE_CHECK_NOTNULL(block_assigner);
   if (block_assigner->GetMemoryRanges(ranges) != SUCCESS) {
-    GELOGE(FAILED, "GetMemoryRanges Fail!");
+    GELOGE(FAILED, "[Get][MemoryRanges] Fail!");
     return FAILED;
   }
   GE_IF_BOOL_EXEC(ranges.empty(), return SUCCESS);
 
   block_assigner->AssignMemoryWithReuse(ranges);
 
-  mem_size = block_assigner->GetMemOffset();
+  // total size
+  for (auto it : block_assigner->GetMemOffsets()) {
+    mem_size += it.second;
+  }
   return SUCCESS;
 }
 
 Status HybridMemAssigner::Assign() {
   if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors_, anchor_to_symbol_) != GRAPH_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
-    GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
+    GELOGE(FAILED, "[Get][RefMapping] for graph %s failed.", compute_graph_->GetName().c_str());
     return FAILED;
   }
 
@@ -58,8 +61,8 @@ Status HybridMemAssigner::Assign() {
   size_t bin_mem_size = 0;
   size_t max_mem_size = 0;
 
-  GE_CHK_STATUS_RET(AssignMemory(binary_assigner, bin_mem_size), "BinaryBlock Method AssignMemory Fail!");
-  GE_CHK_STATUS_RET(AssignMemory(max_assigner, max_mem_size), "MaxBlock Method AssignMemory Fail!");
+  GE_CHK_STATUS_RET(AssignMemory(binary_assigner, bin_mem_size), "[Assign][Memory] Fail!");
+  GE_CHK_STATUS_RET(AssignMemory(max_assigner, max_mem_size), "[Assign][Memory] Fail!");
 
   std::unique_ptr<BlockMemAssigner> priority_assigner;
 
@@ -73,8 +76,7 @@ Status HybridMemAssigner::Assign() {
   }
 
   priority_assigner->SetOpMemOffset(false);
-  mem_offset_ = priority_assigner->GetMemOffset();
-  p2p_mem_offset_ = priority_assigner->GetP2PMemOffset();
+  mem_offsets_ = priority_assigner->GetMemOffsets();
   priority_assigner_ = std::move(priority_assigner);
 
   return SUCCESS;
diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h
index 7baece44..2bdfd5c5 100755
--- a/ge/graph/build/memory/hybrid_mem_assigner.h
+++ b/ge/graph/build/memory/hybrid_mem_assigner.h
@@ -42,16 +42,14 @@ class HybridMemAssigner : public MemAssigner {
 
   Status Assign() override;
 
-  size_t GetMemOffset() const { return mem_offset_; }
-  size_t GetP2PMemOffset() const { return p2p_mem_offset_; }
+  const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; }
 
   BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; }
 
  private:
   Status AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size);
 
-  size_t mem_offset_;
-  size_t p2p_mem_offset_;
+  std::map<uint64_t, size_t> mem_offsets_;
 
   ge::ComputeGraphPtr compute_graph_;
 
diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc
index 0f58a040..570aae07 100755
--- a/ge/graph/build/memory/memory_assigner.cc
+++ b/ge/graph/build/memory/memory_assigner.cc
@@ -20,51 +20,54 @@
 #include "graph/build/memory/graph_mem_assigner.h"
 
 namespace ge {
-Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) {
+Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) {
   GraphMemoryAssigner graph_mem_assigner(compute_graph_);
 
   if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) {
-    GELOGE(ge::FAILED, "Memory assigner failed");
+    GELOGE(ge::FAILED, "[Assign][Memory] failed, graph:%s", compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
 
   // Reassign memory for special nodes
   if (graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset) != ge::SUCCESS) {
-    GELOGE(ge::FAILED, "Memory assigner failed");
+    GELOGE(ge::FAILED, "[ReAssign][Memory] failed, graph:%s", compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
 
   // Assign memory (block and offset) for zero copy nodes
   if (graph_mem_assigner.AssignZeroCopyMemory(mem_offset, zero_copy_mem_size) != ge::SUCCESS) {
-    GELOGE(ge::FAILED, "Zero copy memory assigner failed");
+    GELOGE(ge::FAILED, "[Assign][ZeroCopyMemory] failed, graph:%s", compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
 
   if (graph_mem_assigner.AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
-    GELOGE(ge::FAILED, "Assign memory to node which has ref attr failed!");
+    GELOGE(ge::FAILED, "[Assign][Memory] to node which has ref attr failed! graph:%s",
+           compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
 
   // Assign memory for reference
   if (graph_mem_assigner.AssignReferenceMemory() != ge::SUCCESS) {
-    GELOGE(ge::FAILED, "Assign reference memory failed!");
+    GELOGE(ge::FAILED, "[Assign][ReferenceMemory] failed! graph:%s", compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
 
   // Must do variable attr assign after all the memory assigned
   if (graph_mem_assigner.AssignVarAttr2Nodes() != SUCCESS) {
-    GELOGE(FAILED, "Variable Memory assigner failed");
+    GELOGE(FAILED, "[Variable][Memory] assigner failed, graph:%s", compute_graph_->GetName().c_str());
     return FAILED;
   }
   if (graph_mem_assigner.SetInputOffset() != ge::SUCCESS) {
-    GELOGE(ge::FAILED, "SetInputOffset Fail!");
+    GELOGE(ge::FAILED, "[Set][InputOffset] Fail! graph:%s", compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
 
   if (graph_mem_assigner.CheckOffset() != SUCCESS) {
-    GELOGE(FAILED, "CheckOffset Fail!");
+    GELOGE(FAILED, "[Check][Offset] Fail! graph:%s", compute_graph_->GetName().c_str());
     return FAILED;
   }
+
+  graph_mem_assigner.MarkDistanceAttr();
   return SUCCESS;
 }
 }  // namespace ge
diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc
index 4cbde2af..b8138a30 100755
--- a/ge/graph/build/memory/var_mem_assign_util.cc
+++ b/ge/graph/build/memory/var_mem_assign_util.cc
@@ -53,9 +53,8 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
     GE_IF_BOOL_EXEC(ge::AttrUtils::GetStr(n->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name), continue);
     string node_name = n->GetName();
     GE_IF_BOOL_EXEC(n->GetOpDesc()->GetAllOutputsDesc().empty(),
-                    REPORT_INNER_ERROR("E19999", "check node:%s has no OutputDesc",
-                                       n->GetName().c_str());
-                    GELOGE(FAILED, "node:%s has no OutputDesc.", n->GetName().c_str());
+                    REPORT_INNER_ERROR("E19999", "check node:%s has no OutputDesc", n->GetName().c_str());
+                    GELOGE(FAILED, "[Check][Param] node:%s has no OutputDesc.", n->GetName().c_str());
                     return FAILED);
     ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0);
     GE_CHECK_NOTNULL(tensor_desc);
@@ -118,9 +117,8 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N
   GE_CHECK_NOTNULL(node->GetOpDesc());
   output_list = node->GetOpDesc()->GetOutputOffset();
   if (output_list.empty()) {
-    REPORT_INNER_ERROR("E19999", "check node:%s output_offset_list is empty",
-                       node->GetName().c_str());
-    GELOGE(PARAM_INVALID, "Output_list is empty");
+    REPORT_INNER_ERROR("E19999", "check node:%s output_offset_list is empty", node->GetName().c_str());
+    GELOGE(PARAM_INVALID, "[Check][Param] node:%s Output_list is empty", node->GetName().c_str());
     return PARAM_INVALID;
   }
   GE_CHECK_NOTNULL(var_node->GetOpDesc());
@@ -133,7 +131,8 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N
   if (index >= out_list_size) {
     REPORT_INNER_ERROR("E19999", "param index:%d >= output_list.size() %d in node %s, check invalid",
                        index, out_list_size, node->GetName().c_str());
-    GELOGE(FAILED, "index %d >= output_list.size() %d", index, out_list_size);
+    GELOGE(FAILED, "[Check][Param] index %d >= output_list.size() %d in node %s", index, out_list_size,
+           node->GetName().c_str());
     return FAILED;
   }
 
@@ -169,7 +168,8 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr
   broad_cast_info.broadcast_name = node->GetName();
 
   auto op_desc = node->GetOpDesc();
-  GE_CHK_BOOL_RET_STATUS(op_desc != nullptr, FAILED, "Get broadcast op %s desc is nullptr", node->GetName().c_str());
+  GE_CHK_BOOL_RET_STATUS(op_desc != nullptr, FAILED,
+                         "[Check][Param] Get broadcast op %s desc is nullptr", node->GetName().c_str());
 
   GE_IF_BOOL_EXEC(broad_cast_info.idx < 0,
                   GELOGI("Broadcast input index must be positive, actual %d", broad_cast_info.idx);
@@ -180,8 +180,8 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr
   if (input_tensor_desc_ptr_vistor.size() <= broad_cast_index) {
     REPORT_INNER_ERROR("E19999", "Get broadcast op %s input tensor desc size [%zu] < idx [%d]",
                        node->GetName().c_str(), input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
-    GELOGE(FAILED, "Get broadcast op %s input tensor desc size [%zu] < idx [%d]", node->GetName().c_str(),
-           input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
+    GELOGE(FAILED, "[Check][Param] Get broadcast op %s input tensor desc size [%zu] < idx [%d]",
+           node->GetName().c_str(), input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
     return FAILED;
   }
   const ge::GeTensorDescPtr input_tensor_desc =
@@ -192,8 +192,8 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr
 
   vector<int64_t> output_list = op_desc->GetOutputOffset();
   GE_CHK_BOOL_RET_STATUS(output_list.size() > broad_cast_index, FAILED,
-                         "Get broadcast op %s output_list size [%zu] < idx [%d]", node->GetName().c_str(),
-                         output_list.size(), broad_cast_info.idx);
+                         "[Check][Param] Get broadcast op %s output_list size [%zu] < idx [%d]",
+                         node->GetName().c_str(), output_list.size(), broad_cast_info.idx);
   broad_cast_info.input_offset = output_list[broad_cast_info.idx];
   broad_cast_info.output_offset = output_list[broad_cast_info.idx];
 
@@ -201,16 +201,16 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr
 
   auto output_tensor_desc_ptr_vistor = op_desc->GetAllOutputsDescPtr();
   GE_CHK_BOOL_RET_STATUS(output_tensor_desc_ptr_vistor.size() > broad_cast_index, FAILED,
-                         "Get broadcast op %s output tensor desc size [%zu] < idx [%d]", node->GetName().c_str(),
-                         output_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
+                         "[Check][Param] Get broadcast op %s output tensor desc size [%zu] < idx [%d]",
+                         node->GetName().c_str(), output_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
   const ge::GeTensorDescPtr output_tensor_desc =
       output_tensor_desc_ptr_vistor.at(static_cast<size_t>(broad_cast_info.idx));
   int64_t output_size = 0;
-  GE_CHK_STATUS(TensorUtils::GetSize(*output_tensor_desc, output_size), "get input size failed.");
+  GE_CHK_STATUS(TensorUtils::GetSize(*output_tensor_desc, output_size), "[Check][Param] get output size failed.");
   broad_cast_info.output_size = output_size;
   GE_CHK_BOOL_RET_STATUS(broad_cast_info.output_size == broad_cast_info.input_size, FAILED,
-                         "Broadcast op input size[%lu] is not equal output size[%lu]", broad_cast_info.input_size,
-                         broad_cast_info.output_size);
+                         "[Check][Param] Broadcast op input size[%lu] is not equal output size[%lu]",
+                         broad_cast_info.input_size, broad_cast_info.output_size);
 
   GE_CHK_STATUS_RET(VarManager::Instance(session_id)->SaveBroadCastInfo(graph_id, broad_cast_info));
   return SUCCESS;
@@ -298,7 +298,9 @@ Status VarMemAssignUtil::SetOutTransNodeToAssign(const ge::NodePtr &node, const
   vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
   auto out_list_size = output_list.size();
   GE_CHECK_SIZE(out_list_size);
-  GE_CHK_BOOL_RET_STATUS(index < out_list_size, FAILED, "index %zu >= output_list.size() %zu", index, out_list_size);
+  GE_CHK_BOOL_RET_STATUS(index < out_list_size, FAILED,
+                         "[Check][Param] index %zu >= output_list.size() %zu, node:%s",
+                         index, out_list_size, node->GetName().c_str());
 
   // final_trans_node outputOffset[0] to assign_node outputOffset[0]
   GELOGI("final_trans_node outputOffset[0] is: %ld", final_trans_output_list[0]);
@@ -372,7 +374,7 @@ Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node,
   GE_CHECK_SIZE(ref_attr_node_output_list.size());
 
   GE_CHK_BOOL_RET_STATUS(out_index < ref_attr_node_output_list.size(), FAILED,
-                         "out_index %u >= ref_attr_node_output_list.size() %zu", out_index,
+                         "[Check][Param] out_index %u >= ref_attr_node_output_list.size() %zu", out_index,
                          ref_attr_node_output_list.size());
 
   ref_attr_node_output_list[out_index] = static_cast<int64_t>(reinterpret_cast<uintptr_t>(dev_ptr));
diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 56cd5b5a..ce2f57f9 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -47,6 +47,7 @@
 #include "omg/version.h"
 #include "register/op_registry.h"
 #include "graph/passes/set_input_output_offset_pass.h"
+#include "graph/build/memory/block_mem_assigner.h"
 
 using std::map;
 using std::set;
@@ -118,14 +119,16 @@ Status ModelBuilder::CalcOutputSize(const ge::NodePtr &n) {
     if (graph_status != GRAPH_SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed for op:%s(%s) index:%u",
                         node_op_desc->GetName().c_str(), node_op_desc->GetType().c_str(), index);
-      GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!");
+      GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed for op:%s(%s) index:%u",
+             node_op_desc->GetName().c_str(), node_op_desc->GetType().c_str(), index);
       return FAILED;
     }
     TensorUtils::SetSize(desc_temp, size_temp);
     if (node_op_desc->UpdateOutputDesc(index, desc_temp) != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Update Output desc size failed for op:%s(%s) index:%u",
                         node_op_desc->GetName().c_str(), node_op_desc->GetType().c_str(), index);
-      GELOGE(FAILED, "UpdateOutputDesc failed.");
+      GELOGE(FAILED, "[Update][OutputDesc] failed for op:%s(%s) index:%u",
+             node_op_desc->GetName().c_str(), node_op_desc->GetType().c_str(), index);
       return FAILED;
     }
 
@@ -212,14 +215,14 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_
     if (weights.empty()) {
       REPORT_INNER_ERROR("E19999", "Check weights size of node %s(%s) is empty",
                          node->GetName().c_str(), node->GetType().c_str());
-      GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
+      GELOGE(FAILED, "[Check][Param] weights size of node %s is empty", node->GetName().c_str());
       return FAILED;
     }
     GeTensorPtr weight = weights[0];
     if (weight == nullptr) {
       REPORT_INNER_ERROR("E19999", "Check weight of node %s(%s) is nullptr",
                          node->GetName().c_str(), node->GetType().c_str());
-      GELOGE(FAILED, "weights[0] is null.");
+      GELOGE(FAILED, "[Check][Param] weights[0] is null, node:%s.", node->GetName().c_str());
       return FAILED;
     }
     GeTensorDesc &tensor_desc = weight->MutableTensorDesc();
@@ -271,15 +274,16 @@ Status ModelBuilder::SetInputOutputDesc() {
     bool is_unknow = false;
     (void)NodeUtils::GetNodeUnknownShapeStatus(*n, is_unknow);
     if ((IsGeLocalOp(n->GetOpDesc())) && (!is_unknow)) {
-      GE_CHK_STATUS_RET(CalcOutputSize(n), "Calculate output size failed");
+      GE_CHK_STATUS_RET(CalcOutputSize(n), "[Calc][OutputSize] failed, node:%s", n->GetName().c_str());
     }
     ret = AdjustConstWeightSize(n, weight_offset_);
-    GE_CHK_STATUS_RET(ret, "AdjustConstWeightSize failed");
+    GE_CHK_STATUS_RET(ret, "[Adjust][ConstWeightSize] failed, node:%s", n->GetName().c_str());
 
     GE_IF_BOOL_EXEC(((weight_offset_ > 0) && (weight_offset_ % MEM_ALIGN_SIZE != 0)),
                     weight_offset_ = (weight_offset_ + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE);
   }
-  GE_CHK_STATUS_RET(compute_graph_->TopologicalSorting(), "TopologicalSorting failed");
+  GE_CHK_STATUS_RET(compute_graph_->TopologicalSorting(), "[Call][TopologicalSorting] failed, graph:%s",
+                    compute_graph_->GetName().c_str());
   return SUCCESS;
 }
 
@@ -363,7 +367,8 @@ Status ModelBuilder::AdjustInputTensorFlag() {
             REPORT_CALL_ERROR("E19999", "Update Input desc size failed for op:%s(%s) index:%u",
                               owner_node_op_desc->GetName().c_str(), owner_node_op_desc->GetType().c_str(),
                               in_anchors->GetIdx());
-            GELOGE(FAILED, "UpdateOutputDesc failed.");
+            GELOGE(FAILED, "[Update][InputDesc] failed for op:%s(%s) index:%u",
+                   owner_node_op_desc->GetName().c_str(), owner_node_op_desc->GetType().c_str(), in_anchors->GetIdx());
             return FAILED;
           }
         }
@@ -391,61 +396,64 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
 
   max_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_HBM];
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_MEMORY_SIZE, max_mem_offset_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_MEMORY_SIZE.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_MEMORY_SIZE failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str());
                    return FAILED);
+  auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
+  size_t session_scope_mem_offset = 0;
+  auto it = mem_type_to_mem_offset_.find(mem_type_session_scope);
+  if (it != mem_type_to_mem_offset_.end()) {
+    session_scope_mem_offset = it->second;
+  }
   if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) {
     p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR];
   }
-  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_),
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_P2P_MEMORY_SIZE failed.");
-                       return FAILED);
+                                      ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE.c_str());
+  GELOGE(FAILED, "SetInt of ATTR_NAME_SESSION_SCOPE_MEMORY_SIZE failed.");
+  return FAILED);
+
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_),
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
+                   return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_WEIGHT_SIZE, weight_offset_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_WEIGHT_SIZE.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_WEIGHT_SIZE failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_WEIGHT_SIZE.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_WEIGHT_SIZE.c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_STREAM_NUM, stream_num_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_STREAM_NUM.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_STREAM_NUM failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_STREAM_NUM.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_STREAM_NUM.c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_EVENT_NUM, event_num_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_EVENT_NUM.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_EVENT_NUM failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_EVENT_NUM.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_EVENT_NUM.c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(&model, ATTR_MODEL_HUGE_STREAM_LIST, huge_streams_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_HUGE_STREAM_LIST.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_HUGE_STREAM_LIST failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_HUGE_STREAM_LIST.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_HUGE_STREAM_LIST.c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_LABEL_NUM, label_num_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_LABEL_NUM.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_LABEL_NUM failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_LABEL_NUM.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_LABEL_NUM.c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, zero_copy_mem_size_),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
                                       ATTR_MODEL_ZERO_COPY_MEMORY_SIZE.c_str());
-                   GELOGE(FAILED, "SetInt of ATTR_MODEL_ZERO_COPY_MEMORY_SIZE failed.");
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed.", ATTR_MODEL_ZERO_COPY_MEMORY_SIZE.c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, ATTR_MODEL_OUT_NODES_NAME, GetLocalOmgContext().net_out_nodes),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_MODEL_OUT_NODES_NAME.c_str());
-                   GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str());
+                   GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str());
                    return FAILED);
-  GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_,
-         p2p_mem_offset_, zero_copy_mem_size_);
+  GELOGI("For model, max_mem_offset: %zu, p2p_mem_size: %zu, zero_copy_mem_size: %zu, session_scope_mem_size: %zu",
+         max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_, session_scope_mem_offset);
   string fp_ceiling_mode;
   if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) {
     if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
-      REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                         ATTR_FP_CEILING_MODE.c_str());
-      GELOGE(FAILED, "Failed to set attr ATTR_FP_CEILING_MODE");
+      REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_FP_CEILING_MODE.c_str());
+      GELOGE(FAILED, "[Set][Str] %s in model failed", ATTR_FP_CEILING_MODE.c_str());
       return FAILED;
     }
     GELOGI("Set attr ATTR_FP_CEILING_MODE to model, value is %s.", fp_ceiling_mode.c_str());
@@ -459,31 +467,27 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
   int64_t core_type = (ge_core_type == kVectorCore) ? 1 : 0;
   GELOGI("core_type: %ld", core_type);
   if (!ge::AttrUtils::SetInt(&model, ATTR_MODEL_CORE_TYPE, core_type)) {
-    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                       ATTR_MODEL_CORE_TYPE.c_str());
-    GELOGE(FAILED, "SetInt of ATTR_CORE_TYPE failed.");
+    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_CORE_TYPE.c_str());
+    GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_CORE_TYPE.c_str());
   }
   InitL1FusionOption();
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(&model, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_),
-                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                                      ATTR_NAME_SWITCH_FOR_L1_FUSION.c_str());
-                   GELOGE(FAILED, "SetBool of ATTR_NAME_SWITCH_FOR_L1_FUSION failed.");
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_NAME_SWITCH_FOR_L1_FUSION.c_str());
+                   GELOGE(FAILED, "[Set][Attr] %s in model failed.", ATTR_NAME_SWITCH_FOR_L1_FUSION.c_str());
                    return FAILED);
   const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(session_id_);
   bool is_op_debug = dump_properties.IsOpDebugOpen();
   if (is_op_debug) {
     if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) {
-      REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                         ATTR_OP_DEBUG_FLAG.c_str());
-      GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed.");
+      REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_OP_DEBUG_FLAG.c_str());
+      GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_OP_DEBUG_FLAG.c_str());
       return FAILED;
     }
     uint32_t op_debug_mode = dump_properties.GetOpDebugMode();
     GELOGI("Get op debug mode:%d", op_debug_mode);
     if (!ge::AttrUtils::SetInt(&model, ATTR_OP_DEBUG_MODE, op_debug_mode)) {
-      REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
-                         ATTR_OP_DEBUG_MODE.c_str());
-      GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_MODE failed.");
+      REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_OP_DEBUG_MODE.c_str());
+      GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_OP_DEBUG_MODE.c_str());
       return FAILED;
     }
   }
@@ -556,7 +560,7 @@ Status ModelBuilder::MergeWeights() {
     if (weight == nullptr) {
       REPORT_INNER_ERROR("E19999", "Can't get const weight in op:%s(%s)",
                          op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "Can't get const op weight, name: %s", node->GetName().c_str());
+      GELOGE(FAILED, "[Call][MutableTensor] Can't get const op weight, name:%s", node->GetName().c_str());
       return FAILED;
     }
 
@@ -581,14 +585,15 @@ Status ModelBuilder::MergeWeights() {
       GE_IF_BOOL_EXEC(base_addr == nullptr,
                       REPORT_INNER_ERROR("E19999", "Check weight in op:%s(%s) is nullptr",
                                          op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                      GELOGE(FAILED, "Base addr is nullptr.");
+                      GELOGE(FAILED, "[Check][Param] weight in op:%s(%s) is nullptr",
+                             op_desc->GetName().c_str(), op_desc->GetType().c_str());
                       return FAILED);
       if (weight_offset_ - offset < weight_data.size()) {
         REPORT_INNER_ERROR("E19999", "left weight size not enough for op:%s(%s) left_size:%zu, weight_size:%zu",
                            op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                            weight_offset_ - offset, weight_data.size());
-        GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu",
-               weight_offset_ - offset, weight_data.size());
+        GELOGE(FAILED, "[Check][Param] left weight size not enough for op:%s(%s). left_size:%lu, weight_size:%lu",
+               op_desc->GetName().c_str(), op_desc->GetType().c_str(), weight_offset_ - offset, weight_data.size());
         return FAILED;
       }
       uintptr_t dst_ptr = reinterpret_cast<uintptr_t>(base_addr) + offset;
@@ -615,7 +620,7 @@ Status ModelBuilder::MergeWeights() {
         REPORT_CALL_ERROR("E19999", "mem copy failed. errret:%u, "
                           "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu,",
                           err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
-        GELOGE(FAILED, "mem copy failed. errret:%u, "
+        GELOGE(FAILED, "[Update][Data] mem copy failed. errret:%u, "
                "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
                err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
         return FAILED;
@@ -647,6 +652,13 @@ Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) {
       std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize());
       tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data));
       GE_CHECK_NOTNULL(tbe_kernel);
+      GELOGI("Node [%s][%s] start recovery extra attr %s from %s", atomic_op_desc->GetName().c_str(),
+             atomic_op_desc->GetType().c_str(), ge::OP_EXTATTR_NAME_TBE_KERNEL, ATTR_NAME_TBE_KERNEL_NAME.c_str());
+      if (!(atomic_op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel))) {
+        std::string error = "Node" + FmtToStr(atomic_op_desc->GetName()) + "set extra tbeKernel attr failed";
+        GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
+        return ge::FAILED;
+      }
     }
   }
   if (tbe_kernel == nullptr) {
@@ -695,13 +707,22 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
         GE_CHECK_NOTNULL(kernel_buffer.GetData());
         std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize());
         tbe_kernel = std::make_shared<OpKernelBin>(kernel_name, std::move(data));
+        GE_CHECK_NOTNULL(tbe_kernel);
+        GELOGI("Node [%s][%s] start recovery extra attr %s from %s", node_op_desc->GetName().c_str(),
+               node_op_desc->GetType().c_str(), ge::OP_EXTATTR_NAME_TBE_KERNEL, ATTR_NAME_TBE_KERNEL_NAME.c_str());
+        if (!(node_op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel))) {
+          std::string error = "Node" + FmtToStr(node_op_desc->GetName()) + "set extra tbeKernel attr failed";
+          GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
+          return ge::FAILED;
+        }
       }
     }
     GE_IF_BOOL_EXEC(tbe_kernel == nullptr, continue);
     if (tbe_name_set.count(tbe_kernel->GetName()) > 0) {
-      REPORT_INNER_ERROR("E19999", "tbe_kernel name %s can't be the same, judge for op:%s(%s),",
+      REPORT_INNER_ERROR("E19999", "tbe_kernel name %s can't be the same, judge for op:%s(%s)",
                          tbe_kernel->GetName().c_str(), n->GetName().c_str(), n->GetType().c_str());
-      GELOGE(FAILED, "tbe_kernel name %s can't be the same", tbe_kernel->GetName().c_str());
+      GELOGE(FAILED, "[Check][Param] tbe_kernel name %s can't be the same, judge for op:%s(%s)",
+             tbe_kernel->GetName().c_str(), n->GetName().c_str(), n->GetType().c_str());
       return FAILED;
     }
     tbe_name_set.insert(tbe_kernel->GetName());
@@ -719,9 +740,10 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
         node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
     GE_IF_BOOL_EXEC(cust_aicpu_kernel == nullptr, continue);
     if (aicpu_name_set.count(cust_aicpu_kernel->GetName()) > 0) {
-      REPORT_INNER_ERROR("E19999", "aicpu_kernel name %s can't be the same, judge for op:%s(%s),",
+      REPORT_INNER_ERROR("E19999", "aicpu_kernel name %s can't be the same, judge for op:%s(%s)",
                          cust_aicpu_kernel->GetName().c_str(), n->GetName().c_str(), n->GetType().c_str());
-      GELOGE(FAILED, "aicpu_kernel name %s can't be the same", cust_aicpu_kernel->GetName().c_str());
+      GELOGE(FAILED, "[Check][Param] aicpu_kernel name %s can't be the same, judge for op:%s(%s)",
+             cust_aicpu_kernel->GetName().c_str(), n->GetName().c_str(), n->GetType().c_str());
       return FAILED;
     }
     aicpu_name_set.insert(cust_aicpu_kernel->GetName());
@@ -730,11 +752,11 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
   }
 
   if (!tbe_kernel_store_.Build()) {
-    GELOGE(FAILED, "TBE Kernels store build failed!");
+    GELOGE(FAILED, "[Call][Build] TBE Kernels store build failed!");
     return FAILED;
   }
   if (!cust_aicpu_kernel_store_.Build()) {
-    GELOGE(FAILED, "custom AICPU kernels store build failed!");
+    GELOGE(FAILED, "[Call][Build] custom AICPU kernels store build failed!");
     return FAILED;
   }
   ge_model.SetTBEKernelStore(tbe_kernel_store_);
@@ -744,14 +766,14 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
   GeAttrValue::BYTES task_def_bytes;
   if (!AttrUtils::GetZeroCopyBytes(model, MODEL_ATTR_TASKS, task_def_bytes)) {
     REPORT_CALL_ERROR("E19999", "Get attr:%s in model failed", MODEL_ATTR_TASKS.c_str());
-    GELOGE(INTERNAL_ERROR, "Get zero copy bytes fail.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in model failed", MODEL_ATTR_TASKS.c_str());
     return INTERNAL_ERROR;
   }
   int byte_size = static_cast<int>(task_def_bytes.GetSize());
   std::shared_ptr<domi::ModelTaskDef> task = ge::MakeShared<domi::ModelTaskDef>();
   GE_CHECK_NOTNULL(task);
   GE_CHK_BOOL_EXEC(ReadProtoFromArray(task_def_bytes.GetData(), byte_size, task.get()), return INTERNAL_ERROR,
-                   "ReadProtoFromArray failed.");
+                   "[Read][Proto] From Array failed.");
   ge_model.SetModelTaskDef(task);
 
   // Add graph
@@ -780,11 +802,12 @@ void ModelBuilder::SetModelVersion(ge::Model &model) {
 Status ModelBuilder::PreBuildModel() {
   if ((compute_graph_ == nullptr) || !(compute_graph_->IsValid())) {
     REPORT_INNER_ERROR("E19999", "Check compute_graph no valid");
-    GELOGE(FAILED, "Graph_ is not valid.");
+    GELOGE(FAILED, "[Check][Param] Graph_ is not valid.");
     return FAILED;
   }
 
-  GE_CHK_STATUS_RET(SetInputOutputDesc(), "SetInputOutputDesc Failed!");
+  GE_CHK_STATUS_RET(SetInputOutputDesc(),
+                    "[Set][InputOutputDesc] Failed! graph:%s", compute_graph_->GetName().c_str());
 
   AddNodeInputProperty();
 
@@ -792,14 +815,15 @@ Status ModelBuilder::PreBuildModel() {
 }
 
 Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
-  GE_CHK_STATUS_RET(AdjustInputTensorFlag(), "AdjustInputTensorFlag failed!");
+  GE_CHK_STATUS_RET(AdjustInputTensorFlag(), "[Adjust][InputTensorFlag] failed! graph:%s",
+                    compute_graph_->GetName().c_str());
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kStreamAlloc);
   // Assign logical streams.
   StreamAllocator stream_allocator(compute_graph_, subgraphs_);
   GE_TIMESTAMP_START(AssignLogicalStreams);
   GE_CHK_STATUS_RET(stream_allocator.AssignLogicalStreams(stream_max_parallel_num_, hcom_parallel_),
-                    "Assign logical streams failed.");
+                    "[Assign][LogicalStreams] failed. graph:%s", compute_graph_->GetName().c_str());
   GE_TIMESTAMP_END(AssignLogicalStreams, "GraphBuilder::AssignLogicalStreams");
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kMemoryAlloc);
@@ -810,34 +834,36 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
   GE_TIMESTAMP_START(AssignMemory);
   MemoryAssigner mem_assigner(compute_graph_);
   GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_type_to_mem_offset_, zero_copy_mem_size_),
-                    "Assign Memory Failed!");
+                    "[Assign][Memory] Failed! graph:%s", compute_graph_->GetName().c_str());
   GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory");
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
   GE_TIMESTAMP_START(SetInputOutputOffset);
   SetInputOutputOffsetPass input_output_offset;
-  GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed.");
+  GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_),
+                    "[Set][InputOutputOffset] failed. graph:%s", compute_graph_->GetName().c_str());
   GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run");
 
   // Compile single op in graph build stage
   GE_TIMESTAMP_START(CompileSingleOp);
-  GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail.");
+  GE_CHK_STATUS_RET(CompileSingleOp(), "[Compile][SingleOp] fail. graph:%s", compute_graph_->GetName().c_str());
   GE_TIMESTAMP_EVENT_END(CompileSingleOp, "GraphBuilder::CompileSingleOp");
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kStreamAlloc);
   // Refresh real streams and insert event nodes.
   GE_TIMESTAMP_START(RefreshRealStream);
-  GE_CHK_STATUS_RET(stream_allocator.RefreshRealStream(stream_num_, event_num_), "RefreshRealStream failed.");
+  GE_CHK_STATUS_RET(stream_allocator.RefreshRealStream(stream_num_, event_num_),
+                    "[Refresh][RealStream] failed. graph:%s", compute_graph_->GetName().c_str());
   huge_streams_ = stream_allocator.GetHugeStreams();
   GE_TIMESTAMP_END(RefreshRealStream, "GraphBuilder::RefreshRealStream");
 
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
   GE_TIMESTAMP_START(MergeWeights);
-  GE_CHK_STATUS_RET(MergeWeights(), "MergeWeights Failed!");
+  GE_CHK_STATUS_RET(MergeWeights(), "[Merge][Weights] Failed! graph:%s", compute_graph_->GetName().c_str());
   GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights");
 
   GE_TIMESTAMP_START(BuildModelDef);
-  GE_CHK_STATUS_RET(BuildModelDef(model), "BuildModelDef failed!");
+  GE_CHK_STATUS_RET(BuildModelDef(model), "[Build][ModelDef] failed! graph:%s", compute_graph_->GetName().c_str());
   GE_TIMESTAMP_END(BuildModelDef, "GraphBuilder::BuildModelDef");
 
   SetModelVersion(model);
@@ -847,7 +873,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
 
 Status ModelBuilder::BuildModelForGetDynShapeTask(ge::Model &model_def) {
   GE_TIMESTAMP_START(BuildModelDef);
-  GE_CHK_STATUS_RET(BuildModelDef(model_def), "BuildModelDef failed!");
+  GE_CHK_STATUS_RET(BuildModelDef(model_def), "[Build][ModelDef] failed!");
   GE_TIMESTAMP_END(BuildModelDef, "GraphBuilder::BuildModelDef");
   SetModelVersion(model_def);
   return SUCCESS;
@@ -860,7 +886,7 @@ Status ModelBuilder::CompileSingleOp() {
   std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
   if ((instance == nullptr) || !instance->InitFlag()) {
     REPORT_INNER_ERROR("E19999", "Check GELib instance not init before");
-    GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "CompileSingleOp failed.");
+    GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] CompileSingleOp failed.");
     return ge::GE_CLI_GE_NOT_INITIALIZED;
   }
 
@@ -883,7 +909,7 @@ Status ModelBuilder::CompileSingleOp() {
         if (kernel_lib_name.empty()) {
           REPORT_INNER_ERROR("E19999", "Check kernel lib name empty of op:%s(%s)",
                              node->GetName().c_str(), node->GetType().c_str());
-          GELOGE(ge::INTERNAL_ERROR, "Get node:%s(%s) kernel lib failed.", node->GetName().c_str(),
+          GELOGE(ge::INTERNAL_ERROR, "[Get][Name] of node:%s(%s) kernel lib failed.", node->GetName().c_str(),
                  node->GetType().c_str());
           return ge::INTERNAL_ERROR;
         }
@@ -895,7 +921,7 @@ Status ModelBuilder::CompileSingleOp() {
       } else {
         REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s,",
                            node->GetName().c_str(), node->GetType().c_str(), kernel_lib_name.c_str());
-        GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str());
+        GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "[Get][OpsKernelInfoStore] for op %s failed", node->GetName().c_str());
         return ge::GE_GRAPH_PARAM_NULLPTR;
       }
     }
@@ -912,7 +938,7 @@ Status ModelBuilder::CompileSingleOp() {
     if (ret != ge::SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Batch compile op failed, kernel lib name, node size:%zu,",
                         node_vector.size());
-      GELOGE(ret, "Compile op failed, kernel lib name is %s", kernel_lib_name.c_str());
+      GELOGE(ret, "[Compile][Op] failed, kernel lib name is %s", kernel_lib_name.c_str());
       return ret;
     }
   }
@@ -960,10 +986,10 @@ void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string
     compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
     aicpu_tf_optype_list.size());
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return,
-                   "Set attr needCheckCpu fail.");
+                   "[Set][Attr] needCheckCpu fail.");
 
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return,
-                   "Set attr needCheckTf fail.");
+                   "[Set][Attr] needCheckTf fail.");
   return;
 }
 }  // namespace ge
diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index 67def859..6f097329 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -93,7 +93,7 @@ class ModelBuilder {
 
   uint64_t session_id_;
 
-  map<int64_t, size_t> mem_type_to_mem_offset_;
+  map<uint64_t, size_t> mem_type_to_mem_offset_;
 
   size_t weight_offset_;
 
diff --git a/ge/graph/build/run_context.cc b/ge/graph/build/run_context.cc
index eca8b31b..05e40b63 100644
--- a/ge/graph/build/run_context.cc
+++ b/ge/graph/build/run_context.cc
@@ -29,13 +29,15 @@ Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_siz
                                    uint64_t weight_mem_size) {
   if ((data_mem_size > 0) && (data_mem_base == nullptr)) {
     REPORT_INNER_ERROR("E19999", "InitMemInfo param data_mem_base is null but data_mem_size = %lu", data_mem_size);
-    GELOGE(PARAM_INVALID, "InitMemInfo param data_mem_base is null but data_mem_size = %lu.", data_mem_size);
+    GELOGE(PARAM_INVALID, "[Check][Param] InitMemInfo param data_mem_base is null but data_mem_size = %lu.",
+           data_mem_size);
     return PARAM_INVALID;
   }
   if ((weight_mem_size > 0) && (weight_mem_base == nullptr)) {
     REPORT_INNER_ERROR("E19999", "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu",
                        weight_mem_size);
-    GELOGE(PARAM_INVALID, "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.", weight_mem_size);
+    GELOGE(PARAM_INVALID, "[Check][Param] InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.",
+           weight_mem_size);
     return PARAM_INVALID;
   }
   if (mem_type_to_data_mem_base.empty() || mem_type_to_data_mem_size.empty() ||
@@ -44,9 +46,8 @@ Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_siz
                        "is not equal to the size of mem_type_to_data_mem_size[%zu].",
                        mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
     GELOGE(PARAM_INVALID,
-           "InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of "
-           "mem_type_to_data_mem_size[%zu].",
-           mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
+           "[Check][Param] InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of "
+           "mem_type_to_data_mem_size[%zu].", mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
     return PARAM_INVALID;
   }
   data_mem_base_ = data_mem_base;
@@ -63,7 +64,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
   rtError_t rt_ret = rtModelCreate(&rt_model_, 0);
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "call rtModelCreate failed, ret:%d,", static_cast<int>(rt_ret));
-    GELOGE(RT_FAILED, "rtModelCreate failed. rt_ret = %d", static_cast<int>(rt_ret));
+    GELOGE(RT_FAILED, "[Call][RtModelCreate] failed. rt_ret = %d", static_cast<int>(rt_ret));
     return RT_FAILED;
   }
 
@@ -74,7 +75,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "call rtStreamCreate failed, ret:%d, index:%u,",
                         static_cast<int>(rt_ret), i);
-      GELOGE(RT_FAILED, "rtStreamCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
+      GELOGE(RT_FAILED, "[Call][RtStreamCreate] failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
       return RT_FAILED;
     }
     stream_list_.emplace_back(stream);
@@ -83,7 +84,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "call rtModelBindStream failed, ret:%d, index:%u,",
                         static_cast<int>(rt_ret), i);
-      GELOGE(RT_FAILED, "Bind stream and model failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
+      GELOGE(RT_FAILED, "[Bind][StreamAndModel] failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
       return RT_FAILED;
     }
   }
@@ -97,7 +98,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "call rtEventCreate failed, ret:%d, index:%u,",
                         static_cast<int>(rt_ret), i);
-      GELOGE(RT_FAILED, "rtEventCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
+      GELOGE(RT_FAILED, "[Call][RtEventCreate] failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
       return RT_FAILED;
     }
     event_list_.emplace_back(event);
@@ -110,7 +111,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "call rtLabelCreateV2 failed, ret:%d, index:%u,",
                         static_cast<int>(rt_ret), i);
-      GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
+      GELOGE(RT_FAILED, "[Call][RtLabelCreate] failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
       return RT_FAILED;
     }
     label_list_.emplace_back(label);
@@ -162,40 +163,43 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra
   // check params
   if (graph == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param graph nullptr, session_id:%lu,", session_id);
-    GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id);
+    GELOGE(PARAM_INVALID, "[Check][Param] CreateRunContext param graph is null. session_id=%lu", session_id);
     return PARAM_INVALID;
   }
 
   uint32_t stream_num = 0;
   if (!AttrUtils::GetInt(&model, ATTR_MODEL_STREAM_NUM, stream_num)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s failed for model, session_id:%lu,",
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s failed from model, session_id:%lu,",
                        ATTR_MODEL_STREAM_NUM.c_str(), session_id);
-    GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id);
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s failed from model. session_id=%lu",
+           ATTR_MODEL_STREAM_NUM.c_str(), session_id);
     return INTERNAL_ERROR;
   }
   GELOGD("Stream_num = %u", stream_num);
 
   uint32_t event_num = 0;
   if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s failed for model, session_id:%lu,",
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s failed from model, session_id:%lu,",
                        ATTR_MODEL_EVENT_NUM.c_str(), session_id);
-    GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id);
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s failed from model, session_id:%lu,",
+           ATTR_MODEL_EVENT_NUM.c_str(), session_id);
     return INTERNAL_ERROR;
   }
   GELOGD("Event_num = %u", event_num);
 
   uint32_t label_num = 0;
   if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s failed for model, session_id:%lu,",
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s failed from model, session_id:%lu,",
                        ATTR_MODEL_LABEL_NUM.c_str(), session_id);
-    GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id);
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s failed from model, session_id:%lu,",
+           ATTR_MODEL_LABEL_NUM.c_str(), session_id);
     return INTERNAL_ERROR;
   }
   GELOGD("Label_num = %u", label_num);
 
   Status ret = CreateRtModelResources(stream_num, event_num, label_num);
   if (ret != SUCCESS) {
-    GELOGE(ret, "CreateRtModelResources failed. session_id=%lu", session_id);
+    GELOGE(ret, "[Create][RtModelResources] failed. session_id=%lu", session_id);
     DestroyRtModelResources();
     return ret;
   }
diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc
index 8218588f..0e1a1aba 100644
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -176,8 +176,8 @@ Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &m
 
   auto gelib = GELib::GetInstance();
   if (gelib == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Check GELib instance nullptr");
-    GELOGE(FAILED, "Get GELib instance failed.");
+    REPORT_INNER_ERROR("E19999", "Check GELib instance nullptr, graph:%s", whole_graph_->GetName().c_str());
+    GELOGE(FAILED, "[Get][Instance] of GELib failed. graph:%s", whole_graph_->GetName().c_str());
     return FAILED;
   }
 
@@ -188,7 +188,7 @@ Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &m
 
   Status status = logical_allocator.Assign(whole_graph_, subgraphs_, stream_num_);
   if (status != SUCCESS) {
-    GELOGE(status, "Assign logical streams failed.");
+    GELOGE(status, "[Assign][LogicalStreams] failed. graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
   GE_DUMP(whole_graph_, "AfterAssignedLogicalStreams");
@@ -203,62 +203,62 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu
 
   Status status = AssignSingleStream();
   if (status != SUCCESS) {
-    GELOGE(status, "AssignSingleStream failed!");
+    GELOGE(status, "[Assign][SingleStream] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = SetActiveStreamsByLabel();
   if (status != SUCCESS) {
-    GELOGE(status, "SetActiveStreamsByLabel failed!");
+    GELOGE(status, "[Set][ActiveStreams] By Label failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = SetActiveStreamsForSubgraphs();
   if (status != SUCCESS) {
-    GELOGE(status, "SetActiveStreamsForSubgraphs failed.");
+    GELOGE(status, "[Set][ActiveStreams] For Subgraphs failed. graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = InsertSyncEvents();
   if (status != SUCCESS) {
-    GELOGE(status, "InsertSyncEventId failed!");
+    GELOGE(status, "[Insert][SyncEventId] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = OptimizeSyncEvents();
   if (status != SUCCESS) {
-    GELOGE(status, "OptimizeSyncEventId failed!");
+    GELOGE(status, "[Optimize][SyncEventId] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   vector<set<int64_t>> split_streams(stream_num_);
   status = SplitStreams(split_streams);
   if (status != SUCCESS) {
-    GELOGE(status, "SplitStreams failed!");
+    GELOGE(status, "[Split][Streams] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = UpdateActiveStreams(split_streams);
   if (status != SUCCESS) {
-    GELOGE(status, "UpdateActiveStreams failed!");
+    GELOGE(status, "[Update][ActiveStreams] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = RefreshContinuousEvents();
   if (status != SUCCESS) {
-    GELOGE(status, "RefreshContinuousEvents failed!");
+    GELOGE(status, "[Refresh][ContinuousEvents] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = RefreshEventsWithReuse();
   if (status != SUCCESS) {
-    GELOGE(status, "[Refresh][Events]RefreshEventsWithReuse failed!");
+    GELOGE(status, "[Refresh][Events] With Reuse failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = InsertSyncEventNodes();
   if (status != SUCCESS) {
-    GELOGE(status, "InsertSyncEventNode failed!");
+    GELOGE(status, "[Insert][SyncEventNode] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
@@ -293,7 +293,7 @@ Status StreamAllocator::AssignSingleStream() {
   if (stream_num_ > 1) {
     REPORT_INNER_ERROR("E19999", "The number of ts streams is %ld, only one is supported",
                        stream_num_);
-    GELOGE(FAILED, "The number of ts streams is %ld, only one is supported.", stream_num_);
+    GELOGE(FAILED, "[Check][Param] The number of ts streams is %ld, only one is supported.", stream_num_);
     return FAILED;
   }
 
@@ -311,7 +311,7 @@ Status StreamAllocator::AssignSingleStream() {
   uint32_t max_normal_task_count = 0;
   Status status = GetMaxStreamAndTask(false, max_normal_stream_count, max_normal_task_count);
   if (status != SUCCESS) {
-    GELOGE(status, "Get max task count of normal stream failed.");
+    GELOGE(status, "[Get][MaxCount] of normal stream and task failed. graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
@@ -369,7 +369,8 @@ Status StreamAllocator::SetActiveStreamsByLabel() {
                      REPORT_INNER_ERROR("E19999", "Set Attr:%s for op:%s(%s) failed",
                                         ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                                         node->GetName().c_str(), node->GetType().c_str());
-                     GELOGE(FAILED, "SetListInt failed.");
+                     GELOGE(FAILED, "[Set][Attr] %s for op:%s(%s) failed",
+                            ATTR_NAME_ACTIVE_STREAM_LIST.c_str(), node->GetName().c_str(), node->GetType().c_str());
                      return FAILED);
   }
 
@@ -422,7 +423,7 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() {
       REPORT_INNER_ERROR("E19999", "Set Attr:%s for op:%s(%s) failed",
                          ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                          first_active_node->GetName().c_str(), first_active_node->GetType().c_str());
-      GELOGE(FAILED, "Set active streams for node %s failed.", first_active_node->GetName().c_str());
+      GELOGE(FAILED, "[Set][Attr] active streams for node %s failed.", first_active_node->GetName().c_str());
       return FAILED;
     }
   }
@@ -438,7 +439,7 @@ Status StreamAllocator::InsertSyncEvents() {
         NodePtr next_node = peer_in_anchor->GetOwnerNode();
         Status status = InsertOneEventInTwoNodes(cur_node, next_node);
         if (status != SUCCESS) {
-          GELOGE(status, "InsertOneEventInTwoNodes failed!");
+          GELOGE(status, "[Insert][OneEvent] In Two Nodes failed! cur node:%s", cur_node->GetName().c_str());
           return status;
         }
       }
@@ -451,7 +452,7 @@ Status StreamAllocator::InsertSyncEvents() {
         NodePtr next_node = peer_in_anchor->GetOwnerNode();
         Status status = InsertOneEventInTwoNodes(cur_node, next_node);
         if (status != SUCCESS) {
-          GELOGE(status, "InsertOneEventInTwoNodes failed!");
+          GELOGE(status, "[Insert][OneEvent] In Two Nodes failed! cur node:%s", cur_node->GetName().c_str());
           return status;
         }
       }
@@ -460,7 +461,8 @@ Status StreamAllocator::InsertSyncEvents() {
 
   Status status = InsertEventsForSubgraph();
   if (status != SUCCESS) {
-    GELOGE(status, "InsertEventsBetweenSubAndParentGraphNodes failed!");
+    GELOGE(status, "[Insert][Events] Between Sub And Parent GraphNodes failed! graph:%s",
+           whole_graph_->GetName().c_str());
     return status;
   }
 
@@ -493,7 +495,8 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const
   if (next_stream_id == kInvalidStream) {
     REPORT_INNER_ERROR("E19999", "Stream id of next_node %s(%s) should not be %ld",
                        next_node->GetName().c_str(), next_node->GetType().c_str(), kInvalidStream);
-    GELOGE(FAILED, "Stream id of next_node %s should not be %ld", next_node->GetName().c_str(), kInvalidStream);
+    GELOGE(FAILED, "[Check][Param] Stream id of next_node %s should not be %ld",
+           next_node->GetName().c_str(), kInvalidStream);
     return FAILED;
   }
 
@@ -542,7 +545,7 @@ Status StreamAllocator::InsertEventsForSubgraph() {
       for (const auto &next_node : parent_node->GetOutAllNodes()) {
         Status status = InsertOneEventInTwoNodes(node, next_node);
         if (status != SUCCESS) {
-          GELOGE(status, "InsertOneEventInTwoNodes failed!");
+          GELOGE(status, "[Insert][OneEvent] In Two Nodes failed! node:%s", node->GetName().c_str());
           return status;
         }
       }
@@ -566,19 +569,19 @@ Status StreamAllocator::OptimizeSyncEvents() {
 
   Status status = OptimizeBySendEvents(stream_nodes);
   if (status != SUCCESS) {
-    GELOGE(status, "OptimizeBySendEvents failed!");
+    GELOGE(status, "[Optimize][StreamNodes] By Send Events failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = OptimizeByRecvEvents(stream_nodes);
   if (status != SUCCESS) {
-    GELOGE(status, "OptimizeByRecvEvents failed!");
+    GELOGE(status, "[Optimize][StreamNodes] By Recv Events failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = OptimizeByStreamActivate();
   if (status != SUCCESS) {
-    GELOGE(status, "OptimizeByStreamActivate failed!");
+    GELOGE(status, "[Call][OptimizeByStreamActivate] failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
   for (auto pair : node_to_send_events_) {
@@ -708,11 +711,11 @@ Status StreamAllocator::OptimizeByStreamActivate() {
 bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const {
   GE_CHECK_NOTNULL_EXEC(send_node_ptr->GetOpDesc(),
                         REPORT_INNER_ERROR("E19999", "Check param send_node_ptr nullptr");
-                        GELOGE(FAILED, "op desc is nullptr");
+                        GELOGE(FAILED, "[Check][Param] op desc is nullptr");
                         return false);
   GE_CHECK_NOTNULL_EXEC(recv_node_ptr->GetOpDesc(),
                         REPORT_INNER_ERROR("E19999", "Check param recv_node_ptr nullptr");
-                        GELOGE(FAILED, "op desc is nullptr");
+                        GELOGE(FAILED, "[Check][Param] op desc is nullptr");
                         return false);
   auto cur_stream_id = send_node_ptr->GetOpDesc()->GetStreamId();
   if (AttrUtils::HasAttr(recv_node_ptr->GetOpDesc(), ATTR_NAME_STREAM_LABEL)) {
@@ -826,7 +829,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
   uint32_t max_stream_count = 0;
   uint32_t max_task_count = 0;
   GE_CHK_STATUS_RET(GetMaxStreamAndTask(false, max_stream_count, max_task_count),
-                    "Get max stream and task count failed.");
+                    "[Get][MaxCount] of stream and task failed.");
 
   for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {
     GE_CHECK_NOTNULL(cur_node);
@@ -839,7 +842,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
     if (stream_id > last_stream_id) {
       REPORT_INNER_ERROR("E19999", "streamid(%ld) > last_stream_id(%ld), check invalid",
                          stream_id, last_stream_id);
-      GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id);
+      GELOGE(FAILED, "[Check][Param] SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id);
       return FAILED;
     }
     bool is_stream_first_node = (stream_node_num_vec[stream_id] == 0);
@@ -854,7 +857,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
       if (stream_continuous_2_node_num_map[continuous_stream_label] > max_node_num_one_stream) {
         REPORT_INNER_ERROR("E19999", "Check node[%s] stream_id[%ld] continuous stream label[%s] unsatisfied",
                            op_desc->GetName().c_str(), stream_id, continuous_stream_label.c_str());
-        GELOGE(FAILED, "SplitStreams:node[%s] stream_id[%ld] continuous stream label[%s] unsatisfied ",
+        GELOGE(FAILED, "[Check][Param] SplitStreams:node[%s] stream_id[%ld] continuous stream label[%s] unsatisfied ",
                op_desc->GetName().c_str(), stream_id, continuous_stream_label.c_str());
         return FAILED;
       }
@@ -877,7 +880,8 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
       if (HasContinuousStreamLabel(op_desc, cur_continuous_stream_label)) {
         // get stored nodes
         auto nodes = stream_continuous_2_nodes_map[cur_continuous_stream_label];
-        GE_RETURN_WITH_LOG_IF_FALSE(!nodes.empty(), "split stream with continuous stream label %s failed",
+        GE_RETURN_WITH_LOG_IF_FALSE(!nodes.empty(),
+                                    "[Check][Param] split stream with continuous stream label %s failed",
                                     cur_continuous_stream_label.c_str());
         for (const auto &node : nodes) {
           auto stored_op_desc = node->GetOpDesc();
@@ -893,7 +897,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
         auto iter = std::find(stream_2_nodes_map[stream_id].begin(), stream_2_nodes_map[stream_id].end(), not_cur);
         GE_RETURN_WITH_LOG_IF_FALSE(
             (iter != stream_2_nodes_map[stream_id].end()) && (iter != stream_2_nodes_map[stream_id].begin()),
-            "split stream with continuous stream label %s failed", cur_continuous_stream_label.c_str());
+            "[Check][Param] split stream with continuous stream label %s failed", cur_continuous_stream_label.c_str());
         iter--;
         pre_node = *iter;
       }
@@ -905,7 +909,9 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
 
       // Add the send/recv event to the first and last nodes of the split stream.
       if (pre_node != nullptr) {
-        GE_CHK_STATUS_RET(AddEventId(pre_node, not_cur, cur_node, not_use_cur), "AddEventId failed.");
+        GE_CHK_STATUS_RET(AddEventId(pre_node, not_cur, cur_node, not_use_cur),
+                          "[Add][EventId] failed, pre node:%s, not cur node:%s, cur node:%s.",
+                          pre_node->GetName().c_str(), not_cur->GetName().c_str(), cur_node->GetName().c_str());
       }
     }
 
@@ -943,12 +949,12 @@ Status StreamAllocator::UpdateActiveStreams(const vector<set<int64_t>> &split_st
   for (auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {
     if ((node->GetType() == STREAMSWITCH) || (node->GetType() == STREAMSWITCHN)) {
       if (UpdateActiveStreamsForSwitchNode(node) != SUCCESS) {
-        GELOGE(FAILED, "Update active streams for switch node: %s failed.", node->GetName().c_str());
+        GELOGE(FAILED, "[Update][ActiveStreams] for switch node: %s failed.", node->GetName().c_str());
         return FAILED;
       }
     } else {
       if (UpdateActiveStreamsForActiveNode(split_streams, node) != SUCCESS) {
-        GELOGE(FAILED, "Update active streams for active node: %s failed.", node->GetName().c_str());
+        GELOGE(FAILED, "[Update][ActiveStreams] for active node: %s failed.", node->GetName().c_str());
         return FAILED;
       }
     }
@@ -956,13 +962,13 @@ Status StreamAllocator::UpdateActiveStreams(const vector<set<int64_t>> &split_st
 
   Status status = UpdateActiveStreamsForSubgraphs();
   if (status != SUCCESS) {
-    GELOGE(status, "Update active streams for subgraphs failed!");
+    GELOGE(status, "[Update][ActiveStreams] for subgraphs failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
   status = SetActiveStreamsForLoop();
   if (status != SUCCESS) {
-    GELOGE(status, "SetActiveStreamsForLoop failed!");
+    GELOGE(status, "[Set][ActiveStreams] For Loop failed! graph:%s", whole_graph_->GetName().c_str());
     return status;
   }
 
@@ -990,7 +996,7 @@ void StreamAllocator::UpdateLabelStreams(const vector<set<int64_t>> &split_strea
 Status StreamAllocator::UpdateActiveStreamsForSwitchNode(NodePtr &switch_node) {
   vector<NodePtr> active_nodes;
   if (InsertActiveNodesAfterSwitch(switch_node, active_nodes) != SUCCESS) {
-    GELOGE(FAILED, "Insert active nodes after node %s failed.", switch_node->GetName().c_str());
+    GELOGE(FAILED, "[Insert][ActiveNodes] after node %s failed.", switch_node->GetName().c_str());
     return FAILED;
   }
   if (active_nodes.empty()) {
@@ -1010,7 +1016,8 @@ Status StreamAllocator::UpdateActiveStreamsForSwitchNode(NodePtr &switch_node) {
   if (!AttrUtils::SetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, stream_ids)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "SetListInt failed.");
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return FAILED;
   }
 
@@ -1024,21 +1031,21 @@ Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node, vecto
   vector<string> ori_active_label_list;
   if (!AttrUtils::GetListStr(switch_desc, ATTR_NAME_ACTIVE_LABEL_LIST, ori_active_label_list) ||
       ori_active_label_list.empty()) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s fail for op:%s(%s)", ATTR_NAME_ACTIVE_LABEL_LIST.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s fail from op:%s(%s)", ATTR_NAME_ACTIVE_LABEL_LIST.c_str(),
                        switch_node->GetName().c_str(), switch_node->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "Get active label list of switch %s failed.", switch_node->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] active label list of switch %s failed.", switch_node->GetName().c_str());
     return INTERNAL_ERROR;
   }
 
   vector<string> active_label_list;
   vector<NodePtr> added_active_nodes;
   if (AddActiveNodes(switch_node, ori_active_label_list, active_label_list, added_active_nodes) != SUCCESS) {
-    GELOGE(FAILED, "Add active nodes after node %s failed.", switch_node->GetName().c_str());
+    GELOGE(FAILED, "[Add][ActiveNodes] after node %s failed.", switch_node->GetName().c_str());
     return FAILED;
   }
 
   if (SetActiveLabelList(switch_node, active_label_list) != SUCCESS) {
-    GELOGE(FAILED, "set active label list failed");
+    GELOGE(FAILED, "[Set][ActiveLabelList] failed, node:%s", switch_node->GetName().c_str());
     return FAILED;
   }
 
@@ -1051,7 +1058,8 @@ Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node, vecto
     if (switch_node->GetOutControlAnchor()->LinkTo(active_node->GetInControlAnchor()) != GRAPH_SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Link from %s to %s failed",
                         switch_node->GetName().c_str(), active_node->GetName().c_str());
-      GELOGE(FAILED, "Link %s to %s failed.", switch_node->GetName().c_str(), active_node->GetName().c_str());
+      GELOGE(FAILED, "[Link][Nodes] from %s to %s failed.",
+             switch_node->GetName().c_str(), active_node->GetName().c_str());
       return FAILED;
     }
     active_nodes.emplace_back(active_node);
@@ -1068,7 +1076,8 @@ Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector<set<int64_
       if (static_cast<size_t>(logical_stream) >= split_streams.size()) {
         REPORT_INNER_ERROR("E19999", "Check logical stream:%u is out of range:%zu",
                            logical_stream, split_streams.size());
-        GELOGE(FAILED, "logical stream is out of range.");
+        GELOGE(FAILED, "[Check][Param] logical stream:%u is out of range(0, %zu).",
+               logical_stream, split_streams.size());
         return FAILED;
       }
       const set<int64_t> &new_split_streams = split_streams[logical_stream];
@@ -1088,7 +1097,7 @@ Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector<set<int64_
     if (!AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, new_active_streams)) {
       REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                          node->GetName().c_str(), node->GetType().c_str());
-      GELOGE(FAILED, "Set active streams for node %s failed.", node->GetName().c_str());
+      GELOGE(FAILED, "[Set][Attr] active streams for node %s failed.", node->GetName().c_str());
       return FAILED;
     }
   }
@@ -1130,7 +1139,7 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const {
     if (!AttrUtils::SetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) {
       REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                          active_op->GetName().c_str(), active_op->GetType().c_str());
-      GELOGE(FAILED, "Set active streams for node %s failed.", active_node->GetName().c_str());
+      GELOGE(FAILED, "[Set][Attr] active streams for node %s failed.", active_node->GetName().c_str());
       return FAILED;
     }
   }
@@ -1200,7 +1209,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
       if (pre_switch_node == nullptr) {
         REPORT_INNER_ERROR("E19999", "Find switch node before loop active node %s fail",
                            node->GetName().c_str());
-        GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str());
+        GELOGE(FAILED, "[Find][SwitchNode] before loop active node %s failed", node->GetName().c_str());
         return FAILED;
       }
 
@@ -1210,7 +1219,8 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
                          REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)",
                                             ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                                             node->GetName().c_str(), node->GetType().c_str());
-                         GELOGE(FAILED, "SetListInt failed.");
+                         GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+                                node->GetName().c_str(), node->GetType().c_str());
                          return FAILED);
         for (const auto &stream_id : loop_active_streams) {
           GELOGI("Active stream %u for node: %s.", stream_id, node->GetName().c_str());
@@ -1258,7 +1268,7 @@ Status StreamAllocator::CheckStreamActived() const {
       if (iter != active_streams.end()) {
         REPORT_INNER_ERROR("E19999", "Node:%s(%s) cannot active its own stream %u, check invalid ",
                            node->GetName().c_str(), node->GetType().c_str(), stream_id);
-        GELOGE(FAILED, "Node %s cannot active its own stream %u.", node->GetName().c_str(), stream_id);
+        GELOGE(FAILED, "[Check][Param] Node %s cannot active its own stream %u.", node->GetName().c_str(), stream_id);
         return FAILED;
       }
     }
@@ -1376,7 +1386,7 @@ Status StreamAllocator::RefreshContinuousEvents() {
       auto find_it = old_to_new_events.find(send_events[i]);
       if (find_it == old_to_new_events.end()) {
         REPORT_INNER_ERROR("E19999", "Check invalid send event %u", send_events[i]);
-        GELOGE(FAILED, "RefreshContinuousEvents: invalid send event %u", send_events[i]);
+        GELOGE(FAILED, "[Check][Param] RefreshContinuousEvents: invalid send event %u", send_events[i]);
         return FAILED;
       }
       send_events[i] = find_it->second;
@@ -1390,7 +1400,7 @@ Status StreamAllocator::RefreshContinuousEvents() {
       auto find_it = old_to_new_events.find(recv_events[i]);
       if (find_it == old_to_new_events.end()) {
         REPORT_INNER_ERROR("E19999", "Check invalid recv event %u", recv_events[i]);
-        GELOGE(FAILED, "RefreshContinuousEvents: invalid recv event %u", recv_events[i]);
+        GELOGE(FAILED, "[Check][Param] RefreshContinuousEvents: invalid recv event %u", recv_events[i]);
         return FAILED;
       }
       recv_events[i] = find_it->second;
@@ -1430,7 +1440,8 @@ Status StreamAllocator::InsertSyncEventNodes() {
                        REPORT_INNER_ERROR("E19999", "Set Attr:%s for op:%s(%s) failed, event_id:%u,",
                                           RECV_ATTR_EVENT_ID.c_str(),
                                           node->GetName().c_str(), node->GetType().c_str(), event_id);
-                       GELOGE(FAILED, "SetInt failed.");
+                       GELOGE(FAILED, "[Set][Attr] %s for op:%s(%s) failed, event_id:%u,",
+                              RECV_ATTR_EVENT_ID.c_str(), node->GetName().c_str(), node->GetType().c_str(), event_id);
                        return FAILED);
       (void)AttrUtils::SetListStr(op_desc_ptr, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES,
                                   std::move(std::vector<std::string>()));
@@ -1441,7 +1452,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
       if (status != SUCCESS) {
         REPORT_INNER_ERROR("E19999", "Add edge from node %s to node %s failed",
                            recv_node->GetName().c_str(), node->GetName().c_str());
-        GELOGE(status, "Add edge for node %s and node %s failed.", recv_node->GetName().c_str(),
+        GELOGE(status, "[Add][Edge] for node %s and node %s failed.", recv_node->GetName().c_str(),
                node->GetName().c_str());
         return status;
       }
@@ -1478,7 +1489,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
       if (status != SUCCESS) {
         REPORT_INNER_ERROR("E19999", "Add edge from node %s to node %s failed",
                            node->GetName().c_str(), send_node->GetName().c_str());
-        GELOGE(status, "Add edge for node %s and node %s failed.", node->GetName().c_str(),
+        GELOGE(status, "[Add][Edge] for node %s and node %s failed.", node->GetName().c_str(),
                send_node->GetName().c_str());
         return status;
       }
@@ -1491,7 +1502,8 @@ Status StreamAllocator::InsertSyncEventNodes() {
   if (status != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Insert Graph Events fail, graph:%s,",
                       whole_graph_->GetName().c_str());
-    GELOGE(status, "Graph ReorderEventNodes failed");
+    GELOGE(status, "[Insert][GraphEvents] Graph ReorderEventNodes failed, graph:%s,",
+           whole_graph_->GetName().c_str());
     return status;
   }
 
@@ -1544,7 +1556,8 @@ Status StreamAllocator::GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stre
   if (ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "call rtGetMaxStreamAndTask fail, ret:%d, stream_type:%u,",
                       static_cast<int>(ret), stream_type);
-    GELOGE(FAILED, "Get max stream and task count by rts failed.");
+    GELOGE(FAILED, "[Call][RtGetMaxStreamAndTask] Get max stream and task count by rts failed, "
+           "ret:%d, stream_type:%u,", static_cast<int>(ret), stream_type);
     return FAILED;
   }
   GELOGD("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count);
@@ -1687,7 +1700,7 @@ Status StreamAllocator::AddActiveNodes(NodePtr &switch_node, const vector<string
     const string &active_label = ori_active_label_list[i];
     if (labeled_streams_.find(active_label) == labeled_streams_.end()) {
       REPORT_INNER_ERROR("E19999", "can not find stream label:%s", active_label.c_str());
-      GELOGE(FAILED, "can not find stream label %s", active_label.c_str());
+      GELOGE(FAILED, "[Check][Param] can not find stream label %s", active_label.c_str());
       return FAILED;
     }
     if (labeled_streams_[active_label].size() <= 1) {
@@ -1715,32 +1728,32 @@ Status StreamAllocator::AddActiveNodes(NodePtr &switch_node, const vector<string
       if (switch_node->GetOutControlAnchor()->Unlink(node->GetInControlAnchor()) != GRAPH_SUCCESS) {
         REPORT_CALL_ERROR("E19999", "Unlink %s to %s failed",
                           switch_node->GetName().c_str(), node->GetName().c_str());
-        GELOGE(FAILED, "Unlink %s to %s failed.", switch_node->GetName().c_str(), node->GetName().c_str());
+        GELOGE(FAILED, "[Unlink][Nodes] %s to %s failed.", switch_node->GetName().c_str(), node->GetName().c_str());
         return FAILED;
       }
       GE_CHECK_NOTNULL(active_node->GetOutControlAnchor());
       if (active_node->GetOutControlAnchor()->LinkTo(node->GetInControlAnchor()) != GRAPH_SUCCESS) {
         REPORT_CALL_ERROR("E19999", "Link %s to %s failed",
                           active_node->GetName().c_str(), node->GetName().c_str());
-        GELOGE(FAILED, "Link %s to %s failed.", active_node->GetName().c_str(), node->GetName().c_str());
+        GELOGE(FAILED, "[Link][Nodes] %s to %s failed.", active_node->GetName().c_str(), node->GetName().c_str());
         return FAILED;
       }
     }
 
     if (SetSwitchBranchNodeLabel(active_node, name) != SUCCESS) {
-      GELOGE(FAILED, "Set switch branch node label failed.");
+      GELOGE(FAILED, "[Set][SwitchBranchNodeLabel] failed, node:%s.", active_node->GetName().c_str());
       return FAILED;
     }
     if (SetStreamLabel(active_node, name) != SUCCESS) {
-      GELOGE(FAILED, "Set stream label failed.");
+      GELOGE(FAILED, "[Set][StreamLabel] failed, node:%s.", active_node->GetName().c_str());
       return FAILED;
     }
     if (SetActiveLabelList(active_node, {active_label}) != SUCCESS) {
-      GELOGE(FAILED, "Set active label list failed.");
+      GELOGE(FAILED, "[Set][ActiveLabelList] failed, node:%s.", active_node->GetName().c_str());
       return FAILED;
     }
     if (SetActiveStreamList(active_node, active_label) != SUCCESS) {
-      GELOGE(FAILED, "Set active stream list failed.");
+      GELOGE(FAILED, "[Set][ActiveStreamList] failed, node:%s.", active_node->GetName().c_str());
       return FAILED;
     }
 
@@ -1753,7 +1766,7 @@ Status StreamAllocator::AddActiveNodes(NodePtr &switch_node, const vector<string
 Status StreamAllocator::SetActiveStreamList(NodePtr &active_node, const string &active_label) {
   if (labeled_streams_.find(active_label) == labeled_streams_.end()) {
     REPORT_INNER_ERROR("E19999", "Can not find stream label:%s", active_label.c_str());
-    GELOGE(FAILED, "Can not find stream label %s.", active_label.c_str());
+    GELOGE(FAILED, "[Check][Param] Can not find stream label %s.", active_label.c_str());
     return FAILED;
   }
   set<int64_t> &streams = labeled_streams_[active_label];
@@ -1761,7 +1774,8 @@ Status StreamAllocator::SetActiveStreamList(NodePtr &active_node, const string &
   if (!AttrUtils::SetListInt(active_node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                        active_node->GetName().c_str(), active_node->GetType().c_str());
-    GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_ACTIVE_STREAM_LIST.c_str());
+    GELOGE(FAILED, "[Set][Attr] %s failed for op:%s(%s).", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+           active_node->GetName().c_str(), active_node->GetType().c_str());
     return FAILED;
   }
   return SUCCESS;
diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc
index c71c31be..30142c2b 100644
--- a/ge/graph/build/stream_graph_optimizer.cc
+++ b/ge/graph/build/stream_graph_optimizer.cc
@@ -128,8 +128,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
           REPORT_INNER_ERROR("E19999", "Check stream_id:%ld in op:%s(%s) is bigger than "
                              "run_context.graphStreamList.size():%zu", stream_id, op_desc->GetName().c_str(),
                              op_desc->GetType().c_str(), run_context.graphStreamList.size());
-          GELOGE(FAILED, "stream_id %ld is bigger than run_context.graphStreamList.size() %zu", stream_id,
-                 run_context.graphStreamList.size());
+          GELOGE(FAILED, "[Check][Param] stream_id %ld is bigger than run_context.graphStreamList.size() %zu",
+                 stream_id, run_context.graphStreamList.size());
           return FAILED;
         }
         run_context.stream = run_context.graphStreamList[stream_id];
@@ -145,11 +145,9 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
             REPORT_CALL_ERROR("E19999", "Call optimize streamed subgraph failed, subgraph: %s, engine_name: %s, graph "
                               "Optimizer num: %zu, ret: %u", subgraph->GetName().c_str(), engine_name.c_str(),
                               graph_optimizers.size(), ret);
-            GELOGE(
-              ret,
-              "[optimizeStreamedSubGraph]: optimize streamed subgraph failed, subgraph: %s, engine_name: %s, graph "
-              "Optimizer num: %zu, ret: %u",
-              subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret);
+            GELOGE(ret, "[Optimize][StreamGraph] failed, subgraph: %s, engine_name: %s, graph "
+                   "Optimizer num: %zu, ret: %u",
+                   subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret);
             return ret;
           }
           GELOGD(
diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc
index 633f541c..dabdc5d2 100755
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -72,7 +72,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
   // Check params
   if (graph == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param graph is null, session_id:%lu", session_id);
-    GELOGE(PARAM_INVALID, "GetTaskInfo param graph is null. session_id=%lu", session_id);
+    GELOGE(PARAM_INVALID, "[Check][Param] GetTaskInfo param graph is null. session_id=%lu", session_id);
     return PARAM_INVALID;
   }
 
@@ -83,7 +83,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
   GE_DUMP(graph, "GenerateTaskAfter");
 
   if (ret != SUCCESS) {
-    GELOGE(ret, "GenerateTask failed. session_id=%lu", session_id);
+    GELOGE(ret, "[Generate][Task] failed. session_id=%lu", session_id);
     return ret;
   }
 
@@ -98,7 +98,8 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s",
                                       ATTR_MODEL_TASK_INDEX_OP_NAME.c_str(), model.GetName().c_str());
-                   GELOGE(FAILED, "SetListStr failed.");
+                   GELOGE(FAILED, "[Set][Attr] %s fail for model:%s",
+                          ATTR_MODEL_TASK_INDEX_OP_NAME.c_str(), model.GetName().c_str());
                    return FAILED);
 
   GELOGI("GenerateTask Success, task list:%zu, op map:%zu, logic mem base:%p, logic weight base:%p, logic var base:%p",
@@ -113,7 +114,8 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
     if (task_def == nullptr) {
       REPORT_INNER_ERROR("E19999", "Add task_def in ModelTaskDef fail, session_id:%lu, graph:%s, model:%s",
                          session_id, graph->GetName().c_str(), model.GetName().c_str());
-      GELOGE(FAILED, "task_def is nullptr.");
+      GELOGE(FAILED, "[Check][Param] task_def is nullptr, session_id:%lu, graph:%s, model:%s",
+             session_id, graph->GetName().c_str(), model.GetName().c_str());
       return FAILED;
     }
     *task_def = task_def_temp;
@@ -121,7 +123,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
 
   ret = AddModelTaskToModel(model_task_def, session_id, model, run_context);
   if (ret != SUCCESS) {
-    GELOGE(ret, "AddModelTaskToModel failed. session_id=%lu", session_id);
+    GELOGE(ret, "[Add][ModelTask] To Model failed. session_id=%lu", session_id);
     return ret;
   }
 
@@ -135,28 +137,33 @@ Status TaskGenerator::AddModelTaskToModel(const ModelTaskDef &model_task_def, ui
       AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast<uintptr_t>(run_context.dataMemBase)),
       REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s",
                          MODEL_ATTR_TASK_GEN_BASE_ADDR.c_str(), model.GetName().c_str());
-      GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed.");
+      GELOGE(FAILED, "[Set][Attr] %s fail for model:%s",
+             MODEL_ATTR_TASK_GEN_BASE_ADDR.c_str(), model.GetName().c_str());
       return FAILED);
   GE_CHK_BOOL_EXEC(
       AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast<uintptr_t>(run_context.weightMemBase)),
       REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s",
                          MODEL_ATTR_TASK_GEN_WEIGHT_ADDR.c_str(), model.GetName().c_str());
-      GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed.");
+      GELOGE(FAILED, "[Set][Attr] %s fail for model:%s",
+             MODEL_ATTR_TASK_GEN_WEIGHT_ADDR.c_str(), model.GetName().c_str());
       return FAILED);
   GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_TASK_GEN_VAR_ADDR, reinterpret_cast<uintptr_t>(var_mem_base_)),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s",
                                       ATTR_MODEL_TASK_GEN_VAR_ADDR.c_str(), model.GetName().c_str());
-                   GELOGE(FAILED, "SetInt ATTR_MODEL_TASK_GEN_VAR_ADDR failed.");
+                   GELOGE(FAILED, "[Set][Attr] %s fail for model:%s",
+                          ATTR_MODEL_TASK_GEN_VAR_ADDR.c_str(), model.GetName().c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_VAR_SIZE, var_mem_size_),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s",
                                       ATTR_MODEL_VAR_SIZE.c_str(), model.GetName().c_str());
-                   GELOGE(FAILED, "SetInt ATTR_MODEL_VAR_SIZE failed.");
+                   GELOGE(FAILED, "[Set][Attr] %s fail for model:%s",
+                          ATTR_MODEL_VAR_SIZE.c_str(), model.GetName().c_str());
                    return FAILED);
   GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, MODEL_ATTR_SESSION_ID, session_id),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for mode:%s",
                                       MODEL_ATTR_SESSION_ID.c_str(), model.GetName().c_str());
-                   GELOGE(FAILED, "SetInt MODEL_ATTR_SESSION_ID failed.");
+                   GELOGE(FAILED, "[Set][Attr] %s fail for mode:%s",
+                          MODEL_ATTR_SESSION_ID.c_str(), model.GetName().c_str());
                    return FAILED);
 
   size_t task_size = model_task_def.ByteSizeLong();
@@ -164,15 +171,15 @@ Status TaskGenerator::AddModelTaskToModel(const ModelTaskDef &model_task_def, ui
   if (!model_task_def.SerializePartialToArray(serial_buff.GetData(), static_cast<int>(task_size))) {
     REPORT_INNER_ERROR("E19999", "model_task_def's serialize failed,  model name = %s, task_size=%zu",
                        model.GetName().c_str(), task_size);
-    GELOGE(FAILED, "model_task_def's serialize failed,  model name = %s, task_size=%zu.", model.GetName().c_str(),
-           task_size);
+    GELOGE(FAILED, "[Call][SerializePartialToArray] failed,  model name = %s, task_size=%zu.",
+           model.GetName().c_str(), task_size);
     return FAILED;
   }
   if (!AttrUtils::SetZeroCopyBytes(model, MODEL_ATTR_TASKS, std::move(serial_buff))) {
     REPORT_INNER_ERROR("E19999", "Set model task to model failed,  model name = %s, task_size=%zu",
                        model.GetName().c_str(), task_size);
-    GELOGE(FAILED, "Set model task to model failed,  model name = %s, task_size=%zu.", model.GetName().c_str(),
-           task_size);
+    GELOGE(FAILED, "[Call][SetZeroCopyBytes] Set model task to model failed,  model name = %s, task_size=%zu.",
+           model.GetName().c_str(), task_size);
     return FAILED;
   }
 
@@ -191,7 +198,8 @@ Status TaskGenerator::UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t sessi
     GE_CHK_BOOL_EXEC(AttrUtils::SetListBool(op_desc, kIsInputVar, input_var),
                      REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", kIsInputVar,
                                         op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                     GELOGE(FAILED, "SetListBool failed.");
+                     GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", kIsInputVar,
+                            op_desc->GetName().c_str(), op_desc->GetType().c_str());
                      return FAILED);
   }
 
@@ -204,7 +212,8 @@ Status TaskGenerator::UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t sessi
     GE_CHK_BOOL_EXEC(AttrUtils::SetListBool(op_desc, kIsOutputVar, output_var),
                      REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", kIsOutputVar,
                                         op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                     GELOGE(FAILED, "SetListBool failed.");
+                     GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", kIsOutputVar,
+                            op_desc->GetName().c_str(), op_desc->GetType().c_str());
                      return FAILED);
   }
   return SUCCESS;
@@ -280,10 +289,11 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
   std::shared_ptr<GELib> ge_lib = GELib::GetInstance();
   if ((ge_lib == nullptr) || !ge_lib->InitFlag()) {
     REPORT_INNER_ERROR("E19999", "Check GELib instance not init before");
-    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed.");
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] GenerateTask failed, because GELib instance not init before.");
     return GE_CLI_GE_NOT_INITIALIZED;
   }
-  GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed.");
+  GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph),
+                    "[Call][MarkNodeAndSetIndex] failed, graph:%s.", graph->GetName().c_str());
   ProfilingPoint profiling_point;
   vector<uint32_t> all_reduce_nodes;
   GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes));
@@ -304,12 +314,13 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
   rtStream_t stream = nullptr;
   bool is_unknown_shape = graph->GetGraphUnknownFlag() || GetContext().GetHostExecFlag();
   if (is_unknown_shape) {
-    GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed.");
+    GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream),
+                      "[Set][UnknownShapeStream] failed, graph:%s.", graph->GetName().c_str());
   }
   std::function<void()> callback = [&]() {
     if (is_unknown_shape) {
       if (DestroyUnknownShapeStream(run_context, stream) != SUCCESS) {
-        GELOGE(FAILED, "Destory unknown shape stream failed.");
+        GELOGE(FAILED, "[Destroy][UnknownShapeStream] failed.");
       }
     }
   };
@@ -335,7 +346,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
         FusionTaskInfo{run_context,        graph,         node,        op_desc,         node_index,      ge_lib,
                        ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes};
     GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen),
-                      "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str());
+                      "[Call][GenerateTaskForFusionNode] node:%s(%s) failed", name.c_str(), type.c_str());
     // continue directly
     if (ge::AttrUtils::GetInt(op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key)) {
       GELOGI("Fusion node[name:%s, type:%s] do not need generate task again.", name.c_str(), type.c_str());
@@ -349,13 +360,11 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
     if (kernel_info_store == nullptr) {
       REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s",
                          node->GetName().c_str(), node->GetType().c_str(), op_kernel_lib_name.c_str());
-      GELOGE(INTERNAL_ERROR,
-             "No ops kernel store or ops kernel builder found. node:%s(%s), op_kernel_lib_name=%s.",
-             name.c_str(),
-             type.c_str(), op_kernel_lib_name.c_str());
+      GELOGE(INTERNAL_ERROR, "[Call][GetOpsKernelInfoStore] No ops kernel store or ops kernel builder found. "
+             "node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), type.c_str(), op_kernel_lib_name.c_str());
       return INTERNAL_ERROR;
     }
-    GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(),
+    GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "[Call][UpdateAnchorStatus] node:%s(%s) failed", name.c_str(),
                       type.c_str());
     // Profiling task
     size_t task_list_size_before = task_def_list.size();
@@ -365,7 +374,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
     int64_t stream_id = 0;
     if (!is_unknown_shape) {
       stream_id = op_desc->GetStreamId();
-      GE_CHK_STATUS_RET(SetKnownShapeStream(run_context, stream_id), "node[name:%s(%s), id:%ld] stream id is invalid.",
+      GE_CHK_STATUS_RET(SetKnownShapeStream(run_context, stream_id),
+                        "[Set][KnownShapeStream] node[name:%s(%s), id:%ld] stream id is invalid.",
                         name.c_str(), type.c_str(), op_id);
     }
     GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(),
@@ -376,8 +386,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
     if (ret != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Call OpsKernelBuilderManager GenerateTask fail for op:%s(%s)",
                         node->GetName().c_str(), node->GetType().c_str());
-      GELOGE(ret, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task failed.",
-             op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id);
+      GELOGE(ret, "[Generate][Task] fail for op:%s(%s)", node->GetName().c_str(), node->GetType().c_str());
       return ret;
     }
     // Profiling task
@@ -388,9 +397,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
       REPORT_INNER_ERROR("E19999", "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task "
                          "but task num from %zu to %zu, check invalid", op_kernel_lib_name.c_str(), name.c_str(),
                          type.c_str(), op_id, stream_id, task_list_size_before, task_list_size_after);
-      GELOGE(FAILED, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task. but task num from %zu to %zu.",
-             op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, task_list_size_before,
-             task_list_size_after);
+      GELOGE(FAILED, "[Check][Param] Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task. "
+             "but task num from %zu to %zu.", op_kernel_lib_name.c_str(), name.c_str(), type.c_str(),
+             op_id, stream_id, task_list_size_before, task_list_size_after);
       return FAILED;
     }
 
@@ -455,15 +464,15 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s",
                            op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                            op_kernel_lib_name.c_str());
-        GELOGE(INTERNAL_ERROR,
-               "Fusion: No ops kernel store or ops kernel builder found. fusion_node:%s(%s), op_kernel_lib_name=%s.",
+        GELOGE(INTERNAL_ERROR, "[Call][GetOpsKernelInfoStore] Fusion: No ops kernel store or ops kernel builder found."
+               " fusion_node:%s(%s), op_kernel_lib_name=%s.",
                fusion_node_name.c_str(), fusion_node_type.c_str(), op_kernel_lib_name.c_str());
         return INTERNAL_ERROR;
       }
 
       ret = UpdateAnchorStatus(fusion_node);
       if (ret != SUCCESS) {
-        GELOGE(ret, "Fusion: Call UpdateAnchorStatus fusion_node:%s(%s) failed", fusion_node_name.c_str(),
+        GELOGE(ret, "[Update][AnchorStatus] fusion_node:%s(%s) failed", fusion_node_name.c_str(),
                fusion_node_type.c_str());
         return ret;
       }
@@ -474,8 +483,9 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         REPORT_INNER_ERROR("E19999", "Fusion: fusion_node[name:%s(%s), id:%ld] stream id is invalid, "
                            "stream list size=%zu", fusion_node_name.c_str(), fusion_node_type.c_str(),
                            op_id, run_context.graphStreamList.size());
-        GELOGE(INTERNAL_ERROR, "Fusion: fusion_node[name:%s(%s), id:%ld] stream id is invalid, stream list size=%zu",
-               fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, run_context.graphStreamList.size());
+        GELOGE(INTERNAL_ERROR, "[Check][Param] Fusion: fusion_node[name:%s(%s), id:%ld] stream id is invalid, "
+               "stream list size=%zu", fusion_node_name.c_str(), fusion_node_type.c_str(), op_id,
+               run_context.graphStreamList.size());
         return INTERNAL_ERROR;
       }
       // profiling task
@@ -488,8 +498,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         REPORT_CALL_ERROR("E19999", " Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
                           "id:%ld, stream_id:%ld] task failed", op_kernel_lib_name.c_str(),
                           fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
-        GELOGE(ret,
-               "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
+        GELOGE(ret, "[Generate][Task] Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
                "id:%ld, stream_id:%ld] task failed.",
                op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
         return ret;
@@ -503,8 +512,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
                            "id:%ld, stream_id:%ld] task, but task num from %zu to %zu, check invalid",
                            fusion_node_name.c_str(), fusion_node_type.c_str(), op_kernel_lib_name.c_str(),
                            op_id, stream_id, task_list_size_before, task_list_size_after);
-        GELOGE(FAILED,
-               "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
+        GELOGE(FAILED, "[Check][Param] Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
                "id:%ld, stream_id:%ld] task. but task num from %zu to %zu.",
                op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id,
                task_list_size_before, task_list_size_after);
@@ -539,16 +547,18 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
   if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "SetAllAnchorStatus fail for op:%s(%s)",
                       node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "NodeUtils::SetAllAnchorStatus failed.");
+    GELOGE(INTERNAL_ERROR, "[Set][AllAnchorStatus] failed, op:%s(%s)",
+           node->GetName().c_str(), node->GetType().c_str());
     return INTERNAL_ERROR;
   }
   for (auto &anchor : node->GetAllInDataAnchors()) {
     auto peer_anchor = anchor->GetPeerOutAnchor();
     if (peer_anchor == nullptr) {
       if (AnchorUtils::SetStatus(anchor, ANCHOR_SUSPEND) != GRAPH_SUCCESS) {
-        REPORT_CALL_ERROR("E19999", "Set in peer anchor status fail for op:%s(%s), anchor_index:%d,",
+        REPORT_CALL_ERROR("E19999", "Set in peer anchor status fail for op:%s(%s), anchor_index:%d",
                           node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx());
-        GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
+        GELOGE(INTERNAL_ERROR, "[Set][Status] failed, op:%s(%s), anchor_index:%d",
+               node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx());
         return INTERNAL_ERROR;
       }
       continue;
@@ -558,16 +568,18 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
     bool is_const = NodeUtils::GetConstOpType(peer_anchor->GetOwnerNode(), const_type);
     if (is_const && (const_type == CONSTANT)) {
       if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) {
-        REPORT_CALL_ERROR("E19999", "Set in anchor CONST status fail for op:%s(%s), anchor_index:%d,",
+        REPORT_CALL_ERROR("E19999", "Set in anchor CONST status fail for op:%s(%s), anchor_index:%d",
                           node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx());
-        GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
+        GELOGE(INTERNAL_ERROR, "[Set][Status] failed. op:%s(%s), anchor_index:%d.",
+               node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx());
         return INTERNAL_ERROR;
       }
     } else {
       if (AnchorUtils::SetStatus(anchor, ANCHOR_DATA) != GRAPH_SUCCESS) {
-        REPORT_CALL_ERROR("E19999", "Set in anchor DATA status fail for op:%s(%s), anchor_index:%d,",
+        REPORT_CALL_ERROR("E19999", "Set in anchor DATA status fail for op:%s(%s), anchor_index:%d",
                           node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx());
-        GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
+        GELOGE(INTERNAL_ERROR, "[Set][Status] failed, op:%s(%s), anchor_index:%d.",
+               node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx());
         return INTERNAL_ERROR;
       }
     }
@@ -580,7 +592,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
   auto ge_lib = GELib::GetInstance();
   if ((ge_lib == nullptr) || !ge_lib->InitFlag()) {
     REPORT_INNER_ERROR("E19999", "Check GELib instance not init before");
-    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized or is finalized.");
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] GE is not initialized or is finalized.");
     return GE_CLI_GE_NOT_INITIALIZED;
   }
 
@@ -588,7 +600,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
   if (all_nodes.empty()) {
     REPORT_INNER_ERROR("E19999", "Check param all_nodes empty in graph:%s",
                        graph->GetName().c_str());
-    GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Graph's node is empty");
+    GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[Check][Param] Graph's node is empty, graph:%s", graph->GetName().c_str());
     return GE_GRAPH_GRAPH_NODE_NULL;
   }
 
@@ -612,7 +624,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
   for (const auto &stream_ops : all_stream_ops) {
     Status status = MarkFirstAndLastOps(stream_ops.second, is_single_stream);
     if (status != SUCCESS) {
-      GELOGE(status, "Mark first and last nodes failed.");
+      GELOGE(status, "[Mark][FirstAndLastOps] failed, graph:%s.", graph->GetName().c_str());
       return status;
     }
   }
@@ -644,9 +656,8 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
       string op_kernel_lib_name = op_desc->GetOpKernelLibName();
       if (op_kernel_lib_name.empty()) {
         REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s",
-                           op_desc->GetName().c_str(), op_desc->GetType().c_str(),
-                           op_kernel_lib_name.c_str());
-        GELOGE(INTERNAL_ERROR, "node:%s(%s) get op kernel lib failed.", op_desc->GetName().c_str(),
+                           op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_kernel_lib_name.c_str());
+        GELOGE(INTERNAL_ERROR, "[Check][Param] node:%s(%s) get op kernel lib failed.", op_desc->GetName().c_str(),
                op_desc->GetType().c_str());
         return INTERNAL_ERROR;
       }
@@ -664,12 +675,14 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
       GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_pair.first, kIsFirstNode, true),
                        REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", kIsFirstNode,
                                           op_pair.first->GetName().c_str(), op_pair.first->GetType().c_str());
-                       GELOGE(FAILED, "SetBool failed.");
+                       GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", kIsFirstNode,
+                              op_pair.first->GetName().c_str(), op_pair.first->GetType().c_str());
                        return FAILED);
       GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_pair.second, kIsLastNode, true),
                        REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", kIsLastNode,
                                           op_pair.second->GetName().c_str(), op_pair.second->GetType().c_str());
-                       GELOGE(FAILED, "SetBool failed.");
+                       GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", kIsLastNode,
+                              op_pair.second->GetName().c_str(), op_pair.second->GetType().c_str());
                        return FAILED);
     }
   }
@@ -980,7 +993,8 @@ Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std:
         GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
                         REPORT_INNER_ERROR("E19999", "Multiply result is out of range when calc profiling ar log id "
                                            "for node:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                        GELOGE(FAILED, "Multiply result is out of range.");
+                        GELOGE(FAILED, "[Check][Param] Multiply result is out of range. node:%s(%s)",
+                               op_desc->GetName().c_str(), op_desc->GetType().c_str());
                         return FAILED);
         ar_log_id = i * kProfilingArStep + kProfilingArStartLogid;
         is_insert_all_reduce_task = true;
@@ -1074,7 +1088,8 @@ Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::
         GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
                         REPORT_INNER_ERROR("E19999", "Multiply result is out of range when calc profiling ar log id "
                                            "for node:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                        GELOGE(FAILED, "Multiply result is out of range.");
+                        GELOGE(FAILED, "[Check][Param] Multiply result is out of range. node:%s(%s)",
+                               op_desc->GetName().c_str(), op_desc->GetType().c_str());
                         return FAILED);
         ar_log_id = i * kProfilingArStep + kProfilingArEndLogid;
         is_insert_all_reduce_task = true;
@@ -1184,7 +1199,7 @@ Status TaskGenerator::SetUnknownShapeStream(RunContext &run_context, rtStream_t
   rtError_t rt_ret = rtModelBindStream(run_context.model, stream, 0);
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtModelBindStream failed, ret:0x%X", rt_ret);
-    GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(FAILED, "[Call][RtModelBindStream] failed, ret: 0x%X", rt_ret);
     GE_CHK_RT_RET(rtStreamDestroy(stream));
     return FAILED;
   }
@@ -1199,7 +1214,7 @@ Status TaskGenerator::DestroyUnknownShapeStream(RunContext &run_context, rtStrea
 
 Status TaskGenerator::SetKnownShapeStream(RunContext &run_context, int64_t stream_id) {
   if (stream_id < 0 || stream_id >= static_cast<int64_t>(run_context.graphStreamList.size())) {
-    GELOGE(INTERNAL_ERROR, "Stream id[%ld] is invalid, stream list size=%zu", stream_id,
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Stream id[%ld] is invalid, stream list size=%zu", stream_id,
            run_context.graphStreamList.size());
     return INTERNAL_ERROR;
   }
diff --git a/ge/graph/common/bcast.cc b/ge/graph/common/bcast.cc
index 95a93897..b36b50b2 100644
--- a/ge/graph/common/bcast.cc
+++ b/ge/graph/common/bcast.cc
@@ -37,7 +37,7 @@ Status BCast::GenerateBcastInfo(const kVecInt &sx, const kVecInt &sy) {
     Reverse(x);
     Reverse(y);
     ExtendTensorDim(x, y);
-    GE_RETURN_WITH_LOG_IF_ERROR(SetShapeDifferentInfo(x, y), "GenerateBcastInfo failed.");
+    GE_RETURN_WITH_LOG_IF_ERROR(SetShapeDifferentInfo(x, y), "[Set][ShapeDifferentInfo] GenerateBcastInfo failed.");
   }
   ReverseAllIntermediateShapes();
   return domi::SUCCESS;
@@ -76,7 +76,7 @@ Status BCast::SetShapeDifferentInfo(const kVecInt &x, const kVecInt &y) {
       REPORT_INNER_ERROR("E19999", "SetShapeDifferentInfo failed. Two tensor shapes are not compatible "
                          "according to the broadcasting rule.");
       GELOGE(domi::PARAM_INVALID,
-             "SetShapeDifferentInfo failed. Two tensor shapes are not compatible "
+             "[Check][Param] SetShapeDifferentInfo failed. Two tensor shapes are not compatible "
              "according to the broadcasting rule.");
       return domi::PARAM_INVALID;
     }
diff --git a/ge/graph/common/omg_util.cc b/ge/graph/common/omg_util.cc
index 7fe11f23..1dba8c51 100644
--- a/ge/graph/common/omg_util.cc
+++ b/ge/graph/common/omg_util.cc
@@ -16,9 +16,6 @@
 
 #include "graph/common/omg_util.h"
 
-#include <algorithm>
-
-#include "framework/common/debug/ge_log.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/tensor_utils.h"
@@ -38,9 +35,10 @@ Status GetOriginalType(const ge::NodePtr &node, string &type) {
   GE_CHECK_NOTNULL(node->GetOpDesc());
   bool ret = ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type);
   if (!ret) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s fail for op:%s(%s)", ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s fail from op:%s(%s)", ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "Get FrameWorkOp original type [%s]", type.c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s fail from op:%s(%s)", ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return INTERNAL_ERROR;
   }
   GELOGD("Get FrameWorkOp original type [%s]", type.c_str());
@@ -61,7 +59,8 @@ Status SetStreamLabel(const ge::NodePtr &node, const std::string &label) {
   if (!AttrUtils::SetStr(tmp_desc, ge::ATTR_NAME_STREAM_LABEL, label)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_STREAM_LABEL.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_STREAM_LABEL failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_STREAM_LABEL.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -80,7 +79,8 @@ Status SetCycleEvent(const ge::NodePtr &node) {
   if (!AttrUtils::SetBool(tmp_desc, ge::ATTR_NAME_STREAM_CYCLE_EVENT_FLAG, true)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_STREAM_CYCLE_EVENT_FLAG.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_STREAM_CYCLE_EVENT_FLAG failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_STREAM_CYCLE_EVENT_FLAG.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -100,7 +100,8 @@ Status SetActiveLabelList(const ge::NodePtr &node, const std::vector<std::string
   if (!AttrUtils::SetListStr(tmp_desc, ge::ATTR_NAME_ACTIVE_LABEL_LIST, active_label_list)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_ACTIVE_LABEL_LIST.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_ACTIVE_LABEL_LIST failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_ACTIVE_LABEL_LIST.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -120,7 +121,8 @@ Status SetSwitchBranchNodeLabel(const ge::NodePtr &node, const std::string &bran
   if (!AttrUtils::SetStr(tmp_desc, ge::ATTR_NAME_SWITCH_BRANCH_NODE_LABEL, branch_label)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_SWITCH_BRANCH_NODE_LABEL.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_SWITCH_BRANCH_NODE_LABEL failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_SWITCH_BRANCH_NODE_LABEL.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -140,7 +142,8 @@ Status SetSwitchTrueBranchFlag(const ge::NodePtr &node, bool value) {
   if (!AttrUtils::SetBool(tmp_desc, ge::ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, value)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -160,7 +163,8 @@ Status SetOriginalNodeName(const ge::NodePtr &node, const std::string &orig_name
   if (!AttrUtils::SetStr(tmp_desc, ge::ATTR_NAME_ORIG_NODE_NAME, orig_name)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_ORIG_NODE_NAME.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_ORIG_NODE_NAME failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_ORIG_NODE_NAME.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -179,7 +183,8 @@ Status SetCyclicDependenceFlag(const ge::NodePtr &node) {
   if (!AttrUtils::SetBool(tmp_desc, ge::ATTR_NAME_CYCLIC_DEPENDENCE_FLAG, true)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_CYCLIC_DEPENDENCE_FLAG.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_CYCLIC_DEPENDENCE_FLAG failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_CYCLIC_DEPENDENCE_FLAG.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -200,7 +205,8 @@ Status SetNextIteration(const ge::NodePtr &node, const std::string &next) {
   if (!AttrUtils::SetStr(tmp_desc, ge::ATTR_NAME_NEXT_ITERATION, next)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_NEXT_ITERATION.c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
-    GELOGE(FAILED, "Op: %s set ATTR_NAME_NEXT_ITERATION failed", node->GetName().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_NEXT_ITERATION.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
     return FAILED;
   }
 
@@ -244,4 +250,42 @@ Status GetMemorySize(const NodePtr &node, int64_t &output_size) {
   output_size = kBufferPoolMemAlignSize + size + kBufferPoolMemAlignSize;
   return SUCCESS;
 }
+
+///
+/// @brief Check Is Unknown shape Tensor
+/// @param [in] tensor_desc
+/// @return true: Unknown / false: Known
+///
+bool IsUnknownShapeTensor(const GeTensorDesc &tensor_desc) {
+  const static int kUnknowShape = -1;
+  const static int kUnknowRank = -2;
+  for (auto dim_size : tensor_desc.GetShape().GetDims()) {
+    if (dim_size == kUnknowShape || dim_size == kUnknowRank) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+///
+/// @brief Set Op _force_unknown_shape flag
+/// @param [in] node
+/// @param [in] force_unknown, set attribute if true
+/// @return
+///
+void MarkForceUnknownShape(const NodePtr &node, bool force_unknown) {
+  GE_RT_VOID_CHECK_NOTNULL(node);
+  if (!force_unknown) {
+    return;
+  }
+
+  GELOGD("[%s] mark as force unknown shape node", node->GetName().c_str());
+  if (!AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, force_unknown)) {
+    REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_FORCE_UNKNOWN_SHAPE.c_str(),
+                       node->GetName().c_str(), node->GetType().c_str());
+    GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_FORCE_UNKNOWN_SHAPE.c_str(),
+           node->GetName().c_str(), node->GetType().c_str());
+  }
+}
 }  // namespace ge
diff --git a/ge/graph/common/omg_util.h b/ge/graph/common/omg_util.h
index 561a12e0..c84da7f8 100644
--- a/ge/graph/common/omg_util.h
+++ b/ge/graph/common/omg_util.h
@@ -117,6 +117,21 @@ void AlignMemSize(int64_t &mem_size, int64_t align_size);
 /// @return Status
 ///
 Status GetMemorySize(const NodePtr &node, int64_t &output_size);
+
+///
+/// @brief Check Is Unknown shape Tensor
+/// @param [in] tensor_desc
+/// @return true: Unknown / false: Known
+///
+bool IsUnknownShapeTensor(const GeTensorDesc &tensor_desc);
+
+///
+/// @brief Set Op _force_unknown_shape flag
+/// @param [in] node
+/// @param [in] force_unknown, set attribute if true
+/// @return
+///
+void MarkForceUnknownShape(const NodePtr &node, bool force_unknown);
 }  // namespace ge
 
 #endif  // GE_GRAPH_COMMON_OMG_UTIL_H_
diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc
index 1d22016e..87dadb41 100755
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -43,7 +43,7 @@ GraphExecutor::~GraphExecutor() {
       rt_ret = rtFreeHost(buffer_addr);
       if (rt_ret != RT_ERROR_NONE) {
         REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X", rt_ret);
-        GELOGE(RT_FAILED, "[GraphManager] subgraph free buffer failed, ret: 0x%X", rt_ret);
+        GELOGE(RT_FAILED, "[Call][RtFreeHost] subgraph free buffer failed, ret: 0x%X", rt_ret);
       }
     }
   }
@@ -55,17 +55,17 @@ Status GraphExecutor::SetCondition(std::mutex *mutex, std::condition_variable *c
                                    std::shared_ptr<GraphModelListener> listener) {
   if (mutex == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param mutex nullptr");
-    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[SetCondition] input param mutex is nullptr.");
+    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param mutex is nullptr.");
     return GE_GRAPH_PARAM_NULLPTR;
   }
   if (cond == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param cond nullptr");
-    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[SetCondition] input param cond is nullptr.");
+    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param cond is nullptr.");
     return GE_GRAPH_PARAM_NULLPTR;
   }
   if (listener == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param listener nullptr");
-    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[SetCondition] input param listener is nullptr.");
+    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param listener is nullptr.");
     return GE_GRAPH_PARAM_NULLPTR;
   }
 
@@ -82,7 +82,7 @@ Status GraphExecutor::SetCondition(std::mutex *mutex, std::condition_variable *c
 Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) {
   if (graph_context_ptr == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param graph_context_ptr nullptr");
-    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[SetGraphContext] input param graph_context_ptr is nullptr");
+    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param graph_context_ptr is nullptr");
     return GE_GRAPH_PARAM_NULLPTR;
   }
   graph_context_ = graph_context_ptr;
@@ -94,7 +94,7 @@ Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->SetDynamicSize(model_id, batch_num, dynamic_type);
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetDynamicSize failed");
+    GELOGE(ret, "[Set][DynamicSize] failed, model_id:%u", model_id);
     return ret;
   }
   return SUCCESS;
@@ -109,7 +109,7 @@ Status GraphExecutor::FreeInOutBuffer() {
       rt_ret = rtFreeHost(*iter);
       if (rt_ret != RT_ERROR_NONE) {
         REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X", rt_ret);
-        GELOGE(RT_FAILED, "[GraphManager] subgraph free buffer failed, ret: 0x%X", rt_ret);
+        GELOGE(RT_FAILED, "[Call][RtFreeHost] subgraph free buffer failed, ret: 0x%X", rt_ret);
         (void)buffer_addr_.erase(buffer_addr_.begin(), iter);
         return GE_GRAPH_FREE_FAILED;
       }
@@ -144,7 +144,7 @@ Status GraphExecutor::MallocInOutBuffer(const std::vector<uint64_t> &buffer_size
     buffer_size_.clear();
     auto rt_ret = FreeInOutBuffer();
     if (rt_ret != SUCCESS) {
-      GELOGE(RT_FAILED, "[SubGraphInfo] MallocInOutBuffer free buffer failed, ret: 0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Free][Buffer] failed, ret: 0x%X", rt_ret);
       return RT_FAILED;
     }
   }
@@ -154,9 +154,8 @@ Status GraphExecutor::MallocInOutBuffer(const std::vector<uint64_t> &buffer_size
     void *tmp_buf = nullptr;
     rt_ret = rtMallocHost(&tmp_buf, buffer_size[i]);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%lu, ret:0x%X",
-                        buffer_size[i], rt_ret);
-      GELOGE(RT_FAILED, "[GraphManager] subgraph malloc buffer failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%lu, ret:0x%X", buffer_size[i], rt_ret);
+      GELOGE(RT_FAILED, "[Malloc][Buffer] failed, size:%lu, ret:0x%X", buffer_size[i], rt_ret);
       return GE_GRAPH_MALLOC_FAILED;
     }
     malloc_flag_ = true;
@@ -190,7 +189,7 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor
 
   Status ret = MallocInOutBuffer(bufferSizeVec, addrVec);
   if (ret != SUCCESS) {
-    GELOGE(GE_GRAPH_MALLOC_FAILED, "[GraphExecutor] Malloc mem failed");
+    GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][Mem] failed");
     return GE_GRAPH_MALLOC_FAILED;
   }
 
@@ -203,7 +202,8 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor
       if (rt_ret != RT_ERROR_NONE) {
         REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, dst_size:%lu, src_size:%zu, ret:0x%X",
                           bufferSizeVec[i], in_tensor->GetData().size(), rt_ret);
-        GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+        GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, dst_size:%lu, src_size:%zu, ret:0x%X",
+               bufferSizeVec[i], in_tensor->GetData().size(), rt_ret);
         return RT_FAILED;
       }
     }
@@ -247,7 +247,7 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
   GELOGI("[ExecuteGraph] GetInputOutputDescInfo via new ome begin.");
   Status ret = GetInputOutputDescInfo(model_id, inputs_desc, output_desc);
   if (ret != SUCCESS) {
-    GELOGE(GE_GRAPH_GET_IN_OUT_FAILED, "[GraphExecutor] GetInputOutputDescInfo failed, modelId=%u.", model_id);
+    GELOGE(GE_GRAPH_GET_IN_OUT_FAILED, "[Get][InputOutputDescInfo] failed, modelId=%u.", model_id);
     return GE_GRAPH_GET_IN_OUT_FAILED;
   }
   outputs_desc_.assign(output_desc.begin(), output_desc.end());
@@ -257,14 +257,13 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
   input_data.model_id = model_id;
   ret = PrepareInputData(input_tensor, input_data, output_data, output_desc);
   if (ret != SUCCESS) {
-    GELOGE(GE_GRAPH_PREPARE_FAILED, "[GraphExecutor] PrepareInputData failed, modelId=%u.", model_id);
+    GELOGE(GE_GRAPH_PREPARE_FAILED, "[Prepare][InputData] failed, modelId=%u.", model_id);
     return GE_GRAPH_PREPARE_FAILED;
   }
 
   if (graph_run_listener_->ResetResult() != SUCCESS) {
-    REPORT_CALL_ERROR("E19999", "Call graph_run_listener_.ResetResult fail, model_id:%u",
-                      model_id);
-    GELOGE(GE_GRAPH_EXECUTE_FAILED, "Reset result failed");
+    REPORT_CALL_ERROR("E19999", "Call graph_run_listener_.ResetResult fail, model_id:%u", model_id);
+    GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Reset][Result] failed, model_id:%u", model_id);
     return GE_GRAPH_EXECUTE_FAILED;
   }
 
@@ -272,7 +271,7 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
   GELOGI("[ExecuteGraph] DataInput via new ome begin.");
   ret = DataInput(input_data, output_data);
   if (ret != SUCCESS) {
-    GELOGE(GE_GRAPH_DATA_INPUT_FAILED, "[GraphExecutor] push data failed, modelId=%u.", model_id);
+    GELOGE(GE_GRAPH_DATA_INPUT_FAILED, "[Call][DataInput] push data failed, modelId=%u.", model_id);
     return GE_GRAPH_DATA_INPUT_FAILED;
   }
   GELOGI("[GraphExecutor] input data push to wrapper finish, waiting for result...");
@@ -287,10 +286,8 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
     // Run graph return
     uint32_t result_code = graph_run_listener_->GetResultCode();
     if (result_code != SUCCESS && result_code != END_OF_SEQUENCE) {
-      REPORT_CALL_ERROR("E19999", "Graph_run_listener_ run fail, result:%u, model_id:%u",
-                        result_code, model_id);
-      GELOGE(GE_GRAPH_EXECUTE_FAILED, "[GraphExecutor] execute model failed, ret=%u, modelId=%u.", result_code,
-             model_id);
+      REPORT_CALL_ERROR("E19999", "Graph_run_listener_ run fail, result:%u, model_id:%u", result_code, model_id);
+      GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Execute][Model] failed, ret=%u, modelId=%u.", result_code, model_id);
       return GE_GRAPH_EXECUTE_FAILED;
     }
   }
@@ -299,13 +296,13 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
     CHECK_FALSE_EXEC(outputDataTmp.length != 0,
                      REPORT_INNER_ERROR("E19999", "Param output_data.length is 0 in model:%u, check invalid",
                                         model_id);
-                     GELOGE(GE_GRAPH_EXECUTE_FAILED, "Failed to allocate memory, length is 0.");
+                     GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Check][Param] Failed to allocate memory, "
+                            "length is 0, model id:%u", model_id);
                      return GE_GRAPH_EXECUTE_FAILED);
     std::unique_ptr<uint8_t> outBufTmp(new (std::nothrow) uint8_t[outputDataTmp.length]);
     if (outBufTmp == nullptr) {
-      REPORT_CALL_ERROR("E19999", "New output buffer fail, length:%lu, model:%u",
-                        outputDataTmp.length, model_id);
-      GELOGE(FAILED, "Failed to allocate memory.");
+      REPORT_CALL_ERROR("E19999", "New output buffer fail, length:%lu, model:%u", outputDataTmp.length, model_id);
+      GELOGE(FAILED, "[Allocate][Memory] failed, length:%lu, model:%u", outputDataTmp.length, model_id);
       return FAILED;
     }
     GE_PRINT_DYNAMIC_MEMORY(new, "the output memory of data on training.", sizeof(uint8_t) * outputDataTmp.length)
@@ -314,7 +311,8 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
     CHECK_FALSE_EXEC(ret_value == RT_ERROR_NONE,
                      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, dst_size:%lu, src_size:%zu, ret:0x%X",
                                        outputDataTmp.length, outputDataTmp.length, ret_value);
-                     GELOGE(GE_GRAPH_EXECUTE_FAILED, "Call rt api rtMemcpy failed, ret: 0x%X", ret);
+                     GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Call][RtMemcpy] failed, dst_size:%lu, src_size:%zu, ret:0x%X",
+                            outputDataTmp.length, outputDataTmp.length, ret_value);
                      return GE_GRAPH_EXECUTE_FAILED);
     GeTensor outTensor;
     std::vector<int64_t> shapeDims;
@@ -348,7 +346,7 @@ void GraphExecutor::InitModelIdInfo(std::vector<uint32_t> &out_model_id_info,
 Status GraphExecutor::FreeExecuteMemory() {
   auto ret = FreeInOutBuffer();
   if (ret != SUCCESS) {
-    GELOGE(ret, "[FreeExecuteMemory] FreeInOutBuffer Error!");
+    GELOGE(ret, "[Free][InOutBuffer] Error!");
     return ret;
   }
 
@@ -368,13 +366,14 @@ Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_ro
   if (!init_flag_) {
     REPORT_INNER_ERROR("E19999", "No SetCondition called before, graph:%u, check invalid",
                        graph_id);
-    GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[GraphExecutor] AI Core Engine without calling SetCondition!");
+    GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[Check][Param] AI Core Engine without calling SetCondition! graph id:%u",
+           graph_id);
     return GE_GRAPH_EXECUTE_NOT_INIT;
   }
   GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
   Status ret = SyncExecuteModel(ge_root_model->GetModelId(), input_tensor, output_tensor);
   if (ret != SUCCESS) {
-    GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] SyncExecuteModel Error!");
+    GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[SyncExecute][Model] Error! graph id:%u", graph_id);
     return GE_GRAPH_SYNC_MODEL_FAILED;
   }
 
@@ -382,7 +381,7 @@ Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_ro
 }
 
 Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
-                                        const std::vector<InputTensorInfo> &input_tensor,
+                                        const std::vector<ge::Tensor> &input_tensor,
                                         const RunAsyncCallback& callback) {
   GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id);
   if (graph_id != last_graph_id_) {
@@ -395,7 +394,7 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &
   GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
   Status ret = AsyncExecuteModel(ge_root_model, input_tensor, callback);
   if (ret != SUCCESS) {
-    GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] AsyncExecuteModel Error!");
+    GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[AsyncExecute][Model] Error! graph id:%u", graph_id);
     return GE_GRAPH_SYNC_MODEL_FAILED;
   }
 
@@ -403,6 +402,73 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &
   return SUCCESS;
 }
 
+Status GraphExecutor::GetExecuteData(const std::vector<GeTensor> &input_tensor, std::vector<DataBuffer> &blobs,
+                                     std::vector<GeTensorDesc> &tensor_desc) {
+  for (const auto &tensor : input_tensor) {
+    DataBuffer in_data_buf;
+    // check placement
+    in_data_buf.data = const_cast<uint8_t *>(tensor.GetData().data());
+    in_data_buf.length = tensor.GetData().size();
+    in_data_buf.isDataSupportMemShare = false;
+    blobs.emplace_back(in_data_buf);
+    tensor_desc.emplace_back(tensor.GetTensorDesc());
+  }
+  return SUCCESS;
+}
+
+Status GraphExecutor::ExecuteGraphWithStream(GraphId graph_id,
+                                             rtStream_t stream,
+                                             const GeRootModelPtr &ge_root_model,
+                                             const std::vector<GeTensor> &input_tensor,
+                                             std::vector<GeTensor> &output_tensor) {
+  GELOGI("[GraphExecutor] Start to execute graph with stream, graph id = %u, stream = %p.", graph_id, stream);
+  if (!init_flag_) {
+    REPORT_INNER_ERROR("E19999", "No SetCondition called before, graph id = %u, stream = %p, check invalid.",
+                       graph_id, stream);
+    GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[Check][Param] AI Core Engine without calling SetCondition! graph id = %u",
+           graph_id);
+    return GE_GRAPH_EXECUTE_NOT_INIT;
+  }
+
+  if (graph_id != last_graph_id_) {
+    auto ret = FreeExecuteMemory();
+    if (ret != SUCCESS) {
+      return ret;
+    }
+  }
+  last_graph_id_ = graph_id;
+
+  GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
+  auto model_id = ge_root_model->GetModelId();
+  InputData input_data;
+  input_data.index = 0;
+  input_data.model_id = model_id;
+  std::vector<GeTensorDesc> input_desc;
+  auto ret = GetExecuteData(input_tensor, input_data.blobs, input_desc);
+  if (ret != SUCCESS) {
+    return ret;
+  }
+  OutputData output_data;
+  output_data.index = 0;
+  output_data.model_id = model_id;
+  std::vector<GeTensorDesc> output_desc;
+  ret = GetExecuteData(output_tensor, output_data.blobs, output_desc);
+  if (ret != SUCCESS) {
+    return ret;
+  }
+
+  auto async_mode = true;
+  auto model_manager = ge::ModelManager::GetInstance();
+  GE_CHECK_NOTNULL(model_manager);
+  ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
+  if (ret != SUCCESS) {
+    return ret;
+  }
+
+  GELOGI("[GraphExecutor] Async execute graph with stream success graph id = %u, stream = %p.", graph_id, stream);
+  return SUCCESS;
+}
+
 bool CompareByLoad(const Uint32Pair &lhs, const Uint32Pair &rhs) {
   return lhs.second < rhs.second;
 }
@@ -449,21 +515,21 @@ Status GraphExecutor::SetCallback(uint32_t model_id, const GeRootModelPtr &ge_ro
     auto model = model_manager->GetHybridModel(model_id);
     GE_CHECK_NOTNULL(model);
     if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
-      GELOGE(FAILED, "SetRunAsyncListenerCallback failed.");
+      GELOGE(FAILED, "[Set][RunAsyncListenerCallback] failed, model_id %u", model_id);
       return FAILED;
     }
   } else {
     auto model = model_manager->GetModel(model_id);
     GE_CHECK_NOTNULL(model);
     if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
-      GELOGE(FAILED, "SetRunAsyncListenerCallback failed.");
+      GELOGE(FAILED, "[Set][RunAsyncListenerCallback] failed, model_id %u", model_id);
       return FAILED;
     }
   }
   return SUCCESS;
 }
 
-Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &inputs,
+Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<ge::Tensor> &inputs,
                                         const RunAsyncCallback &callback) {
   uint32_t model_id = GetExecuteModelId(ge_root_model);
   if (model_id == kInvalidModelId) {
@@ -475,24 +541,24 @@ Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, con
     GE_CHECK_NOTNULL(model_manager);
     GELOGI("RunAsync begin.model_id %u", model_id);
     if (SetCallback(model_id, ge_root_model, callback) != SUCCESS) {
-      GELOGE(FAILED, "RunAsync: SetCallBack for model fail");
+      GELOGE(FAILED, "[Set][CallBack] for model fail, model_id %u", model_id);
       return FAILED;
     }
 
     Status ret = model_manager->DataInputTensor(model_id, inputs);
     if (ret != SUCCESS) {
-      GELOGE(ret, "RunAsync: DataInput fail");
+      GELOGE(ret, "[Call][DataInputTensor] RunAsync: DataInput fail, model_id %u", model_id);
       return ret;
     }
 
     GELOGI("RunAsync success.");
   } catch (std::bad_alloc &) {
-    REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed");
-    GELOGE(MEMALLOC_FAILED, "RunAsync failed, bad memory allocation occur !");
+    REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed, model_id %u", model_id);
+    GELOGE(MEMALLOC_FAILED, "[Run][Async] failed, bad memory allocation occur, model_id %u", model_id);
     return MEMALLOC_FAILED;
   } catch (...) {
-    REPORT_INNER_ERROR("E19999", "Some exceptions occur failed");
-    GELOGE(FAILED, "RunAsync failed, some exceptions occur !");
+    REPORT_INNER_ERROR("E19999", "Some exceptions occur failed, model_id %u", model_id);
+    GELOGE(FAILED, "[Run][Async] failed, some exceptions occur, model_id %u", model_id);
     return FAILED;
   }
 
@@ -505,16 +571,16 @@ Status GraphExecutor::DataInput(const InputData &input_data, OutputData &output_
     GE_CHECK_NOTNULL(model_manager);
     Status ret = model_manager->DataInput(input_data, output_data);
     if (ret != SUCCESS) {
-      GELOGE(ret, "DataInput: DataInput failed.");
+      GELOGE(ret, "[Call][DataInput] failed.");
       return ret;
     }
   } catch (std::bad_alloc &) {
     REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed");
-    GELOGE(MEMALLOC_FAILED, "DataInput failed, bad memory allocation occur !");
+    GELOGE(MEMALLOC_FAILED, "[Call][DataInput] failed, bad memory allocation occur !");
     return MEMALLOC_FAILED;
   } catch (...) {
     REPORT_INNER_ERROR("E19999", "Some exceptions occur failed");
-    GELOGE(FAILED, "DataInput failed, some exceptions occur !");
+    GELOGE(FAILED, "[Call][DataInput] failed, some exceptions occur !");
     return FAILED;
   }
 
@@ -528,16 +594,16 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp
     GE_CHECK_NOTNULL(model_manager);
     Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc);
     if (ret != SUCCESS) {
-      GELOGE(ret, "GetInputOutputDescInfo  failed.");
+      GELOGE(ret, "[Get][InputOutputDescInfo] failed, model_id:%u.", model_id);
       return ret;
     }
   } catch (std::bad_alloc &) {
-    REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed");
-    GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfo failed, bad memory allocation occur !");
+    REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed, model_id:%u.", model_id);
+    GELOGE(MEMALLOC_FAILED, "[Get][InputOutputDescInfo] failed, bad memory allocation occur, model_id:%u.", model_id);
     return MEMALLOC_FAILED;
   } catch (...) {
-    REPORT_INNER_ERROR("E19999", "Some exceptions occur failed");
-    GELOGE(FAILED, "GetInputOutputDescInfo failed, some exceptions occur !");
+    REPORT_INNER_ERROR("E19999", "Some exceptions occur failed, model_id:%u.", model_id);
+    GELOGE(FAILED, "[Get][InputOutputDescInfo] failed, some exceptions occur, model_id:%u.", model_id);
     return FAILED;
   }
 
@@ -554,16 +620,16 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp
     Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats,
                                                        new_model_desc);
     if (ret != SUCCESS) {
-      GELOGE(ret, "GetInputOutputDescInfo  failed.");
+      GELOGE(ret, "[Get][InputOutputDescInfo] failed, model_id:%u.", model_id);
       return ret;
     }
   } catch (std::bad_alloc &) {
-    REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed");
-    GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfo failed, bad memory allocation occur !");
+    REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed, model_id:%u.", model_id);
+    GELOGE(MEMALLOC_FAILED, "[Get][InputOutputDescInfo] failed, bad memory allocation occur, model_id:%u.", model_id);
     return MEMALLOC_FAILED;
   } catch (...) {
-    REPORT_INNER_ERROR("E19999", "Some exceptions occur failed");
-    GELOGE(FAILED, "GetInputOutputDescInfo failed, some exceptions occur !");
+    REPORT_INNER_ERROR("E19999", "Some exceptions occur failed, model_id:%u.", model_id);
+    GELOGE(FAILED, "[Get][InputOutputDescInfo] failed, some exceptions occur, model_id:%u.", model_id);
     return FAILED;
   }
 
@@ -583,7 +649,7 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetDynamicBatchInfo failed.");
+    GELOGE(ret, "[Get][DynamicBatchInfo] failed, model_id:%u.", model_id);
     return ret;
   }
   return SUCCESS;
@@ -601,7 +667,7 @@ Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std:
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetCombinedDynamicDims(model_id, batch_info);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetCombinedDynamicDims failed.");
+    GELOGE(ret, "[Call][GetCombinedDynamicDims] failed, model_id:%u.", model_id);
     return ret;
   }
   return SUCCESS;
@@ -620,7 +686,7 @@ ge::Status GraphExecutor::GetUserDesignateShapeOrder(uint32_t model_id,
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetUserDesignateShapeOrder(model_id, user_input_shape_order);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetUserDesignateShapeOrder failed.");
+    GELOGE(ret, "[Get][UserDesignateShapeOrder] failed, model_id:%u.", model_id);
     return ret;
   }
   return SUCCESS;
@@ -631,7 +697,20 @@ Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t>
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetCurShape(model_id, batch_info, dynamic_type);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetCurShape failed");
+    GELOGE(ret, "[Get][CurShape] failed, model_id:%u", model_id);
+    return ret;
+  }
+  return SUCCESS;
+}
+
+Status GraphExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                                   std::string &attr_value) {
+  auto model_manager = ge::ModelManager::GetInstance();
+  GE_CHECK_NOTNULL(model_manager);
+  Status ret = model_manager->GetOpAttr(model_id, op_name, attr_name, attr_value);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str());
+    REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str());
     return ret;
   }
   return SUCCESS;
@@ -642,7 +721,7 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynam
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetModelAttr(model_id, dynamic_output_shape_info);
   if (ret != SUCCESS) {
-    GELOGE(FAILED, "GetModelAttr failed");
+    GELOGE(FAILED, "[Get][ModelAttr] failed, model_id:%u", model_id);
     return ret;
   }
   return SUCCESS;
@@ -675,7 +754,7 @@ Status GraphExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, Origin
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetOrigInputInfo(model_id, index, orig_input_info);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetOrigInputInfo failed.");
+    GELOGE(ret, "[Get][OrigInputInfo] failed, model_id:%u, index:%u.", model_id, index);
     return ret;
   }
 
@@ -689,7 +768,7 @@ Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t inde
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetAllAippInputOutputDims(model_id, index, input_dims, output_dims);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetAllAippInputOutputDims failed.");
+    GELOGE(ret, "[Get][AllAippInputOutputDims] failed, model_id:%u, index:%u.", model_id, index);
     return ret;
   }
 
@@ -702,7 +781,8 @@ Status GraphExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint
   GE_CHECK_NOTNULL(model_manager);
   Status ret = model_manager->GetOpDescInfo(device_id, stream_id, task_id, op_desc_info);
   if (ret != SUCCESS) {
-    GELOGE(ret, "GetOpDescInfo failed.");
+    GELOGE(ret, "[Get][OpDescInfo] failed, device_id:%u, stream_id:%u, task_id:%u.",
+           device_id, stream_id, task_id);
     return ret;
   }
   return SUCCESS;
diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h
index 2add453f..aa791c9b 100755
--- a/ge/graph/execute/graph_execute.h
+++ b/ge/graph/execute/graph_execute.h
@@ -50,7 +50,13 @@ class GraphExecutor {
                       std::vector<GeTensor> &output_tensor);
 
   ge::Status ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
-                               const std::vector<InputTensorInfo> &input_tensor, const RunAsyncCallback &callback);
+                               const std::vector<ge::Tensor> &input_tensor, const RunAsyncCallback &callback);
+
+  Status ExecuteGraphWithStream(GraphId graph_id,
+                                rtStream_t stream,
+                                const GeRootModelPtr &ge_root_model,
+                                const std::vector<GeTensor> &input_tensor,
+                                std::vector<GeTensor> &output_tensor);
 
   Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr<GraphModelListener> listener);
 
@@ -108,6 +114,9 @@ class GraphExecutor {
 
   static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
 
+  static Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                          std::string &attr_value);
+
   static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);
 
   static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
@@ -122,10 +131,13 @@ class GraphExecutor {
   Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,
                           OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc);
 
+  Status GetExecuteData(const std::vector<GeTensor> &input_tensor, std::vector<DataBuffer> &blobs,
+                        std::vector<GeTensorDesc> &tensor_desc);
+
   Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor,
                           std::vector<GeTensor> &output_tensor);
 
-  Status AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &input_tensor,
+  Status AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<ge::Tensor> &input_tensor,
                            const RunAsyncCallback &callback);
 
   void InitModelIdInfo(std::vector<uint32_t> &out_model_id_info, std::vector<SubGraphInfoPtr> &sub_graph_vec,
diff --git a/ge/graph/label/case_label_maker.cc b/ge/graph/label/case_label_maker.cc
index 8bf5de71..3fdb1783 100644
--- a/ge/graph/label/case_label_maker.cc
+++ b/ge/graph/label/case_label_maker.cc
@@ -44,8 +44,8 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
   if (graph_names.empty() || graph_names.size() > kMaxCaseBranch) {
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) subgraph size: %zu, check invalid", case_desc->GetName().c_str(),
                        case_desc->GetType().c_str(), graph_names.size());
-    GELOGE(INTERNAL_ERROR, "Node: %s has invalid subgraph, graph size: %zu.", case_desc->GetName().c_str(),
-           graph_names.size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Node: %s has invalid subgraph, graph size: %zu.",
+           case_desc->GetName().c_str(), graph_names.size());
     return FAILED;
   }
 
@@ -71,7 +71,7 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
     if (stream_active == nullptr) {
       REPORT_CALL_ERROR("E19999", "Add StreamActive node in graph:%s fail",
                         graph->GetName().c_str());
-      GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active failed.", graph->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[Add][StreamActive] in Subgraph: %s failed.", graph->GetName().c_str());
       return FAILED;
     }
 
@@ -81,7 +81,7 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
     if (label == nullptr) {
       REPORT_CALL_ERROR("E19999", "Add LabelSetEnter node in graph:%s fail",
                         graph->GetName().c_str());
-      GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", graph->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[Call][AddLabelSetEnter] Subgraph: %s add label set failed.", graph->GetName().c_str());
       return FAILED;
     }
     switch_labels.emplace_back(curr_label_index);
@@ -96,7 +96,8 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
       if (AddLabelGotoLeave(graph, label_goto_name, last_label_index) == nullptr) {
         REPORT_CALL_ERROR("E19999", "Add LabelGotoLeave node in graph:%s fail",
                           graph->GetName().c_str());
-        GELOGE(INTERNAL_ERROR, "Subgraph: %s add label goto failed.", graph->GetName().c_str());
+        GELOGE(INTERNAL_ERROR, "[Call][AddLabelGotoLeave] Subgraph: %s add label goto failed.",
+               graph->GetName().c_str());
         return FAILED;
       }
     } else {
@@ -105,7 +106,8 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
       if (AddLabelSetLeave(graph, last_label_name, last_label_index) == nullptr) {
         REPORT_CALL_ERROR("E19999", "Add LabelSetLeave node in graph:%s fail",
                           graph->GetName().c_str());
-        GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", graph->GetName().c_str());
+        GELOGE(INTERNAL_ERROR, "[Call][AddLabelSetLeave] Subgraph: %s add label set failed.",
+               graph->GetName().c_str());
         return FAILED;
       }
     }
@@ -122,7 +124,8 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
   if (switch_node == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSwitchEnter node in graph:%s fail",
                       first_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", first_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Call][AddLabelSwitchEnter] Subgraph: %s add label switch failed.",
+           first_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -130,7 +133,7 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
   if (GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), first_label->GetInControlAnchor()) != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail", switch_node->GetName().c_str(),
                       first_label->GetName().c_str(), first_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add ctrl edge to %s failed.", first_label->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] to %s failed.", first_label->GetName().c_str());
     return FAILED;
   }
 
@@ -139,7 +142,8 @@ Status CaseOpLabelMaker::Run(uint32_t &label_index) {
   if (AddLabelSwitchIndex(first_graph, data_name, pred_desc, switch_node, parent_index) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSwitchIndex node in graph:%s fail",
                       first_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add switch input failed.", first_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Call][AddLabelSwitchIndex] Subgraph: %s add switch input failed.",
+           first_graph->GetName().c_str());
     return FAILED;
   }
 
diff --git a/ge/graph/label/if_label_maker.cc b/ge/graph/label/if_label_maker.cc
index cf4cdd39..72b33015 100644
--- a/ge/graph/label/if_label_maker.cc
+++ b/ge/graph/label/if_label_maker.cc
@@ -47,7 +47,7 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
                        "then branch graph: %s, else branch graph: %s",
                        if_desc->GetName().c_str(), if_desc->GetType().c_str(),
                        then_branch_name.c_str(), else_branch_name.c_str());
-    GELOGE(INTERNAL_ERROR, "Node: %s has invalid subgraph, then branch: %s, else branch: %s.",
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Node: %s has invalid subgraph, then branch: %s, else branch: %s.",
            if_desc->GetName().c_str(), then_branch_name.c_str(), else_branch_name.c_str());
     return FAILED;
   }
@@ -72,7 +72,7 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
   if (then_stream_active == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add StreamActive node in graph:%s fail",
                       then_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active failed.", then_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][StreamActive] in Subgraph:%s failed.", then_sub_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -80,14 +80,14 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
   if (then_enter_label == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSetEnter node in graph:%s fail",
                       then_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", then_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSetEnter] in Subgraph:%s failed.", then_sub_graph->GetName().c_str());
     return FAILED;
   }
 
   if (AddLabelGotoLeave(then_sub_graph, then_leave_name, else_leave_index) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelGotoLeave node in graph:%s fail",
                       then_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label goto failed.", then_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelGotoLeave] in Subgraph:%s failed.", then_sub_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -95,20 +95,20 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
   if (else_stream_active == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add StreamActive node in graph:%s fail",
                       else_stream_active->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active failed.", else_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][StreamActive] in Subgraph:%s failed.", else_sub_graph->GetName().c_str());
     return FAILED;
   }
 
   if (AddLabelSetEnter(else_sub_graph, else_enter_name, else_enter_index, else_stream_active) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSetEnter node in graph:%s fail",
                       else_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", else_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSetEnter] in Subgraph:%s failed.", else_sub_graph->GetName().c_str());
     return FAILED;
   }
   if (AddLabelSetLeave(else_sub_graph, else_leave_name, else_leave_index) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSetLeave node in graph:%s fail",
                       else_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", else_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSetLeave] in Subgraph:%s failed.", else_sub_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -121,7 +121,7 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
   if (switch_node == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSwitchEnter node in graph:%s fail",
                       then_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", then_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSwitchEnter] in Subgraph:%s failed.", then_sub_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -129,7 +129,7 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
   if (GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), then_enter_label->GetInControlAnchor()) != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail", switch_node->GetName().c_str(),
                       then_enter_label->GetName().c_str(), then_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add ctrl edge to %s failed.", then_enter_label->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] to %s failed.", then_enter_label->GetName().c_str());
     return FAILED;
   }
 
@@ -138,7 +138,7 @@ Status IfOpLabelMaker::Run(uint32_t &label_index) {
   if (AddLabelSwitchIndex(then_sub_graph, data_name, pred_desc, switch_node, parent_index) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSwitchIndex node in graph:%s fail",
                       then_sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add switch input failed.", then_sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSwitchIndex] in Subgraph:%s failed.", then_sub_graph->GetName().c_str());
     return FAILED;
   }
 
diff --git a/ge/graph/label/label_maker.cc b/ge/graph/label/label_maker.cc
index 156748e8..638cbbae 100644
--- a/ge/graph/label/label_maker.cc
+++ b/ge/graph/label/label_maker.cc
@@ -58,7 +58,7 @@ void LabelMaker::LinkToGraphHead(const ComputeGraphPtr &graph, const NodePtr &no
     if (GraphUtils::AddEdge(node->GetOutControlAnchor(), n->GetInControlAnchor()) != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail", node->GetName().c_str(),
                         n->GetName().c_str(), graph->GetName().c_str());
-      GELOGE(INTERNAL_ERROR, "Add ctrl edge from %s to %s failed.", node->GetName().c_str(), n->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] from %s to %s failed.", node->GetName().c_str(), n->GetName().c_str());
     }
   }
 }
@@ -82,7 +82,7 @@ void LabelMaker::LinkToGraphTail(const ComputeGraphPtr &graph, const NodePtr &no
     if (GraphUtils::AddEdge(tail->GetOutControlAnchor(), node->GetInControlAnchor()) != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail", tail->GetName().c_str(),
                         node->GetName().c_str(), graph->GetName().c_str());
-      GELOGE(INTERNAL_ERROR, "Add ctrl edge from %s to %s failed.", tail->GetName().c_str(), node->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] from %s to %s failed.", tail->GetName().c_str(), node->GetName().c_str());
     }
     return;
   }
@@ -101,7 +101,7 @@ NodePtr LabelMaker::AddStreamActive(const ComputeGraphPtr &graph, const std::str
   const auto &node_list = graph->GetDirectNode();
   if (node_list.empty()) {
     REPORT_INNER_ERROR("E19999", "Check param graph:%s has no node", graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSet: Graph %s node is empty.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] LabelSet: Graph %s node is empty.", graph->GetName().c_str());
     return nullptr;
   }
 
@@ -137,7 +137,7 @@ NodePtr LabelMaker::AddLabelSetEnter(const ComputeGraphPtr &graph, const std::st
   const auto &node_list = graph->GetDirectNode();
   if (node_list.empty()) {
     REPORT_INNER_ERROR("E19999", "Check param graph:%s has no node", graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSet: Graph %s node is empty.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] LabelSet: Graph %s node is empty.", graph->GetName().c_str());
     return nullptr;
   }
 
@@ -153,7 +153,7 @@ NodePtr LabelMaker::AddLabelSetEnter(const ComputeGraphPtr &graph, const std::st
   if (GraphUtils::AddEdge(label_set->GetOutControlAnchor(), stream_active->GetInControlAnchor()) != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail", label_set->GetName().c_str(),
                       stream_active->GetName().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Add ctrl edge from %s to %s failed.", label_set->GetName().c_str(),
+    GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] from %s to %s failed.", label_set->GetName().c_str(),
            stream_active->GetName().c_str());
     return nullptr;
   }
@@ -202,7 +202,7 @@ NodePtr LabelMaker::AddLabelGotoEnter(const ComputeGraphPtr &graph, const std::s
   auto it = node_list.begin();
   if (it == node_list.end()) {
     REPORT_INNER_ERROR("E19999", "Check param graph:%s has no node", graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelGoto: Graph %s node is empty.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] LabelGoto: Graph %s node is empty.", graph->GetName().c_str());
     return nullptr;
   }
 
@@ -216,7 +216,7 @@ NodePtr LabelMaker::AddLabelGotoEnter(const ComputeGraphPtr &graph, const std::s
   if (label_goto == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s fail",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelGoto: Add to graph %s failed.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][Node] to graph %s failed.", graph->GetName().c_str());
     return nullptr;
   }
 
@@ -265,7 +265,7 @@ NodePtr LabelMaker::AddLabelSwitchEnter(const ComputeGraphPtr &graph, const std:
   auto it = node_list.begin();
   if (it == node_list.end()) {
     REPORT_INNER_ERROR("E19999", "Check param graph:%s has no node", graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Graph %s node is empty.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] LabelSwitchByIndex: Graph %s node is empty.", graph->GetName().c_str());
     return nullptr;
   }
 
@@ -277,14 +277,14 @@ NodePtr LabelMaker::AddLabelSwitchEnter(const ComputeGraphPtr &graph, const std:
   if (op_desc->AddInputDesc(desc) != GRAPH_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add input desc into node:%s(%s) in graph:%s fail",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add input desc failed.");
+    GELOGE(INTERNAL_ERROR, "[Add][InputDesc] failed.");
     return nullptr;
   }
 
   if (!AttrUtils::SetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, labels)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_LABEL_SWITCH_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add %s failed.", ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
+    GELOGE(INTERNAL_ERROR, "[Set][Attr] %s failed.", ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
     return nullptr;
   }
 
@@ -292,7 +292,7 @@ NodePtr LabelMaker::AddLabelSwitchEnter(const ComputeGraphPtr &graph, const std:
   if (label_switch == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s ahead fail",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add to graph %s failed.", graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][Node] to graph %s failed.", graph->GetName().c_str());
     return nullptr;
   }
 
@@ -320,14 +320,15 @@ NodePtr LabelMaker::AddLabelSwitchLeave(const ComputeGraphPtr &graph, const std:
   if (op_desc->AddInputDesc(desc) != GRAPH_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add input desc into node:%s(%s) in graph:%s fail",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add input desc failed.");
+    GELOGE(INTERNAL_ERROR, "[Add][InputDesc] into node:%s(%s) in graph:%s fail",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
     return nullptr;
   }
 
   if (!AttrUtils::SetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, labels)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_LABEL_SWITCH_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add %s failed.", ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
+    GELOGE(INTERNAL_ERROR, "[Set][Attr] %s failed.", ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
     return nullptr;
   }
 
@@ -360,20 +361,23 @@ NodePtr LabelMaker::AddLabelSwitchIndex(const ComputeGraphPtr &graph, const std:
   if (op_desc->AddInputDesc(desc) != GRAPH_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add input desc into node:%s(%s) in graph:%s fail",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add data input desc failed.");
+    GELOGE(INTERNAL_ERROR, "[Add][InputDesc] into node:%s(%s) in graph:%s fail",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
     return nullptr;
   }
   if (op_desc->AddOutputDesc(desc) != GRAPH_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add output desc into node:%s(%s) in graph:%s fail",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add data output desc failed.");
+    GELOGE(INTERNAL_ERROR, "[Add][OutputDesc] into node:%s(%s) in graph:%s fail",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
     return nullptr;
   }
 
   if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
     REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add %s failed.", ATTR_NAME_PARENT_NODE_INDEX.c_str());
+    GELOGE(INTERNAL_ERROR, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return nullptr;
   }
   NodePtr op_data = graph->AddNodeFront(op_desc);
@@ -384,7 +388,7 @@ NodePtr LabelMaker::AddLabelSwitchIndex(const ComputeGraphPtr &graph, const std:
   if (GraphUtils::AddEdge(op_data->GetOutDataAnchor(0), sw_node->GetInDataAnchor(0)) != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail", op_data->GetName().c_str(),
                       sw_node->GetName().c_str(), graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add input edge to %s failed.", op_data->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] to %s failed.", op_data->GetName().c_str());
     return nullptr;
   }
 
diff --git a/ge/graph/label/partitioned_call_label_maker.cc b/ge/graph/label/partitioned_call_label_maker.cc
index d9a89ef2..7b4bcbd8 100644
--- a/ge/graph/label/partitioned_call_label_maker.cc
+++ b/ge/graph/label/partitioned_call_label_maker.cc
@@ -41,7 +41,7 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) {
   if (sub_graph_name.empty()) {
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) subgraph_index:%d name is empty, check invalid",
                        call_desc->GetName().c_str(), call_desc->GetType().c_str(), kSubGraphIndex);
-    GELOGE(INTERNAL_ERROR, "Node: %s has no subgraph name.", sub_graph_name.c_str());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Node:%s has no subgraph name.", sub_graph_name.c_str());
     return FAILED;
   }
 
@@ -50,7 +50,7 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) {
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) subgraph_name:%s is not exist in parent_graph, check invalid",
                        call_desc->GetName().c_str(), call_desc->GetType().c_str(),
                        sub_graph_name.c_str());
-    GELOGE(INTERNAL_ERROR, "Node: %s has no subgraph.", sub_graph_name.c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][SubGraph] Node:%s has no subgraph.", sub_graph_name.c_str());
     return FAILED;
   }
 
@@ -59,7 +59,7 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) {
   if (stream_active == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add StreamActive node in graph:%s fail",
                       sub_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active node failed.", sub_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][StreamActive] in Subgraph:%s failed.", sub_graph->GetName().c_str());
     return FAILED;
   }
 
diff --git a/ge/graph/label/while_label_maker.cc b/ge/graph/label/while_label_maker.cc
index 22e783e3..cd6b3743 100644
--- a/ge/graph/label/while_label_maker.cc
+++ b/ge/graph/label/while_label_maker.cc
@@ -47,7 +47,7 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) cond subgraph index:%d or body subgraph index:%d name is empty, "
                        "check invalid", while_desc->GetName().c_str(), while_desc->GetType().c_str(),
                        kCondBranchIndex, kBodyBranchIndex);
-    GELOGE(INTERNAL_ERROR, "Node: %s has invalid subgraph, cond branch: %s, body branch: %s.",
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Node: %s has invalid subgraph, cond branch: %s, body branch: %s.",
            while_desc->GetName().c_str(), cond_name.c_str(), body_name.c_str());
     return FAILED;
   }
@@ -72,14 +72,14 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
   if (cond_stream_active == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add StreamActive node in graph:%s fail",
                       cond_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active failed.", cond_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][StreamActive] in Subgraph:%s failed.", cond_graph->GetName().c_str());
     return FAILED;
   }
 
   if (AddLabelSetEnter(cond_graph, cond_enter_name, cond_enter_index, cond_stream_active) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSetEnter node in graph:%s fail",
                       cond_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", cond_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSetEnter] in Subgraph:%s failed.", cond_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -87,28 +87,28 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
   if (body_stream_active == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add StreamActive node in graph:%s fail",
                       body_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active failed.", body_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][StreamActive] in Subgraph:%s failed.", body_graph->GetName().c_str());
     return FAILED;
   }
 
   if (AddLabelSetEnter(body_graph, body_enter_name, body_enter_index, body_stream_active) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSetEnter node in graph:%s fail",
                       body_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", body_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSetEnter] in Subgraph:%s failed.", body_graph->GetName().c_str());
     return FAILED;
   }
 
   if (AddLabelGotoLeave(body_graph, goto_leave_name, cond_enter_index) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelGotoLeave node in graph:%s fail",
                       body_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label goto failed.", body_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelGotoLeave] in Subgraph:%s failed.", body_graph->GetName().c_str());
     return FAILED;
   }
 
   if (AddLabelSetLeave(body_graph, body_leave_name, body_leave_index) == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSetLeave node in graph:%s fail",
                       body_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label set failed.", body_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSetLeave] in Subgraph:%s failed.", body_graph->GetName().c_str());
     return FAILED;
   }
 
@@ -126,14 +126,14 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
   if (switch_node == nullptr) {
     REPORT_CALL_ERROR("E19999", "Add LabelSwitchLeave node in graph:%s fail",
                       cond_graph->GetName().c_str());
-    GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", cond_graph->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Add][LabelSwitchLeave] in Subgraph:%s failed.", cond_graph->GetName().c_str());
     return FAILED;
   }
 
   // link Data input.
   const auto &all_in_data = cond_out_node->GetAllInDataAnchors();
   if (all_in_data.size() != kCondOutputNum) {
-    GELOGE(FAILED, "Node: %s Cond sbugraph output size:%zu should equal size:%u.",
+    GELOGE(FAILED, "[Check][Param] Node: %s Cond sbugraph output size:%zu should equal size:%u.",
            switch_node->GetName().c_str(), all_in_data.size(), kCondOutputNum);
     return FAILED;
   }
@@ -144,7 +144,7 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
     REPORT_CALL_ERROR("E19999", "Add ctrl edge from %s to %s in graph:%s fail",
                       in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetName().c_str(),
                       switch_node->GetName().c_str(), cond_graph->GetName().c_str());
-    GELOGE(FAILED, "Node: %s Add pred data input failed.", switch_node->GetName().c_str());
+    GELOGE(FAILED, "[Add][PredDataInput] to Node:%s failed.", switch_node->GetName().c_str());
     return FAILED;
   }
 
diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc
index ff1b2178..e4904614 100755
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -75,6 +75,16 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge
     return ret;
   }
 
+  if (ge_root_model_ptr->IsSpecificStream()) {
+    GELOGI("No need to start a new thread to run model in specific scene.");
+    rt_ret = rtDeviceReset(GetContext().DeviceId());
+    if (rt_ret != RT_ERROR_NONE) {
+      REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X",
+                        GetContext().DeviceId(), rt_ret);
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    }
+    return SUCCESS;
+  }
   ret = model_manager->Start(model_id);
   if (ret != SUCCESS) {
     if (model_manager->Unload(model_id) != SUCCESS) {
diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc
index c92e2a04..9821aa73 100644
--- a/ge/graph/load/model_manager/cpu_queue_schedule.cc
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc
@@ -51,18 +51,16 @@ CpuTaskInfo::~CpuTaskInfo() {
 ///
 Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
-    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
-                       "check invalid", args_size_);
-    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
     return FAILED;
   }
 
   args_size_ = sizeof(MbufQueueInfo) + sizeof(uintptr_t);  // sizeof(uintptr_t) for save in_mbuf.
   rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo);
@@ -73,9 +71,8 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
   queue_info.in_mbuf = in_mbuf;  // Placeholder, input mbuf addr will save to this place.
   status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -86,15 +83,14 @@ Status CpuTaskModelDequeue::Distribute() {
   if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
     REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
                        "check invalid", args_size_);
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X",
-                      status);
-    GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status);
+    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
+    GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -111,9 +107,8 @@ Status CpuTaskModelDequeue::Distribute() {
 ///
 Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
-    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
-                       "check invalid", args_size_);
-    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
     return FAILED;
   }
 
@@ -127,7 +122,7 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32
   vector<uint64_t> dst_addrs;
   for (const auto &addrs : outside_addrs) {
     const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
-    GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
+    GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "[Check][Param] not set outside_addrs");
     std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
     for (const auto &virtual_args_addr : virtual_args_addrs) {
       addr_map_info.addr_num += virtual_args_addr.second.size();
@@ -143,13 +138,21 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32
   GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
   rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(),
                               src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
-  GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
+  GE_IF_BOOL_EXEC(status != RT_ERROR_NONE,
+                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X",
+                                    src_addrs.size() * sizeof(uint64_t), status);
+                  GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X",
+                         src_addrs.size() * sizeof(uint64_t), status);
                   return RT_ERROR_TO_GE_STATUS(status);)
 
   GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
   status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(),
                     dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
-  GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
+  GE_IF_BOOL_EXEC(status != RT_ERROR_NONE,
+                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X",
+                                    dst_addrs.size() * sizeof(uint64_t), status);
+                  GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X",
+                         dst_addrs.size() * sizeof(uint64_t), status);
                   return RT_ERROR_TO_GE_STATUS(status);)
 
   // src_addr_list is init to src_addr, which is the point to src_addrs
@@ -160,7 +163,9 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32
   }
 
   status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE);
-  GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
+  GE_IF_BOOL_EXEC(status != RT_ERROR_NONE,
+                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
+                  GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
                   return RT_ERROR_TO_GE_STATUS(status);)
   return SUCCESS;
 }
@@ -169,15 +174,14 @@ Status CpuTaskZeroCopy::Distribute() {
   if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
     REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
                        "check invalid", args_size_);
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X",
-                      status);
-    GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status);
+    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
+    GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -215,18 +219,16 @@ CpuTaskZeroCopy::~CpuTaskZeroCopy() {
 ///
 Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mbuf, uintptr_t &out_mbuf) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
-    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
-                       "check invalid", args_size_);
-    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
     return FAILED;
   }
 
   args_size_ = sizeof(PrepareOutputInfo) + sizeof(uintptr_t);  // sizeof(uintptr_t) for save out_mbuf.
   rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo);
@@ -240,9 +242,8 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb
   prepare.out_mbuf = out_mbuf;  // Placeholder, output mbuf addr will save to this place.
   status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -253,15 +254,14 @@ Status CpuTaskPrepareOutput::Distribute() {
   if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
     REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
                        "check invalid", args_size_);
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X",
-                      status);
-    GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status);
+    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
+    GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -278,9 +278,8 @@ Status CpuTaskPrepareOutput::Distribute() {
 ///
 Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
-    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
-                       "check invalid", args_size_);
-    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
     return FAILED;
   }
 
@@ -288,9 +287,8 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
   args_size_ = sizeof(MbufQueueInfo);
   rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
@@ -300,9 +298,8 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
   queue_info.in_mbuf = out_mbuf;
   status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -313,15 +310,14 @@ Status CpuTaskModelEnqueue::Distribute() {
   if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
     REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_ is 0 or stream_ is nullptr, arg_size:%u,"
                        "check invalid", args_size_);
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X",
-                      status);
-    GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status);
+    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
+    GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -338,7 +334,7 @@ Status CpuTaskModelEnqueue::Distribute() {
 Status CpuTaskActiveEntry::Init(rtStream_t stream) {
   if (stream == nullptr) {
     REPORT_INNER_ERROR("E19999", "Param stream is nullptr, check invalid");
-    GELOGE(FAILED, "Task active stream not valid");
+    GELOGE(FAILED, "[Check][Param] Task active stream not valid");
     return FAILED;
   }
 
@@ -348,17 +344,15 @@ Status CpuTaskActiveEntry::Init(rtStream_t stream) {
 
 Status CpuTaskActiveEntry::Distribute() {
   if ((active_stream_ == nullptr) || (stream_ == nullptr)) {
-    REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, "
-                       "check invalid");
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, check invalid");
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t ret = rtStreamActive(active_stream_, stream_);
   if (ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X",
-                      ret);
-    GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret);
+    REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X", ret);
+    GELOGE(RT_FAILED, "[Call][RtStreamActive] failed, ret:0x%X", ret);
     return RT_ERROR_TO_GE_STATUS(ret);
   }
 
@@ -374,27 +368,24 @@ Status CpuTaskActiveEntry::Distribute() {
 ///
 Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
-    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
-                       "check invalid", args_size_);
-    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
     return FAILED;
   }
 
   args_size_ = sizeof(model_id);
   rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
 
   status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -405,15 +396,14 @@ Status CpuTaskWaitEndGraph::Distribute() {
   if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
     REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
                        "check invalid", args_size_);
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X",
-                      status);
-    GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status);
+    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
+    GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -429,27 +419,24 @@ Status CpuTaskWaitEndGraph::Distribute() {
 ///
 Status CpuTaskModelRepeat::Init(uint32_t model_id) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
-    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
-                       "check invalid", args_size_);
-    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
+    REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
     return FAILED;
   }
 
   args_size_ = sizeof(model_id);
   rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
 
   status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, status);
-    GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
@@ -460,15 +447,14 @@ Status CpuTaskModelRepeat::Distribute() {
   if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
     REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
                        "check invalid", args_size_);
-    GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
+    GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
     return FAILED;
   }
 
   rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_);
   if (status != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X",
-                      status);
-    GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status);
+    REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
+    GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
diff --git a/ge/graph/load/model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc
index ac256526..c96b3885 100644
--- a/ge/graph/load/model_manager/data_dumper.cc
+++ b/ge/graph/load/model_manager/data_dumper.cc
@@ -127,7 +127,7 @@ void DataDumper::ReleaseDevMem(void **ptr) noexcept {
   if (*ptr != nullptr) {
     rtError_t rt_ret = rtFree(*ptr);
     if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtFree] failed, ret:0x%X", rt_ret);
     }
 
     *ptr = nullptr;
@@ -144,7 +144,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
   if (node != nullptr) {
     auto input_op_desc = node->GetOpDesc();
     if (input_op_desc == nullptr) {
-      GELOGE(PARAM_INVALID, "input op desc is null.");
+      GELOGE(PARAM_INVALID, "[Get][OpDesc] input op desc is null.");
       return;
     }
 
@@ -153,7 +153,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
         ge::NodePtr dst_node = dst_in_data_anchor->GetOwnerNode();
         auto op_desc = dst_node->GetOpDesc();
         if (op_desc == nullptr) {
-          GELOGE(PARAM_INVALID, "input op desc is null.");
+          GELOGE(PARAM_INVALID, "[Get][OpDesc] input op desc is null.");
           return;
         }
 
@@ -179,7 +179,7 @@ void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_de
 void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
                               uintptr_t args) {
   if (op_desc == nullptr) {
-    GELOGE(PARAM_INVALID, "Opdesc is nullptr");
+    GELOGE(PARAM_INVALID, "[Check][Param] Opdesc is nullptr");
     return;
   }
 
@@ -190,14 +190,14 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s
     InnerInputMapping &inner_input_mapping = iter.first->second;
     auto &data_op = inner_input_mapping.data_op;
     if (data_op == nullptr) {
-      GELOGE(PARAM_INVALID, "data_op is null.");
+      GELOGE(PARAM_INVALID, "[Check][Param] data_op is null.");
       return;
     }
 
     auto input_tensor = op_desc->GetInputDescPtr(inner_input_mapping.input_anchor_index);
     if (input_tensor == nullptr) {
-      GELOGE(PARAM_INVALID, "input_tensor is null, index: %d, size: %zu.", inner_input_mapping.input_anchor_index,
-             op_desc->GetInputsSize());
+      GELOGE(PARAM_INVALID, "[Get][InputDescPtr] input_tensor in op:%s is null, index:%d, size:%zu.",
+             op_desc->GetName().c_str(), inner_input_mapping.input_anchor_index, op_desc->GetInputsSize());
       return;
     }
 
@@ -205,7 +205,8 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s
     if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
       GELOGI("Get aipp data size according to attr is %ld", data_size);
     } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
-      GELOGE(PARAM_INVALID, "Get input size filed");
+      GELOGE(PARAM_INVALID, "[Get][InputSize] failed in %s, index:%u",
+             op_desc->GetName().c_str(), inner_input_mapping.input_anchor_index);
       return;
     }
 
@@ -249,7 +250,7 @@ Status DataDumper::GenerateOutput(toolkit::aicpu::dump::Output &output,
   int64_t output_size = 0;
   if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Get tensor size fail");
-    GELOGE(PARAM_INVALID, "Get output size filed");
+    GELOGE(PARAM_INVALID, "[Get][OutputSize] failed");
     return PARAM_INVALID;
   }
   GELOGD("Get output size in dump is %ld", output_size);
@@ -274,34 +275,37 @@ Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_inf
   size_t index;
   // parser and find which node's input or output tensor desc is chosen for dump info
   if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
-    GELOGE(PARAM_INVALID, "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
+    GELOGE(PARAM_INVALID, "[Check][Param] Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
            inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
     return PARAM_INVALID;
   }
   GE_CHECK_NOTNULL(compute_graph_);
   auto replace_node = compute_graph_->FindNode(dump_op_name);
   GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
-                                       "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
-                                       " cannot find redirect node[%s].",
+                                       "[Check][Param] Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF "
+                                       "attr[%s], cannot find redirect node[%s].",
                                        inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
                                        dump_op_name.c_str());
   auto replace_opdesc = replace_node->GetOpDesc();
   GE_CHECK_NOTNULL(replace_opdesc);
   auto iter = ref_info_.find(replace_opdesc);
   GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
-                                       "Op [%s] output desc[%zu] cannot find any saved redirect node[%s]'s info.",
+                                       "[Check][Param] Op [%s] output desc[%zu] cannot find "
+                                       "any saved redirect node[%s]'s info.",
                                        inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
   GE_CHECK_NOTNULL(iter->second);
   auto addr = reinterpret_cast<uintptr_t>(iter->second);
   if (input_or_output == kDumpInput) {
     const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
     addr += kAddrLen * index;
-    GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index), "Generate output failed");
+    GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index),
+                      "[Generate][Output] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
   } else if (input_or_output == kDumpOutput) {
     const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
     const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
     addr += (index + replace_input_size) * kAddrLen;
-    GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index), "Generate output failed");
+    GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index),
+                      "[Generate][Output] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
   }
   GELOGD("Op [%s] output desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
          inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
@@ -314,9 +318,9 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, tool
   std::vector<int64_t> v_memory_type;
   bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
   GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != output_descs.size()),
-                                       "DumpOutputWithTask[%s], output size[%zu], output memory type size[%zu]",
-                                       inner_dump_info.op->GetName().c_str(), output_descs.size(),
-                                       v_memory_type.size());
+                                       "[Check][Param] DumpOutputWithTask[%s], output size[%zu], "
+                                       "output memory type size[%zu]", inner_dump_info.op->GetName().c_str(),
+                                       output_descs.size(), v_memory_type.size());
 
   size_t no_need_dump_output_num = 0;
   for (size_t i = 0; i < output_descs.size(); ++i) {
@@ -338,16 +342,16 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, tool
                          "output which is need to dump.", inner_dump_info.op->GetName().c_str(),
                          inner_dump_info.op->GetType().c_str(), no_need_dump_output_num, output_descs.size(),
                          output_addrs.size());
-      GELOGE(PARAM_INVALID, "The number of output does not match in op:%s(%s). The size[%zu] of output which is no need"
-             " to dump should not greater than the size[%zu] of output descs minus the size[%zu] of output which is "
-             "need to dump.", inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(),
-             no_need_dump_output_num, output_descs.size(), output_addrs.size());
+      GELOGE(PARAM_INVALID, "[Check][Param] The number of output does not match in op:%s(%s). The size[%zu] of output "
+             "which is no need to dump should not greater than the size[%zu] of output descs minus the size[%zu] "
+             "of output which is need to dump.", inner_dump_info.op->GetName().c_str(),
+             inner_dump_info.op->GetType().c_str(), no_need_dump_output_num, output_descs.size(), output_addrs.size());
       return PARAM_INVALID;
     }
 
     // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF
     if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) {
-      GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed");
+      GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "[Dump][RefOutput] failed");
       task.mutable_output()->Add(std::move(output));
     } else {
       if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
@@ -356,7 +360,7 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, tool
         if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
           REPORT_CALL_ERROR("E19999", "Get output tensor size fail in op:%s(%s), index:%zu",
                             inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i);
-          GELOGE(PARAM_INVALID, "Get output size failed.");
+          GELOGE(PARAM_INVALID, "[Get][OutputSize] failed in %s, index:%zu", inner_dump_info.op->GetName().c_str(), i);
           return PARAM_INVALID;
         }
         GELOGI("Get output size of l1_fusion_dump is %ld", output_size);
@@ -364,7 +368,8 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, tool
       } else {
         const auto input_size = inner_dump_info.op->GetInputsSize();
         auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
-        GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed");
+        GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i),
+                          "[Generate][Output] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), i);
         task.mutable_output()->Add(std::move(output));
       }
     }
@@ -383,11 +388,11 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, toolkit::aic
   auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
   const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
   if (output_tensor == nullptr) {
-    REPORT_INNER_ERROR("E19999", "output_desc tensor is nullptr in op:%s(%s), index:%u, "
-                       "check invalid",
+    REPORT_INNER_ERROR("E19999", "output_desc tensor is nullptr in op:%s(%s), index:%u, check invalid",
                        inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(),
                        inner_dump_info.output_anchor_index);
-    GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
+    GELOGE(PARAM_INVALID, "[Get][OutputDescPtr] output_tensor is null in op:%s, index:%d, size:%zu.",
+           inner_dump_info.op->GetName().c_str(), inner_dump_info.output_anchor_index,
            inner_dump_info.op->GetOutputsSize());
     return PARAM_INVALID;
   }
@@ -413,7 +418,9 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, toolkit::aic
     REPORT_INNER_ERROR("E19999", "output_anchor_index:%u >= output addr size:%zu in op:%s(%s), "
                        "check invalid", inner_dump_info.output_anchor_index, output_addrs.size(),
                        inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str());
-    GELOGE(FAILED, "Index is out of range.");
+    GELOGE(FAILED, "[Check][Param] output_anchor_index:%u >= output addr size:%zu in op:%s(%s)",
+           inner_dump_info.output_anchor_index, output_addrs.size(),
+           inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str());
     return FAILED;
   }
   auto data_addr = inner_dump_info.args + kAddrLen * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
@@ -440,7 +447,7 @@ Status DataDumper::GenerateInput(toolkit::aicpu::dump::Input &input, const OpDes
     GELOGI("Get aipp input size according to attr is %ld", input_size);
   } else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Get tensor size fail");
-    GELOGE(PARAM_INVALID, "Get input size filed");
+    GELOGE(PARAM_INVALID, "[Get][TensorSize] failed");
     return PARAM_INVALID;
   }
   GELOGD("Get input size in dump is %ld", input_size);
@@ -456,34 +463,37 @@ Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info
   size_t index;
   // parser and find which node's input or output tensor desc is chosen for dump info
   if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
-    GELOGE(PARAM_INVALID, "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
+    GELOGE(PARAM_INVALID, "[Call][ParseNameIndex] Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
            inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
     return PARAM_INVALID;
   }
   GE_CHECK_NOTNULL(compute_graph_);
   auto replace_node = compute_graph_->FindNode(dump_op_name);
   GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
-                                       "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
-                                       " cannot find redirect node[%s].",
+                                       "[Check][Param] Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF "
+                                       "attr[%s], cannot find redirect node[%s].",
                                        inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
                                        dump_op_name.c_str());
   auto replace_opdesc = replace_node->GetOpDesc();
   GE_CHECK_NOTNULL(replace_opdesc);
   auto iter = ref_info_.find(replace_opdesc);
   GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
-                                       "Op [%s] input desc[%zu] cannot find any saved redirect node[%s]'s info.",
+                                       "[Check][Param] Op [%s] input desc[%zu] cannot find "
+                                       "any saved redirect node[%s]'s info.",
                                        inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
   GE_CHECK_NOTNULL(iter->second);
   auto addr = reinterpret_cast<uintptr_t>(iter->second);
   if (input_or_output == kDumpInput) {
     const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
     addr += kAddrLen * index;
-    GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index), "Generate input failed");
+    GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index),
+                      "[Generate][Input] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
   } else if (input_or_output == kDumpOutput) {
     const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
     const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
     addr += (index + replace_input_size) * kAddrLen;
-    GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index), "Generate input failed");
+    GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index),
+                      "[Generate][Input] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
   }
   GELOGD("Op [%s] input desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
          inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
@@ -498,14 +508,14 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, toolkit::aicp
     REPORT_INNER_ERROR("E19999", "input_desc size:%zu != input addr size:%zu in op:%s(%s)",
                        input_descs.size(), input_addrs.size(),
                        inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str());
-    GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
-           inner_dump_info.op->GetName().c_str(), input_descs.size());
+    GELOGE(PARAM_INVALID, "[Check][Param] Invalid input desc addrs size %zu, op %s has %zu input desc.",
+           input_addrs.size(), inner_dump_info.op->GetName().c_str(), input_descs.size());
     return PARAM_INVALID;
   }
   std::vector<int64_t> v_memory_type;
   bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
   GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != input_descs.size()),
-                                       "DumpInput[%s], input size[%zu], input memory type size[%zu]",
+                                       "[Check][Param] DumpInput[%s], input size[%zu], input memory type size[%zu]",
                                        inner_dump_info.op->GetName().c_str(), input_descs.size(), v_memory_type.size());
 
   for (size_t i = 0; i < input_descs.size(); ++i) {
@@ -513,7 +523,8 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, toolkit::aicp
     std::string node_name_index;
     // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF
     if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) {
-      GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed");
+      GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index),
+                        "[Dump][RefInput] failed, node name index:%s", node_name_index.c_str());
       task.mutable_input()->Add(std::move(input));
       // normal dump without attr
     } else {
@@ -525,14 +536,16 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, toolkit::aicp
         } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
           REPORT_CALL_ERROR("E19999", "Get input tensor size fail in op:%s(%s), index:%zu",
                             inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i);
-          GELOGE(PARAM_INVALID, "Get input size failed.");
+          GELOGE(PARAM_INVALID, "[Get][InputTensorSize] fail in op:%s(%s), index:%zu",
+                 inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i);
           return PARAM_INVALID;
         }
         GELOGI("Get input size of l1_fusion_dump is %ld", input_size);
         GenerateOpBuffer(input_size, task);
       } else {
         auto addr = inner_dump_info.args + kAddrLen * i;
-        GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed");
+        GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i),
+                          "[Generate][Input] failed for op:%s, index:%zu", inner_dump_info.op->GetName().c_str(), i);
         task.mutable_input()->Add(std::move(input));
       }
     }
@@ -554,7 +567,7 @@ Status DataDumper::ExecuteLoadDumpInfo(toolkit::aicpu::dump::OpMappingInfo &op_m
   bool ret = op_mapping_info.SerializeToString(&proto_str);
   if (!ret || proto_size == 0) {
     REPORT_INNER_ERROR("E19999", "Serialize proto to string fail");
-    GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
+    GELOGE(PARAM_INVALID, "[Call][SerializeToString] failed, proto size %zu.", proto_size);
     return PARAM_INVALID;
   }
 
@@ -565,25 +578,23 @@ Status DataDumper::ExecuteLoadDumpInfo(toolkit::aicpu::dump::OpMappingInfo &op_m
 
   rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                      proto_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", proto_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size)
 
   rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                      proto_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", proto_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, ret:0x%X", rt_ret);
-    GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, length:%zu, ret:0x%X", proto_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtDatadumpInfoLoad] failed, length:%zu, ret:0x%X", proto_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -598,7 +609,7 @@ Status DataDumper::ExecuteUnLoadDumpInfo(toolkit::aicpu::dump::OpMappingInfo &op
   bool ret = op_mapping_info.SerializeToString(&proto_str);
   if (!ret || proto_size == 0) {
     REPORT_INNER_ERROR("E19999", "Serialize proto to string fail");
-    GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
+    GELOGE(PARAM_INVALID, "[Call][SerializeToString] failed, proto size %zu.", proto_size);
     return PARAM_INVALID;
   }
 
@@ -609,25 +620,23 @@ Status DataDumper::ExecuteUnLoadDumpInfo(toolkit::aicpu::dump::OpMappingInfo &op
 
   rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                      proto_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", proto_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size)
 
   rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                      proto_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", proto_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, ret:0x%X", rt_ret);
-    GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, length:%zu, ret:0x%X", proto_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtDatadumpInfoLoad] failed, length:%zu, ret:0x%X", proto_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   load_flag_ = false;
@@ -654,7 +663,7 @@ Status DataDumper::LoadDumpInfo() {
   SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
   auto ret = BuildTaskInfo(op_mapping_info);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Build task info failed");
+    GELOGE(ret, "[Build][TaskInfo] failed, ret:%u, path:%s", ret, dump_path.c_str());
     return ret;
   }
 
@@ -663,9 +672,9 @@ Status DataDumper::LoadDumpInfo() {
   SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);
 
   if (!op_list_.empty() || is_op_debug_ || is_end_graph_) {
-    auto ret = ExecuteLoadDumpInfo(op_mapping_info);
+    ret = ExecuteLoadDumpInfo(op_mapping_info);
     if (ret != SUCCESS) {
-      GELOGE(ret, "Execute load dump info failed");
+      GELOGE(ret, "[Execute][LoadDumpInfo] failed, ret:%u", ret);
       return ret;
     }
   }
@@ -686,7 +695,7 @@ Status DataDumper::BuildTaskInfo(toolkit::aicpu::dump::OpMappingInfo &op_mapping
     if (dump_properties_.GetDumpMode() == kDumpOutput) {
       Status ret = DumpOutput(op_iter, task);
       if (ret != SUCCESS) {
-        GELOGE(ret, "Dump output failed");
+        GELOGE(ret, "[Dump][Output] failed, ret:%u, op:%s", ret, op_desc->GetName().c_str());
         return ret;
       }
       op_mapping_info.mutable_task()->Add(std::move(task));
@@ -696,7 +705,7 @@ Status DataDumper::BuildTaskInfo(toolkit::aicpu::dump::OpMappingInfo &op_mapping
       if (op_iter.is_task) {
         Status ret = DumpInput(op_iter, task);
         if (ret != SUCCESS) {
-          GELOGE(ret, "Dump input failed");
+          GELOGE(ret, "[Dump][Input] failed, ret:%u, op:%s", ret, op_desc->GetName().c_str());
           return ret;
         }
       }
@@ -706,13 +715,13 @@ Status DataDumper::BuildTaskInfo(toolkit::aicpu::dump::OpMappingInfo &op_mapping
     if (dump_properties_.GetDumpMode() == kDumpAll || is_op_debug_) {
       auto ret = DumpOutput(op_iter, task);
       if (ret != SUCCESS) {
-        GELOGE(ret, "Dump output failed when in dumping all");
+        GELOGE(ret, "[Dump][Output] failed when in dumping all, ret:%u, op:%s", ret, op_desc->GetName().c_str());
         return ret;
       }
       if (op_iter.is_task) {
         ret = DumpInput(op_iter, task);
         if (ret != SUCCESS) {
-          GELOGE(ret, "Dump input failed when in dumping all");
+          GELOGE(ret, "[Dump][Input] failed when in dumping all, ret:%u, op:%s", ret, op_desc->GetName().c_str());
           return ret;
         }
       }
@@ -795,7 +804,7 @@ Status DataDumper::UnloadDumpInfo() {
   }
   auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Execute unload dump info failed");
+    GELOGE(ret, "[Execute][UnLoadDumpInfo] failed, ret:%d", ret);
     return ret;
   }
   return SUCCESS;
diff --git a/ge/graph/load/model_manager/data_inputer.cc b/ge/graph/load/model_manager/data_inputer.cc
index 0fe75465..d286b9b4 100755
--- a/ge/graph/load/model_manager/data_inputer.cc
+++ b/ge/graph/load/model_manager/data_inputer.cc
@@ -24,7 +24,7 @@
 
 namespace ge {
 domi::Status InputDataWrapper::Init(const InputData &input, const OutputData &output) {
-  GE_CHK_BOOL_RET_STATUS(!is_init, domi::INTERNAL_ERROR, "InputDataWrapper is re-initialized");
+  GE_CHK_BOOL_RET_STATUS(!is_init, domi::INTERNAL_ERROR, "[Check][Param] InputDataWrapper is re-initialized");
 
   input_ = input;
   output_ = output;
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 81edd40b..b52796c8 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -40,7 +40,7 @@
 #include "graph/load/model_manager/cpu_queue_schedule.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/tbe_handle_store.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
 #include "graph/manager/util/debug.h"
@@ -60,6 +60,8 @@
 #include "graph/common/local_context.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "graph/common/omg_util.h"
+#include "graph/build/memory/block_mem_assigner.h"
+#include "graph/manager/session_scope_mem_allocator.h"
 
 // create std::thread, catch exceptions using try/catch
 #define CREATE_STD_THREAD(thread_id, func, args)                                                  \
@@ -69,8 +71,9 @@
     } catch (const std::system_error &e) {                                                        \
       REPORT_CALL_ERROR("E19999", "Create thread fail, ecode:%d, emsg:%s",                        \
                         e.code().value(), e.what());                                              \
-      GELOGE(FAILED, "Caught system_error with code:%d, meaning:%s", e.code().value(), e.what()); \
-      GELOGE(FAILED, "Thread creat FAIL, Please check the left resource!");                       \
+      GELOGE(FAILED, "[Create][Thread] Caught system_error with code:%d, meaning:%s",             \
+             e.code().value(), e.what());                                                         \
+      GELOGE(FAILED, "[Create][Thread] FAIL, Please check the left resource!");                   \
       return FAILED;                                                                              \
     }                                                                                             \
   } while (0)
@@ -122,6 +125,8 @@ const char* const kInferEndTime = "infer_end_time";
 const char* const kOutputBeginTime = "output_start_time";
 const char* const kOutputEndTime = "output_end_time";
 const uint32_t kStringHeadElems = 2;
+const uint32_t kPlacementHostData = 0;
+const size_t kAlignment = 64;
 
 inline bool IsDataOp(const std::string &node_type) {
   return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE);
@@ -165,7 +170,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
       mem_base_(nullptr),
       is_inner_mem_base_(false),
       is_inner_weight_base_(false),
-      is_inner_p2p_mem_base_(false),
       data_inputer_(nullptr),
       load_begin_time_(0),
       load_end_time_(0),
@@ -233,7 +237,7 @@ DavinciModel::~DavinciModel() {
 
       FreeFeatureMapMem();
 
-      FreeP2PMem();
+      FreeExMem();
 
       OpDebugUnRegister();
 
@@ -280,7 +284,7 @@ void DavinciModel::UnbindHcomStream() {
   if (!all_hccl_stream_list_.empty()) {
     for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) {
       GE_LOGW_IF(rtModelUnbindStream(rt_model_handle_, all_hccl_stream_list_[i]) != RT_ERROR_NONE,
-                 "Unbind hccl stream from model failed! Index: %zu", i);
+                 "Unbind hccl stream from model failed, Index: %zu", i);
       GE_LOGW_IF(rtStreamDestroy(all_hccl_stream_list_[i]) != RT_ERROR_NONE, "Destroy hccl stream for rt_model failed")
     }
   }
@@ -290,14 +294,14 @@ void DavinciModel::UnbindHcomStream() {
 void DavinciModel::ReleaseTask() {
   for (const auto &task : cpu_task_list_) {
     if (task != nullptr) {
-      GE_CHK_STATUS(task->Release(), "Release task failed.");
+      GE_CHK_STATUS(task->Release(), "[Release][Task] failed, model id:%u.", model_id_);
     }
   }
   cpu_task_list_.clear();
 
   for (const auto &task : task_list_) {
     if (task != nullptr) {
-      GE_CHK_STATUS(task->Release(), "Release task failed.");
+      GE_CHK_STATUS(task->Release(), "[Release][Task] failed, model id:%u.", model_id_);
     }
   }
 
@@ -331,9 +335,8 @@ void DavinciModel::Shrink() {
 
 Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
   if (is_weight_mem_has_inited_) {
-    REPORT_INNER_ERROR("E19999", "Call InitWeightMem more than once, model_id:%u, check invalid",
-                       model_id_);
-    GELOGE(FAILED, "call InitWeightMem more than once.");
+    REPORT_INNER_ERROR("E19999", "Call InitWeightMem more than once, model_id:%u, check invalid", model_id_);
+    GELOGE(FAILED, "[Check][Param] call InitWeightMem more than once, model id:%u.", model_id_);
     return FAILED;
   }
   is_weight_mem_has_inited_ = true;
@@ -345,7 +348,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
   if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
     REPORT_INNER_ERROR("E19999", "Param weight_ptr is nullptr or ge_model.weight.size:%zu < param weights_size:%zu, "
                        "model_id:%u, check invalid", weight_size, weights_size, model_id_);
-    GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
+    GELOGE(FAILED, "[Check][Param] Invalid mem param: weight_size=%zu totalsize=%zu, model_id:%u.",
+           weight_size, weights_size, model_id_);
     return FAILED;
   }
 
@@ -360,7 +364,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
       if (weights_mem_base_ == nullptr) {
         REPORT_CALL_ERROR("E19999", "MallocWeightsMem fail, weights_size:%zu, model_id:%u, check invalid",
                           weights_size, model_id_);
-        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size);
+        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for weight failed. size:%zu, model_id:%u",
+               weights_size, model_id_);
         return ACL_ERROR_GE_MEMORY_ALLOCATION;
       }
       is_inner_weight_base_ = true;
@@ -378,25 +383,23 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
 
 Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
   if (is_feature_map_mem_has_inited_) {
-    REPORT_INNER_ERROR("E19999", "Call InitFeatureMapMem more than once, model_id:%u, check invalid",
-                       model_id_);
-    GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once");
+    REPORT_INNER_ERROR("E19999", "Call InitFeatureMapMem more than once, model_id:%u, check invalid", model_id_);
+    GELOGE(PARAM_INVALID, "[Check][Param] call InitFeatureMapMem more than once, model_id:%u", model_id_);
     return PARAM_INVALID;
   }
   is_feature_map_mem_has_inited_ = true;
 
   std::size_t data_size = TotalMemSize();
-  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
 
   if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
     REPORT_INNER_ERROR("E19999", "Param dev_ptr is nullptr or mem_size:%zu < ge_model.mem_size:%zu, "
                        "model_id:%u, check invalid", mem_size, TotalMemSize(), model_id_);
-    GELOGE(PARAM_INVALID, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
+    GELOGE(PARAM_INVALID, "[Check][Param] Invalid mem param: mem_size=%zu totalsize=%zu, model_id:%u.",
+           mem_size, TotalMemSize(), model_id_);
     return PARAM_INVALID;
   }
 
   mem_base_ = static_cast<uint8_t *>(dev_ptr);
-  p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
   is_inner_mem_base_ = false;
 
   if (TotalMemSize() && mem_base_ == nullptr) {
@@ -404,7 +407,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
     if (mem_base_ == nullptr) {
       REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, data_size:%zu, model_id:%u, check invalid",
                         data_size, model_id_);
-      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
+      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for feature map failed. size:%zu, model_id:%u",
+             data_size, model_id_);
       return ACL_ERROR_GE_MEMORY_ALLOCATION;
     }
     GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
@@ -417,23 +421,13 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
     is_inner_mem_base_ = true;
   }
 
-  if (p2p_data_size != 0) {
-    p2p_mem_base_ = MallocP2PMem(p2p_data_size);
-    if (p2p_mem_base_ == nullptr) {
-      REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, p2p_data_size:%zu, model_id:%u, check invalid",
-                        p2p_data_size, model_id_);
-      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size);
-      return ACL_ERROR_GE_MEMORY_ALLOCATION;
-    }
-    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
-           p2p_mem_base_, p2p_data_size);
-    is_inner_p2p_mem_base_ = true;
+  if (!runtime_param_.memory_infos.empty()) {
+    GE_CHK_STATUS_RET(MallocExMem(), "MallocExMem failed.");
   }
 
-  GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
+  GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_);
   runtime_param_.mem_base = mem_base_;
   runtime_param_.weight_base = weights_mem_base_;
-  runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_;
   return SUCCESS;
 }
 
@@ -445,7 +439,7 @@ Status DavinciModel::InitVariableMem() {
     if (ret != SUCCESS) {
       REPORT_CALL_ERROR("E19999", "MallocVarMemory fail, var_size:%zu, model_id:%u, check invalid",
                         TotalVarMemSize(), model_id_);
-      GELOGE(ret, "Malloc variable memory failed.");
+      GELOGE(ret, "[Malloc][VarMemory] failed, var_size:%zu, model_id:%u", TotalVarMemSize(), model_id_);
       return ret;
     }
     var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM);
@@ -459,7 +453,6 @@ Status DavinciModel::InitVariableMem() {
 void DavinciModel::InitRuntimeParams() {
   int64_t value = 0;
   bool ret;
-  MemInfo p2p_mem_info;
   ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value);
   runtime_param_.mem_size = ret ? (uint64_t)value : 0;
   ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value);
@@ -484,16 +477,18 @@ void DavinciModel::InitRuntimeParams() {
   runtime_param_.var_size = ret ? (uint64_t)value : 0;
   session_id_ = runtime_param_.session_id;
   ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value);
-  p2p_mem_info.memory_size = ret ? (uint64_t)value : 0;
+  MemInfo p2p_mem_info;
+  p2p_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
+  p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR;
+  p2p_mem_info.memory_key = "_p";
   runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info);
 
-  GELOGI(
-      "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, "
-      "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, "
-      "memory_size:%lu, weight_size:%lu, var_size:%lu",
-      runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num,
-      runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base,
-      runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size);
+  ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, value);
+  MemInfo session_scope_mem_info;
+  session_scope_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
+  runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info);
+
+  GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str());
 }
 
 void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) {
@@ -551,25 +546,27 @@ Status DavinciModel::DoTaskSink() {
   }
 
   GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_));
-  GELOGI("Do task_sink. AiCpu deploy type is: %x.", deploy_type_);
+  GELOGI("Do task sink. AiCpu deploy type is: %x.", deploy_type_);
 
-  GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");
+  GE_CHK_STATUS_RET(BindModelStream(), "[Bind][ModelStream] failed, model_id:%u.", model_id_);
 
   if (known_node_) {
-    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed");
+    GE_CHK_STATUS_RET(MallocKnownArgs(), "[Malloc][KnownArgs] failed, model_id:%u.", model_id_);
   }
 
-  GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");
+  GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "[Init][TaskInfo] failed, model_id:%u.", model_id_);
 
-  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed");
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(),
+                    "[Launch][CustAicpuSo] failed, model_id:%u.", model_id_);
 
-  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed");
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_),
+                    "[Check][AicpuOpList] failed, model_id:%u.", model_id_);
 
-  GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed");
+  GE_CHK_STATUS_RET(InitEntryTask(), "[Init][EntryTask] failed, model_id:%u.", model_id_);
 
-  GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed");
+  GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "[Init][L1DataDumperArgs] failed, model_id:%u.", model_id_);
 
-  GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed");
+  GE_CHK_STATUS_RET(DistributeTask(), "[Distribute][Task] failed, model_id:%u.", model_id_);
 
   GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));
 
@@ -582,12 +579,11 @@ Status DavinciModel::SetTSDevice() {
   int64_t value = 0;
   bool ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_CORE_TYPE, value);
   uint32_t core_type = ret ? static_cast<uint32_t>(value) : 0;
-  GELOGD("SetTSDevice: %u.", core_type);
+  GELOGD("Set TSDevice: %u.", core_type);
   rtError_t rt_ret = rtSetTSDevice(core_type);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtSetTSDevice failed, core_type:%u, model_id:%u",
-                      core_type, model_id_);
-    GELOGE(RT_FAILED, "SetTSDevice failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtSetTSDevice failed, core_type:%u, model_id:%u", core_type, model_id_);
+    GELOGE(RT_FAILED, "[Set][TSDevice] failed, core_type:%u, model_id:%u, ret: 0x%X", core_type, model_id_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   return SUCCESS;
@@ -598,7 +594,7 @@ Status DavinciModel::OpDebugRegister() {
     uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode();
     auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_);
     if (ret != SUCCESS) {
-      GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret);
+      GELOGE(ret,"[Call][RegisterDebugForModel] Register known shape op debug failed, ret: 0x%X", ret);
       return ret;
     }
     is_op_debug_reg_ = true;
@@ -619,17 +615,17 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   // validating params
   GELOGI("Priority is %d.", priority_);
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID,
-                                 "Priority must between 0-7, now is %d.", priority_);
-  GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null.");
+                                 "[Check][Param] Priority must between 0-7, now is %d.", priority_);
+  GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "[Check][Param] GeModel is null.");
   Graph graph = ge_model_->GetGraph();
   ComputeGraphPtr compute_graph = GraphUtils::GetComputeGraph(graph);
-  GE_CHK_BOOL_RET_STATUS(compute_graph != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr.");
+  GE_CHK_BOOL_RET_STATUS(compute_graph != nullptr, INTERNAL_ERROR, "[Get][ComputeGraph] failed, ret is nullptr.");
 
   // Initializing runtime_param_
   InitRuntimeParams();
 
   // RTS set aicore or vectorcore
-  GE_CHK_STATUS_RET(SetTSDevice(), "SetTSDevice failed.");
+  GE_CHK_STATUS_RET(SetTSDevice(), "[Set][TSDevice] failed, graph:%s.", compute_graph->GetName().c_str());
 
   version_ = ge_model_->GetVersion();
   name_ = ge_model_->GetName();
@@ -683,12 +679,16 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   runtime_param_.graph_id = compute_graph->GetGraphID();
 
   // op debug register
-  GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed");
+  GE_CHK_STATUS_RET(OpDebugRegister(), "[Call][OpDebugRegister] failed, model_id:%u.", model_id_);
 
   GE_TIMESTAMP_START(TransAllVarData);
-  GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed");
+  GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id),
+                    "[Call][TransAllVarData] failed, graph:%s, graph_id:%u.",
+                    compute_graph->GetName().c_str(), runtime_param_.graph_id);
   GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData");
-  GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed");
+  GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_),
+                    "[Copy][VarData] failed, graph:%s, session_id:%lu, device_id:%u",
+                    compute_graph->GetName().c_str(), session_id_, device_id_);
 
   GE_TIMESTAMP_START(InitModelMem);
   GELOGD("Known node is %d.", known_node_);
@@ -696,7 +696,8 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   if (!known_node_) {
     GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
     data_inputer_ = new (std::nothrow) DataInputer();
-    GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr");
+    GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED,
+                           "[Create][DataInputer] data_inputer_ is nullptr");
   }
   fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_);
   GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem");
@@ -709,10 +710,10 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
                     (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore"););
   }
 
-  GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed.");
+  GE_CHK_STATUS_RET(InitNodes(compute_graph), "[Init][Nodes] failed, graph:%s.", compute_graph->GetName().c_str());
 
   GE_TIMESTAMP_START(DoTaskSink);
-  GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed.");
+  GE_CHK_STATUS_RET(DoTaskSink(), "[Call][DoTaskSink] failed, model_id:%u.", model_id_);
   GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink");
 
   /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution,
@@ -731,10 +732,10 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   // collect profiling for ge
   auto &profiling_manager = ProfilingManager::Instance();
   if (profiling_manager.ProfilingModelLoadOn()) {
-    GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed");
+    GE_CHK_STATUS_RET(InitModelProfile(), "[Init][ModelProfile] failed, model_id:%u.", model_id_);
     Status p_ret = ReportProfilingData();
     if (p_ret != SUCCESS) {
-      GELOGE(p_ret, "Report profiling data failed.");
+      GELOGE(p_ret, "[Report][ProfilingData] failed, ret:%d, model_id:%u.", p_ret, model_id_);
       return p_ret;
     }
   }
@@ -743,9 +744,22 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   return SUCCESS;
 }
 
+// save specify attr values of op, such as ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES
+// it will save more attr values in the future
+void DavinciModel::SaveSpecifyAttrValues(const OpDescPtr &op_desc) {
+  std::vector<std::string> value;
+  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, value)) {
+    std::map<std::string, std::vector<std::string>> attr_name_to_value;
+    attr_name_to_value[ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES]= value;
+    op_name_to_attrs_[op_desc->GetName()] = attr_name_to_value;
+    GELOGD("Get op:%s attr:%s success.", op_desc->GetName().c_str(), ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES.c_str());
+  }
+  return;
+}
+
 Status DavinciModel::ReportProfilingData() {
   ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo());
-  GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
+  GE_CHK_STATUS(SinkModelProfile(), "[Sink][ModelProfile] failed, model_id:%u.", model_id_);
 
   return SUCCESS;
 }
@@ -846,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
     const auto &node = nodes.at(i);
     const auto &op_desc = node->GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
-
+    SaveSpecifyAttrValues(op_desc);
     op_list_[op_desc->GetId()] = op_desc;
 
     GE_TIMESTAMP_RESTART(LoadTBEKernelBinToOpDesc);
@@ -855,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
 
     if (IsDataOp(op_desc->GetType())) {
       if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
+        GELOGE(PARAM_INVALID, "[Init][DataOp] failed, Name:%s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
       data_dumper_.SaveDumpInput(node);
@@ -864,11 +878,11 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
 
     if (op_desc->GetType() == NETOUTPUT) {
       if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
+        GELOGE(PARAM_INVALID, "[Init][NetOutput] failed, Name:%s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
       if (InitRealSizeAndShapeInfo(compute_graph, node) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Init real size and shape failed, Name: %s", op_desc->GetName().c_str());
+        GELOGE(PARAM_INVALID, "[Init][RealSizeAndShapeInfo] failed, Name:%s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
       continue;
@@ -876,7 +890,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
 
     if (op_desc->GetType() == VARIABLE) {
       if (InitVariable(op_desc, variable_by_name) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Variable init failed, Name: %s", op_desc->GetName().c_str());
+        GELOGE(PARAM_INVALID, "[Init][Variable] failed, Name:%s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
       continue;
@@ -887,7 +901,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
     auto it = op_desc_handle.find(op_desc->GetType());
     if (it != op_desc_handle.end()) {
       if ((this->*it->second)(op_desc) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Node init failed, Name: %s", op_desc->GetName().c_str());
+        GELOGE(PARAM_INVALID, "[Init][Node] failed, Name:%s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
       continue;
@@ -909,9 +923,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
 
       rtError_t rt_ret = rtMemcpy(addr, size, tensor_device_addrs.data(), size, RT_MEMCPY_HOST_TO_DEVICE);
       if (rt_ret != RT_ERROR_NONE) {
-        REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X",
-                          size, rt_ret);
-        GELOGE(RT_FAILED, "rtMemcpy error, ret: 0x%X", rt_ret);
+        REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", size, rt_ret);
+        GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", size, rt_ret);
         GE_CHK_RT(rtFree(addr));
         return RT_ERROR_TO_GE_STATUS(rt_ret);
       }
@@ -922,7 +935,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
     if (IsTbeTask(op_desc)) {
       Status status = InitTbeHandle(op_desc);
       if (status != SUCCESS) {
-        GELOGE(status, "TBE init failed. %s", op_desc->GetName().c_str());
+        GELOGE(status, "[Init][TbeHandle] failed. op:%s", op_desc->GetName().c_str());
         return status;
       }
     }
@@ -969,7 +982,7 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
     return SUCCESS;
   }
 
-  GELOGI("Init Data node: %s.", op_desc->GetName().c_str());
+  GELOGI("Init data node: %s.", op_desc->GetName().c_str());
   auto data_index = data_op_index++;
   if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
     GELOGD("Get new index %u, old %u", data_index, data_op_index - 1);
@@ -991,8 +1004,9 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
         "not equal or has empty, model_id:%u",
         op_desc->GetName().c_str(), op_desc->GetType().c_str(),
         output_size_list.size(), virtual_addr_list.size(), output_offset_list.size(), model_id_);
-    GELOGE(PARAM_INVALID, "Data[%s] init failed: output size is %zu, virtual_addr size is %zu, offset size is %zu.",
-           op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size());
+    GELOGE(PARAM_INVALID, "[Check][Param] Data[%s] init failed: output size is %zu, "
+           "virtual_addr size is %zu, offset size is %zu.", op_desc->GetName().c_str(), output_size_list.size(),
+           virtual_addr_list.size(), output_offset_list.size());
     return PARAM_INVALID;
   }
 
@@ -1002,7 +1016,7 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
   void *virtual_addr = virtual_addr_list[kDataIndex];
   Status ret = zero_copy_offset.InitInputDataInfo(data_size, virtual_addr, op_desc, fusion_flag);
   if (ret != SUCCESS) {
-    GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str());
+    GELOGE(PARAM_INVALID, "[Init][DataInfo] of input_info %s failed.", op_desc->GetName().c_str());
     return PARAM_INVALID;
   }
   if (input_outside_addrs.count(virtual_addr) == 0) {
@@ -1027,14 +1041,19 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
   GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size());
   for (auto &item : data_by_index) {
     const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
-    GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
+    GELOGD("Data node is: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
     input_addrs_list_.emplace_back(output_addrs);
 
-    GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second), "Init AIPP Info failed");
-    GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed");
-    GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed");
-    GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed");
-    GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed");
+    GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second),
+                      "[Init][AippInfo] failed, node:%s", item.second->GetName().c_str());
+    GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index),
+                      "[Init][AippType] failed, node:%s", item.second->GetName().c_str());
+    GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second),
+                      "[Init][OrigInputInfo] failed, node:%s", item.second->GetName().c_str());
+    GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second),
+                      "[Init][AippInputOutputDims] failed, node:%s", item.second->GetName().c_str());
+    GE_CHK_STATUS_RET(InitInputDescInfo(item.second),
+                      "[Init][InputDescInfo] failed, node:%s", item.second->GetName().c_str());
     if (item.second->GetType() == AIPP_DATA_TYPE) {
       GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str());
       is_dynamic_aipp_ = true;
@@ -1043,10 +1062,10 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
 
   vector<string> out_node_name;
   (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name);
-  GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size());
+  GELOGD("Output node size: %zu, out nodes name is: %zu", output_op_list.size(), out_node_name.size());
   for (const auto &op_desc : output_op_list) {
     const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
-    GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size());
+    GELOGD("NetOutput node is: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size());
     output_addrs_list_.emplace_back(input_addrs);
 
     bool getnext_sink_dynamic = false;
@@ -1064,7 +1083,8 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
       return INTERNAL_ERROR;
     }
 
-    GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed");
+    GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name),
+                      "[Init][OutputDescInfo] failed, node:%s", op_desc->GetName().c_str());
   }
 
   return SUCCESS;
@@ -1108,24 +1128,24 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
   const vector<void *> virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
   const vector<int64_t> input_offset_list = op_desc->GetInputOffset();
   GE_IF_BOOL_EXEC(input_offset_list.size() != virtual_addr_list.size(),
-                  REPORT_INNER_ERROR(
-                      "E19999", "Check data fail in op:%s(%s), input addr size:%zu input offset size:%zu "
-                      "not equal, model_id:%u",
-                      op_desc->GetName().c_str(), op_desc->GetType().c_str(),
-                      virtual_addr_list.size(), input_offset_list.size(), model_id_);
-                  GELOGE(PARAM_INVALID, "virtual_addr size should be equal to offset size.");
+                  REPORT_INNER_ERROR("E19999", "Check data fail in op:%s(%s), input addr size:%zu "
+                                     "input offset size:%zu not equal, model_id:%u", op_desc->GetName().c_str(),
+                                     op_desc->GetType().c_str(), virtual_addr_list.size(), input_offset_list.size(),
+                                     model_id_);
+                  GELOGE(PARAM_INVALID, "[Check][Param] virtual_addr size:%zu should be equal to offset size:%zu, "
+                         "op:%s(%s), model id:%u", virtual_addr_list.size(), input_offset_list.size(),
+                         op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
                   return PARAM_INVALID;);
   if (input_size_list.empty() && virtual_addr_list.empty()) {
     GELOGI("NetOutput[%s] is empty.", op_desc->GetName().c_str());
     return SUCCESS;
   }
   if (input_size_list.empty() || input_size_list.size() != virtual_addr_list.size()) {
-    REPORT_INNER_ERROR(
-        "E19999", "Check data fail in op:%s(%s), input_desc size:%zu input addr size:%zu not equal or has empty, "
-        "model_id:%u", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
-        input_size_list.size(), virtual_addr_list.size(), model_id_);
-    GELOGE(PARAM_INVALID, "NetOutput[%s] init failed: Input size is %zu, Input addr is %zu", op_desc->GetName().c_str(),
-           input_size_list.size(), virtual_addr_list.size());
+    REPORT_INNER_ERROR("E19999", "Check data fail in op:%s(%s), input_desc size:%zu input addr size:%zu "
+                       "not equal or has empty, model_id:%u", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+                       input_size_list.size(), virtual_addr_list.size(), model_id_);
+    GELOGE(PARAM_INVALID, "[Check][Param] NetOutput[%s] init failed: Input size is %zu, Input addr is %zu",
+           op_desc->GetName().c_str(), input_size_list.size(), virtual_addr_list.size());
     return PARAM_INVALID;
   }
 
@@ -1141,8 +1161,9 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
   for (size_t idx = 0; idx < input_count; ++idx) {
     ZeroCopyOffset zero_copy_offset;
     Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag);
-    GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.",
-                                           op_desc->GetName().c_str()); return PARAM_INVALID;);
+    GE_IF_BOOL_EXEC(ret != SUCCESS,
+                    GELOGE(PARAM_INVALID, "[Init][DataInfo] of input_info %s failed.", op_desc->GetName().c_str());
+                    return PARAM_INVALID;);
     void *addr = virtual_addr_list.at(idx);
     int64_t input_offset = input_offset_list.at(idx);
     if (output_outside_addrs.count(addr) == 0) {
@@ -1172,13 +1193,18 @@ Status DavinciModel::InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_gra
   GetAllGearsInfo(node);
   if (is_getnext_sink_dynamic_) {
     GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS,
-                    GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;);
+                    GELOGE(PARAM_INVALID, "[Get][Info] of getdynamicdims node:%s failed.", node->GetName().c_str());
+                    return PARAM_INVALID;);
   }
   if (is_online_infer_dynamic_) {
     GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(compute_graph, node) != SUCCESS,
-                    GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;);
+                    GELOGE(PARAM_INVALID, "[Call][GetGearAndRealOutSizeInfo] failed, node:%s.",
+                           node->GetName().c_str());
+                    return PARAM_INVALID;);
     GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(compute_graph, node) != SUCCESS,
-                    GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
+                    GELOGE(PARAM_INVALID, "[Call][GetGearAndRealOutShapeInfo] failed, node:%s.",
+                           node->GetName().c_str());
+                    return PARAM_INVALID;);
   }
 
   return SUCCESS;
@@ -1222,9 +1248,9 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) {
   auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
   if (peer_out_anchor == nullptr) {
     REPORT_INNER_ERROR("E19999", "In anchor index:%zu in op:%s(%s) peer anchor is nullptr, model_id:%u, check invalid",
-                       get_dynamic_dims_index,
-                       node->GetName().c_str(), node->GetType().c_str(), model_id_);
-    GELOGE(PARAM_INVALID, "Out anchor of getdynmaicdims node should not be nullptr.");
+                       get_dynamic_dims_index, node->GetName().c_str(), node->GetType().c_str(), model_id_);
+    GELOGE(PARAM_INVALID, "[Check][Param] In anchor index:%zu in op:%s(%s) peer anchor is nullptr, model_id:%u.",
+           get_dynamic_dims_index, node->GetName().c_str(), node->GetType().c_str(), model_id_);
     return PARAM_INVALID;
   }
   auto peer_node = peer_out_anchor->GetOwnerNode();
@@ -1238,16 +1264,18 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) {
       REPORT_INNER_ERROR("E19999", "input_addr size:%zu or input_length size:%zu in op:%s(%s) has empty, model_id:%u "
                          "check invalid", input_addr.size(), input_size.size(),
                          node->GetName().c_str(), node->GetType().c_str(), model_id_);
-      GELOGE(PARAM_INVALID, "Not set output of %s", op_desc->GetName().c_str());
+      GELOGE(PARAM_INVALID, "[Check][Param] input_addr size:%zu or input_length size:%zu in op:%s(%s) is empty, "
+             "model_id:%u", input_addr.size(), input_size.size(),
+             node->GetName().c_str(), node->GetType().c_str(), model_id_);
       return PARAM_INVALID;
     }
     auto input_desc = node->GetOpDesc()->GetInputDescPtr(get_dynamic_dims_index);
     GE_CHECK_NOTNULL(input_desc);
     if (input_desc->GetShape().GetDims().empty()) {
       REPORT_INNER_ERROR("E19999", "input_desc_index:%zu in op:%s(%s) shape dim is empty, model_id:%u, check invalid",
-                         get_dynamic_dims_index,
-                         node->GetName().c_str(), node->GetType().c_str(), model_id_);
-      GELOGE(PARAM_INVALID, "Not set output desc shape of %s.", op_desc->GetName().c_str());
+                         get_dynamic_dims_index, node->GetName().c_str(), node->GetType().c_str(), model_id_);
+      GELOGE(PARAM_INVALID, "[Check][Param] input_desc_index:%zu in op:%s(%s) shape dim is empty, model_id:%u",
+             get_dynamic_dims_index, node->GetName().c_str(), node->GetType().c_str(), model_id_);
       return PARAM_INVALID;
     }
     netoutput_last_input_addr_ = input_addr[get_dynamic_dims_index];
@@ -1273,7 +1301,7 @@ Status DavinciModel::GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, con
     GE_CHECK_NOTNULL(op_desc);
     if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
       if (GetRealOutputSizeOfCase(graph, idx, peer_node) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str());
+        GELOGE(PARAM_INVALID, "[Get][RealOutputSizeOfCase] %s failed.", peer_node->GetName().c_str());
         return PARAM_INVALID;
       }
     }
@@ -1293,7 +1321,8 @@ Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_
     if (subgraph == nullptr) {
       REPORT_INNER_ERROR("E19999", "Get name:%s subgraph in graph:%s fail, model_id:%u, check invalid",
                          name.c_str(), graph->GetName().c_str(), model_id_);
-      GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s.", name.c_str());
+      GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "[Get][Subgraph] %s in graph:%s failed, model_id:%u.",
+             name.c_str(), graph->GetName().c_str(), model_id_);
       return GE_GRAPH_EMPTY_SUBGRAPH;
     }
     for (auto &node : subgraph->GetDirectNode()) {
@@ -1309,7 +1338,9 @@ Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_
                                "check invalid", batch_index,
                                op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                                all_gears_info_.size(), model_id_);
-            GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid.");
+            GELOGE(PARAM_INVALID, "[Check][Param] Batch_index:%zu in op:%s(%s) > all_gears_info.size:%zu, "
+                   "model_id:%u.", batch_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+                   all_gears_info_.size(), model_id_);
             return PARAM_INVALID;
           }
 
@@ -1321,7 +1352,8 @@ Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_
             REPORT_INNER_ERROR("E19999", "Get input TensorSize in op:%s(%s) failed, input_index:%zu, model_id:%u",
                                op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                                input_index, model_id_);
-            GELOGE(FAILED, "Get tensor size in bytes failed.");
+            GELOGE(FAILED, "[Get][TensorSize] in op:%s(%s) failed, input_index:%zu, model_id:%u",
+                   op_desc->GetName().c_str(), op_desc->GetType().c_str(), input_index, model_id_);
             return FAILED;
           }
           gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size;
@@ -1365,7 +1397,8 @@ Status DavinciModel::GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, co
           REPORT_INNER_ERROR("E19999", "gear index:%zu in op:%s(%s) > all_gears_info.size:%zu in model:%u "
                              "check invalid", gear_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                              all_gears_info_.size(), model_id_);
-          GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0]));
+          GELOGE(PARAM_INVALID, "[Check][Param] gear index:%zu in op:%s(%s) > all_gears_info.size:%zu in model:%u.",
+                 gear_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(), all_gears_info_.size(), model_id_);
           return PARAM_INVALID;
         }
 
@@ -1415,7 +1448,8 @@ Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type
   if (label_index >= label_list_.size()) {
     REPORT_INNER_ERROR("E19999", "Param label index:%u >= label_list_.size:%zu in model:%u, check invalid",
                        label_index, label_list_.size(), model_id_);
-    GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Param label index:%u >= label_list_.size:%zu in model:%u",
+           label_index, label_list_.size(), model_id_);
     return INTERNAL_ERROR;
   }
   GE_CHECK_NOTNULL(label_list_[label_index]);
@@ -1424,17 +1458,16 @@ Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type
   arg_size = label_used.size() * sizeof(rtLabelDevInfo);
   rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X",
-                      arg_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", arg_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", arg_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   label_goto_args_[label_index] = { arg_addr, arg_size };
   rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtLabelListCpy failed, ret: 0x%X", rt_ret);
-    GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtLabelListCpy failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtLabelListCpy] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -1451,22 +1484,24 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
     REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail, model_id:%u, check invalid",
                        ATTR_NAME_LABEL_SWITCH_INDEX.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
-    GELOGE(INTERNAL_ERROR, "InitLabelSet: %s attr [%s] not exist.", op_desc->GetName().c_str(),
-           ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in op:%s(%s) fail, model_id:%u",
+           ATTR_NAME_LABEL_SWITCH_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
     return INTERNAL_ERROR;
   }
   if (label_index >= LabelNum()) {
     REPORT_INNER_ERROR("E19999", "label_switch_index:%u in op:%s(%s) >= label_num:%u in model:%u, check invalid",
                        label_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        LabelNum(), model_id_);
-    GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %u.", label_index, LabelNum());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] label_switch_index:%u in op:%s(%s) >= label_num:%u in model:%u",
+           label_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(), LabelNum(), model_id_);
     return INTERNAL_ERROR;
   }
   if (label_id_indication_.count(label_index) > 0) {
     REPORT_INNER_ERROR("E19999", "label_switch_index:%u in op:%s(%s) is already used  in model:%u, check invalid",
                        label_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        model_id_);
-    GELOGE(INTERNAL_ERROR, "InitLabelSet: %s label index: %u already used.", op_desc->GetName().c_str(), label_index);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] label_switch_index:%u in op:%s(%s) is already used  in model:%u",
+           label_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
     return INTERNAL_ERROR;
   }
 
@@ -1480,16 +1515,17 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
     REPORT_INNER_ERROR("E19999", "stream_id:%u in op:%s(%s) >= stream size:%zu in model:%u, check invalid",
                        stream_id, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        stream_list_.size(), model_id_);
-    GELOGE(INTERNAL_ERROR, "InitLabelSet: stream index: %u >= stream size: %zu.", stream_id, stream_list_.size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] stream_id:%u in op:%s(%s) >= stream size:%zu in model:%u",
+           stream_id, op_desc->GetName().c_str(), op_desc->GetType().c_str(), stream_list_.size(), model_id_);
     return INTERNAL_ERROR;
   }
 
   rtLabel_t rt_label = nullptr;
   rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream);
   if (rt_error != RT_ERROR_NONE || rt_label == nullptr) {
-    REPORT_CALL_ERROR("E19999", "Call rtLabelCreateExV2 failed, ret: 0x%X",
-                      rt_error);
-    GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error);
+    REPORT_CALL_ERROR("E19999", "Call rtLabelCreateExV2 failed, ret:0x%X", rt_error);
+    GELOGE(INTERNAL_ERROR, "[Call][RtLabelCreateExV2] InitLabelSet: %s create label failed, ret:0x%x.",
+           op_desc->GetName().c_str(), rt_error);
     return INTERNAL_ERROR;
   }
 
@@ -1527,10 +1563,10 @@ Status DavinciModel::InitVariable(const OpDescPtr &op_desc, map<string, OpDescPt
 Status DavinciModel::SetQueIds(const std::vector<uint32_t> &input_queue_ids,
                                const std::vector<uint32_t> &output_queue_ids) {
   if (input_queue_ids.empty() && output_queue_ids.empty()) {
-    REPORT_INNER_ERROR("E19999", "Param input_queue_ids.size:%zu or output_queue_ids.size:%zu is empty, model_id:%u,"
+    REPORT_INNER_ERROR("E19999", "Param input_queue_ids.size:%zu and output_queue_ids.size:%zu is empty, model_id:%u,"
                        "check invalid", input_queue_ids.size(), output_queue_ids.size(),
                        model_id_);
-    GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Param is empty");
+    GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "[Check][Param] Param is empty, model_id:%u", model_id_);
     return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
   }
 
@@ -1555,34 +1591,35 @@ Status DavinciModel::LoadWithQueue() {
     REPORT_INNER_ERROR("E19999", "Param input_queue_ids_.size:%zu != input_data_info_.size:%zu, model_id:%u,"
                        "check invalid", input_queue_ids_.size(), input_data_info_.size(),
                        model_id_);
-    GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu",
-           input_queue_ids_.size(), input_data_info_.size());
+    GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "[Check][Param] Input queue ids not match model: "
+           "input_queue=%zu input_data=%zu, model_id:%u", input_queue_ids_.size(), input_data_info_.size(), model_id_);
     return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
   }
 
   if (output_queue_ids_.size() != output_data_info_.size()) {
     REPORT_INNER_ERROR("E19999", "Param output_queue_ids_.size:%zu != output_data_info_.size:%zu, model_id:%u,"
-                       "check invalid", output_queue_ids_.size(), output_data_info_.size(),
-                       model_id_);
+                       "check invalid", output_queue_ids_.size(), output_data_info_.size(), model_id_);
     GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID,
-           "Output queue ids not match model: output_queue=%zu output_data=%zu",
-           output_queue_ids_.size(), output_data_info_.size());
+           "[Check][Param] Output queue ids not match model: output_queue=%zu output_data=%zu, model_id:%u",
+           output_queue_ids_.size(), output_data_info_.size(), model_id_);
     return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
   }
 
-  GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed.");
+  GE_CHK_STATUS_RET(AddHeadStream(), "[Add][HeadStream] failed, model_id:%u", model_id_);
   // Binding input_queue and Data Op.
-  GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed.");
-  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed.");
+  GE_CHK_STATUS_RET(BindInputQueue(), "[Bind][InputQueue] failed, model_id:%u", model_id_);
+  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_),
+                    "[Call][CpuTaskModelZeroCopy] failed, model_id:%u", model_id_);
 
   // Binding output_queue and NetOutput Op.
-  GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed.");
-  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed.");
+  GE_CHK_STATUS_RET(BindOutputQueue(), "[Bind][OutputQueue] failed, model_id:%u", model_id_);
+  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_),
+                    "[Call][CpuTaskModelZeroCopy] failed, model_id:%u", model_id_);
 
-  GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed.");
-  GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed.");
-  GE_CHK_STATUS_RET(BindEnqueue(), "Launch enqueue failed.");
-  GE_CHK_STATUS_RET(CpuModelRepeat(), "Launch model repeat failed.");
+  GE_CHK_STATUS_RET(CpuActiveStream(), "[Call][CpuActiveStream] failed, model_id:%u", model_id_);
+  GE_CHK_STATUS_RET(CpuWaitEndGraph(), "[Call][CpuWaitEndGraph] failed, model_id:%u", model_id_);
+  GE_CHK_STATUS_RET(BindEnqueue(), "[Call][BindEnqueue] failed, model_id:%u", model_id_);
+  GE_CHK_STATUS_RET(CpuModelRepeat(), "[Call][CpuModelRepeat] failed, model_id:%u", model_id_);
 
   return SUCCESS;
 }
@@ -1595,14 +1632,13 @@ Status DavinciModel::BindInputQueue() {
   for (size_t i = 0; i < input_queue_ids_.size(); ++i) {
     auto it = input_data_info_.find(i);
     if (it == input_data_info_.end()) {
-
-      GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i);
+      GELOGE(FAILED, "[Check][Param] Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i);
       return FAILED;
     }
 
     uint32_t queue_id = input_queue_ids_[i];
     if (it->second.GetDataInfo().empty()) {
-      GELOGE(INTERNAL_ERROR, "the %zu input_queue not set data_info.", i);
+      GELOGE(INTERNAL_ERROR, "[Check][Param] the %zu input_queue not set data_info.", i);
       return INTERNAL_ERROR;
     }
     uint32_t data_size = static_cast<uint32_t>(it->second.GetDataInfo().at(0).first);
@@ -1613,7 +1649,7 @@ Status DavinciModel::BindInputQueue() {
     rtError_t rt_ret = rtModelBindQueue(rt_model_handle_, queue_id, RT_MODEL_INPUT_QUEUE);
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "Call rtModelBindQueue failed, ret: 0x%X", rt_ret);
-      GELOGE(RT_FAILED, "Call rtModelBindQueue failed, ret: 0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtModelBindQueue] failed, ret: 0x%X", rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
 
@@ -1633,9 +1669,8 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
   GELOGI("Set CpuKernel model dequeue task enter.");
   std::shared_ptr<CpuTaskModelDequeue> dequeue_task = MakeShared<CpuTaskModelDequeue>(rt_entry_stream_);
   if (dequeue_task == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskModelDequeue failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskModelDequeue task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskModelDequeue failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskModelDequeue] task failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1657,9 +1692,8 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
   GELOGI("Set CpuKernel model zero_copy task enter.");
   std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_);
   if (zero_copy == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskZeroCopy failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskZeroCopy task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskZeroCopy failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskZeroCopy] failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1683,15 +1717,16 @@ Status DavinciModel::BindOutputQueue() {
     if (it == output_data_info_.end()) {
       REPORT_INNER_ERROR("E19999", "Index:%zu can't find in output_data_info_ size:%zu in model_id:%u, check invalid",
                          i, output_data_info_.size(), model_id_);
-      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
+      GELOGE(FAILED, "[Check][Param] Index:%zu can't find in output_data_info_ size:%zu in model_id:%u",
+             i, output_data_info_.size(), model_id_);
       return FAILED;
     }
 
     uint32_t queue_id = output_queue_ids_[i];
     if (it->second.GetDataInfo().empty()) {
-      REPORT_INNER_ERROR("E19999", "Index:%zu out_data_info in model:%u is empty, check invalid",
-                         i, model_id_);
-      GELOGE(INTERNAL_ERROR, "the %zu output_queue not set data_info.", i);
+      REPORT_INNER_ERROR("E19999", "Index:%zu out_data_info in model:%u is empty, check invalid", i, model_id_);
+      GELOGE(INTERNAL_ERROR, "[Check][Param] Index:%zu out_data_info in model:%u is empty, check invalid",
+             i, model_id_);
       return INTERNAL_ERROR;
     }
     uint32_t data_size = static_cast<uint32_t>(it->second.GetDataInfo().at(0).first);
@@ -1701,9 +1736,8 @@ Status DavinciModel::BindOutputQueue() {
 
     rtError_t rt_ret = rtModelBindQueue(rt_model_handle_, queue_id, RT_MODEL_OUTPUT_QUEUE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtModelBindQueue failed, queue_id:%u, ret: 0x%X",
-                        queue_id, rt_ret);
-      GELOGE(RT_FAILED, "Call rtModelBindQueue failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtModelBindQueue failed, queue_id:%u, ret:0x%X", queue_id, rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtModelBindQueue] failed, queue_id:%u, ret:0x%X", queue_id, rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
 
@@ -1724,17 +1758,15 @@ Status DavinciModel::BindOutputQueue() {
 Status DavinciModel::CpuModelPrepareOutput(uintptr_t addr, uint32_t size) {
   GELOGI("Set CpuKernel model enqueue task enter.");
   if (input_mbuf_list_.empty()) {
-    REPORT_INNER_ERROR("E19999", "input_mbuf_list_ is empty, model_id:%u, check invalid",
-                       model_id_);
-    GELOGE(FAILED, "Need input mbuf for fill output mbuf head info.");
+    REPORT_INNER_ERROR("E19999", "input_mbuf_list_ is empty, model_id:%u, check invalid", model_id_);
+    GELOGE(FAILED, "[Check][Param] input_mbuf_list_ is empty, model_id:%u", model_id_);
     return FAILED;
   }
 
   std::shared_ptr<CpuTaskPrepareOutput> prepare_output = MakeShared<CpuTaskPrepareOutput>(rt_entry_stream_);
   if (prepare_output == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskPrepareOutput failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskPrepareOutput task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskPrepareOutput failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskPrepareOutput] failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1758,9 +1790,8 @@ Status DavinciModel::CpuActiveStream() {
   GELOGI("Set CpuKernel active stream task enter.");
   std::shared_ptr<CpuTaskActiveEntry> active_entry = MakeShared<CpuTaskActiveEntry>(rt_entry_stream_);
   if (active_entry == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskActiveEntry failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskActiveEntry task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskActiveEntry failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskActiveEntry] failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1781,9 +1812,8 @@ Status DavinciModel::CpuWaitEndGraph() {
   GELOGI("Set CpuKernel wait end graph task enter.");
   std::shared_ptr<CpuTaskWaitEndGraph> wait_endgraph = MakeShared<CpuTaskWaitEndGraph>(rt_entry_stream_);
   if (wait_endgraph == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskWaitEndGraph failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskWaitEndGraph task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskWaitEndGraph failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskWaitEndGraph] failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1803,7 +1833,8 @@ Status DavinciModel::BindEnqueue() {
     if (it == output_data_info_.end()) {
       REPORT_INNER_ERROR("E19999", "Index:%zu can't find in output_data_info_ size:%zu in model_id:%u, check invalid",
                          i, output_data_info_.size(), model_id_);
-      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
+      GELOGE(FAILED, "Index:%zu can't find in output_data_info_ size:%zu in model_id:%u",
+             i, output_data_info_.size(), model_id_);
       return FAILED;
     }
 
@@ -1819,9 +1850,8 @@ Status DavinciModel::CpuModelEnqueue(uint32_t queue_id, uintptr_t out_mbuf) {
   GELOGI("Set CpuKernel model enqueue task enter.");
   std::shared_ptr<CpuTaskModelEnqueue> model_enqueue = MakeShared<CpuTaskModelEnqueue>(rt_entry_stream_);
   if (model_enqueue == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskModelEnqueue failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskModelEnqueue task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskModelEnqueue failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskModelEnqueue] failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1841,9 +1871,8 @@ Status DavinciModel::CpuModelRepeat() {
   GELOGI("Set CpuKernel repeat task enter.");
   std::shared_ptr<CpuTaskModelRepeat> model_repeat = MakeShared<CpuTaskModelRepeat>(rt_entry_stream_);
   if (model_repeat == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New CpuTaskModelRepeat failed, model_id:%u",
-                      model_id_);
-    GELOGE(MEMALLOC_FAILED, "Make CpuTaskModelRepeat task failed.");
+    REPORT_CALL_ERROR("E19999", "New CpuTaskModelRepeat failed, model_id:%u", model_id_);
+    GELOGE(MEMALLOC_FAILED, "[New][CpuTaskModelRepeat] failed, model_id:%u", model_id_);
     return MEMALLOC_FAILED;
   }
 
@@ -1863,11 +1892,13 @@ Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_d
     GELOGI("data_op_list_ is empty or input_desc size is not 1.");
   } else {
     vector<uint32_t> input_formats;
-    GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats, false), "get input desc info failed.");
+    GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats, false),
+                      "[Get][InputDescInfo] failed, model_id:%u", model_id_);
   }
 
   vector<uint32_t> output_formats;
-  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed");
+  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats),
+                    "[Get][OutputDescInfo] failed, model_id:%u", model_id_);
   return SUCCESS;
 }
 
@@ -1878,13 +1909,15 @@ Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_d
   if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) {
     REPORT_INNER_ERROR("E19999", "input_addrs_list_ is empty or first member size != 1, model_id:%u, "
                        "check invalid", model_id_);
-    GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!");
+    GELOGE(FAILED, "[Check][Param] input_addrs_list_ is empty or first member size != 1, model_id:%u", model_id_);
     return FAILED;
   }
 
-  GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats, by_dims), "get input desc info failed");
+  GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats, by_dims),
+                    "[Get][InputDescInfo] failed, model_id:%u", model_id_);
 
-  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed");
+  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats),
+                    "[Get][OutputDescInfo] failed, model_id:%u", model_id_);
   return SUCCESS;
 }
 
@@ -1940,14 +1973,15 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
   domi::AippOpParams aipp_params;
   GeAttrValue::NAMED_ATTRS aipp_attr;
   GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST,
-                         "Data node do not contain param aipp!");
-  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed");
+                         "[Get][NamedAttrs] Data node:%s do not contain param aipp!", op_desc->GetName().c_str());
+  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params),
+                    "[Convert][AippParams] get aipp params failed, op:%s", op_desc->GetName().c_str());
   GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u",
          op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank());
 
   AippConfigInfo aipp_info;
   GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(&aipp_params, aipp_info),
-                    "convert aipp params to aipp config info failed");
+                    "[Call][ConvertAippParams2AippInfo] failed, op:%s", op_desc->GetName().c_str());
 
   aipp_info_list_[index] = aipp_info;
   return SUCCESS;
@@ -1991,8 +2025,9 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons
     REPORT_INNER_ERROR("E19999", "Attr:%s data_mode:%s in op:%s(%s), model_id:%u, check invalid",
                        ATTR_DATA_RELATED_AIPP_MODE.c_str(), data_mode.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
-    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID,
-           "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index);
+    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "[Get][Attr] %s data_mode:%s in op:%s(%s), model_id:%u, check invalid",
+           ATTR_DATA_RELATED_AIPP_MODE.c_str(), data_mode.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
     return ACL_ERROR_GE_AIPP_MODE_INVALID;
   }
 
@@ -2018,7 +2053,8 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons
 }
 
 Status DavinciModel::GetAippType(uint32_t index, InputAippType &aipp_type, size_t &aipp_index) const {
-  GE_CHK_BOOL_RET_STATUS(index < input_addrs_list_.size(), PARAM_INVALID, "Index %u is invalid", index);
+  GE_CHK_BOOL_RET_STATUS(index < input_addrs_list_.size(), PARAM_INVALID,
+                         "[Check][Param] Index %u is invalid", index);
   const auto it = aipp_type_list_.find(index);
   if (it == aipp_type_list_.end()) {
     GELOGW("There is no aipp releated info with index %u", index);
@@ -2056,6 +2092,25 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami
   dynamic_type = dynamic_type_;
 }
 
+Status DavinciModel::GetOpAttr(const std::string &op_name, const std::string &attr_name,
+                               std::string &attr_value) const {
+  auto itr = op_name_to_attrs_.find(op_name);
+  if (itr == op_name_to_attrs_.end()) {
+    GELOGW("Did not save op:%s attr", op_name.c_str());
+    return SUCCESS;
+  }
+  auto attr_itr = itr->second.find(attr_name);
+  if (attr_itr == itr->second.end()) {
+    GELOGW("Did not save attr:%s of op:%s", attr_name.c_str(), op_name.c_str());
+    return SUCCESS;
+  }
+  for (const auto &name : attr_itr->second) {
+    attr_value += "[" + std::to_string(name.size()) + "]" + name;
+  }
+  GELOGD("Get attr:%s of op:%s success, attr value:%s", attr_name.c_str(), op_name.c_str(), attr_value.c_str());
+  return SUCCESS;
+}
+
 void DavinciModel::GetModelAttr(vector<string> &out_shape_info) const {
   out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end());
 }
@@ -2118,7 +2173,8 @@ Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) {
   input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
   input.name = op_desc->GetName();
   int64_t input_size = 0;
-  GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
+  GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size),
+                    "[Get][InputSize] failed in op:%s.", op_desc->GetName().c_str());
   input.size = input_size;
   input_formats_.push_back(format);
   input_descs_.push_back(input);
@@ -2142,9 +2198,10 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
   /// netoutput input tensor desc
   GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr,
                   REPORT_INNER_ERROR("E19999", "input_desc index:%u in op:%s(%s) not exist, model_id:%u, "
-                                     "check invalid", index,
-                                     op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
-                  GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr");
+                                     "check invalid", index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+                                     model_id_);
+                  GELOGE(FAILED, "[Get][InputDescPtr] input_desc index:%u in op:%s(%s) not exist, model_id:%u",
+                         index, op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
                   return);
   Format format = op_desc->GetInputDescPtr(index)->GetFormat();
   GeShape shape = op_desc->GetInputDescPtr(index)->GetShape();
@@ -2205,7 +2262,9 @@ Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector<s
     std::vector<std::string> src_name = op_desc->GetSrcName();
     std::vector<int64_t> src_index = op_desc->GetSrcIndex();
     GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR,
-                           "construct output_name failed.");
+                           "[Check][Param] construct output failed, as index:%u >= src name size:%zu, "
+                           "or index >= src index size:%zu, op:%s.",
+                           i, src_name.size(), src_index.size(), op_desc->GetName().c_str());
     // forward compatbility, if old om has no out_node_name, need to return output follow origin way
     if (out_size == out_node_name.size()) {
       // neweast plan, the index will add to name during generate model.
@@ -2229,28 +2288,29 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs
   return SUCCESS;
 }
 
-Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
-  rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
+Status DavinciModel::CopyInputData(const InputData &input_data) {
   const std::vector<DataBuffer> &blobs = input_data.blobs;
   for (const auto &data : input_data_info_) {
     if (data.first >= blobs.size()) {
       REPORT_INNER_ERROR("E19999", "index:%u in input_data_info_ >= input_data.blobs.size:%zu, model_id:%u, "
                          "check invalid", data.first, blobs.size(), model_id_);
-      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(),
-             input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
+      GELOGE(FAILED, "[Check][Param] Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)",
+             blobs.size(), input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
              data.second.GetOpName().c_str());
       return FAILED;
     }
 
     const DataBuffer &data_buf = blobs[data.first];
+    rtMemcpyKind_t kind =
+      data_buf.placement == kPlacementHostData ? RT_MEMCPY_HOST_TO_DEVICE : RT_MEMCPY_DEVICE_TO_DEVICE;
     if (data_buf.length == 0) {
       GELOGW("No data need to memcpy!");
       return SUCCESS;
     }
     uint64_t data_size = data.second.GetDataSize();
     GE_CHK_BOOL_RET_STATUS(data_size >= data_buf.length, PARAM_INVALID,
-                           "input data size(%lu) does not match model required size(%lu), op_name(%s) ret failed.",
-                           data_buf.length, data_size, data.second.GetOpName().c_str());
+                           "[Check][Param] input data size(%lu) does not match model required size(%lu), "
+                           "op_name(%s), ret failed.", data_buf.length, data_size, data.second.GetOpName().c_str());
     void *mem_addr = data.second.GetBasicAddr();
     void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data));
     uint64_t data_buf_length = data_buf.length;
@@ -2303,7 +2363,11 @@ Status DavinciModel::InitModelProfile() {
     }
 
     const auto &op_desc = GetOpByIndex(fusion_op_info->op_index);
-    GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index);
+    GE_CHK_BOOL_EXEC(op_desc != nullptr,
+                     REPORT_INNER_ERROR("E19999", "Get op by index failed, as index:%u out of range",
+                                        fusion_op_info->op_index);
+                     return FAILED,
+                     "[Get][Op] failed, as index:%u out of range", fusion_op_info->op_index);
 
     ProfileInfo profile;
     profile.fusion_info = *fusion_op_info;
@@ -2387,11 +2451,10 @@ Status DavinciModel::SinkModelProfile() {
   } catch (std::exception &e) {
     REPORT_INNER_ERROR("E19999", "Convert model_load_info JSON to string failed, model_id:%u, reason:%s",
                        model_id_, e.what());
-    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+    GELOGE(FAILED, "[Convert][JSON] to string failed, model_id:%u, reason:%s.", model_id_, e.what());
   } catch (...) {
-    REPORT_INNER_ERROR("E19999", "Convert model_load_info JSON to string failed, model_id:%u",
-                       model_id_);
-    GELOGE(FAILED, "Failed to convert JSON to string.");
+    REPORT_INNER_ERROR("E19999", "Convert model_load_info JSON to string failed, model_id:%u", model_id_);
+    GELOGE(FAILED, "[Convert][JSON] to string failed, model_id:%u.", model_id_);
   }
   reported_data.append(",")
                .append("\n");
@@ -2427,11 +2490,10 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
   } catch (std::exception &e) {
     REPORT_INNER_ERROR("E19999", "Convert model_time_info JSON to string failed, model_id:%u, reason:%s",
                        model_id_, e.what());
-    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+    GELOGE(FAILED, "[Convert][JSON] to string failed, model_id:%u, reason:%s.", model_id_, e.what());
   } catch (...) {
-    REPORT_INNER_ERROR("E19999", "Convert model_time_info JSON to string failed, model_id:%u",
-                       model_id_);
-    GELOGE(FAILED, "Failed to convert JSON to string.");
+    REPORT_INNER_ERROR("E19999", "Convert model_time_info JSON to string failed, model_id:%u", model_id_);
+    GELOGE(FAILED, "[Convert][JSON] to string failed, model_id:%u.", model_id_);
   }
   reported_data.append(",")
                .append("\n");
@@ -2498,10 +2560,9 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
   output_data.model_id = model_id_;
   if (output_data.blobs.size() != output_data_info_.size()) {
     REPORT_INNER_ERROR("E19999", "output_data.blobs.size:%zu != output_data_info.size:%zu, model_id:%u, "
-                       "check invalid",
-                       output_data.blobs.size(), output_data_info_.size(), model_id_);
-    GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(),
-           output_data_info_.size());
+                       "check invalid", output_data.blobs.size(), output_data_info_.size(), model_id_);
+    GELOGE(FAILED, "[Check][Param] output_data.blobs.size:%zu != output_data_info.size:%zu, model_id:%u",
+           output_data.blobs.size(), output_data_info_.size(), model_id_);
     return FAILED;
   }
 
@@ -2511,8 +2572,8 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
     if (output.first >= blobs.size()) {
       REPORT_INNER_ERROR("E19999", "index:%u in output_data_info_ >= output_data.blobs.size:%zu, model_id:%u, "
                          "check invalid", output.first, blobs.size(), model_id_);
-      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(),
-             input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
+      GELOGE(FAILED, "[Check][Param] index:%u in output_data_info_ >= output_data.blobs.size:%zu, model_id:%u",
+             output.first, blobs.size(), model_id_);
       return FAILED;
     }
 
@@ -2530,9 +2591,9 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
       GELOGI("No need to check output data size.");
     } else if (buffer.length < mem_size) {
       REPORT_INNER_ERROR("E19999", "Buffer.length:%lu in output blob < mem_size:%lu in output_data_info, index:%u, "
-                         "model_id:%u, check invalid", buffer.length, mem_size, output.first,
-                         model_id_);
-      GELOGE(FAILED, "Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length);
+                         "model_id:%u, check invalid", buffer.length, mem_size, output.first, model_id_);
+      GELOGE(FAILED, "[Check][Param] Buffer.length:%lu in output blob < mem_size:%lu in output_data_info, index:%u, "
+             "model_id:%u", buffer.length, mem_size, output.first, model_id_);
       return FAILED;
     } else if (buffer.length > mem_size) {
       GELOGW("Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length);
@@ -2569,10 +2630,10 @@ Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) {
     auto ret = TensorUtils::GetTensorSizeInBytes(*input_desc, size);
     GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS,
                     REPORT_INNER_ERROR("E19999", "Get input TensorSize in op:%s(%s) failed, input_index:%zu, "
-                                       "model_id:%u",
-                                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), i,
+                                       "model_id:%u", op_desc->GetName().c_str(), op_desc->GetType().c_str(), i,
                                        model_id_);
-                    GELOGE(ret, "Get size from TensorDesc failed, op:%s, input id:%zu", op_desc->GetName().c_str(), i);
+                    GELOGE(ret, "[Get][InputTensorSize] in op:%s(%s) failed, input_index:%zu, model_id:%u",
+                           op_desc->GetName().c_str(), op_desc->GetType().c_str(), i, model_id_);
                     return ret);
     const GeShape &shape = input_desc->GetShape();
     GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(shape.GetDims()).c_str());
@@ -2583,7 +2644,7 @@ Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) {
   return SUCCESS;
 }
 
-Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs) {
+Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<ge::Tensor> &outputs) {
   GE_CHECK_NOTNULL(output_data);
   if (!output_data->blobs.empty()) {
     GELOGI("No need to generate output tensor info, model id:%u", model_id_);
@@ -2612,26 +2673,25 @@ Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputT
 
   GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_);
   for (size_t i = 0; i < output_buffer_size.size(); ++i) {
-    std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size[i]]);
-    if (data_buf == nullptr) {
-      REPORT_CALL_ERROR("E19999", "New buffer failed, size:%ld, model_id:%u",
-                        output_buffer_size[i], model_id_);
-      GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed.");
-      return GE_GRAPH_MALLOC_FAILED;
-    }
-    output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size[i]), false});
-    OutputTensorInfo output;
-    output.dims = output_shape_info[i];
-    output.data = std::move(data_buf);
-    output.length = output_buffer_size[i];
-    outputs.emplace_back(std::move(output));
+    auto aligned_ptr = MakeShared<AlignedPtr>(output_buffer_size[i], kAlignment);
+    GE_CHECK_NOTNULL(aligned_ptr);
+    GeShape ge_shape(output_shape_info[i]);
+    GeTensorDesc tensor_desc;
+    tensor_desc.SetShape(ge_shape);
+    GeTensor ge_tensor(tensor_desc);
+    ge_tensor.SetData(aligned_ptr, output_buffer_size[i]);
+    ge::Tensor output_tensor = TensorAdapter::AsTensor(ge_tensor);
+
+    auto data_ptr = aligned_ptr->MutableGet();
+    output_data->blobs.push_back(
+      {reinterpret_cast<void *>(data_ptr), static_cast<uint64_t>(output_buffer_size[i]), false});
+    outputs.emplace_back(std::move(output_tensor));
     GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i,
-           formats::JoinToString(output.dims).c_str(), output.length);
+           formats::JoinToString(output_shape_info[i]).c_str(), output_buffer_size[i]);
   }
 
   return SUCCESS;
 }
-
 ///
 /// @ingroup ge
 /// @brief send Output Op result to upper layer
@@ -2645,8 +2705,10 @@ Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputT
 ///
 Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flag,
                                   OutputData *output_data) {
-  GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null.");
-  std::vector<ge::OutputTensorInfo> outputs;
+  GE_CHK_BOOL_EXEC(listener_ != nullptr,
+                   REPORT_INNER_ERROR("E19999", "listener_ is nullptr, check invalid.");
+                   return PARAM_INVALID, "[Check][Param] listener_ is null.");
+  std::vector<ge::Tensor> outputs;
 
   // return result is not required
   if (!rslt_flg && !seq_end_flag) {
@@ -2655,17 +2717,20 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
     GE_CHECK_NOTNULL(model_manager);
     auto exception_infos = model_manager->GetExceptionInfos();
     if (exception_infos.size() > 0) {
-      GE_CHK_STATUS_RET(DumpExceptionInfo(exception_infos), "[Dump][Exception] Dump exception info failed.");
+      GE_CHK_STATUS_RET(DumpExceptionInfo(exception_infos),
+                        "[Dump][Exception] Dump exception info failed, model_id:%u.", model_id_);
     } else {
       GELOGI("[Dump][Exception] Exception info is null.");
     }
-    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed.");
+    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs),
+                  "[Call][OnComputeDone] failed, model_id:%u, data_id:%u.", model_id_, data_id);
     return INTERNAL_ERROR;
   }
 
   if (!has_output_node_) {
     GELOGW("Output tensor list is empty, model id: %u", model_id_);
-    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed.");
+    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs),
+                  "[Call][OnComputeDone] failed, model_id:%u, data_id:%u.", model_id_, data_id);
     return INTERNAL_ERROR;
   }
 
@@ -2688,16 +2753,19 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
   }
 
   if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) {
-    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed");
+    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs),
+                  "[Call][OnComputeDone] failed, model_id:%u, data_id:%u.", model_id_, data_id);
     return INTERNAL_ERROR;
   }
 
   if (seq_end_flag) {
     GELOGW("End of sequence, model id: %u", model_id_);
-    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, END_OF_SEQUENCE, outputs), "OnCompute Done failed.");
+    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, END_OF_SEQUENCE, outputs),
+                  "[Call][OnComputeDone] failed, model_id:%u, data_id:%u.", model_id_, data_id);
     return END_OF_SEQUENCE;
   }
-  GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, SUCCESS, outputs), "OnComputeDone failed");
+  GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, SUCCESS, outputs),
+                "[Call][OnComputeDone] failed, model_id:%u, data_id:%u.", model_id_, data_id);
   return SUCCESS;
 }
 ///
@@ -2709,15 +2777,18 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
 Status DavinciModel::ReturnNoOutput(uint32_t data_id) {
   GELOGI("ReturnNoOutput model id:%u.", model_id_);
 
-  GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!");
-  std::vector<ge::OutputTensorInfo> outputs;
-  GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, SUCCESS, outputs), "OnComputeDone failed.");
+  GE_CHK_BOOL_EXEC(listener_ != nullptr,
+                   REPORT_INNER_ERROR("E19999", "listener_ is nullptr, check invalid.");
+                   return PARAM_INVALID, "[Check][Param] listener_ is null!");
+  std::vector<ge::Tensor> outputs;
+  GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, SUCCESS, outputs),
+                "[Call][OnComputeDone] failed, model_id:%u, data_id:%u.", model_id_, data_id);
   return SUCCESS;
 }
 
 void *DavinciModel::Run(DavinciModel *model) {
   GE_CHK_BOOL_EXEC(model != nullptr,
-                   return nullptr, "model_pointer is null!")
+                   return nullptr, "[Check][Param] model_pointer is null!")
   bool seq_end_flag = false;
   uint32_t model_id = model->Id();
   uint32_t device_id = model->GetDeviceId();
@@ -2727,7 +2798,7 @@ void *DavinciModel::Run(DavinciModel *model) {
   rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id));
   if (rt_ret != RT_ERROR_NONE) {
 
-    GELOGE(FAILED, "Model run rtsetdevice failed.");
+    GELOGE(FAILED, "[Run][Rtsetdevice] failed, model_id:%u, device_id:%u.", model_id, device_id);
     return nullptr;
   }
   // DeviceReset before thread run finished!
@@ -2760,16 +2831,18 @@ void *DavinciModel::Run(DavinciModel *model) {
     ret = model->SyncVarData();
     GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
         ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput());
-        continue, "Copy input data to model failed.");  // [No need to check value]
+        continue,
+        "[Call][SyncVarData] Copy input data to model failed, model_id:%u.", model_id);  // [No need to check value]
     GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData"));
 
     GELOGI("Copy input data, model id:%u", model_id);
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
                     model->SetProfileTime(MODEL_PRE_PROC_START));
-    ret = model->CopyInputData(current_data, false);
+    ret = model->CopyInputData(current_data);
     GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
         ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput());
-        continue, "Copy input data to model failed.");  // [No need to check value]
+        continue,
+        "[Call][CopyInputData] Copy input data to model failed, model_id:%u.", model_id);  // [No need to check value]
     if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) {
       model->cur_dynamic_dims_.clear();
       GE_IF_BOOL_EXEC(current_data.blobs.empty(), break);
@@ -2868,12 +2941,13 @@ Status DavinciModel::DestroyThread() {
 /// @author
 ///
 Status DavinciModel::ModelRunStart() {
-  GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, INTERNAL_ERROR, "data_inputer_ is nullptr.");
+  GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, INTERNAL_ERROR,
+                         "[Check][Param] data_inputer_ is nullptr, model id:%u.", model_id_);
 
   LockRunFlg();
   GE_MAKE_GUARD(tmp_lock, [&] { UnlockRunFlg(); });
 
-  GE_CHK_BOOL_RET_STATUS(!run_flg_, INTERNAL_ERROR, "Model already started.");
+  GE_CHK_BOOL_RET_STATUS(!run_flg_, INTERNAL_ERROR, "[Check][Param] Model already started, model id:%u.", model_id_);
 
   run_flg_ = true;
 
@@ -2903,7 +2977,7 @@ Status DavinciModel::ModelRunStop() {
   LockRunFlg();
   GE_MAKE_GUARD(tmp_lock, [&] { UnlockRunFlg(); });
 
-  GE_CHK_STATUS_RET(DestroyThread(), "DestoyThead failed.");
+  GE_CHK_STATUS_RET(DestroyThread(), "[Destoy][Thead] failed, model id:%u.", model_id_);
 
   return SUCCESS;
 }
@@ -2957,7 +3031,10 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
   GELOGI("in, inputs size: %zu, input addr size: %zu, outputs size: %zu, output addr size: %zu",
          inputs.size(), input_addrs_list_.size(), outputs.size(), output_addrs_list_.size());
   if (inputs.size() > input_addrs_list_.size()) {
-    GELOGE(FAILED, "input data addr %zu should less than input op num %zu.", inputs.size(), input_addrs_list_.size());
+    REPORT_INNER_ERROR("E19999", "input data addr %zu should less than input op num %zu.",
+                       inputs.size(), input_addrs_list_.size());
+    GELOGE(FAILED, "[Check][Param] input data addr %zu should less than input op num %zu.",
+           inputs.size(), input_addrs_list_.size());
     return FAILED;
   }
   // remove zero copy addr in last iteration
@@ -3023,19 +3100,21 @@ Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, boo
 Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs) {
   GELOGI("DavinciModel::UpdateKnownNodeArgs in");
   GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs),
-                    "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy.");
+                    "[Call][CreateKnownZeroCopyMap] failed, model_id:%u.", model_id_);
   total_io_addrs_.clear();
   for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
     auto &task = task_list_[task_index];
     if (task != nullptr) {
       Status ret = task->UpdateArgs();
       if (ret != SUCCESS) {
-        GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index);
+        REPORT_CALL_ERROR("E19999", "task %zu update args failed, model_id:%u", task_index, model_id_);
+        GELOGE(FAILED, "[Update][Args] to task %zu failed, model_id:%u.", task_index, model_id_);
         return FAILED;
       }
     }
   }
-  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
+  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false),
+                    "[Call][UpdateKnownZeroCopyAddr] failed, model_id:%u.", model_id_);
 
   if (total_args_size_ == 0) {
     GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -3046,7 +3125,10 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
 
     Status rt_ret =
         rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), total_addr_size, RT_MEMCPY_HOST_TO_DEVICE);
-    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;)
+    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
+                    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", total_args_size_ , rt_ret);
+                    GELOGE(rt_ret, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", total_args_size_ , rt_ret);
+                    return FAILED;)
   }
 
   GELOGI("DavinciModel::UpdateKnownNodeArgs success");
@@ -3065,7 +3147,8 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
     GE_CHECK_NOTNULL(task_list_[i]);
     Status ret = task_list_[i]->Init(task, this);
     if (ret != SUCCESS) {
-      GELOGE(ret, "Task index %d init failed.", i);
+      REPORT_CALL_ERROR("E19999", "Task index:%d init failed, ret:%d.", i, ret);
+      GELOGE(ret, "[Init][Task] index:%d failed, ret:%d.", i, ret);
       return ret;
     }
   }
@@ -3076,7 +3159,7 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
 Status DavinciModel::CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const {
   int64_t value = RT_CAPABILITY_SUPPORT;
   auto rt_ret = rtGetRtCapability(featureType, featureInfo, &value);
-  GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "call rtGetRtCapability failed!");
+  GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "[Call][RtGetRtCapability] failed, ret:0x%X", rt_ret);
   is_support = (value == RT_CAPABILITY_SUPPORT) ? true : false;
   return SUCCESS;
 }
@@ -3095,7 +3178,8 @@ Status DavinciModel::MallocKnownArgs() {
     GE_CHECK_NOTNULL(task_list_[i]);
     Status ret = task_list_[i]->CalculateArgs(taskdef, this);
     if (ret != SUCCESS) {
-      GELOGE(ret, "TaskInfo CalculateArgs failed.");
+      REPORT_CALL_ERROR("E19999", "task index:%d CalculateArgs failed, ret:%d", i, ret);
+      GELOGE(ret, "[Calculate][Args] for taskdef index:%d failed, ret:%d", i, ret);
       return ret;
     }
   }
@@ -3107,9 +3191,8 @@ Status DavinciModel::MallocKnownArgs() {
   if (total_args_size_ != 0) {
     rt_ret = rtMalloc(&args_, total_args_size_, mem_type);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X",
-                        total_args_size_, rt_ret);
-      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X", total_args_size_, rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret: 0x%X", total_args_size_, rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -3117,9 +3200,8 @@ Status DavinciModel::MallocKnownArgs() {
   if (total_hybrid_args_size_ != 0) {
     rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, mem_type);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X",
-                        total_hybrid_args_size_, rt_ret);
-      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X", total_hybrid_args_size_, rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret: 0x%X", total_hybrid_args_size_, rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -3128,9 +3210,8 @@ Status DavinciModel::MallocKnownArgs() {
     GELOGI("Begin to allocate fixed addr.");
     rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, mem_type);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X",
-                        total_hybrid_args_size_, rt_ret);
-      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret: 0x%X", total_hybrid_args_size_, rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret: 0x%X", total_hybrid_args_size_, rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -3215,7 +3296,7 @@ Status DavinciModel::DistributeTask() {
     auto &task_def = model_task_def->task(task_index);
     auto &task = task_list_.at(task_index);
     GE_CHECK_NOTNULL(task);
-    GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
+    GE_CHK_STATUS_RET(task->Distribute(), "[Call][Distribute] for Task[%zu] fail", task_index);
     // for data dump
     auto op_index = std::max(task_def.kernel().context().op_index(),
                              task_def.kernel_ex().op_index());
@@ -3238,7 +3319,7 @@ Status DavinciModel::DistributeTask() {
     SaveProfilingTaskDescInfo(op, task, task_def, task_index);
   }
   // launch dump kernel to aicpu
-  GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");
+  GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "[Load][DumpInfo] failed, model_id:%u.", model_id_);
   return SUCCESS;
 }
 
@@ -3381,7 +3462,8 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
                        "check invalid",
                        input_size, kDataMemAlignSizeCompare, op_size, model_id_);
     GELOGE(ACL_ERROR_GE_PARAM_INVALID,
-           "Input size [%ld] can not be smaller than op size [%ld] after 64-byte alignment", input_size, op_size);
+           "[Check][Param] input size:%ld from user add align:%u > input_op_size:%ld in model, model_id:%u",
+           input_size, kDataMemAlignSizeCompare, op_size, model_id_);
     return false;
   }
   return true;
@@ -3397,18 +3479,21 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
 ///
 Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) {
   if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed.");
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Call][UpdateIoTaskArgs] [ZCPY] Update input data to model:%u failed.",
+           model_id_);
     return ACL_ERROR_GE_PARAM_INVALID;
   }
 
   if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
       SUCCESS) {
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed.");
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Call][UpdateIoTaskArgs] [ZCPY] Update output data to model:%u failed.",
+           model_id_);
     return ACL_ERROR_GE_PARAM_INVALID;
   }
 
   for (ZeroCopyTask &task : zero_copy_tasks_) {
-    GE_CHK_STATUS_RET(task.DistributeParam(is_async_mode_, rt_model_stream_), "[ZCPY] Update args failed.");
+    GE_CHK_STATUS_RET(task.DistributeParam(is_async_mode_, rt_model_stream_),
+                      "[Call][DistributeParam] [ZCPY] Update args failed, model_id:%u.", model_id_);
   }
 
   output_data.index = input_data.index;
@@ -3430,36 +3515,35 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &
                                       const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) {
   if (blobs.size() != data_info.size()) {
     REPORT_INNER_ERROR("E19999", "is_input:%d blob size:%ld from user != op_size:%ld in model, mode_id:%u"
-                       "check invalid", is_input,
-                       blobs.size(), data_info.size(), model_id_);
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu",
-           is_input ? "input" : "output", data_info.size(), blobs.size());
+                       "check invalid", is_input, blobs.size(), data_info.size(), model_id_);
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] is_input:%d blob size:%ld "
+           "from user != op_size:%ld in model, mode_id:%u",
+           is_input, blobs.size(), data_info.size(), model_id_);
     return ACL_ERROR_GE_PARAM_INVALID;
   }
 
   for (const auto &data : data_info) {
     if (data.first >= blobs.size()) {  // check data index.
       REPORT_INNER_ERROR("E19999", "is_input:%d, data index:%u from model >= blobs.size:%zu from user, mode_id:%u"
-                         "check invalid", is_input,
-                         data.first, blobs.size(), model_id_);
+                         "check invalid", is_input, data.first, blobs.size(), model_id_);
       GELOGE(ACL_ERROR_GE_PARAM_INVALID,
-             "Verify %s data num failed: can not find No.%u data, because user only feeds %zu",
-             is_input ? "input" : "output", data.first, blobs.size());
+             "[Check][Param] is_input:%d, data index:%u from model >= blobs.size:%zu from user, mode_id:%u",
+             is_input, data.first, blobs.size(), model_id_);
       return ACL_ERROR_GE_PARAM_INVALID;
     }
 
     const DataBuffer &buffer = blobs[data.first];  // index of data.
     if (buffer.data == nullptr) {
       REPORT_INNER_ERROR("E19999", "is_input:%d buffer from user is nullptr, index:%u, mode_id:%u"
-                         "check invalid", is_input,
-                         data.first, model_id_);
-      GELOGE(ACL_ERROR_GE_PARAM_INVALID, "data_buf.data is nullptr, index=%u", data.first);
+                         "check invalid", is_input, data.first, model_id_);
+      GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] data_buf.data is nullptr, "
+             "index=%u, mode_id:%u", data.first, model_id_);
       return ACL_ERROR_GE_PARAM_INVALID;
     }
 
     if (!CheckInputAndModelSize(buffer.length, data.second.GetDataSize(), is_dynamic)) {
-      GELOGE(ACL_ERROR_GE_PARAM_INVALID,
-             "Check input size and model size failed, op[%s]", data.second.GetOpName().c_str());
+      GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Call][CheckInputAndModelSize] failed, op[%s]",
+             data.second.GetOpName().c_str());
       return ACL_ERROR_GE_PARAM_INVALID;
     }
 
@@ -3470,9 +3554,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &
         GELOGI("[IMAS] Find addr %p need direct copy from user malloc input %p", basic_addr, buffer.data);
         rtError_t rt_ret = rtMemcpy(basic_addr, data_size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE);
         if (rt_ret != RT_ERROR_NONE) {
-          REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, model_id:%u",
-                            data_size, model_id_);
-          GELOGE(rt_ret, "Non-zero copy data node copy failed");
+          REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, model_id:%u", data_size, model_id_);
+          GELOGE(rt_ret, "[Call][RtMemcpy] failed, size:%lu, model_id:%u", data_size, model_id_);
           return RT_ERROR_TO_GE_STATUS(rt_ret);
         }
       }
@@ -3564,9 +3647,10 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) {
   GE_IF_BOOL_EXEC(static_cast<size_t>(v_output_size[0]) < tensor->GetData().size(),
                   REPORT_INNER_ERROR("E19999", "Output size:%zu < weight size:%zu in op:%s(%s) model_id:%u, "
                                      "check invalid", v_output_size[0], tensor->GetData().size(),
-                                     op_desc->GetName().c_str(), op_desc->GetType().c_str() ,model_id_);
-                  GELOGE(PARAM_INVALID, "output size:%ld less than weight data size:%zu", v_output_size[0],
-                         tensor->GetData().size());
+                                     op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
+                  GELOGE(PARAM_INVALID, "[Check][Param] Output size:%zu < weight size:%zu in op:%s(%s), model_id:%u",
+                         v_output_size[0], tensor->GetData().size(),
+                         op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
                   return PARAM_INVALID;);
 
   GE_IF_BOOL_EXEC(tensor->GetData().size() == 0, GELOGW("const op:%s has no weight data.", op_desc->GetName().c_str());
@@ -3585,7 +3669,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) {
     }
     uint64_t *buff = reinterpret_cast<uint64_t *>(tensor->MutableData().data());
     if (ge::CheckInt64Uint32MulOverflow(elem_num, kBytes * kStringHeadElems) != SUCCESS) {
-      GELOGE(FAILED, "Shape size is invalid");
+      GELOGE(FAILED, "[Call][CheckInt64Uint32MulOverflow] Shape size:%ld is invalid", elem_num);
       return FAILED;
     }
     uint64_t offset = elem_num * kBytes * kStringHeadElems;
@@ -3615,8 +3699,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
   auto tbe_kernel = (kernel != nullptr) ? kernel : op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
   if (tbe_kernel == nullptr) {
     REPORT_INNER_ERROR("E19999", "Get tbe_kernel for op:%s(%s) fail, model_id:%u",
-                       op_desc->GetName().c_str(), op_desc->GetType().c_str() ,model_id_);
-    GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc->GetName().c_str());
+                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find tvm bin file!", op_desc->GetName().c_str());
     return INTERNAL_ERROR;
   }
 
@@ -3644,8 +3728,10 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
       } else {
         REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid",
                            TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(),
-                           op_desc->GetName().c_str(), op_desc->GetType().c_str() ,model_id_);
-        GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
+                           op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
+        GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid",
+               TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(),
+               op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
         return PARAM_INVALID;
       }
 
@@ -3718,7 +3804,12 @@ Status DavinciModel::InitStreamActive(const OpDescPtr &op_desc) {
   if (op_desc->HasAttr(ATTR_NAME_SWITCH_BRANCH_NODE_LABEL)) {
     std::vector<uint32_t> active_stream_list;
     GE_CHK_BOOL_EXEC(AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list),
-                     return INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM failed.");
+                     REPORT_INNER_ERROR("E19999", "[Get][Attr] %s in op:%s(%s) failed, model_id:%u.",
+                                        ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+                                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
+                     return INTERNAL_ERROR,
+                    "[Get][Attr] %s in op:%s(%s) failed, model_id:%u.", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+                    op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
 
     for (size_t j = 0; j < active_stream_list.size(); ++j) {
       active_stream_indication_.insert(active_stream_list[j]);
@@ -3739,7 +3830,9 @@ Status DavinciModel::InitStreamSwitch(const OpDescPtr &op_desc) {
                        ATTR_NAME_ACTIVE_STREAM_LIST.c_str(), active_stream_list.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        kTrueBranchStreamNum, model_id_);
-    GELOGE(INTERNAL_ERROR, "Stream num of switch true branch must be %u.", kTrueBranchStreamNum);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Attr:%s active_stream_list.size:%zu in op:%s(%s) != %u, model_id:%u",
+           ATTR_NAME_ACTIVE_STREAM_LIST.c_str(), active_stream_list.size(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), kTrueBranchStreamNum, model_id_);
     return INTERNAL_ERROR;
   }
 
@@ -3753,10 +3846,10 @@ Status DavinciModel::InitStreamSwitch(const OpDescPtr &op_desc) {
 Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) {
   std::vector<uint32_t> active_stream_list;
   if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail, model_id:%u",
-                       ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail, model_id:%u", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
-    GELOGE(INTERNAL_ERROR, "StreamSwitchNOp get attr ACTIVE_STREAM failed.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s from op:%s(%s) fail, model_id:%u", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
     return INTERNAL_ERROR;
   }
 
@@ -3767,10 +3860,10 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) {
 
   uint32_t batch_num = 0;
   if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail, model_id:%u",
-                       ATTR_NAME_BATCH_NUM.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail, model_id:%u", ATTR_NAME_BATCH_NUM.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
-    GELOGE(FAILED, "Failed to get attr ATTR_NAME_BATCH_NUM, StreamSwitchN: %s.", op_desc->GetName().c_str());
+    GELOGE(FAILED, "[Get][Attr] %s from op:%s(%s) fail, model_id:%u", ATTR_NAME_BATCH_NUM.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
     return FAILED;
   }
 
@@ -3787,10 +3880,10 @@ Status DavinciModel::SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batc
     std::vector<int64_t> batch_shape;
     const std::string attr_name = ATTR_NAME_PRED_VALUE + "_" + std::to_string(i);
     if (!AttrUtils::GetListInt(op_desc, attr_name, batch_shape)) {
-      REPORT_INNER_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail, model_id:%u",
-                         attr_name.c_str(),
+      REPORT_INNER_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail, model_id:%u", attr_name.c_str(),
                          op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
-      GELOGE(FAILED, "Get attr ATTR_NAME_PRED_VALUE failed, Node: %s", op_desc->GetName().c_str());
+      GELOGE(FAILED, "[Get][Attr] %s from op:%s(%s) fail, model_id:%u", attr_name.c_str(),
+             op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
       batch_info_.clear();
       return FAILED;
     }
@@ -3839,7 +3932,7 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) {
 Status DavinciModel::InitModelStream(rtStream_t stream) {
   ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION;
   GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR,
-                         "NnExecute not support mix execute.");
+                         "[Check][Param] NnExecute not support mix execute.");
   last_execute_mode_ = curr_mode;
 
   // asynchronize mode, use user input stream.
@@ -3880,15 +3973,15 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
                                OutputData &output_data) {
   is_async_mode_ = async_mode;
   GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_);
-  GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");
+  GE_CHK_STATUS_RET(InitModelStream(stream), "[Init][ModelStream] failed, model_id:%u.", model_id_);
   is_dynamic_ = input_data.is_dynamic_batch;
 
   bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn();
   bool profiling_model_load_on = ProfilingManager::Instance().ProfilingModelLoadOn();
   GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START));
   Status ret = CopyModelData(input_data, output_data, is_dynamic_);
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u",
-                                 model_id_);
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret,
+                                 "[Copy][ModelData] failed. model id: %u", model_id_);
 
   GELOGD("current_data.index=%u", input_data.index);
   GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END));
@@ -3921,7 +4014,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
     GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START));
     ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE);
     GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR,
-        "Copy Output data to user failed.");
+                                   "[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_);
     GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END));
   }
 
@@ -3934,10 +4027,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
 // Add active entry stream for special env.
 Status DavinciModel::AddHeadStream() {
   if (active_stream_list_.empty()) {
-    REPORT_INNER_ERROR("E19999", "active_stream_list is empty in model:%u, check invalid",
-                       model_id_);
-    GELOGE(INTERNAL_ERROR, "Active stream is empty, stream list size: %zu, stream indication size: %zu.",
-           stream_list_.size(), active_stream_indication_.size());
+    REPORT_INNER_ERROR("E19999", "active_stream_list is empty in model:%u, check invalid", model_id_);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] active_stream_list is empty in model:%u, check invalid", model_id_);
     return INTERNAL_ERROR;
   }
 
@@ -3955,9 +4046,8 @@ Status DavinciModel::AddHeadStream() {
     for (auto s : active_stream_list_) {
       std::shared_ptr<CpuTaskActiveEntry> active_entry = MakeShared<CpuTaskActiveEntry>(rt_head_stream_);
       if (active_entry == nullptr) {
-        REPORT_CALL_ERROR("E19999", "New CpuTaskActiveEntry failed, model_id:%u",
-                          model_id_);
-        GELOGE(MEMALLOC_FAILED, "Make CpuTaskActiveEntry task failed.");
+        REPORT_CALL_ERROR("E19999", "New CpuTaskActiveEntry failed, model_id:%u", model_id_);
+        GELOGE(MEMALLOC_FAILED, "[New][CpuTaskActiveEntry] task failed, model_id:%u", model_id_);
         return MEMALLOC_FAILED;
       }
 
@@ -3978,7 +4068,7 @@ Status DavinciModel::AddHeadStream() {
 
 Status DavinciModel::InitEntryTask() {
   if (deploy_type_ == AICPU_DEPLOY_CROSS_THREAD) {
-    GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed.");
+    GE_CHK_STATUS_RET(AddHeadStream(), "[Add][HeadStream] failed.");
     return CpuActiveStream();
   } else {
     return LoadWithQueue();
@@ -3988,14 +4078,15 @@ Status DavinciModel::InitEntryTask() {
 uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
   uint8_t *mem_base = nullptr;
   const string purpose("feature map,used for op input and output.");
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK) {
     data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize());
     string memory_key = std::to_string(0) + "_f";
-    mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId());
+    mem_base =
+      MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, data_size, GetDeviceId());
   } else {
-    mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, data_size, GetDeviceId());
+    mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, data_size, GetDeviceId());
   }
 
   if (mem_base != nullptr) {
@@ -4004,83 +4095,119 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
   return mem_base;
 }
 
-uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) {
-  uint8_t *p2p_mem_base = nullptr;
-  const string purpose("p2p memory, used for some op related to hcom");
-  if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
-    string p2p_memory_key = std::to_string(0) + "_p";
-    p2p_mem_base =
-        MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId());
-  } else {
-    p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId());
+Status DavinciModel::MallocExMem() {
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
+  INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
+  for (auto it : runtime_param_.memory_infos) {
+    auto mem_size = it.second.memory_size;
+    if (mem_size == 0) {
+      continue;
+    }
+    bool sessoion_scope = ((kSessionScopeMemory & it.first) == kSessionScopeMemory);
+    auto mem_type = it.first & kMemoryTypeMask;
+    uint8_t *mem_base = nullptr;
+    const string purpose("p2p memory, used for some op related to hcom or session scope memory");
+    if (sessoion_scope) {
+      mem_base = MemManager::Instance().SessionScopeMemInstance(mem_type).Malloc(mem_size, runtime_param_.session_id);
+    } else if (res_static_memory == EN_OK) {
+      string memory_key = std::to_string(0) + it.second.memory_key;
+      mem_base =
+        MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, memory_key, mem_size, GetDeviceId());
+    } else {
+      mem_base = MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, mem_size, GetDeviceId());
+    }
+
+    if (mem_base == nullptr) {
+      REPORT_CALL_ERROR("E19999", "MallocExMem fail, type:%ld size:%zu, model_id:%u, check invalid",
+                        mem_type, mem_size, model_id_);
+      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc ex memory failed, type:%ld size: %zu", mem_type, mem_size);
+      return ACL_ERROR_GE_MEMORY_ALLOCATION;
+    }
+    it.second.memory_base = mem_base;
+    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] mem_type[%ld] mem_addr[%p] mem_size[%zu]",
+           runtime_param_.graph_id, mem_type, mem_base, mem_size);
   }
-  return p2p_mem_base;
+  return SUCCESS;
 }
 
 uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) {
   uint8_t *weights_mem_base = nullptr;
   const string purpose("weights memory in inference network.");
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK) {
     string weight_memory_key = std::to_string(0) + "_w";
-    weights_mem_base =
-        MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
+    weights_mem_base = MemManager::Instance()
+                         .MemInstance(RT_MEMORY_HBM)
+                         .MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
   } else {
-    weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId());
+    weights_mem_base =
+      MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, weights_size, GetDeviceId());
   }
   return weights_mem_base;
 }
 
 void DavinciModel::FreeFeatureMapMem() {
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK && is_inner_mem_base_) {
     string weight_memory_key = std::to_string(0) + "_f";
-    if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) {
-      GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()),
+    if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(weight_memory_key) != nullptr) {
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weight_memory_key, GetDeviceId()),
                     "failed to free weight memory");
     }
     mem_base_ = nullptr;
   } else {
-    GE_IF_BOOL_EXEC(mem_base_ != nullptr && is_inner_mem_base_,
-                    GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(mem_base_, GetDeviceId()),
-                                  "failed to free feature_map memory");
-                    mem_base_ = nullptr);
+    GE_IF_BOOL_EXEC(
+      mem_base_ != nullptr && is_inner_mem_base_,
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(mem_base_, GetDeviceId()),
+                    "failed to free feature_map memory");
+      mem_base_ = nullptr);
   }
 }
 
-void DavinciModel::FreeP2PMem() {
-  if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
-    std::string p2p_memory_key = std::to_string(0) + "_p";
-    if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) {
-      GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()),
-                    "failed to free p2p memory");
+void DavinciModel::FreeExMem() {
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
+  INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
+  for (auto it : runtime_param_.memory_infos) {
+    // free when session destory
+    if ((kSessionScopeMemory & it.first) == kSessionScopeMemory) {
+      continue;
+    }
+    auto mem_type = it.first & kMemoryTypeMask;
+    if (res_static_memory == EN_OK) {
+      std::string memory_key = std::to_string(0) + it.second.memory_key;
+      if (MemManager::Instance().MemInstance(mem_type).GetMemoryAddr(memory_key) != nullptr) {
+        GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(memory_key, GetDeviceId()),
+                      "failed to free memory");
+      }
+      it.second.memory_base = nullptr;
+    } else {
+      GE_IF_BOOL_EXEC(
+        it.second.memory_base != nullptr,
+        GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(it.second.memory_base, GetDeviceId()),
+                      "failed to free memory");
+        it.second.memory_base = nullptr);
     }
-    p2p_mem_base_ = nullptr;
-  } else {
-    GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_,
-                    GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()),
-                                  "failed to free p2p memory");
-                    p2p_mem_base_ = nullptr);
   }
 }
 
 void DavinciModel::FreeWeightsMem() {
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK) {
     string memory_key = std::to_string(0) + "_w";
-    if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) {
-      GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()),
+    if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(memory_key) != nullptr) {
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key, GetDeviceId()),
                     "failed to free feature_map memory");
     }
     weights_mem_base_ = nullptr;
   } else {
-    GE_IF_BOOL_EXEC(weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_,
-                    GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weights_mem_base_, GetDeviceId()),
-                                  "failed to free weight memory");
-                    weights_mem_base_ = nullptr);
+    GE_IF_BOOL_EXEC(
+      weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_,
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weights_mem_base_, GetDeviceId()),
+                    "failed to free weight memory");
+      weights_mem_base_ = nullptr);
   }
 }
 
@@ -4088,9 +4215,8 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id)
   rtContext_t ctx = nullptr;
   rtError_t rt_ret = rtCtxGetCurrent(&ctx);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, model_id:%u",
-                      model_id_);
-    GELOGE(RT_FAILED, "Failed to get current context, error_code is: 0x%X.", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, model_id:%u", model_id_);
+    GELOGE(RT_FAILED, "[Call][RtCtxGetCurrent] failed, ret:0x%X, model_id:%u.", rt_ret, model_id_);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -4124,7 +4250,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str
   rtError_t rt_ret = rtGetDevice(&device_id);
   if (rt_ret != RT_ERROR_NONE || device_id < 0) {
     REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, model_id:%u", model_id_);
-    GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
+    GELOGE(RT_FAILED, "[Call][RtGetDevice] failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
     return;
   }
   data_dumper_.SetDeviceId(device_id);
@@ -4190,7 +4316,9 @@ Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc)
                          "check invalid", ATTR_NAME_AIPP_INPUTS.c_str(),
                          op_desc->GetName().c_str(), op_desc->GetType().c_str(), infos.size(), kAippInfoNum,
                          model_id_);
-      GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum);
+      GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "[Check][Param] Attr:%s in op:%s(%s), "
+             "aipp input size:%zu != kAippInfoNum:%u, model_id:%u", ATTR_NAME_AIPP_INPUTS.c_str(),
+             op_desc->GetName().c_str(), op_desc->GetType().c_str(), infos.size(), kAippInfoNum, model_id_);
       return ACL_ERROR_GE_AIPP_MODE_INVALID;
     }
 
@@ -4210,9 +4338,9 @@ Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc)
 Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const {
   const auto it = orig_input_info_.find(index);
   if (it == orig_input_info_.end()) {
-    REPORT_INNER_ERROR("E19999", "Get index:%u from orig_input_info_ fail, model_id:%u",
-                       index, model_id_);
-    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index);
+    REPORT_INNER_ERROR("E19999", "Get index:%u from orig_input_info_ fail, model_id:%u", index, model_id_);
+    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "[Check][Param] Get index:%u from orig_input_info_ fail, model_id:%u",
+           index, model_id_);
     return ACL_ERROR_GE_AIPP_NOT_EXIST;
   }
 
@@ -4231,7 +4359,8 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_
     REPORT_INNER_ERROR("E19999", "in_out_info:%s size:%zu != kAippInfoNum:%u, model_id:%u, "
                        "check invalid", in_out_info.c_str(), infos.size(), kAippInfoNum,
                        model_id_);
-    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum);
+    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "[Check][Param] in_out_info:%s size:%zu != kAippInfoNum:%u, model_id:%u",
+           in_out_info.c_str(), infos.size(), kAippInfoNum, model_id_);
     return;
   }
   dims_info.name = infos[kAippInfoTensorName];
@@ -4293,9 +4422,9 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector<InputOutpu
                                                vector<InputOutputDims> &output_dims) const {
   const auto it = aipp_dims_info_.find(index);
   if (it == aipp_dims_info_.end()) {
-    REPORT_INNER_ERROR("E19999", "Get index:%u from aipp_dims_info_ fail, model_id:%u",
-                       index, model_id_);
-    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index);
+    REPORT_INNER_ERROR("E19999", "Get index:%u from aipp_dims_info_ fail, model_id:%u", index, model_id_);
+    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "[Check][Param] Get index:%u from aipp_dims_info_ fail, model_id:%u",
+           index, model_id_);
     return ACL_ERROR_GE_AIPP_NOT_EXIST;
   }
 
@@ -4326,9 +4455,8 @@ Status DavinciModel::InitL1DataDumperArgs() {
     if (rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion) !=
         RT_ERROR_NONE) {
       // l1_fusion_addr_ will be free when DavinciModel destruct
-      REPORT_CALL_ERROR("E19999", "Call rtDumpAddrSet failed, model_id:%u",
-                        model_id_);
-      GELOGE(FAILED, "Call rtDumpAddrSet failed");
+      REPORT_CALL_ERROR("E19999", "Call rtDumpAddrSet failed, model_id:%u", model_id_);
+      GELOGE(FAILED, "[Call][RtDumpAddrSet] failed, model_id:%u", model_id_);
       return FAILED;
     }
 
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index 736272f7..e4898dec 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -248,8 +248,6 @@ class DavinciModel {
   // get total mem size
   size_t TotalMemSize() const { return runtime_param_.mem_size; }
 
-  const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; }
-
   // model name
   string Name() const { return name_; }
 
@@ -361,6 +359,8 @@ class DavinciModel {
 
   void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const;
 
+  Status GetOpAttr(const std::string &op_name, const std::string &attr_name, std::string &attr_value) const;
+
   void GetModelAttr(vector<string> &dynamic_output_shape_info) const;
 
   ///
@@ -474,6 +474,8 @@ class DavinciModel {
 
   int64_t GetLoadEndTime() { return load_end_time_; }
 
+  void SaveSpecifyAttrValues(const OpDescPtr &op_desc);
+
   Status ReportProfilingData();
 
   void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@@ -582,10 +584,8 @@ class DavinciModel {
   // memory address of model
   uintptr_t fixed_mem_base_;  // Initial of mem_base_, keep forever.
   uint8_t *mem_base_;
-  uint8_t *p2p_mem_base_;
   bool is_inner_mem_base_;
   bool is_inner_weight_base_;
-  bool is_inner_p2p_mem_base_;
   // input data manager
   DataInputer *data_inputer_;
   int64_t load_begin_time_;
@@ -635,7 +635,7 @@ class DavinciModel {
   Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
                           const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label);
 
-  Status CopyInputData(const InputData &input_data, bool device_data = false);
+  Status CopyInputData(const InputData &input_data);
 
   Status CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind);
 
@@ -664,13 +664,13 @@ class DavinciModel {
 
   uint8_t *MallocWeightsMem(size_t weights_size);
 
-  uint8_t *MallocP2PMem(size_t p2p_data_size);
+  Status MallocExMem();
 
   void FreeFeatureMapMem();
 
   void FreeWeightsMem();
 
-  void FreeP2PMem();
+  void FreeExMem();
 
   void ReleaseTask();
 
@@ -880,7 +880,7 @@ class DavinciModel {
   Status SinkTimeProfile(const InputData &current_data);
 
   Status InitOutputTensorInfo(const OpDescPtr &op_desc);
-  Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);
+  Status GenOutputTensorInfo(OutputData *output_data, vector<ge::Tensor> &outputs);
 
   Status InitInputDescInfo(const OpDescPtr &op_desc);
   Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name);
@@ -1096,6 +1096,9 @@ class DavinciModel {
 
   // known shape node for dump
   void *known_shape_global_step_;
+
+  // op name to attrs mapping
+  std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc
index a288e14e..47d104f4 100755
--- a/ge/graph/load/model_manager/model_manager.cc
+++ b/ge/graph/load/model_manager/model_manager.cc
@@ -27,6 +27,7 @@
 #include "graph/load/model_manager/davinci_model.h"
 #include "model/ge_root_model.h"
 #include "common/formats/utils/formats_trans_utils.h"
+#include "toolchain/adx_datadump_server.h"
 
 namespace ge {
 thread_local uint32_t device_count = 0;
@@ -48,6 +49,7 @@ const int kTimeSpecNano = 1000000000;
 const int kTimeSpecMiro = 1000000;
 const int kOpNameMaxSize = 100;
 const uint64_t kInferSessionId = 0;
+const int32_t kDumpStatus = 0;
 #pragma pack(push, 1)
 struct CustAicpuSoBuf {
   uint64_t kernelSoBuf;
@@ -321,6 +323,58 @@ bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) {
   (void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph);
   return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag();
 }
+
+bool ModelManager::IsDumpSeverInited(uint64_t session_id) {
+  auto it = session_id_to_dump_server_init_flag_.find(session_id);
+  return it != session_id_to_dump_server_init_flag_.end() && it->second;
+}
+
+Status ModelManager::AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties) {
+  if (!IsDumpSeverInited(session_id)) {
+    if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
+      GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus,
+                      GELOGE(PARAM_INVALID, "[Init][AdxDataDumpServer] failed, session_id:%lu.", session_id);
+                      return PARAM_INVALID)
+      GELOGI("Init adx data dump server success");
+      session_id_to_dump_server_init_flag_[session_id] = true;
+    }
+  }
+  DumpManager::GetInstance().AddDumpProperties(session_id, dump_properties);
+  return SUCCESS;
+}
+
+Status ModelManager::InitDumPropertiesWithNewSessionId(uint64_t session_id) {
+  DumpProperties dump_properties;
+  dump_properties.InitByOptions();
+  GE_CHK_STATUS_RET(AddDumpProperties(session_id, dump_properties), "[Add][DumpProperties] failed.");
+  return SUCCESS;
+}
+
+Status ModelManager::UpdateSessionId(uint32_t model_id, GeModelPtr ge_model,
+                                     std::shared_ptr<DavinciModel> &davinci_model, uint64_t &session_id) {
+  uint64_t new_session_id;
+  Status ret = GenSessionId(new_session_id);
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
+  ret = davinci_model->UpdateSessionId(new_session_id);
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");
+  ge_model->InsertSessionMap(model_id, new_session_id);
+  GELOGD("Update new session id: %lu.", new_session_id);
+  session_id = new_session_id;
+  return SUCCESS;
+}
+
+bool ModelManager::HasVarNode(ComputeGraphPtr &compute_graph) const {
+  for (ge::NodePtr &node : compute_graph->GetAllNodes()) {
+    if (node == nullptr) {
+      continue;
+    }
+    if (node->GetType() == VARIABLE) {
+      return true;
+    }
+  }
+  return false;
+}
+
 ///
 /// @ingroup domi_ome
 /// @brief load model online
@@ -347,10 +401,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
   davinci_model->SetId(model_id);
   davinci_model->SetDeviceId(GetContext().DeviceId());
 
-  const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(GetContext().SessionId());
-  davinci_model->SetDumpProperties(dump_properties);
-  dump_properties_ = dump_properties;
-
   auto root_graph = ge_root_model->GetRootGraph();
   GE_CHECK_NOTNULL(root_graph);
   string root_model_name = root_graph->GetName();
@@ -364,15 +414,23 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
     /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
     /// These session_ids come from the same model, so the values of session_id are the same.
     /// Update session_id for infer in load model to avoid the same session_id.
-    if (!ge_root_model->GetTrainFlag()) {
-      uint64_t new_session_id;
-      ret = GenSessionId(new_session_id);
-      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
-      ret = davinci_model->UpdateSessionId(new_session_id);
-      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");
-      ge_model->InsertSessionMap(model_id, new_session_id);
-      GELOGD("Update new session id: %lu.", new_session_id);
+    uint64_t session_id = GetContext().SessionId();
+    // Inference graph with variable node is not support for multi-threads scenario
+    if (!ge_root_model->GetTrainFlag() && !HasVarNode(root_graph)) {
+      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(UpdateSessionId(model_id, ge_model, davinci_model, session_id) != SUCCESS,
+                                     return ret,
+                                     "UpdateSessionId failed.");
+      GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId()));
+      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(InitDumPropertiesWithNewSessionId(session_id) != SUCCESS,
+                                     GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
+                                     return ret,
+                                     "Init DumProperties with new session_id failed.");
     }
+
+    const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(session_id);
+    davinci_model->SetDumpProperties(dump_properties);
+    dump_properties_ = dump_properties;
+
     GE_TIMESTAMP_START(Init);
     GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;);
     GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit");
@@ -398,26 +456,32 @@ void ModelManager::InsertModel(uint32_t model_id, shared_ptr<hybrid::HybridDavin
 }
 
 Status ModelManager::DeleteModel(uint32_t id) {
-  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
+  // These two pointers are used to unbind erase() and model destruction process.
+  std::shared_ptr<DavinciModel> tmp_model;
+  std::shared_ptr<hybrid::HybridDavinciModel> tmp_hybrid_model;
+  {
+    std::lock_guard<std::recursive_mutex> lock(map_mutex_);
 
-  auto it = model_map_.find(id);
-  auto hybrid_model_it = hybrid_model_map_.find(id);
-  if (it != model_map_.end()) {
-    uint64_t session_id = it->second->GetSessionId();
-    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id)  + "_" +
-                            std::to_string(it->second->SubModelId());
-    auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
-    if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
-      (void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
+    auto it = model_map_.find(id);
+    auto hybrid_model_it = hybrid_model_map_.find(id);
+    if (it != model_map_.end()) {
+      uint64_t session_id = it->second->GetSessionId();
+      std::string model_key = std::to_string(session_id) + "_" + std::to_string(id)  + "_" +
+                              std::to_string(it->second->SubModelId());
+      auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
+      if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
+        (void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
+      }
+      tmp_model = it->second;
+      (void)model_map_.erase(it);
+    } else if (hybrid_model_it != hybrid_model_map_.end()) {
+      tmp_hybrid_model = hybrid_model_it->second;
+      (void)hybrid_model_map_.erase(hybrid_model_it);
+    } else {
+      REPORT_INNER_ERROR("E19999", "model_id:%u not exist in model_map, check invalid", id);
+      GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
+      return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
     }
-    (void)model_map_.erase(it);
-  } else if (hybrid_model_it != hybrid_model_map_.end()) {
-    (void)hybrid_model_map_.erase(hybrid_model_it);
-  } else {
-    REPORT_INNER_ERROR("E19999", "model_id:%u not exist in model_map, check invalid",
-                       id);
-    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
-    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
   }
 
   return SUCCESS;
@@ -542,7 +606,7 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
 /// @brief load Input and output TensorInfo for Model
 /// @return Status run result
 ///
-Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputTensorInfo> &inputs) {
+Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<ge::Tensor> &inputs) {
   std::shared_ptr<DavinciModel> model = GetModel(model_id);
   auto hybrid_model = GetHybridModel(model_id);
   if (hybrid_model == nullptr) {
@@ -556,9 +620,11 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
   input_data.index = 0;
   for (size_t i = 0; i < inputs.size(); ++i) {
     DataBuffer data;
-    data.data = inputs[i].data;
-    data.length = inputs[i].length;
-    input_data.shapes.emplace_back(inputs[i].dims);
+    const TensorDesc &tensor_desc = inputs[i].GetTensorDesc();
+    data.data = reinterpret_cast<void *>(const_cast<uint8_t *>(inputs[i].GetData()));
+    data.length = inputs[i].GetSize();
+    data.placement = static_cast<uint32_t>(tensor_desc.GetPlacement());
+    input_data.shapes.emplace_back(tensor_desc.GetShape().GetDims());
     input_data.blobs.push_back(data);
   }
   if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
@@ -608,7 +674,6 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
 
   return SUCCESS;
 }
-
 ///
 /// @ingroup domi_ome
 /// @brief create model thread, start to execute model
@@ -1045,6 +1110,21 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &
   return SUCCESS;
 }
 
+Status ModelManager::GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                               std::string &attr_value) {
+  auto davinci_model = GetModel(model_id);
+  if (davinci_model != nullptr) {
+    return davinci_model->GetOpAttr(op_name, attr_name, attr_value);
+  }
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    return hybrid_davinci_model->GetOpAttr(op_name, attr_name, attr_value);
+  }
+  GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "[Get][Model]Get model failed, invalid model id:%u.", model_id);
+  REPORT_INNER_ERROR("E19999", "Get model failed, invalid model id:%u.", model_id);
+  return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
+}
+
 Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
   std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
   if (hybrid_davinci_model != nullptr) {
diff --git a/ge/graph/load/model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h
index bf804d32..c0f14934 100755
--- a/ge/graph/load/model_manager/model_manager.h
+++ b/ge/graph/load/model_manager/model_manager.h
@@ -122,7 +122,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   ///
   ge::Status DataInput(const InputData &input_data, OutputData &output_data);
 
-  ge::Status DataInputTensor(uint32_t model_id, const std::vector<InputTensorInfo> &inputs);
+  ge::Status DataInputTensor(uint32_t model_id, const std::vector<ge::Tensor> &inputs);
 
   ///
   /// @ingroup domi_ome
@@ -246,6 +246,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
 
+  ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                       std::string &attr_value);
+
   ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);
 
   ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type);
@@ -342,6 +345,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   void GenModelId(uint32_t *id);
 
+  Status InitDumPropertiesWithNewSessionId(uint64_t session_id);
+
+  bool IsDumpSeverInited(uint64_t session_id);
+
+  Status AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties);
+
+  Status UpdateSessionId(uint32_t model_id, GeModelPtr ge_model,
+                         std::shared_ptr<DavinciModel> &davinci_model, uint64_t &session_id);
+
+  bool HasVarNode(ComputeGraphPtr &compute_graph) const;
 
   std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_;
   std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
@@ -358,6 +371,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   static DumpProperties dump_properties_;
   bool dump_exception_flag_ = false;
+  std::map<uint64_t, bool> session_id_to_dump_server_init_flag_;
 };
 }  // namespace ge
 
diff --git a/ge/graph/load/model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc
index 058a538f..f6ff591a 100755
--- a/ge/graph/load/model_manager/model_utils.cc
+++ b/ge/graph/load/model_manager/model_utils.cc
@@ -21,6 +21,7 @@
 #include "graph/utils/tensor_utils.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/types.h"
+#include "graph/build/memory/block_mem_assigner.h"
 
 #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET)                                                                 \
   do {                                                                                                       \
@@ -514,10 +515,16 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
   bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type);
   bool has_mem_type_workspace =
     ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type);
+
+  vector<int32_t> workspace_no_reuse_scope;
+  bool has_workspace_no_reuse_scope =
+    ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
   for (size_t i = 0; i < v_workspace_bytes.size(); ++i) {
     // Temporary solution, the aicpu workspace of multiple images cannot be shared.
-    if (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] &&
-        !model_param.is_single_op) {
+    bool aicpu_work_space = (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] &&
+                             !model_param.is_single_op);
+    if (aicpu_work_space) {
       void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]);
       v_workspace_data_addr.push_back(mem_addr);
       GELOGI(
@@ -548,7 +555,13 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
              model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]);
     } else {
       VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]);
-      uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i];
+      uint8_t *mem_addr = nullptr;
+      bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size());
+      if (session_scope_memory) {
+        mem_addr = model_param.memory_infos.at(kSessionScopeMemory | RT_MEMORY_HBM).memory_base + v_workspace_offset[i];
+      } else {
+        mem_addr = model_param.mem_base + v_workspace_offset[i];
+      }
       v_workspace_data_addr.push_back(mem_addr);
       GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]",
              model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i],
diff --git a/ge/graph/load/model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
index d3c98684..c714d586 100644
--- a/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
@@ -28,13 +28,13 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   GELOGI("InitEndGraphTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
   davinci_model_ = davinci_model;
   Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList());
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id());
+    GELOGE(ret, "[Set][Stream] fail, stream_id:%u", task_def.stream_id());
     return ret;
   }
 
@@ -51,7 +51,7 @@ Status EndGraphTaskInfo::Distribute() {
     rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "Call rtEndGraphEx failed, ret:0x%X", rt_ret);
-      GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtEndGraphEx] failed, ret:0x%x", rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   } else {
@@ -59,7 +59,7 @@ Status EndGraphTaskInfo::Distribute() {
     rtError_t rt_ret = rtEndGraph(model_, stream_);
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "Call rtEndGraph failed, ret:0x%X", rt_ret);
-      GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtEndGraph] failed, ret:0x%x", rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -68,9 +68,8 @@ Status EndGraphTaskInfo::Distribute() {
   uint32_t stream_id = 0;
   rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtModelGetTaskId] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   task_id_ = task_id;
diff --git a/ge/graph/load/model_manager/task_info/event_record_task_info.cc b/ge/graph/load/model_manager/task_info/event_record_task_info.cc
index 13dae9ee..6af7e20d 100755
--- a/ge/graph/load/model_manager/task_info/event_record_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/event_record_task_info.cc
@@ -24,7 +24,7 @@ Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   GELOGI("EventRecordTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -37,7 +37,7 @@ Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   if (task_def.event_id() >= eventList.size()) {
     REPORT_INNER_ERROR("E19999", "Task event_id:%u > model event size:%zu, check invalid",
                        task_def.event_id(), eventList.size());
-    GELOGE(INTERNAL_ERROR, "event list size:%zu, cur:%u!", eventList.size(), task_def.event_id());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] event list size:%zu, cur:%u!", eventList.size(), task_def.event_id());
     return INTERNAL_ERROR;
   }
 
@@ -50,9 +50,8 @@ Status EventRecordTaskInfo::Distribute() {
   GELOGI("EventRecordTaskInfo Distribute Start.");
   rtError_t rt_ret = rtEventRecord(event_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtEventRecord failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtEventRecord failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtEventRecord] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc
index 8fae9225..52dc660d 100755
--- a/ge/graph/load/model_manager/task_info/event_wait_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc
@@ -24,7 +24,7 @@ Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davi
   GELOGI("EventWaitTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -37,7 +37,7 @@ Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davi
   if (task_def.event_id() >= eventList.size()) {
     REPORT_INNER_ERROR("E19999", "Task event_id:%u > model event size:%zu, check invalid",
                        task_def.event_id(), eventList.size());
-    GELOGE(INTERNAL_ERROR, "event list size:%zu, cur:%u!", eventList.size(), task_def.event_id());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] event list size:%zu, cur:%u!", eventList.size(), task_def.event_id());
     return INTERNAL_ERROR;
   }
 
@@ -51,17 +51,15 @@ Status EventWaitTaskInfo::Distribute() {
   GELOGI("EventWaitTaskInfo Distribute Start.");
   rtError_t rt_ret = rtStreamWaitEvent(stream_, event_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtStreamWaitEvent] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtEventReset(event_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtEventReset] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
index b47ac097..c9fc5424 100755
--- a/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
@@ -24,7 +24,7 @@ Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   GELOGI("FusionStartTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -40,9 +40,8 @@ Status FusionStartTaskInfo::Distribute() {
   GELOGI("FusionStartTaskInfo Distribute Start.");
   rtError_t rt_ret = rtKernelFusionStart(stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtKernelFusionStart failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtKernelFusionStart failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtKernelFusionStart] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
index 6188cfc8..efce62ad 100755
--- a/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
@@ -24,7 +24,7 @@ Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *dav
   GELOGI("FusionStopTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -41,7 +41,7 @@ Status FusionStopTaskInfo::Distribute() {
   rtError_t rt_ret = rtKernelFusionEnd(stream_);
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtKernelFusionEnd failed, ret:0x%X", rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtKernelFusionEnd] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/hccl_task_info.cc b/ge/graph/load/model_manager/task_info/hccl_task_info.cc
index 7a435f91..c3c5c8b7 100644
--- a/ge/graph/load/model_manager/task_info/hccl_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/hccl_task_info.cc
@@ -31,7 +31,7 @@ HcclTaskInfo::~HcclTaskInfo() {
     rtError_t ret = rtFreeHost(private_def_);
     if (ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X", ret);
-      GELOGE(RT_FAILED, "Call rtFree Fail, ret = 0x%X.", ret);
+      GELOGE(RT_FAILED, "[Call][RtFree] Fail, ret = 0x%X.", ret);
     }
     private_def_ = nullptr;
   }
@@ -43,7 +43,7 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m
   GELOGI("HcclTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
   davinci_model_ = davinci_model;
@@ -71,21 +71,21 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m
   if (ret != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Call GetHorovodInputs fail for op:%s(%s)",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(ret, "davinci_model: GetHorovodInputs fail! domi error: %u", ret);
+    GELOGE(ret, "[Get][HorovodInputs] fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return ret;
   }
   Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc, kernel_hccl_infos_);
   if (dmrt != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Call GetHcclDataType fail for op:%s(%s)",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(dmrt, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt);
+    GELOGE(dmrt, "[Get][HcomDataType] fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return dmrt;
   }
   dmrt = HcomOmeUtil::GetHcclCount(op_desc, kernel_hccl_infos_);
   if (dmrt != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Call GetHcclCount fail for op:%s(%s)",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(dmrt, "davinci_model: GetHcomCount fail! domi error: %u", dmrt);
+    GELOGE(dmrt, "[Get][HcomCount] fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return dmrt;
   }
   // Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId
@@ -93,14 +93,14 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m
   if (dmrt != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Call GetAllRootId fail for op:%s(%s)",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(dmrt, "davinci_model: Get rootId fail! domi error: %u", dmrt);
+    GELOGE(dmrt, "[Get][RootId] fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return dmrt;
   }
 
   // GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl
   ret = SetFollowStream(op_desc, davinci_model);
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetStream Fail.");
+    GELOGE(ret, "[Set][Stream] Fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return ret;
   }
 
@@ -111,13 +111,13 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m
 
   ret = SetAddrs(op_desc, kernel_hccl_infos_);
   if (ret != SUCCESS) {
-    GELOGE(ret, "Setaddrs Fail.");
+    GELOGE(ret, "[Set][Addrs] Fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return ret;
   }
   // GE's new process: hccl declares the need for Workspace size, and GE allocates Workspace
   ret = SetWorkspace(op_desc, kernel_hccl_infos_);
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetWorkspace Fail.");
+    GELOGE(ret, "[Set][Workspace] Fail for op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return ret;
   }
 
@@ -156,7 +156,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
       }
       ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id);
       if (ret != SUCCESS) {
-        GELOGE(RT_FAILED, "Create hccl stream failed.");
+        GELOGE(RT_FAILED, "[Create][Stream] for %s failed, stream id:%ld, stream num:%ld.",
+               op_desc->GetName().c_str(), main_stream_id, hccl_stream_num - created_stream_num);
         return RT_ERROR_TO_GE_STATUS(ret);
       }
     }
@@ -165,7 +166,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
     GELOGI("need to create follow stream for %s with new mainstream %ld.", op_desc->GetName().c_str(), main_stream_id);
     ret = CreateStream(hccl_stream_num, davinci_model, main_stream_id);
     if (ret != SUCCESS) {
-      GELOGE(RT_FAILED, "Create hccl stream failed.");
+      GELOGE(RT_FAILED, "[Create][Stream] for %s failed, stream id:%ld, stream num:%ld.",
+             op_desc->GetName().c_str(), main_stream_id, hccl_stream_num);
       return RT_ERROR_TO_GE_STATUS(ret);
     }
   }
@@ -181,7 +183,8 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "Call rtStreamCreateWithFlags failed, ret:0x%X, stream_idx:%ld, stream_num:%ld",
                         rt_ret, i, stream_num);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtStreamCreateWithFlags] failed, ret:0x%X, stream_idx:%ld, stream_num:%ld",
+             rt_ret, i, stream_num);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
     // Create slave stream, inactive by default, activated by hccl
@@ -189,7 +192,8 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
     if (rt_ret != RT_ERROR_NONE) {
       REPORT_CALL_ERROR("E19999", "Call rtModelBindStream failed, ret:0x%X, stream_idx:%ld, stream_num:%ld",
                         rt_ret, i, stream_num);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtModelBindStream] failed, ret:0x%X, stream_idx:%ld, stream_num:%ld",
+             rt_ret, i, stream_num);
       (void)rtStreamDestroy(stream);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
@@ -207,7 +211,7 @@ Status HcclTaskInfo::Distribute() {
   GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
   if (ops_kernel_store_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param ops_kernel_store_ nullptr");
-    GELOGE(INTERNAL_ERROR, "ops kernel store is null.");
+    GELOGE(INTERNAL_ERROR, "[Check][Param] ops kernel store is null.");
     return INTERNAL_ERROR;
   }
   OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store_);
@@ -217,7 +221,7 @@ Status HcclTaskInfo::Distribute() {
   auto result = ops_kernel_info_store->LoadTask(ge_task);
   if (result != HCCL_SUCCESS) {
     REPORT_CALL_ERROR("E19999", "Call ops_kernel_info_store LoadTask fail");
-    GELOGE(INTERNAL_ERROR, "davinci_model : load task fail, return ret: %u", result);
+    GELOGE(INTERNAL_ERROR, "[Load][Task] fail, return ret:%u", result);
     return INTERNAL_ERROR;
   }
   GELOGI("HcclTaskInfo Distribute Success.");
@@ -265,8 +269,9 @@ Status HcclTaskInfo::SetAddrs(const std::shared_ptr<OpDesc> &op_desc,
                               std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) {
   GE_CHECK_NOTNULL(op_desc);
   GE_CHK_STATUS_RET(HcomOmeUtil::CheckKernelHcclInfo(op_desc, kernel_hccl_infos),
-                    "HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid.");
-  GELOGI("Set hccl task input output address, node[%s}, type[%s] kernel_hccl_infos.size[%zu].",
+                    "[Check][Param] HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid, node:%s(%s).",
+                    op_desc->GetName().c_str(), op_desc->GetType().c_str());
+  GELOGI("Set hccl task input output address, node[%s], type[%s] kernel_hccl_infos.size[%zu].",
          op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size());
   if (op_desc->GetType() == HVDWAIT) {
     return SUCCESS;
@@ -300,7 +305,7 @@ Status HcclTaskInfo::SetAddrs(const std::shared_ptr<OpDesc> &op_desc,
     } else if (hccl_type == HCOMALLREDUCE ||
                hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE || hccl_type == HCOMREDUCE) {
       GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type),
-                        "davinci_model: GetHcomOperationType fail!");
+                        "[Get][HcomOperationType] fail! op:%s", op_desc->GetName().c_str());
       kernel_hccl_infos[i].outputDataAddr = output_data_addr;
       kernel_hccl_infos[i].opType = op_type;
     }
@@ -332,18 +337,16 @@ void HcclTaskInfo::GetPrivateDefByTaskDef(const domi::TaskDef &task) {
       private_def_len_ = private_def_temp.size();
       rtError_t ret = rtMallocHost(&private_def_, private_def_len_);
       if (ret != RT_ERROR_NONE) {
-        REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, ret:0x%X, size:%u",
-                          ret, private_def_len_);
-        GELOGE(RT_FAILED, "Call rtMallocHost Fail, ret = 0x%X.", ret);
+        REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, ret:0x%X, size:%u", ret, private_def_len_);
+        GELOGE(RT_FAILED, "[Call][RtMallocHost] Fail, ret:0x%X, size:%u", ret, private_def_len_);
         return;
       }
 
       ret = rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_,
                      RT_MEMCPY_HOST_TO_HOST);
       if (ret != RT_ERROR_NONE) {
-        REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%u",
-                          ret, private_def_len_);
-        GELOGE(RT_FAILED, "Call rtMemcpy Fail, ret = 0x%X.", ret);
+        REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%u", ret, private_def_len_);
+        GELOGE(RT_FAILED, "[Call][RtMemcpy] Fail, ret:0x%X, size:%u", ret, private_def_len_);
         return;
       }
       GELOGI("The first address of the custom info, privateDef=%p.", private_def_);
diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
index e2f600b3..7c26d23d 100644
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -48,11 +48,12 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe
                                                               num_inputs,
                                                               num_outputs,
                                                               unknown_type));
-  GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!");
+  GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "[Malloc][Memory] for aicpu_ext_handle failed!");
   GE_CHK_STATUS_RET(ext_handle->Parse(ext_info),
-                    "Parse kernel ext info failed, kernel_ext_info_size=%zu.", ext_info.size());
-  GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "UpdateExecuteMode failed.");
+                    "[Parse][KernelExtInfo] failed, kernel_ext_info_size=%zu.", ext_info.size());
+  GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "[Update][ExecuteMode] failed.");
   GELOGD("Update aicpu_task ext_info bit_map execute mode to 1.");
+  topic_type_flag_ = ext_handle->GetTopicTypeFlag();
 
   bool all_shape = false;
   (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
@@ -62,29 +63,30 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe
       auto input_desc = op_desc->MutableInputDesc(i);
       GE_CHECK_NOTNULL(input_desc);
       GE_CHK_STATUS_RET(ext_handle->UpdateInputShapeAndType(i, *input_desc),
-                        "Input[%u] update input shape failed.", i);
+                        "[Call][UpdateInputShapeAndType] Input[%u] update input shape failed, op:%s.",
+                        i, op_desc->GetName().c_str());
     }
     if (unknown_type != DEPEND_COMPUTE) {
       for (uint32_t j = 0; j < num_outputs; j++) {
         auto output_desc = op_desc->MutableOutputDesc(j);
         GE_CHECK_NOTNULL(output_desc);
         GE_CHK_STATUS_RET(ext_handle->UpdateOutputShapeAndType(j, *output_desc),
-                          "Output[%u] update output shape failed.", j);
+                          "[Call][UpdateOutputShapeAndType] Output[%u] update output shape failed, op:%s.",
+                          j, op_desc->GetName().c_str());
       }
     }
   }
   auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                                    ext_info.size(), rt_ret);
-                  GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
+                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", ext_info.size(), rt_ret);
+                  GELOGE(RT_FAILED, "[RtMalloc][ExtInfo] error:0x%X, size=%zu", rt_ret, ext_info.size());
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   rt_ret = rtMemcpy(ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle->GetExtInfo(),
                     ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                   REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
                                     ext_handle->GetExtInfoLen(), rt_ret);
-                  GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
+                  GELOGE(RT_FAILED, "[RtMemcpy][ExtInfo] error:0x%X, size=%zu", rt_ret, ext_info.size());
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   return SUCCESS;
 }
@@ -105,9 +107,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   uint32_t op_index = kernel_ex_def.op_index();
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       op_index);
-    GELOGE(INTERNAL_ERROR, "Init aicpu task info error, index is out of range!");
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
+    GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index);
     return INTERNAL_ERROR;
   }
 
@@ -116,22 +117,22 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   if (sizeof(STR_FWK_OP_KERNEL) < kernel_ex_def.args_size()) {
     REPORT_INNER_ERROR("E19999", "Param kernel_ex_def.args_size():%u > sizeof(STR_FWK_OP_KERNEL):%zu, "
                        "check invalid", kernel_ex_def.args_size(), sizeof(STR_FWK_OP_KERNEL));
-    GELOGE(FAILED, "sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", sizeof(STR_FWK_OP_KERNEL),
+    GELOGE(FAILED, "[Check][Param] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", sizeof(STR_FWK_OP_KERNEL),
            kernel_ex_def.args_size());
     return FAILED;
   }
   errno_t sec_ret =
       memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size());
   if (sec_ret != EOK) {
-    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%zu, ret:0x%X",
-                      sizeof(STR_FWK_OP_KERNEL), sec_ret);
-    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%zu, ret:0x%X", sizeof(STR_FWK_OP_KERNEL), sec_ret);
+    GELOGE(FAILED, "[Call][Memcpy] failed, size:%zu, ret: %d", sizeof(STR_FWK_OP_KERNEL), sec_ret);
     return FAILED;
   }
 
   const auto &ext_info = kernel_ex_def.kernel_ext_info();
   GE_CHK_STATUS_RET(InitTaskExtInfo(ext_info, op_desc),
-                    "Init aicpu tf_task ext info failed, ext_info size=%zu", ext_info.size());
+                    "[Init][TaskExtInfo] failed, ext_info size=%zu, op:%s",
+                    ext_info.size(), op_desc->GetName().c_str());
 
   GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(),
          op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_);
@@ -148,15 +149,15 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
                                                                  davinci_model->SubModelId(), kernel_id) != SUCCESS,
                   REPORT_CALL_ERROR("E19999", "CreateAicpuKernel fail, session_id:%lu, model_id:%u, kernel_id:%lu",
                                     session_id, davinci_model->Id(), kernel_id);
-                  GELOGE(FAILED, "CreateAicpuKernel error.");
+                  GELOGE(FAILED, "[Create][AicpuKernel] fail, session_id:%lu, model_id:%u, kernel_id:%lu",
+                         session_id, davinci_model->Id(), kernel_id);
                   return FAILED;)
   // 2.3 Create session
   GE_CHECK_NOTNULL(ModelManager::GetInstance());
   ret = ModelManager::GetInstance()->CreateAicpuSession(session_id);
   GE_IF_BOOL_EXEC(ret != SUCCESS,
-                  REPORT_CALL_ERROR("E19999", "CreateAicpuSession fail, session_id:%lu",
-                                    session_id);
-                  GELOGE(ret, "CreateAicpuSession error. session id: %lu", session_id);
+                  REPORT_CALL_ERROR("E19999", "CreateAicpuSession fail, session_id:%lu", session_id);
+                  GELOGE(ret, "[Create][AicpuSession] error. session id:%lu", session_id);
                   return ret;)
 
   kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL);
@@ -169,7 +170,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
                                       kernel_ex_def.task_info_size(), rt_ret);
-                    GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret);
+                    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X",
+                           kernel_ex_def.task_info_size(), rt_ret);
                     return RT_ERROR_TO_GE_STATUS(rt_ret););
     rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(),
                       kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE);
@@ -181,17 +183,15 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
 
     rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%u",
-                                      rt_ret, kernel_buf_size_);
-                    GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret);
+                    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%u", rt_ret, kernel_buf_size_);
+                    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, ret:0x%X, size:%u", rt_ret, kernel_buf_size_);
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
 
     rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast<void *>(&fwk_op_kernel), kernel_buf_size_,
                       RT_MEMCPY_HOST_TO_DEVICE);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%u",
-                                      rt_ret, kernel_buf_size_);
-                    GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
+                    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%u", rt_ret, kernel_buf_size_);
+                    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, ret:0x%X, size:%u", rt_ret, kernel_buf_size_);
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
 
     SetIoAddrs(op_desc);
@@ -203,7 +203,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   // 3. Set workspaceaddr, inputOutputDataAddr
   Status ge_ret = CopyTaskInfo(kernel_ex_def, rts_param, op_desc);
   if (ge_ret != SUCCESS) {
-    GELOGE(ge_ret, "copy task info to workspace failed.");
+    GELOGE(ge_ret, "[Copy][TaskInfo] to workspace failed, op:%s.", op_desc->GetName().c_str());
     return ge_ret;
   }
 
@@ -211,7 +211,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   if (workspace_data_addrs.empty()) {
     REPORT_CALL_ERROR("E19999", "workspace_data_addrs is empty in op:%s(%s), check invalid",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "workspace_data_addrs is empty.");
+    GELOGE(FAILED, "[Check][Param] workspace_data_addrs is empty in op:%s(%s).",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return FAILED;
   }
 
@@ -226,23 +227,17 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   if (addrs_size > 0) {
     rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%lu",
-                                      rt_ret, addrs_size);
-                    GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
+                    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%lu", rt_ret, addrs_size);
+                    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, ret:0x%X, size:%lu", rt_ret, addrs_size);
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
 
     rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%lu",
-                                      rt_ret, addrs_size);
-                    GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
+                    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%lu", rt_ret, addrs_size);
+                    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, ret:0x%X, size:%lu", rt_ret, addrs_size);
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
 
     InitDumpTask(input_output_addr_, op_desc);
-    if (davinci_model_->GetOpDugReg()) {
-      GELOGI("Op debug is open in kernel ex task info");
-      dump_args_ = input_output_addr_;
-    }
   }
 
   uint64_t input_output_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_output_addr_));
@@ -257,7 +252,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                   REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%zu",
                                     rt_ret, sizeof(STR_FWK_OP_KERNEL));
-                  GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtMalloc] failed, ret:0x%X, size:%zu", rt_ret, sizeof(STR_FWK_OP_KERNEL));
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
 
   rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast<void *>(&fwk_op_kernel),
@@ -265,7 +260,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                   REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%zu",
                                     rt_ret, sizeof(STR_FWK_OP_KERNEL));
-                  GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, ret:0x%X, size:%zu", rt_ret, sizeof(STR_FWK_OP_KERNEL));
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
 
   davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0);
@@ -275,10 +270,15 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
 }
 
 void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
-  if (davinci_model_->OpNeedDump(op_desc->GetName())) {
+  if (davinci_model_->OpNeedDump(op_desc->GetName()) || davinci_model_->GetOpDugReg()) {
+    GELOGD("Op %s need dump in kernel ex task info", op_desc->GetName().c_str());
     dump_flag_ = RT_KERNEL_DUMPFLAG;
     dump_args_ = addr;
   }
+  if (davinci_model_->GetOpDugReg()) {
+    GELOGD("Op debug is open in kernel ex task info");
+    dump_args_ = addr;
+  }
 }
 
 Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
@@ -286,9 +286,8 @@ Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciMod
   uint32_t op_index = kernel_ex_def.op_index();
   OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index);
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       op_index);
-    GELOGE(INTERNAL_ERROR, "Init aicpu task info error, index is out of range!");
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
+    GELOGE(INTERNAL_ERROR, "[Get][Op] By Index, index:%u is out of range!", op_index);
     return INTERNAL_ERROR;
   }
   args_offset_ = davinci_model->GetTotalArgsSize();
@@ -308,7 +307,8 @@ Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciMod
       REPORT_INNER_ERROR("E19999", "The output size[%zu] and output index[%u] in op:%s(%s) are inconsistent, "
                          "check invalid", outputs_size, output_index,
                          op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", outputs_size, output_index);
+      GELOGE(FAILED, "[Check][Param] The output size[%zu] and output index[%u] in op:%s(%s) are inconsistent.",
+             outputs_size, output_index, op_desc->GetName().c_str(), op_desc->GetType().c_str());
       return FAILED;
     }
     fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
@@ -334,11 +334,12 @@ void KernelExTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) {
     if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
       uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
       if (output_index > output_data_addrs.size()) {
-        REPORT_INNER_ERROR("E19999", "The output data addr size[%zu] and output index[%u] in op:%s(%s) are inconsistent"
-                           ", check invalid", output_data_addrs.size(), output_index,
+        REPORT_INNER_ERROR("E19999", "The output data addr size[%zu] and output index[%u] in op:%s(%s) "
+                           "are inconsistent, check invalid", output_data_addrs.size(), output_index,
                            op_desc->GetName().c_str(), op_desc->GetType().c_str());
-        GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
-               output_data_addrs.size(), output_index);
+        GELOGE(FAILED, "[Check][Param] The output data addr size[%zu] and output index[%u] in op:%s(%s) "
+               "are inconsistent.", output_data_addrs.size(), output_index,
+               op_desc->GetName().c_str(), op_desc->GetType().c_str());
         return;
       }
       io_addrs_.insert(io_addrs_.end(), input_data_addrs.begin(), input_data_addrs.end());
@@ -371,7 +372,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) workspace addr:%zu or size:%zu empty, check invalid",
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        workspace_data_addrs.size(), workspace_data_sizes.size());
-    GELOGE(FAILED, "Node:%s invalid workspace, addrs is %zu, size is %zu.", op_desc->GetName().c_str(),
+    GELOGE(FAILED, "[Check][Param] Node:%s invalid workspace, addrs is %zu, size is %zu.", op_desc->GetName().c_str(),
            workspace_data_addrs.size(), workspace_data_sizes.size());
     return FAILED;
   }
@@ -379,7 +380,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
   if (workspace_data_addrs[0] == nullptr) {
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) workspace addr is nullptr, check invalid",
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "Node:%s workspace addrs is null.", op_desc->GetName().c_str());
+    GELOGE(FAILED, "[Check][Param] Node:%s workspace addrs is null.", op_desc->GetName().c_str());
     return FAILED;
   }
 
@@ -387,7 +388,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
     REPORT_INNER_ERROR("E19999", "Node:%s(%s) workspace size:%ld < task info size:%d, check invalid",
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        workspace_data_sizes[0], kernel_def.task_info_size());
-    GELOGE(FAILED, "Node:%s workspace size is %ld, task info size is %d.", op_desc->GetName().c_str(),
+    GELOGE(FAILED, "[Check][Param] Node:%s workspace size is %ld, task info size is %d.", op_desc->GetName().c_str(),
            workspace_data_sizes[0], kernel_def.task_info_size());
     return FAILED;
   }
@@ -395,9 +396,8 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
   rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(),
                               kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%d",
-                      rt_ret, kernel_def.task_info_size());
-    GELOGE(RT_FAILED, "rtMemcpy error: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%d", rt_ret, kernel_def.task_info_size());
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, ret:0x%X, size:%d", rt_ret, kernel_def.task_info_size());
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -406,17 +406,24 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
 
 Status KernelExTaskInfo::Distribute() {
   GELOGI("KernelExTaskInfo Distribute Start.");
+  // Use the fifth and sixth bits of dump_flag_ indicate the value of topic_type.
+  // xxxxxxxx xxxxxxxx xxxxxxxx xx00xxxx: DEVICE_ONLY
+  // xxxxxxxx xxxxxxxx xxxxxxxx xx01xxxx: DEVICE_FIRST
+  // xxxxxxxx xxxxxxxx xxxxxxxx xx10xxxx: HOST_ONLY
+  // xxxxxxxx xxxxxxxx xxxxxxxx xx11xxxx: HOST_FIRST
+  if (topic_type_flag_ > 0) {
+    dump_flag_ = dump_flag_ | topic_type_flag_;
+  }
   rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   if (davinci_model_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model_ is null.");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model_ is null.");
     return PARAM_INVALID;
   }
 
@@ -424,9 +431,8 @@ Status KernelExTaskInfo::Distribute() {
   uint32_t stream_id = 0;  //  for profiling
   rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtModelGetTaskId] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   task_id_ = task_id;
diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
index 71153c31..bcc17168 100644
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
@@ -76,6 +76,7 @@ class KernelExTaskInfo : public TaskInfo {
   vector<void *> io_addrs_;
   uint32_t args_offset_ = 0;
   int64_t fixed_addr_offset_ = 0;
+  int32_t topic_type_flag_ = -1;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_
diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
index 82c3e286..c72bfeef 100755
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -95,11 +95,10 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
     rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                     REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName failed for op:%s(%s), "
-                                      "bin_file_key:%s, ret:0x%X",
-                                      op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
-                                      kernel_def.stub_func().c_str(), rt_ret);
-                    GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
-                           kernel_def.stub_func().c_str());
+                                      "bin_file_key:%s, ret:0x%X", op_desc_->GetName().c_str(),
+                                      op_desc_->GetType().c_str(), kernel_def.stub_func().c_str(), rt_ret);
+                    GELOGE(RT_FAILED, "[Execute][RtGetFunctionByName] failed for op:%s(%s). stub_func:%s",
+                           op_desc_->GetName().c_str(), op_desc_->GetType().c_str(), kernel_def.stub_func().c_str());
                     return RT_ERROR_TO_GE_STATUS(rt_ret););
   } else if (kernel_type_ == ccKernelType::TE) {
     // get bin_file_key
@@ -109,18 +108,20 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
     rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                     REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName failed for op:%s(%s), "
-                                      "bin_file_key:%s, ret:0x%X",
-                                      op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
-                                      bin_file_key, rt_ret);
-                    GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
+                                      "bin_file_key:%s, ret:0x%X", op_desc_->GetName().c_str(),
+                                      op_desc_->GetType().c_str(), bin_file_key, rt_ret);
+                    GELOGE(RT_FAILED, "[Execute][RtGetFunctionByName] failed for op:%s(%s), bin_file_key:%s",
+                           op_desc_->GetName().c_str(), op_desc_->GetType().c_str(), bin_file_key);
                     return RT_ERROR_TO_GE_STATUS(rt_ret););
   }
 
   if (context.origin_op_index_size() > CC_FUSION_OP_MAX) {
-    REPORT_INNER_ERROR("E19999", "context.origin_op_index_size():%d is more than CC_FUSION_OP_MAX(%d), op:%s(%s) ,"
+    REPORT_INNER_ERROR("E19999", "context.origin_op_index_size():%d is more than CC_FUSION_OP_MAX(%d), op:%s(%s), "
                        "check invalid", context.origin_op_index_size(), CC_FUSION_OP_MAX,
                        op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
-    GELOGE(PARAM_INVALID, "context.origin_op_index_size() is more than CC_FUSION_OP_MAX(%d)", CC_FUSION_OP_MAX);
+    GELOGE(PARAM_INVALID, "[Check][Param] context.origin_op_index_size():%d is more than CC_FUSION_OP_MAX(%d), "
+           "op:%s(%s)", context.origin_op_index_size(), CC_FUSION_OP_MAX,
+           op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
     return PARAM_INVALID;
   }
 
@@ -132,10 +133,11 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
     ctx_.opIndex = context.op_index();
     uint16_t *args_offset_tmp = reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data()));
     if (context.args_offset().size() / sizeof(uint16_t) < 1) {
-      REPORT_INNER_ERROR("E19999", "context.args_offset().size():%zu / sizeof(uint16_t) less than 1, op:%s(%s) ,"
+      REPORT_INNER_ERROR("E19999", "context.args_offset().size():%zu / sizeof(uint16_t) less than 1, op:%s(%s), "
                          "check invalid", context.args_offset().size(),
                          op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
-      GELOGE(FAILED, "context.args_offset().size() / sizeof(uint16_t) less than 1");
+      GELOGE(FAILED, "[Check][Param] context.args_offset().size() / sizeof(uint16_t) less than 1, op:%s(%s)",
+             op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
       return FAILED;
     }
 
@@ -149,7 +151,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
     if (kernel_def.args().empty() || args_size_ == 0) {
       REPORT_INNER_ERROR("E19999", "kernel_def.args() is empty, op:%s(%s), check invalid",
                          op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
-      GELOGE(FAILED, "args is null.");
+      GELOGE(FAILED, "[Check][Param] args is empty, op:%s(%s)",
+             op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
       return FAILED;
     }
     ret = InitCceTask(kernel_def);
@@ -181,9 +184,8 @@ void KernelTaskInfo::UpdateSKTTaskId() {
   if (davinci_model_ != nullptr) {
     rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X",
-                        rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtModelGetTaskId] failed, ret:0x%X", rt_ret);
       return;
     }
     SuperKernelTaskInfo &skt_info = davinci_model_->GetSuperKernelTaskInfo();
@@ -201,9 +203,8 @@ void KernelTaskInfo::UpdateTaskId() {
   if (davinci_model_ != nullptr) {
     rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X",
-                        rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtModelGetTaskId] failed, ret:0x%X", rt_ret);
       return;
     }
     task_id_ = task_id;
@@ -214,7 +215,7 @@ void KernelTaskInfo::UpdateTaskId() {
 
 Status KernelTaskInfo::SKTFinalize() {
   UpdateSKTTaskId();
-  GE_CHK_STATUS_RET(SaveSKTDumpInfo(), "skt save dump info failed");
+  GE_CHK_STATUS_RET(SaveSKTDumpInfo(), "[Save][SKTDumpInfo] failed");
   GELOGI("SuperKernel Distribute [skt_id:%u]", skt_id_);
   SuperKernelTaskInfo &skt_info = davinci_model_->GetSuperKernelTaskInfo();
   skt_info.kernel_list.clear();
@@ -258,13 +259,12 @@ Status KernelTaskInfo::SuperKernelLaunch() {
                                     static_cast<rtSmDesc_t *>(skt_info.last_sm_desc), skt_info.last_stream,
                                     skt_info.last_dump_flag);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, ret:0x%X",
-                        rt_ret);
-      GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, ret:0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtKernelLaunchWithFlag] failed, ret:0x%X", rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
     call_save_dump_ = true;
-    GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed");
+    GE_CHK_STATUS_RET(SKTFinalize(), "[Call][SKTFinalize] failed");
     return SUCCESS;
   }
   // Create super kernel factory
@@ -272,27 +272,24 @@ Status KernelTaskInfo::SuperKernelLaunch() {
   // Init super kernel factory
   Status ge_ret = factory->Init();
   if (ge_ret != SUCCESS) {
-    REPORT_CALL_ERROR("E19999", "Call SuperKernelFactory init fail, ret:0x%X",
-                      ge_ret);
-    GELOGE(ge_ret, "SuperKernelLaunch: SuperKernelFactory init failed");
+    REPORT_CALL_ERROR("E19999", "Call SuperKernelFactory init fail, ret:0x%X", ge_ret);
+    GELOGE(ge_ret, "[Init][SuperKernelFactory] failed, ret:0x%X", ge_ret);
     return ge_ret;
   }
   // Call the fuse API
   std::unique_ptr<skt::SuperKernel> superKernel = nullptr;
   ge_ret = factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info.last_block_dim, superKernel);
   if (ge_ret != SUCCESS) {
-    REPORT_CALL_ERROR("E19999", "Call SuperKernelFactory FuseKernels fail, ret:0x%X",
-                      ge_ret);
-    GELOGE(ge_ret, "SuperKernelLaunch: fuse call failed");
+    REPORT_CALL_ERROR("E19999", "Call SuperKernelFactory FuseKernels fail, ret:0x%X", ge_ret);
+    GELOGE(ge_ret, "[Call][FuseKernels] failed, ret:0x%X", ge_ret);
     return ge_ret;
   }
   // Launch a super kernel
   skt_dump_flag_ = GetDumpFlag();
   ge_ret = superKernel->Launch(skt_info.last_stream, skt_dump_flag_);
   if (ge_ret != SUCCESS) {
-    REPORT_CALL_ERROR("E19999", "Call SuperKernelFactory Launch fail, ret:0x%X",
-                      ge_ret);
-    GELOGE(ge_ret, "SuperKernelLaunch: launch failed");
+    REPORT_CALL_ERROR("E19999", "Call SuperKernelFactory Launch fail, ret:0x%X", ge_ret);
+    GELOGE(ge_ret, "[Call][Launch] failed, ret:0x%X", ge_ret);
     return ge_ret;
   }
   GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(),
@@ -300,7 +297,7 @@ Status KernelTaskInfo::SuperKernelLaunch() {
   // record skt addr for release
   superkernel_dev_nav_table_ = superKernel->GetNavTablePtr();
   superkernel_device_args_addr_ = superKernel->GetDeviceArgsPtr();
-  GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed");
+  GE_CHK_STATUS_RET(SKTFinalize(), "[Call][SKTFinalize] failed");
   return SUCCESS;
 }
 
@@ -331,14 +328,13 @@ Status KernelTaskInfo::SaveSuperKernelInfo() {
 bool KernelTaskInfo::IsMarkedLastNode() {
   if (davinci_model_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return false;
   }
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       ctx_.opIndex);
-    GELOGE(INTERNAL_ERROR, "InitTVMTaskInfo error, index is out of range!");
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", ctx_.opIndex);
+    GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", ctx_.opIndex);
     return false;
   }
   bool is_last_node = false;
@@ -349,14 +345,13 @@ bool KernelTaskInfo::IsMarkedLastNode() {
 bool KernelTaskInfo::IsMarkedFirstNode() {
   if (davinci_model_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return false;
   }
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       ctx_.opIndex);
-    GELOGE(INTERNAL_ERROR, "InitTVMTaskInfo error, index is out of range!");
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", ctx_.opIndex);
+    GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", ctx_.opIndex);
     return false;
   }
   bool is_first_node = false;
@@ -381,7 +376,7 @@ Status KernelTaskInfo::SuperKernelDistribute() {
   if (FirstCallSKTLaunchCheck()) {
     ret = SuperKernelLaunch();
     if (ret != SUCCESS) {
-      GELOGE(FAILED, "Call SuperKernelLaunch failed!");
+      GELOGE(FAILED, "[Call][SuperKernelLaunch] failed, taskid:%u", task_id_);
       return FAILED;
     }
   }
@@ -389,16 +384,15 @@ Status KernelTaskInfo::SuperKernelDistribute() {
     // 1.launch before
     ret = SuperKernelLaunch();
     if (ret != SUCCESS) {
-      GELOGE(ret, "Call SuperKernelLaunch failed!");
+      GELOGE(ret, "[Call][SuperKernelLaunch] failed, taskid:%u", task_id_);
       return ret;
     }
     // 2.launch current
     rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_,
                                               static_cast<rtSmDesc_t *>(sm_desc_), stream_, dump_flag_);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, ret:0x%X",
-                        rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, ret:0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtKernelLaunchWithFlag] failed, ret:0x%X", rt_ret);
       return rt_ret;
     }
     call_save_dump_ = true;
@@ -407,15 +401,14 @@ Status KernelTaskInfo::SuperKernelDistribute() {
   } else {
     ret = SaveSuperKernelInfo();
     if (ret != SUCCESS) {
-      GELOGE(ret, "Call SuperKernelLaunch failed!");
+      GELOGE(ret, "[Call][SaveSuperKernelInfo] failed, taskid:%u", task_id_);
       return ret;
     }
   }
   return SUCCESS;
 }
 
-Status KernelTaskInfo::Distribute() {
-  GELOGD("KernelTaskInfo Distribute Start.");
+void KernelTaskInfo::SetArgs() {
   if (davinci_model_->IsKnownNode()) {
     if (kernel_type_ == ccKernelType::TE) {
       args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_)
@@ -425,12 +418,25 @@ Status KernelTaskInfo::Distribute() {
     }
     GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
   }
+}
+
+Status KernelTaskInfo::Distribute() {
+  GELOGD("KernelTaskInfo Distribute Start.");
+  SetArgs();
   rtError_t rt_ret = RT_ERROR_NONE;
   char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
   INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
   int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, kBaseInt) : kStrtolFail;
   bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_);
   if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
+    if (topic_type_flag_ > 0) {
+      // Use the fifth and sixth bits of dump_flag_ indicate the value of topic_type.
+      // xxxxxxxx xxxxxxxx xxxxxxxx xx00xxxx: DEVICE_ONLY
+      // xxxxxxxx xxxxxxxx xxxxxxxx xx01xxxx: DEVICE_FIRST
+      // xxxxxxxx xxxxxxxx xxxxxxxx xx10xxxx: HOST_ONLY
+      // xxxxxxxx xxxxxxxx xxxxxxxx xx11xxxx: HOST_FIRST
+      dump_flag_ = dump_flag_ | topic_type_flag_;
+    }
     GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
     // blockDim is reserved parameter, set to 1
     rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
@@ -461,7 +467,7 @@ Status KernelTaskInfo::Distribute() {
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag or rtCpuKernelLaunchWithFlag failed, "
                       "ret:0x%X", rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   // set for task_id_
@@ -497,18 +503,16 @@ Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) {
   // copy io addr
   errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs.data(), addr_size);
   if (sec_ret != EOK) {
-    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%zu, ret:0x%X",
-                      addr_size, sec_ret);
-    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%zu, ret:0x%X", addr_size, sec_ret);
+    GELOGE(FAILED, "[Call][Memcpy] failed, size:%zu, ret:%d", addr_size, sec_ret);
     return FAILED;
   }
 
   // copy args to device
   rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_);
@@ -558,7 +562,7 @@ Status KernelTaskInfo::Release() {
   ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE;
   if (ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMemFreeManaged failed, ret:0x%X", ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast<int>(ret));
+    GELOGE(RT_FAILED, "[Call][RtMemFreeManaged] failed, ret:0x%X", static_cast<int>(ret));
     return RT_ERROR_TO_GE_STATUS(ret);
   }
   sm_desc_ = nullptr;
@@ -588,17 +592,15 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
 
   rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemAllocManaged] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                      sm_desc.size(), rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", sm_desc.size(), rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", sm_desc.size(), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -644,9 +646,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
   args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
   errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
   if (sec_ret != EOK) {
-    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X",
-                      args_size_, sec_ret);
-    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X", args_size_, sec_ret);
+    GELOGE(FAILED, "[Call][Memcpy] failed, size:%u, ret:0x%X", args_size_, sec_ret);
     return FAILED;
   }
 
@@ -688,26 +689,24 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
   // malloc args memory
   rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   // copy orign args
   rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) {
     REPORT_INNER_ERROR("E19999", "offset:%u >= kernelInfo.argsSize:%u or copy content:%zu beyond applied memory:%u, "
-                       "check invalid",
-                       offset, args_size_, kAddrLen * tensor_device_addrs.size(), args_size_ - offset);
-    GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory.");
+                       "check invalid", offset, args_size_, kAddrLen * tensor_device_addrs.size(), args_size_ - offset);
+    GELOGE(FAILED, "[Check][Param] offset:%u >= kernelInfo.argsSize:%u or copy content:%zu beyond applied memory:%u, "
+           "check invalid", offset, args_size_, kAddrLen * tensor_device_addrs.size(), args_size_ - offset);
     return FAILED;
   }
 
@@ -715,25 +714,20 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
   rt_ret = rtMemcpy(static_cast<char *>(args_) + offset, args_size_ - offset, tensor_device_addrs.data(),
                     kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      args_size_ - offset, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_ - offset, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_ - offset, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(),
                      kAddrLen * tensor_device_addrs.size());
   if (sec_ret != EOK) {
-    REPORT_CALL_ERROR("E19999", "Call memcpy_s failed, size:%u, ret:0x%X",
-                      args_size_ - offset, sec_ret);
-    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+    REPORT_CALL_ERROR("E19999", "Call memcpy_s failed, size:%u, ret:0x%X", args_size_ - offset, sec_ret);
+    GELOGE(FAILED, "[Call][Memcpy] failed, size:%u, ret:0x%X", args_size_ - offset, sec_ret);
     return FAILED;
   }
   skt_dump_args_ = static_cast<char *>(args_) + offset;
   InitDumpTask(offset);
 
-  GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset,
-                             "Op debug is open in TVM task info");
-
   vector<void *> virtual_io_addrs;  // use virtual address for zero copy key.
   virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
   virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
@@ -769,9 +763,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   GELOGI("Do InitAICPUCustomTask");
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       op_index);
-    GELOGE(INTERNAL_ERROR, "index is out of range, index: %u", op_index);
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
+    GELOGE(INTERNAL_ERROR, "[Get][Op] index is out of range, index:%u", op_index);
     return INTERNAL_ERROR;
   }
 
@@ -783,16 +776,18 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   if (ctx_.argsOffset == nullptr) {
     REPORT_CALL_ERROR("E19999", "New ctx_.argsOffset fail, size:%u, op:%s(%s)",
                       kCustomAicpuArgsLen, op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(PARAM_INVALID, "ctx_.argsOffset is null!");
+    GELOGE(PARAM_INVALID, "[Malloc][Memory] ctx_.argsOffset is null, size:%u, op:%s(%s)",
+           kCustomAicpuArgsLen, op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return PARAM_INVALID;
   }
 
   if (context.args_offset().size() / sizeof(uint16_t) < kCustomAicpuArgsLen) {
     REPORT_INNER_ERROR("E19999", "context.args_offset().size():%zu / sizeof(uint16_t) is less than "
-                       "kCustomAicpuArgsLen:%u, op:%s(%s), check invalid",
-                       context.args_offset().size(), kCustomAicpuArgsLen,
-                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(PARAM_INVALID, "context.args_offset().size() / sizeof(uint16_t) is less than kCustomAicpuArgsLen");
+                       "kCustomAicpuArgsLen:%u, op:%s(%s), check invalid", context.args_offset().size(),
+                       kCustomAicpuArgsLen, op_desc->GetName().c_str(), op_desc->GetType().c_str());
+    GELOGE(PARAM_INVALID, "[Check][Param] context.args_offset().size():%zu / sizeof(uint16_t) is less than "
+           "kCustomAicpuArgsLen:%u, op:%s(%s)", context.args_offset().size(), kCustomAicpuArgsLen,
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return PARAM_INVALID;
   }
 
@@ -805,7 +800,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   Status ret = StoreInputOutputTensor(input_data_addrs, output_data_addrs, ModelUtils::GetInputDescs(op_desc),
                                       ModelUtils::GetOutputDescs(op_desc));
   if (ret != SUCCESS) {
-    GELOGE(ret, "StoreInputOutputTensor Failed");
+    GELOGE(ret, "[Store][InputOutputTensor] Failed, op:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return ret;
   }
 
@@ -814,7 +809,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   if (!AttrUtils::GetBytes(op_desc, ATTR_NAME_OPATTR, buffer)) {
     REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_OPATTR.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "can't find opattr bytes!.");
+    GELOGE(FAILED, "[Get][Attr] %s in op:%s(%s) fail", ATTR_NAME_OPATTR.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return FAILED;
   }
 
@@ -822,7 +818,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   if (op_attr_size == 0) {
     REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s) size is 0, check invalid",
                        ATTR_NAME_OPATTR.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(PARAM_INVALID, "param op_attr_size is out of range");
+    GELOGE(PARAM_INVALID, "[Check][Param] param op_attr_size is out of range, op:%s", op_desc->GetName().c_str());
     return PARAM_INVALID;
   }
 
@@ -830,7 +826,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_attr_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_attr_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -838,7 +835,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%u, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_attr_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_attr_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -851,7 +849,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
                          "op:%s(%s) check invalid", i, (uint32_t)ctx_.argsOffset[i],
                          sizeof(uint64_t), kernel_def.args().size(),
                          op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "ctx.argsOffset[%u]: %u + sizeof(uint64_t): %zu >= kernelDef.args().size():%zu", i,
+      GELOGE(FAILED, "[Check][Param] ctx.argsOffset[%u]:%u + sizeof(uint64_t):%zu >= kernelDef.args().size():%zu", i,
              (uint32_t)ctx_.argsOffset[i], sizeof(uint64_t), kernel_def.args().size());
       return FAILED;
     }
@@ -871,7 +869,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -879,9 +878,9 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
                     RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%u, ret:0x%X",
-                      op_desc->GetName().c_str(), op_desc->GetType().c_str(),
-                      kernel_def.args_size(), rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+                      op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_def.args_size(), rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_def.args_size(), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -896,12 +895,12 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
   GELOGI("Do InitCCETask");
   if (davinci_model_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
   Status ret = SetContext(kernel_def);
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetContext Fail.");
+    GELOGE(ret, "[Set][Context] Fail.");
     return ret;
   }
 
@@ -911,7 +910,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
   if (context.is_flowtable()) {
     if (flowtable.empty()) {
       REPORT_INNER_ERROR("E19999", "kernel_def.flowtable is empty, check invalid");
-      GELOGE(FAILED, "flowtable is null.");
+      GELOGE(FAILED, "[Check][Param] flowtable is null.");
       return FAILED;
     }
   }
@@ -931,23 +930,22 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
 
   ret = UpdateCceArgs(sm_desc, flowtable, kernel_def);
   if (ret != SUCCESS) {
-    GELOGE(ret, "update cce args fail");
+    GELOGE(ret, "[Update][CceArgs] fail");
     return ret;
   }
 
   // flowtable
   ret = SetFlowtable(flowtable, kernel_def);
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetFlowtable Fail");
+    GELOGE(ret, "[Set][Flowtable] Fail");
     return ret;
   }
 
   // args
   rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X",
-                      kernel_def.args_size(), rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", kernel_def.args_size(), rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", kernel_def.args_size(), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size())
@@ -955,9 +953,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
   rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(),
                     RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X",
-                      kernel_def.args_size(), rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", kernel_def.args_size(), rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", kernel_def.args_size(), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -965,17 +962,15 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
   if (!sm_desc.empty()) {
     rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged failed, ret:0x%X",
-                        rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged failed, ret:0x%X", rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemAllocManaged] failed, ret:0x%X", rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
 
     rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                        sm_desc.size(), rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", sm_desc.size(), rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", sm_desc.size(), rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -989,9 +984,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
 
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       op_index);
-    GELOGE(INTERNAL_ERROR, "index is out of range, index: %u", op_index);
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
+    GELOGE(INTERNAL_ERROR, "[Get][Op] index is out of range, index:%u", op_index);
     return INTERNAL_ERROR;
   }
   GELOGI("node[%s] test so name %s, kernel name %s", op_desc->GetName().c_str(), so_name_.c_str(),
@@ -1000,7 +994,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
     bool loaded = false;
     GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded),
-                      "launch cust aicpu so failed");
+                      "[Launch][CustAicpuSo] failed");
   }
 
   // copy args to new host memory
@@ -1008,9 +1002,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
   errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
   if (sec_ret != EOK) {
-    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X",
-                      args_size_, sec_ret);
-    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+    REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X", args_size_, sec_ret);
+    GELOGE(FAILED, "[Call][Memcpy] failed, size:%u, ret:0x%X", args_size_, sec_ret);
     return FAILED;
   }
 
@@ -1018,7 +1011,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   const auto &ext_info = kernel_def.kernel_ext_info();
   auto init_ret = InitAicpuTaskExtInfo(ext_info);
   if (init_ret != SUCCESS) {
-    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
+    GELOGE(init_ret, "[Init][AicpuTaskExtInfo] failed, ext_info size=%zu", ext_info.size());
     return init_ret;
   }
   GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc->GetName().c_str(),
@@ -1044,9 +1037,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
     auto addrs_size = sizeof(uint64_t) * io_addrs.size();
     sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
     if (sec_ret != EOK) {
-      REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%lu, ret:0x%X",
-                        addrs_size, sec_ret);
-      GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+      REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%lu, ret:0x%X", addrs_size, sec_ret);
+      GELOGE(FAILED, "[Call][Memcpy] failed, size:%lu, ret:0x%X", addrs_size, sec_ret);
       return FAILED;
     }
   }
@@ -1056,7 +1048,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", args_size_)
@@ -1066,14 +1059,12 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%u, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   InitDumpTask(sizeof(aicpu::AicpuParamHead));
-  if (davinci_model_->GetOpDugReg()) {
-    GELOGI("Op debug is open in aicpu task info");
-    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
-  }
+
   if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
     dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
   }
@@ -1085,6 +1076,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
 
 void KernelTaskInfo::InitDumpTask(uint32_t offset) {
   if (davinci_model_->OpNeedDump(op_desc_->GetName())) {
+    GELOGD("Op %s need dump in task info", op_desc_->GetName().c_str());
     if (IsL1FusionOp(op_desc_)) {
       dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
     } else {
@@ -1092,6 +1084,10 @@ void KernelTaskInfo::InitDumpTask(uint32_t offset) {
     }
     dump_args_ = static_cast<char *>(args_) + offset;
   }
+  if (davinci_model_->GetOpDugReg()) {
+    GELOGD("Op debug is open in kernel task info");
+    dump_args_ = static_cast<char *>(args_) + offset;
+  }
 }
 
 Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
@@ -1109,14 +1105,17 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
                                                               num_inputs,
                                                               num_outputs,
                                                               unknown_type));
-  GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!");
+  GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "[Malloc][Memory] for aicpu_ext_handle failed!");
   GE_CHK_STATUS_RET(ext_handle->Parse(ext_info),
-                    "Parse kernel ext info failed, kernel_ext_info_size=%zu.", ext_info.size());
+                    "[Parse][KernelExtInfo] failed, kernel_ext_info_size=%zu, op:%s.",
+                    ext_info.size(), op_desc_->GetName().c_str());
   GE_CHK_STATUS_RET(ext_handle->UpdateSessionInfoSessionId(davinci_model_->GetSessionId()),
-                    "Update session info session id failed.");
+                    "[Update][SessionInfoSessionId] failed, op:%s", op_desc_->GetName().c_str());
   GELOGD("Update aicpu_task ext_info session_info session_id is %lu", davinci_model_->GetSessionId());
-  GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "UpdateExecuteMode failed.");
+  GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true),
+                    "[Update][ExecuteMode] failed, op:%s", op_desc_->GetName().c_str());
   GELOGD("Update aicpu_task ext_info bit_map execute mode to 1.");
+  topic_type_flag_ = ext_handle->GetTopicTypeFlag();
 
   bool all_shape = false;
   (void)AttrUtils::GetBool(op_desc_, kAicpuAllshape, all_shape);
@@ -1126,13 +1125,15 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
       auto input_desc = op_desc_->MutableInputDesc(i);
       GE_CHECK_NOTNULL(input_desc);
       GE_CHK_STATUS_RET(ext_handle->UpdateInputShapeAndType(i, *input_desc),
-                        "Input[%u] update input shape failed.", i);
+                        "[Call][UpdateInputShapeAndType] Input[%u] update input shape failed, op:%s.",
+                        i, op_desc_->GetName().c_str());
     }
     for (uint32_t j = 0; j < num_outputs; j++) {
       auto output_desc = op_desc_->MutableOutputDesc(j);
       GE_CHECK_NOTNULL(output_desc);
       GE_CHK_STATUS_RET(ext_handle->UpdateOutputShapeAndType(j, *output_desc),
-                        "Output[%u] update output shape failed.", j);
+                        "[Call][UpdateOutputShapeAndType] Output[%u] update output shape failed, op:%s.",
+                        j, op_desc_->GetName().c_str());
     }
   }
   auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM);
@@ -1140,7 +1141,8 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X",
                       op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
                       ext_handle->GetExtInfoLen(), rt_ret);
-    GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%zu, ret:0x%X",
+           op_desc_->GetName().c_str(), op_desc_->GetType().c_str(), ext_handle->GetExtInfoLen(), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle->GetExtInfo(),
@@ -1149,7 +1151,8 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
     REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%zu, ret:0x%X",
                       op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
                       ext_handle->GetExtInfoLen(), rt_ret);
-    GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed for op:%s(%s), size:%zu, ret:0x%X",
+           op_desc_->GetName().c_str(), op_desc_->GetType().c_str(), ext_handle->GetExtInfoLen(), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -1166,9 +1169,8 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
   // inputDescs
   rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                      sizeof(opTensor_t) * input_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * input_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * input_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -1176,9 +1178,8 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
     rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.input_descs) + i, sizeof(opTensor_t),
                       const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                        sizeof(opTensor_t), rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", sizeof(opTensor_t), rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", sizeof(opTensor_t), rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -1186,9 +1187,8 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
   // inputAddrs
   rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                      sizeof(opTensor_t) * input_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * input_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * input_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -1196,9 +1196,8 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
     rt_ret = rtMemcpy(custom_info_.input_addrs, kAddrLen * input_size, &input_data_addrs[0], kAddrLen * input_size,
                       RT_MEMCPY_HOST_TO_DEVICE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                        kAddrLen * input_size, rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", kAddrLen * input_size, rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", kAddrLen * input_size, rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -1206,18 +1205,16 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
   // outputDescs
   rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                      sizeof(opTensor_t) * output_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * output_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * output_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   for (std::size_t i = 0; i < output_size; ++i) {
     rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.output_descs) + i, sizeof(opTensor_t),
                       const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                        sizeof(opTensor_t), rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", sizeof(opTensor_t), rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", sizeof(opTensor_t), rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -1225,9 +1222,8 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
   // outputAddrs
   rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                      sizeof(opTensor_t) * output_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * output_size, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", sizeof(opTensor_t) * output_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -1235,9 +1231,8 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
     rt_ret = rtMemcpy(custom_info_.output_addrs, kAddrLen * output_size, &output_data_addrs[0], kAddrLen * output_size,
                       RT_MEMCPY_HOST_TO_DEVICE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                        kAddrLen * output_size, rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", kAddrLen * output_size, rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", kAddrLen * output_size, rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
   }
@@ -1254,7 +1249,7 @@ Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) {
   ctx_.argsCount = context.args_count();
   if (ctx_.argsCount == 0) {
     REPORT_INNER_ERROR("E19999", "kernel_def.context.args_count is 0, check invalid");
-    GELOGE(INTERNAL_ERROR, "check argsCount fail:%u.", ctx_.argsCount);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] argsCount is %u.", ctx_.argsCount);
     return INTERNAL_ERROR;
   }
 
@@ -1262,16 +1257,16 @@ Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) {
     REPORT_INNER_ERROR("E19999", "param [context.args_offset().size():%zu / sizeof(uint16_t)] "
                        "is less than [ctx_.argsCount:%u], check invalid",
                        context.args_offset().size(), ctx_.argsCount);
-    GELOGE(PARAM_INVALID, "param [context.args_offset().size() / sizeof(uint16_t)] is less than [ctx_.argsCount]");
+    GELOGE(PARAM_INVALID, "[Check][Param] [context.args_offset().size():%zu / sizeof(uint16_t)] "
+           "is less than [ctx_.argsCount:%u], check invalid", context.args_offset().size(), ctx_.argsCount);
     return PARAM_INVALID;
   }
 
   // ctx_.argsOffset stores the offset of the internal information of agrs_, equal to the ctx_.argsCount
   ctx_.argsOffset = new (std::nothrow) uint16_t[ctx_.argsCount]();
   if (ctx_.argsOffset == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New ctx_.argsOffset fail, size:%u",
-                      ctx_.argsCount);
-    GELOGE(PARAM_INVALID, "(param [ctx_.argsOffset] must not be null.");
+    REPORT_CALL_ERROR("E19999", "New ctx_.argsOffset fail, size:%u", ctx_.argsCount);
+    GELOGE(PARAM_INVALID, "[Malloc][Memory] failed, ctx_.argsOffset must not be null, size:%u", ctx_.argsCount);
     return PARAM_INVALID;
   }
 
@@ -1289,7 +1284,7 @@ void KernelTaskInfo::FreeRtMem(void **ptr) {
   rtError_t ret = rtFree(*ptr);
   if (ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret:0x%X", ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
+    GELOGE(RT_FAILED, "[Call][RtFree] failed, ret:0x%X", ret);
   }
 
   *ptr = nullptr;
@@ -1307,7 +1302,7 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl
   Status status =
       CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def);
   if (status != SUCCESS) {
-    GELOGE(status, "Call cce api failed");
+    GELOGE(status, "[Call][CceUpdateKernelArgs] failed, ret:%d", status);
     return status;
   }
   return SUCCESS;
@@ -1336,9 +1331,8 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
   if (handle == nullptr) {
     error = mmDlerror();
     GE_IF_BOOL_EXEC(error == nullptr, error = "");
-    REPORT_INNER_ERROR("E19999", "Failed in dlopen:%s, dlerror:%s",
-                       canonicalPath.c_str(), error);
-    GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error);
+    REPORT_INNER_ERROR("E19999", "Failed in dlopen:%s, dlerror:%s", canonicalPath.c_str(), error);
+    GELOGE(GE_PLGMGR_SO_NOT_EXIST, "[Open][File] %s failed, reason:%s! ", canonicalPath.c_str(), error);
     return FAILED;
   }
   ccStatus_t cc_ret;
@@ -1348,7 +1342,7 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
   if (cceUpdateKernelArgs == nullptr) {
     REPORT_INNER_ERROR("E19999", "No symbol:%s in %s, check invalid",
                        update_kernel_args.c_str(), canonicalPath.c_str());
-    GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs");
+    GELOGE(FAILED, "[Invoke][Function] ccUpdateKernelArgs failed.");
     if (mmDlclose(handle) != 0) {
       error = mmDlerror();
       GE_IF_BOOL_EXEC(error == nullptr, error = "");
@@ -1372,9 +1366,8 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
     return FAILED;
   }
   if (cc_ret != CC_STATUS_SUCCESS) {
-    REPORT_CALL_ERROR("E19999", "Call cceUpdateKernelArgs fail, ret:0x%X",
-                      cc_ret);
-    GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret);
+    REPORT_CALL_ERROR("E19999", "Call cceUpdateKernelArgs fail, ret:0x%X", cc_ret);
+    GELOGE(CCE_FAILED, "[Call][CceUpdateKernelArgs] failed, ret:0x%X", cc_ret);
     return CCE_FAILED;
   }
 
@@ -1387,18 +1380,16 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe
   if (context.is_flowtable()) {
     rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X",
-                        flowtable.size(), rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", flowtable.size(), rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", flowtable.size(), rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
     GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "flowtable refresh of cce scence.", flowtable.size())
 
     rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE);
     if (rt_ret != RT_ERROR_NONE) {
-      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X",
-                        flowtable.size(), rt_ret);
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", flowtable.size(), rt_ret);
+      GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", flowtable.size(), rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
 
@@ -1412,7 +1403,8 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe
           "kernelDef.args().size():%zu, check invalid",
           (uint32_t)((reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0]),
           sizeof(uint64_t), kernel_def.args().size());
-      GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu",
+      GELOGE(FAILED, "[Check][Param] (context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > "
+             "kernelDef.args().size():%zu",
              (uint32_t)((reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0]),
              sizeof(uint64_t), kernel_def.args().size());
       return FAILED;
diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h
index 4156c511..79347255 100644
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h
@@ -145,6 +145,7 @@ class KernelTaskInfo : public TaskInfo {
   bool IsMarkedFirstNode();
   bool FirstCallSKTLaunchCheck();
   bool DoubleCallSKTSaveCheck();
+  void SetArgs();
 
   void *stub_func_;
   void *args_;
@@ -169,6 +170,7 @@ class KernelTaskInfo : public TaskInfo {
   uint16_t io_addr_offset_ = 0;
   bool l2_buffer_on_ = false;
   bool call_save_dump_ = false;
+  int32_t topic_type_flag_ = -1;
 
   // aicpu ext_info device mem
   void *aicpu_ext_info_addr_ = nullptr;
diff --git a/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
index b858259e..861056b3 100755
--- a/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
@@ -40,7 +40,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   if (op_desc == nullptr) {
     REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
                        label_goto.op_index());
-    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range!", label_goto.op_index());
+    GELOGE(INTERNAL_ERROR, "[Get][Op] Task op index:%u out of range!", label_goto.op_index());
     return INTERNAL_ERROR;
   }
 
@@ -49,8 +49,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
     REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
                        ATTR_NAME_LABEL_SWITCH_INDEX.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelGotoExTaskInfo: %s attr [%s] not exist.",
-           op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in op:%s(%s) fail.",
+           ATTR_NAME_LABEL_SWITCH_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
 
@@ -63,7 +63,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%lu, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), sizeof(uint64_t), rt_ret);
-    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%lu, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), sizeof(uint64_t), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -72,7 +73,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%lu, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), sizeof(uint64_t), rt_ret);
-    GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] failed for op:%s(%s), size:%lu, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), sizeof(uint64_t), rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -86,15 +88,14 @@ Status LabelGotoExTaskInfo::Distribute() {
   GE_CHECK_NOTNULL(index_value_);
   if (args_size_ == 0) {
     REPORT_INNER_ERROR("E19999", "Param args_size_ is 0, check fail");
-    GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_);
+    GELOGE(PARAM_INVALID, "[Check][Param] branch max:%u, args size:%u invalid.", kGotoBranchMax, args_size_);
     return PARAM_INVALID;
   }
 
   rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtLabelSwitchByIndex failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtLabelSwitchByIndex failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtLabelSwitchByIndex] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/label_set_task_info.cc b/ge/graph/load/model_manager/task_info/label_set_task_info.cc
index c8cb7975..df88b3c8 100644
--- a/ge/graph/load/model_manager/task_info/label_set_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_set_task_info.cc
@@ -32,9 +32,8 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   const domi::LabelSetDef &label_set = task_def.label_set();
   OpDescPtr op_desc = davinci_model->GetOpByIndex(label_set.op_index());
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       label_set.op_index());
-    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range!", label_set.op_index());
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", label_set.op_index());
+    GELOGE(INTERNAL_ERROR, "[Get][Op] Task op index:%u out of range!", label_set.op_index());
     return INTERNAL_ERROR;
   }
 
@@ -43,7 +42,7 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
     REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
                        ATTR_NAME_LABEL_SWITCH_INDEX.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: %s attr [%s] not exist.",
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] LabelSetTaskInfo:%s attr [%s] not exist.",
            op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
     return INTERNAL_ERROR;
   }
@@ -53,7 +52,8 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
     REPORT_INNER_ERROR("E19999", "lable_index:%u >= label_list.size():%zu in model, op:%s(%s), "
                        "check invalid", label_index, label_list.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] LabelSetTaskInfo: Invalid label id:%u, label size:%zu, op:%s(%s)",
+           label_index, label_list.size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
   label_ = label_list[label_index];
@@ -66,9 +66,8 @@ Status LabelSetTaskInfo::Distribute() {
   GELOGI("LabelSetTaskInfo Distribute Start.");
   rtError_t rt_ret = rtLabelSet(label_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtLabelSet failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtLabelSet failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtLabelSet] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
index b7ffdb84..cd001092 100644
--- a/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
@@ -39,9 +39,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
   const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index();
   OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index());
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       label_switch.op_index());
-    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range!", label_switch.op_index());
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", label_switch.op_index());
+    GELOGE(INTERNAL_ERROR, "[Get][Op] Task op index:%u out of range!", label_switch.op_index());
     return INTERNAL_ERROR;
   }
 
@@ -52,7 +51,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
     REPORT_INNER_ERROR("E19999", "input_data_addr size:%zu != kLabelSwitchIndexNum:%u, op:%s(%s), "
                        "check invalid", input_data_addr.size(), kLabelSwitchIndexNum,
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s invalid addr size: %zu, num: %u!",
+    GELOGE(INTERNAL_ERROR, "[Check][Param] %s invalid addr size:%zu, num:%u!",
            op_desc->GetName().c_str(), input_data_addr.size(), kLabelSwitchIndexNum);
     return INTERNAL_ERROR;
   }
@@ -70,17 +69,16 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
     REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
                        ATTR_NAME_LABEL_SWITCH_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(),
-           ATTR_NAME_LABEL_SWITCH_LIST.c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in op:%s(%s) failed.", ATTR_NAME_LABEL_SWITCH_LIST.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
 
   if (label_idx_list.empty() || label_idx_list.size() != branch_max_) {
     REPORT_INNER_ERROR("E19999", "label_idx_list in op:%s(%s) is empty, or size:%zu != branch_max_:%u"
-                       "check invalid",
-                       op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+                       "check invalid", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                        label_idx_list.size(), branch_max_);
-    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s label index size: %zu, task branch max: %u.",
+    GELOGE(INTERNAL_ERROR, "[Check][Param] %s label index size:%zu, task branch max:%u.",
            op_desc->GetName().c_str(), label_idx_list.size(), branch_max_);
     return INTERNAL_ERROR;
   }
@@ -93,7 +91,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
       REPORT_INNER_ERROR("E19999", "label_id:%u in op:%s(%s) >= label_list.size():%zu in model"
                          "check invalid", label_id,
                          op_desc->GetName().c_str(), op_desc->GetType().c_str(), label_list.size());
-      GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s index: %zu, label index: %u, model label size: %zu.",
+      GELOGE(INTERNAL_ERROR, "[Check][Param] %s index:%zu, label index:%u, model label size:%zu.",
              op_desc->GetName().c_str(), idx, label_id, label_list.size());
       return INTERNAL_ERROR;
     }
@@ -108,15 +106,15 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%u, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtLabelListCpy failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtLabelListCpy failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtLabelListCpy] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -129,17 +127,15 @@ Status LabelSwitchByIndexTaskInfo::Distribute() {
   GE_CHECK_NOTNULL(args_);
   GE_CHECK_NOTNULL(index_value_);
   if (branch_max_ == 0 || args_size_ == 0) {
-    REPORT_INNER_ERROR("E19999", "branch_max_:%u or args_size_:%u is 0"
-                       "check invalid", branch_max_, args_size_);
-    GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", branch_max_, args_size_);
+    REPORT_INNER_ERROR("E19999", "branch_max_:%u or args_size_:%u is 0, check invalid", branch_max_, args_size_);
+    GELOGE(PARAM_INVALID, "[Check][Param] branch max:%u, args size:%u invalid.", branch_max_, args_size_);
     return PARAM_INVALID;
   }
 
   rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtLabelSwitchByIndex failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtLabelSwitchByIndex failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtLabelSwitchByIndex] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -159,7 +155,8 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def,
     REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != kLabelSwitchIndexNum"
                        "check invalid", op_desc->GetInputsSize(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
+    GELOGE(FAILED, "[Check][Param] Label switch op:%s(%s) only have one data input. Now input size is %zu",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_desc->GetInputsSize());
     return FAILED;
   }
   string input_tensor_name = op_desc->GetName();
diff --git a/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
index 960862b4..56ddbaf5 100755
--- a/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
@@ -36,9 +36,8 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
   const auto &memcpy_async = task_def.memcpy_async();
   OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index());
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       memcpy_async.op_index());
-    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index());
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", memcpy_async.op_index());
+    GELOGE(INTERNAL_ERROR, "[Get][Op] Task op index:%u out of range", memcpy_async.op_index());
     return INTERNAL_ERROR;
   }
 
@@ -66,7 +65,8 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
     REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%lu, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(),
                       args_size + kAlignBytes, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMalloc] failed for op:%s(%s), size:%lu, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size + kAlignBytes, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -78,7 +78,8 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%zu, ret:0x%X",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpy] for src failed for op:%s(%s), size:%zu, ret:0x%X",
+           op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -98,9 +99,8 @@ Status MemcpyAddrAsyncTaskInfo::Distribute() {
   rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_align_) + sizeof(void *)),
                                    dst_max_, args_align_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync failed, size:%lu, ret:0x%X",
-                      dst_max_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync failed, size:%lu, ret:0x%X", dst_max_, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpyAsync] failed, size:%lu, ret:0x%X", dst_max_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
index 0bc8fb8d..5c017544 100755
--- a/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
@@ -36,9 +36,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
   dst_max_ = memcpy_async.dst_max();
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async.op_index());
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       memcpy_async.op_index());
-    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index());
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", memcpy_async.op_index());
+    GELOGE(INTERNAL_ERROR, "[Get][Op] Task op index:%u out of range", memcpy_async.op_index());
     return INTERNAL_ERROR;
   }
 
@@ -47,7 +46,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
     dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *));
     // for zero copy
     kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE;
-    GE_CHK_STATUS_RET(SetIoAddrs(op_desc, memcpy_async), "Set addrs failed");
+    GE_CHK_STATUS_RET(SetIoAddrs(op_desc, memcpy_async), "[Set][Addrs] failed, op:%s", op_desc->GetName().c_str());
     GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.",
            op_desc->GetName().c_str(), src_, dst_, args_offset_);
     return SUCCESS;
@@ -77,7 +76,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
 
   davinci_model_->DisableZeroCopy(src_);
   davinci_model_->DisableZeroCopy(dst_);
-  GE_CHK_STATUS_RET(SetIoAddrs(op_desc, memcpy_async), "Set addrs failed");
+  GE_CHK_STATUS_RET(SetIoAddrs(op_desc, memcpy_async), "[Set][Addrs] failed, op:%s", op_desc->GetName().c_str());
   GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu",
          memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_);
   return SUCCESS;
@@ -88,9 +87,8 @@ Status MemcpyAsyncTaskInfo::Distribute() {
 
   rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync failed, size:%lu, ret:0x%X",
-                      dst_max_, rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync failed, size:%lu, ret:0x%X", dst_max_, rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtMemcpyAsync] failed, size:%lu, ret:0x%X", dst_max_, rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc
index f0e3dfb7..fddb142a 100644
--- a/ge/graph/load/model_manager/task_info/model_exit_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc
@@ -25,13 +25,13 @@ Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davi
   GELOGI("InitModelExitTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
   Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList());
   if (ret != SUCCESS) {
-    GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id());
+    GELOGE(ret, "[Set][Stream] fail, stream_id:%u", task_def.stream_id());
     return ret;
   }
 
@@ -44,9 +44,8 @@ Status ModelExitTaskInfo::Distribute() {
   GELOGI("ModelExitTaskInfo Distribute Start.");
   rtError_t rt_ret = rtModelExit(model_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtModelExit failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rtModelExit failed, ret: 0x%x", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtModelExit failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtModelExit] failed, ret:0x%x", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GELOGI("ModelExitTaskInfo Distribute Success.");
diff --git a/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
index 4e829182..ce696978 100755
--- a/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
@@ -24,7 +24,7 @@ Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
   GELOGI("ProfilerTraceTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -47,9 +47,9 @@ Status ProfilerTraceTaskInfo::Distribute() {
 
   rtError_t rt_ret = rtProfilerTrace(log_id_, notify_, flat_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtProfilerTrace failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtProfilerTrace failed, ret:0x%X, logid:%lu. notify:%d",
+                      rt_ret, log_id_, notify_);
+    GELOGE(RT_FAILED, "[Call][RtProfilerTrace] failed, ret:0x%X, logid:%lu. notify:%d", rt_ret, log_id_, notify_);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc
index 4ab4951d..4767913c 100755
--- a/ge/graph/load/model_manager/task_info/stream_active_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc
@@ -27,7 +27,7 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
   GELOGI("StreamActiveTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -46,10 +46,10 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
   GE_CHECK_NOTNULL(op_desc);
   std::vector<uint32_t> active_stream_index_list;
   if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_index_list)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
-                       ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM fail, node name:%s.", op_desc->GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in op:%s(%s) fail", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
 
@@ -57,8 +57,8 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
     REPORT_INNER_ERROR("E19999", "flowctrl index:%u >= active_stream_list size:%zu in op:%s(%s), "
                        "check invalid", internal_index, active_stream_index_list.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "InitStreamSwitchTaskInfo stream id index invalid. index:%u, list size:%zu.", internal_index,
-           active_stream_index_list.size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] stream id index invalid. index:%u, list size:%zu, op:%s(%s).",
+           internal_index, active_stream_index_list.size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
 
@@ -66,8 +66,9 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
     REPORT_INNER_ERROR("E19999", "active_stream_index:%u in op:%s(%s) >= stream size:%zu in model, "
                        "check invalid", active_stream_index_list[internal_index],
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size());
-    GELOGE(INTERNAL_ERROR, "InitStreamSwitchTaskInfo stream index invalid. index:%u, stream list size:%zu.",
-           active_stream_index_list[internal_index], davinci_model->GetStreamList().size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] active_stream_index:%u in op:%s(%s) >= stream size:%zu in model",
+           active_stream_index_list[internal_index], op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+           davinci_model->GetStreamList().size());
     return INTERNAL_ERROR;
   }
 
@@ -83,9 +84,8 @@ Status StreamActiveTaskInfo::Distribute() {
   GELOGI("StreamActiveTaskInfo Distribute Start.");
   rtError_t rt_ret = rtStreamActive(active_stream_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtStreamActive] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
diff --git a/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
index 33dfacf7..dcab4d2f 100644
--- a/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
@@ -32,7 +32,7 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
   GELOGI("StreamSwitchTaskInfo Init Start.");
   if (davinci_model == nullptr) {
     REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr");
-    GELOGE(PARAM_INVALID, "davinci_model is null!");
+    GELOGE(PARAM_INVALID, "[Check][Param] davinci_model is null!");
     return PARAM_INVALID;
   }
 
@@ -50,10 +50,10 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
   SetInputAndValuePtr(davinci_model, input_data_addr);
   uint32_t cond = 0;
   if (!AttrUtils::GetInt(op_desc, ATTR_NAME_STREAM_SWITCH_COND, cond)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
-                       ATTR_NAME_STREAM_SWITCH_COND.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_STREAM_SWITCH_COND.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr STREAM_SWITCH_COND fail.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in op:%s(%s) fail",
+           ATTR_NAME_STREAM_SWITCH_COND.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
   cond_ = static_cast<rtCondition_t>(cond);
@@ -63,17 +63,18 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
     REPORT_INNER_ERROR("E19999", "input_data_addr.size():%zu or input size:%zu != STREAM_SWITCH_INPUT_NUM:%u "
                        "in op:%s(%s), check invalid", input_data_addr.size(), input_size,
                        STREAM_SWITCH_INPUT_NUM, op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "Input num should be %u. inputAddr size:%zu, inputDesc size:%zu.",
-           STREAM_SWITCH_INPUT_NUM, input_data_addr.size(), input_size);
+    GELOGE(INTERNAL_ERROR, "[Check][Param] Input num should be %u. inputAddr size:%zu, inputDesc size:%zu, op:%s(%s).",
+           STREAM_SWITCH_INPUT_NUM, input_data_addr.size(), input_size,
+           op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
 
   vector<uint32_t> active_stream_list;
   if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
-                       ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr ACTIVE_STREAM_LIST fail.");
+    GELOGE(INTERNAL_ERROR, "[Get][Attr] %s in op:%s(%s) fail",
+           ATTR_NAME_ACTIVE_STREAM_LIST.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return INTERNAL_ERROR;
   }
 
@@ -81,17 +82,19 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
     REPORT_INNER_ERROR("E19999", "active_stream_list.size():%zu in op:%s(%s) != kTrueBranchStreamNum:%u, "
                        "check invalid", active_stream_list.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), kTrueBranchStreamNum);
-    GELOGE(FAILED, "Stream num of switch true branch must be %u.", kTrueBranchStreamNum);
+    GELOGE(FAILED, "[Check][Param] active_stream_list.size():%zu in op:%s(%s) must be equal %u",
+           active_stream_list.size(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), kTrueBranchStreamNum);
     return FAILED;
   }
 
   size_t true_stream_index = active_stream_list.front();
   if (true_stream_index >= davinci_model->GetStreamList().size()) {
     REPORT_INNER_ERROR("E19999", "active_stream_index:%zu in op:%s(%s) >= stream list size:%zu in model,"
-                       "check invalid", true_stream_index,
-                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size());
-    GELOGE(INTERNAL_ERROR, "InitStreamSwitchTaskInfo stream index invalid. index:%zu, stream list size:%zu.",
-           true_stream_index, davinci_model->GetStreamList().size());
+                       "check invalid", true_stream_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+                       davinci_model->GetStreamList().size());
+    GELOGE(INTERNAL_ERROR, "[Check][Param] active_stream_index:%zu in op:%s(%s) >= stream list size:%zu in model",
+           true_stream_index, op_desc->GetName().c_str(), op_desc->GetType().c_str(),
+           davinci_model->GetStreamList().size());
     return INTERNAL_ERROR;
   }
 
@@ -103,10 +106,10 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
   if (op_desc->HasAttr(ATTR_NAME_SWITCH_DATA_TYPE)) {
     int64_t data_type = 0;
     if (!AttrUtils::GetInt(op_desc, ATTR_NAME_SWITCH_DATA_TYPE, data_type)) {
-      REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
-                         ATTR_NAME_SWITCH_DATA_TYPE.c_str(),
+      REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_SWITCH_DATA_TYPE.c_str(),
                          op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "StreamSwitchOp[node:%s] get attr SWITCH_DATA_TYPE fail.", op_desc->GetName().c_str());
+      GELOGE(FAILED, "[Get][Attr] %s in op:%s(%s) fail",
+             ATTR_NAME_SWITCH_DATA_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
       return FAILED;
     }
     data_type_ = static_cast<rtSwitchDataType_t>(data_type);
@@ -122,9 +125,8 @@ Status StreamSwitchTaskInfo::Distribute() {
   GELOGI("StreamSwitchTaskInfo Distribute Start.");
   rtError_t rt_ret = rtStreamSwitchEx(input_ptr_, cond_, value_ptr_, true_stream_, stream_, data_type_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtStreamSwitchEx fail, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtStreamSwitchEx fail, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtStreamSwitchEx] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -143,7 +145,8 @@ Status StreamSwitchTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davinc
     REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != STREAM_SWITCH_INPUT_NUM:%u,"
                        "check invalid", op_desc->GetInputsSize(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), STREAM_SWITCH_INPUT_NUM);
-    GELOGE(FAILED, "Stream switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
+    GELOGE(FAILED, "[Check][Param] Stream switch op:%s only have one data input. Now input size is %zu",
+           op_desc->GetName().c_str(), op_desc->GetInputsSize());
     return FAILED;
   }
   for (uint32_t i = 0; i < STREAM_SWITCH_INPUT_NUM; ++i) {
diff --git a/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
index 40bbff02..0daf4626 100755
--- a/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
@@ -36,9 +36,8 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
   auto stream_switchn_def = task_def.stream_switch_n();
   OpDescPtr op_desc = davinci_model->GetOpByIndex(stream_switchn_def.op_index());
   if (op_desc == nullptr) {
-    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u",
-                       stream_switchn_def.op_index());
-    GELOGE(FAILED, "Index is out of range, index: %u", stream_switchn_def.op_index());
+    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", stream_switchn_def.op_index());
+    GELOGE(FAILED, "[Get][Op] failed, as Index is out of range, index:%u", stream_switchn_def.op_index());
     return FAILED;
   }
 
@@ -51,7 +50,8 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
     REPORT_INNER_ERROR("E19999", "task_Def.stream_switch_n.target_value:%d in op:%s(%s) is 0,"
                        "check invalid", value.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "The number of gears in dynamic batch scenario can not be 0.");
+    GELOGE(FAILED, "[Check][Param] The number of gears in dynamic batch scenario can not be 0, op:%s.",
+           op_desc->GetName().c_str());
     return FAILED;
   }
   for (int i = 0; i < value.size(); ++i) {
@@ -62,15 +62,15 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
 
   // set element_size_
   if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, element_size_)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
-                       ATTR_NAME_BATCH_NUM.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_BATCH_NUM.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "Get ATTR_NAME_BATCH_NUM of switchN op failed.");
+    GELOGE(FAILED, "[Get][Attr] %s in op:%s(%s) fail",
+           ATTR_NAME_BATCH_NUM.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return FAILED;
   }
 
   if (GetTrueStreamPtr(op_desc, davinci_model) != SUCCESS) {
-    GELOGE(FAILED, "Get true stream ptr of switchN op failed.");
+    GELOGE(FAILED, "[Get][TrueStreamPtr] of switchN op:%s failed.", op_desc->GetName().c_str());
     return FAILED;
   }
 
@@ -92,9 +92,8 @@ Status StreamSwitchNTaskInfo::Distribute() {
   rtError_t rt_ret =
       rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_);
   if (rt_ret != RT_ERROR_NONE) {
-    REPORT_CALL_ERROR("E19999", "Call rtStreamSwitchN failed, ret:0x%X",
-                      rt_ret);
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Call rtStreamSwitchN failed, ret:0x%X", rt_ret);
+    GELOGE(RT_FAILED, "[Call][RtStreamSwitchN] failed, ret:0x%X", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
@@ -106,10 +105,10 @@ Status StreamSwitchNTaskInfo::Distribute() {
 Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model) {
   vector<uint32_t> true_stream_id_list;
   if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, true_stream_id_list)) {
-    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail",
-                       ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
+    REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "StreamSwitchNOp get attr ACTIVE_STREAM_LIST fail.");
+    GELOGE(FAILED, "[Get][Attr] %s in op:%s(%s) fail",
+           ATTR_NAME_ACTIVE_STREAM_LIST.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
     return FAILED;
   }
 
@@ -118,9 +117,8 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci
                        "check invalid", true_stream_id_list.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size());
     GELOGE(FAILED,
-           "InitStreamSwitchNTaskInfo get true stream id list failed. true stream size:%zu, "
-           "stream list size:%zu.",
-           true_stream_id_list.size(), davinci_model->GetStreamList().size());
+           "[Check][Param] InitStreamSwitchNTaskInfo get true stream id list failed. true stream size:%zu, "
+           "stream list size:%zu.", true_stream_id_list.size(), davinci_model->GetStreamList().size());
     return FAILED;
   }
 
@@ -131,8 +129,8 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci
       REPORT_INNER_ERROR("E19999", "active_stream_id:%u in op:%s(%s) >= stream list size:%zu in model,"
                          "check invalid", true_stream_id,
                          op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size());
-      GELOGE(FAILED, "InitStreamSwitchNTaskInfo stream id invalid. id:%u, stream list size:%zu.", true_stream_id,
-             davinci_model->GetStreamList().size());
+      GELOGE(FAILED, " [Check][Param] stream id:%u in op:%s invalid, stream list size:%zu.",
+             true_stream_id, op_desc->GetName().c_str(), davinci_model->GetStreamList().size());
       return FAILED;
     }
     rtStream_t true_stream = davinci_model->GetStreamList()[true_stream_id];
@@ -144,7 +142,7 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci
     REPORT_INNER_ERROR("E19999", "active_stream_list.size():%zu in op:%s(%s) is empty, "
                        "check invalid", true_stream_id_list.size(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str());
-    GELOGE(FAILED, "true stream list is null.");
+    GELOGE(FAILED, "[Check][Param] true stream list is null, op:%s.", op_desc->GetName().c_str());
     return FAILED;
   }
   true_stream_ptr_ = &true_stream_list_[0];
@@ -160,10 +158,11 @@ Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davin
   GE_CHECK_NOTNULL(op_desc);
   GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
   if (op_desc->GetInputsSize() != kStreamSwitchnInputNum) {
-    REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != kStreamSwitchnInputNum:%u ,"
+    REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != kStreamSwitchnInputNum:%u, "
                        "check invalid", op_desc->GetInputsSize(),
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), kStreamSwitchnInputNum);
-    GELOGE(FAILED, "Stream switchn op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
+    GELOGE(FAILED, "[Check][Param] Stream switchn op:%s only have one data input. Now input size is %zu",
+           op_desc->GetName().c_str(), op_desc->GetInputsSize());
     return FAILED;
   }
   string input_tensor_name = op_desc->GetInputNameByIndex(0);
@@ -187,7 +186,8 @@ Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciMo
       REPORT_INNER_ERROR("E19999", "input_offset size:%zu or input_length.size:%zu in op:%s(%s) is empty,"
                          "check invalid", input_offset.size(), input_legnth.size(),
                          op_desc->GetName().c_str(), op_desc->GetType().c_str());
-      GELOGE(FAILED, "input offset size %zu, input legnth size: %zu", input_offset.size(), input_legnth.size());
+      GELOGE(FAILED, "[Check][Param] op:%s input offset size %zu, input legnth size:%zu",
+             op_desc->GetName().c_str(), input_offset.size(), input_legnth.size());
       return FAILED;
     }
     const RuntimeParam &rts_param = davinci_model->GetRuntimeParam();
@@ -201,7 +201,7 @@ Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciMo
         REPORT_INNER_ERROR("E19999", "input_data_addr size:%zu in op:%s(%s) is empty,"
                            "check invalid", input_data_addr.size(),
                            op_desc->GetName().c_str(), op_desc->GetType().c_str());
-        GELOGE(FAILED, "input data addr is empty");
+        GELOGE(FAILED, "[Check][Param] input data addr is empty in op:%s", op_desc->GetName().c_str());
         return FAILED;
       }
       input_ptr_ = input_data_addr[0];
diff --git a/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
index 66bf5ab7..44aac465 100644
--- a/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
+++ b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
@@ -27,23 +27,21 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) {
 
   rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%lu, ret:0x%X",
-                                    sizeof(args), rt_ret);
-                  GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret);
+                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%lu, ret:0x%X", sizeof(args), rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtMalloc] failied, size:%lu, ret:0x%X", sizeof(args), rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), reinterpret_cast<void *>(args),
                     sizeof(args), RT_MEMCPY_HOST_TO_DEVICE);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X",
-                                    sizeof(args), rt_ret);
-                  GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret);
+                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", sizeof(args), rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtMemcpy] failied, size:%lu, ret:0x%X", sizeof(args), rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream,
                                   dump_flag);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                   REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, dump_flag:%u, ret:0x%X",
                                     dump_flag, rt_ret);
-                  GELOGE(RT_FAILED, "rtKernelLaunchWithFlag failied. error: 0x%X", rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtKernelLaunchWithFlag] failied. error: 0x%X", rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   return SUCCESS;
 }
diff --git a/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
index 2a3e3a17..07dc5d19 100644
--- a/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
+++ b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
@@ -33,20 +33,20 @@ Status SuperKernelFactory::Init() {
     if (handle_ == nullptr) {
       const char* error = mmDlerror();
       GE_IF_BOOL_EXEC(error == nullptr, error = "");
-      GELOGE(FAILED, "SKT: open skt lib failed, please check LD_LIBRARY_PATH. errmsg:%s", error);
+      GELOGE(FAILED, "[Open][SktLib] failed, please check LD_LIBRARY_PATH. errmsg:%s", error);
     }
     rtError_t rt_ret;
     rt_ret = rtGetFunctionByName(this->sk_stub_name_.c_str(), &this->func_stub_);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                     REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName failed, stub_func:%s, ret:0x%X",
                                       this->sk_stub_name_.c_str(), rt_ret);
-                    GELOGE(RT_FAILED, "rtGetFunctionByName failed. stub_func: %s, please export LD_LIBRARY_PATH for "
-                           "libcce_aicore.so", this->sk_stub_name_.c_str());
+                    GELOGE(RT_FAILED, "[Call][RtGetFunctionByName] failed. stub_func:%s, "
+                           "please export LD_LIBRARY_PATH for libcce_aicore.so", this->sk_stub_name_.c_str());
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
     rt_ret = rtGetAddrByFun(this->func_stub_, &this->func_ptr_);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                     REPORT_CALL_ERROR("E19999", "Call rtGetAddrByFun failed, ret:0x%X", rt_ret);
-                    GELOGE(RT_FAILED, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
+                    GELOGE(RT_FAILED, "[Call][RtGetAddrByFun] failed, ret:0x%X", rt_ret);
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
     GELOGD(
       "SKT: fuseKernels super_kernel_template subFunc %p, device func "
@@ -104,7 +104,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
     rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func);
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                     REPORT_CALL_ERROR("E19999", "Call rtGetAddrByFun failed, ret:0x%X", rt_ret);
-                    GELOGE(RT_FAILED, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
+                    GELOGE(RT_FAILED, "[Call][RtGetAddrByFun] failed, ret:0x%X", rt_ret);
                     return RT_ERROR_TO_GE_STATUS(rt_ret);)
     GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func);
     // store two uint64_t address
@@ -116,16 +116,14 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
   }
   rt_ret = rtMalloc(reinterpret_cast<void **>(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%lu, ret:0x%X",
-                                    nav_table_size, rt_ret);
-                  GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret);
+                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%lu, ret:0x%X", nav_table_size, rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%lu, ret:0x%X", nav_table_size, rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   rt_ret = rtMemcpy(reinterpret_cast<void *>(hbm_nav_table_addr), nav_table_size,
                     reinterpret_cast<void *>(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
-                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X",
-                                    nav_table_size, rt_ret);
-                  GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret);
+                  REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", nav_table_size, rt_ret);
+                  GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", nav_table_size, rt_ret);
                   GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)
   // Create the necessary metadata for the super kernel
   h =
diff --git a/ge/graph/load/model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc
index fb446bf7..fad18d37 100755
--- a/ge/graph/load/model_manager/task_info/task_info.cc
+++ b/ge/graph/load/model_manager/task_info/task_info.cc
@@ -27,7 +27,7 @@ Status TaskInfo::SetStream(uint32_t stream_id, const std::vector<rtStream_t> &st
   } else {
   	REPORT_INNER_ERROR("E19999", "stream_id:%u >= stream_list.size(): %zu, check invalid",
                        stream_id, stream_list.size());
-    GELOGE(FAILED, "index: %u >= stream_list.size(): %zu.", stream_id, stream_list.size());
+    GELOGE(FAILED, "[Check][Param] index:%u >= stream_list.size():%zu.", stream_id, stream_list.size());
     return FAILED;
   }
 
diff --git a/ge/graph/load/model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h
index 99ec3c4e..5657f003 100644
--- a/ge/graph/load/model_manager/task_info/task_info.h
+++ b/ge/graph/load/model_manager/task_info/task_info.h
@@ -18,6 +18,7 @@
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_
 
 #include <vector>
+#include <sstream>
 
 #include "cce/customize.h"
 #include "framework/common/taskdown_common.h"
@@ -28,9 +29,11 @@
 
 namespace ge {
 struct MemInfo {
-  uint64_t memory_size = 0;
+  size_t memory_size = 0;
   uint64_t logic_memory_base = 0;
   uint8_t *memory_base = nullptr;
+  uint32_t memory_type = RT_MEMORY_HBM;
+  std::string memory_key = "";
 };
 
 struct RuntimeParam {
@@ -40,6 +43,19 @@ struct RuntimeParam {
   }
   ~RuntimeParam() = default;
 
+  std::string ToString() {
+    std::stringstream ss;
+    ss << "session_id:" << session_id << ", stream_num:" << stream_num << ", event_num:" << event_num
+       << ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base
+       << ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base
+       << ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size
+       << ", ex_memory_info:";
+    for (auto it : memory_infos) {
+      ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]";
+    }
+    return ss.str();
+  }
+
   uint64_t mem_size = 0;
   uint64_t logic_mem_base = 0;
   uint8_t *mem_base = nullptr;
@@ -49,7 +65,7 @@ struct RuntimeParam {
   uint64_t var_size = 0;
   uint64_t logic_var_base = 0;
   uint8_t *var_base = nullptr;
-  std::map<uint32_t, MemInfo> memory_infos;
+  std::map<uint64_t, MemInfo> memory_infos;
   uint32_t batch_num = 0;
   uint32_t stream_num = 0;
   uint32_t event_num = 0;
diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc
index bfa1cdc7..75aa5c01 100644
--- a/ge/graph/manager/graph_caching_allocator.cc
+++ b/ge/graph/manager/graph_caching_allocator.cc
@@ -21,7 +21,7 @@
 #include <utility>
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
@@ -117,7 +117,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) {
     }
     free_block_bins_[i] = bin_ptr;
   }
-  memory_allocator_ = MemManager::Instance(memory_type_);
+  memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
   if (memory_allocator_ == nullptr) {
     return ACL_ERROR_GE_INTERNAL_ERROR;
   }
@@ -168,7 +168,7 @@ Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) {
   if (it == allocated_blocks_.end()) {
     REPORT_INNER_ERROR("E19999", "Param ptr not allocated before, device_id:%u, check invalid",
                        device_id);
-    GELOGE(PARAM_INVALID, "Invalid memory pointer");
+    GELOGE(PARAM_INVALID, "Invalid memory pointer: %p", ptr);
     return ge::PARAM_INVALID;
   }
   Block *block = it->second;
diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h
index a9c3202a..2db00ff2 100644
--- a/ge/graph/manager/graph_caching_allocator.h
+++ b/ge/graph/manager/graph_caching_allocator.h
@@ -88,8 +88,8 @@ class CachingAllocator {
   ///
   /// @ingroup ge_graph
   /// @brief free memory
+  /// @param [in] memory_ptr memory address ptr
   /// @param [in] device_id device id
-  /// @param [out] memory_ptr memory address ptr
   /// @return Status result of function
   ///
   Status Free(uint8_t *memory_addr, uint32_t device_id = 0);
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index a1a7034b..465ae749 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -65,6 +65,7 @@
 #include "graph/passes/merge_pass.h"
 #include "graph/passes/merge_input_memcpy_pass.h"
 #include "graph/passes/merge_to_stream_merge_pass.h"
+#include "graph/passes/mark_force_unknown_for_cond_pass.h"
 #include "graph/passes/multi_batch_pass.h"
 #include "graph/passes/next_iteration_pass.h"
 #include "graph/passes/permute_pass.h"
@@ -106,6 +107,8 @@
 #include "graph/common/omg_util.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "register/custom_pass_helper.h"
+#include "external/graph/types.h"
+#include "common/util/error_manager/error_manager.h"
 
 namespace {
 const char *const kSummary = "Summary";
@@ -126,6 +129,7 @@ const uint32_t kNotAdded = 0;
 const uint32_t kStartAdd = 1;
 const uint32_t kDoneAdded = 2;
 const uint32_t kNeverLoaded = 0;
+const size_t kAlignment = 64;
 
 bool IsTailingOptimization() {
   string is_tailing_optimization_option;
@@ -368,9 +372,9 @@ void GraphManager::RemoveAddGraphCondition(GraphId graph_id) {
   auto it = graph_id_to_add_graph_cond_.find(graph_id);
   if (it != graph_id_to_add_graph_cond_.end()) {
     graph_id_to_add_graph_cond_.erase(it);
-    GELOGD("Successfully removed add_graph_cond of graph [id:%u].", graph_id);
+    GELOGD("Successfully remove add_graph_cond of graph [id:%u].", graph_id);
   } else {
-    GELOGD("Graph [id:%u] has not been added. no need to remove.", graph_id);
+    GELOGD("Graph [id:%u] has not been added, no need to be removed.", graph_id);
   }
 }
 
@@ -463,6 +467,48 @@ Status GraphManager::SetStagesOptions(uint32_t graph_id, const GraphManagerOptio
   return SUCCESS;
 }
 
+Status GraphManager::ModifyDataIndex(const Graph &graph, const std::map<std::string, std::string> &graph_option) {
+  vector<OpDescPtr> data_desc;
+  set<int64_t> indexes;
+  auto compute_graph = GraphUtils::GetComputeGraph(graph);
+  GE_CHECK_NOTNULL(compute_graph);
+  for (auto &input_node : compute_graph->GetDirectNode()) {
+    GE_CHECK_NOTNULL(input_node);
+    auto op = input_node->GetOpDesc();
+    GE_CHECK_NOTNULL(op);
+    if (op->GetType() == DATA) {
+      int64_t index = 0;
+      (void) AttrUtils::GetInt(op, ATTR_NAME_INDEX, index);
+      indexes.insert(index);
+      data_desc.emplace_back(op);
+    }
+  }
+  if (!indexes.empty()) {
+    auto first_iter = indexes.begin();
+    auto end_iter = indexes.end();
+    --end_iter;
+    auto data_size = static_cast<int64_t>(data_desc.size());
+    // The valid index starts with 0 and increases by 1, and num is equal to data_node.
+    if (indexes.size() != data_desc.size() || *first_iter != 0 || *end_iter != data_size - 1) {
+      auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE);
+      if (iter != graph_option.end() && !iter->second.empty()) {
+        // If data inputs shape range is set, user must set valid data index.
+        std::string failed_reason = "Data index must be set continuous from 0 when data shape range enabled!";
+        REPORT_INPUT_ERROR("E10003", std::vector<std::string>({"parameter", "value", "reason"}),
+                           std::vector<std::string>({"--data_index", "-", failed_reason}));
+        GELOGE(GRAPH_PARAM_INVALID, "[COMP][AddGraph]Input data index is invalid when data shape range enabled.");
+        return GRAPH_PARAM_INVALID;
+      }
+      GELOGI("Graph[%s] input data index is invalid, set data index by topo order.", compute_graph->GetName().c_str());
+      int64_t index = 0;
+      for (auto &op : data_desc) {
+        (void) AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++);
+      }
+    }
+  }
+  return SUCCESS;
+}
+
 Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
                               const std::map<std::string, std::string> &options,
                               const OmgContext &omg_context) {
@@ -492,9 +538,13 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
   }
   // Do add graph
   SetAddGraphCondition(graph_id, kStartAdd);
+  if (CheckGraphAdded(graph_id, graph) != SUCCESS) {
+    GELOGE(FAILED, "AddGraph failed.");
+    return FAILED;
+  }
+  GE_CHK_STATUS_RET(ModifyDataIndex(graph, options));
   auto compute_graph = GraphUtils::GetComputeGraph(graph);
   GE_CHECK_NOTNULL(compute_graph);
-  compute_graph->SetGraphID(graph_id);
   (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true);
   SetSessionGraphId(compute_graph, graph_id);
 
@@ -534,7 +584,7 @@ Status GraphManager::CheckGraphAdded(const GraphId &graph_id, const Graph &graph
     bool graph_has_been_added = false;
     if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added)
         && graph_has_been_added) {
-      REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail",
+      REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail.",
                          ATTR_NAME_GRAPH_HAS_BEEN_ADDED.c_str(), graph_id);
       GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST,
              "[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id);
@@ -552,6 +602,10 @@ Status GraphManager::CheckGraphAdded(const GraphId &graph_id, const Graph &graph
 Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &graph,
                                       const std::map<std::string, std::string> &options,
                                       const OmgContext &omg_context) {
+  if (HasGraphNode(graph_id)) {
+    GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u", graph_id);
+    return GE_GRAPH_GRAPH_ALREADY_EXIST;
+  }
   if (CheckGraphAdded(graph_id, graph) != SUCCESS) {
     GELOGE(FAILED, "AddGraphWithCopy failed.");
     return FAILED;
@@ -889,7 +943,7 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
 }
 
 Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) {
-  GELOGD("set rt_context: session id: %lu, graph id: %u, mode %d, device id:%u.",
+  GELOGD("Set rt_context: session id: %lu, graph id: %u, mode %d, device id:%u.",
          session_id, graph_id, static_cast<int>(mode), ge::GetContext().DeviceId());
 
   rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId());
@@ -935,7 +989,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
   GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()),
                     "BuildJsonObject Failed")
 
-  GEEVENT("PreRun start: graph node size %zu, session id %lu, graph id %u, graph name %s",
+  GEEVENT("PreRun start: graph node size %zu, session id %lu, graph id %u, graph name %s.",
           compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(),
           compute_graph->GetName().c_str());
   GE_DUMP(compute_graph, "PreRunBegin");
@@ -956,7 +1010,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
   if (run_optimize_original_graph) {
     Status ret = PreRunOptimizeOriginalGraph(graph_node, inputs, compute_graph, session_id);
     if (ret != SUCCESS) {
-      GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s", compute_graph->GetName().c_str());
+      GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s.", compute_graph->GetName().c_str());
       return ret;
     }
   }
@@ -1051,7 +1105,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
       // release rts generate context
       RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId());
       if (ret != SUCCESS) {
-        GELOGE(ret, "PreRun Failed. graph_id:%u.", graph_node->GetGraphId());
+        GELOGE(ret, "PreRun Failed, graph_id:%u.", graph_node->GetGraphId());
         return ret;
       }
     }
@@ -1102,6 +1156,7 @@ Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphN
         GE_CHK_STATUS_RET(CheckAndReleaseMemory(ge_model, graph_node));
       }
     }
+    ge_root_model->SetIsSpecificStream(graph_node->IsSpecificStream());
     GE_TIMESTAMP_START(LoadGraph);
     Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, model_listener);
     GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraph");
@@ -1225,6 +1280,78 @@ Status GraphManager::InnerRunGraph(GraphNodePtr &graph_node, const GraphId &grap
   return SUCCESS;
 }
 
+Status GraphManager::InnerRunGraphWithStream(GraphNodePtr &graph_node, const GraphId &graph_id, rtStream_t stream,
+                                             const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs) {
+  auto ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_);
+  if (ret != SUCCESS) {
+    GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Run][GraphWithStreamAsync] set condition failed, "
+           "graph id = %u, stream = %p.", graph_id, stream);
+    graph_node->SetRunFlag(false);
+    return GE_GRAPH_RUNGRAPH_FAILED;
+  }
+
+  ret = graph_executor_.ExecuteGraphWithStream(graph_id, stream, graph_node->GetGeRootModel(), inputs, outputs);
+  graph_node->SetRunFlag(false);
+  graph_node->SetIsSpecificStream(false);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[Run][GraphWithStreamAsync] execute graph failed, graph id = %u, stream = %p.", graph_id, stream);
+    return ret;
+  }
+  GELOGI("[Run][GraphWithStreamAsync] run graph success, graph id = %u, stream = %p.", graph_id, stream);
+  return SUCCESS;
+}
+
+Status GraphManager::RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t stream, uint64_t session_id,
+                                             const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs) {
+  ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
+  std::lock_guard<std::mutex> lock(run_mutex_);
+  GELOGI("Start to run graph with stream async, graph id = %u, stream = %p.", graph_id, stream);
+
+  if (inputs.empty()) {
+    GELOGI("Run graph with stream async, initialize sub graph has no inputs.");
+  }
+
+  // find graph
+  GraphNodePtr graph_node = nullptr;
+  Status ret = GetGraphNode(graph_id, graph_node);
+  if (ret != SUCCESS) {
+    REPORT_INNER_ERROR("E19999", "graph id = %u not exist in graph_map, check invalid.", graph_id);
+    GELOGE(ret, "Run graph with stream async graph not exist, graph id = %u.", graph_id);
+    return ret;
+  }
+  if (graph_node == nullptr) {
+    REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph id = %u, check invalid.", graph_id);
+    GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Run graph with stream async graph node is NULL, graph id = %u.", graph_id);
+    return GE_GRAPH_GRAPH_NODE_NULL;
+  }
+  if (graph_node->GetRunFlag()) {
+    REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph id = %u, "
+                       "check invalid.", graph_id);
+    GELOGE(GE_GRAPH_ALREADY_RUNNING, "Run graph with stream async graph already running, graph id = %u.", graph_id);
+    return GE_GRAPH_ALREADY_RUNNING;
+  }
+
+  UpdateLocalOmgContext(graph_id);
+  // set graph's run flag
+  graph_node->SetRunFlag(true);
+  graph_node->SetIsSpecificStream(true);
+  ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph()));
+
+  // when set incre build, add cache helper map
+  AddModelCacheHelperToMap(graph_id, session_id, compute_graph_tmp);
+  if (options_.local_fmk_op_flag) {
+    GetCompilerStages(graph_id).optimizer.TranFrameOp(compute_graph_tmp);
+  }
+  GeRootModelPtr ge_root_model = nullptr;
+  ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[Run][GraphWithStreamAsync] StartForRunGraph failed!");
+    graph_node->SetRunFlag(false);
+    return ret;
+  }
+  return InnerRunGraphWithStream(graph_node, graph_id, stream, inputs, outputs);
+}
+
 Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs,
                               std::vector<GeTensor> &outputs, uint64_t session_id) {
   ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
@@ -1664,7 +1791,8 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
                   return GE_GRAPH_OPTIONS_INVALID);
 
   // ge.graphType
-  ret = ParseTrainGraphFlag(options_.run_graph_flag, options_.train_graph_flag);
+  ret =
+    ParseTrainGraphFlag(options_.run_graph_flag, options_.train_graph_flag);
   GE_IF_BOOL_EXEC(ret != SUCCESS,
                   GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.runFlag value is invalid");
                   return GE_GRAPH_OPTIONS_INVALID);
@@ -1706,20 +1834,18 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
   return SUCCESS;
 }
 
-Status GraphManager::ParseTrainGraphFlag(bool &options, bool &option) {
+Status GraphManager::ParseTrainGraphFlag(const bool &run_flag, bool &train_flag) {
   std::shared_ptr<GELib> ge_instance_ptr = ge::GELib::GetInstance();
   if (ge_instance_ptr == nullptr) {
     GELOGW("[Initialize] set train_graph_flag to 0 when GE is not initialized or finalized");
-    option = false;
+    train_flag = false;
   } else if (!ge_instance_ptr->isTrainMode()) {
-    option = false;
+    train_flag = false;
   } else {  //  ge_instance_ptr->isTrainMode() is true
-    if (!options) {
-      GELOGE(GE_GRAPH_OPTIONS_INVALID,
-             "Key:ge.runFlag, its value %d is invalid, it must be 1 when GElib::is_train_mode_ flag is 1", options);
-      return GE_GRAPH_OPTIONS_INVALID;
+    train_flag = true;
+    if (!run_flag) {
+      GELOGW("Key:ge.runFlag, its value %d is invalid, it must be 1 when GElib::is_train_mode_ flag is 1", run_flag);
     }
-    option = true;
   }
   return SUCCESS;
 }
@@ -2455,7 +2581,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
   // the prune pass should between SwitchPass and SwitchToStreamSwitchPass
   GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::Migration", new (std::nothrow) SubgraphConstMigrationPass));
   GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ArgsClean", new (std::nothrow) UnusedArgsCleanPass));
-  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass))
+  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass));
+  auto mark_force_unknown_pass = new (std::nothrow) MarkForceUnknownForCondPass;
+  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::MarkForceUnknownForCondPass", mark_force_unknown_pass));
   GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass))
   GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass))
   GE_CHK_STATUS_RET(
@@ -2863,7 +2991,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
 }
 
 // run graph async on session
-Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector<ge::InputTensorInfo> &inputs,
+Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector<ge::Tensor> &inputs,
                                    uint64_t session_id, RunAsyncCallback callback) {
   ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute);
   GELOGI("[GraphManager] Start to run graph async, graph_id=%u, inputsSize=%zu.", graph_id, inputs.size());
@@ -2935,14 +3063,6 @@ Status GraphManager::IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_m
   return FAILED;
 }
 
-void GraphManager::ConstructGeInput(const vector<InputTensorInfo> &inputs, vector<GeTensor> &ge_inputs) {
-  for (auto const &input : inputs) {
-    GeTensorDesc input_tensor_desc(GeShape(input.dims));
-    input_tensor_desc.SetDataType(static_cast<ge::DataType>(input.data_type));
-    ge_inputs.emplace_back(input_tensor_desc);
-  }
-}
-
 Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args,
                                               GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model) {
   if (!graph_manager->IsGraphNeedBuild(graph_node)) {
@@ -2961,7 +3081,9 @@ Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const
   GeModelPtr ge_model = nullptr;
   if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
     std::vector<GeTensor> ge_inputs;
-    ConstructGeInput(args.input_tensor, ge_inputs);
+    for (const auto &item: args.input_tensor) {
+      ge_inputs.emplace_back(TensorAdapter::AsGeTensor(item));
+    }
     Status ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
     // release rts generate context
     RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId());
@@ -3073,20 +3195,19 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
   }
 }
 
-void GraphManager::ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor) {
+void GraphManager::ParseInputsDimsForData(const std::vector<ge::Tensor> &input_tensor) {
   GELOGD("Start parse input dims from data.");
   for (size_t i = 0; i < input_tensor.size(); ++i) {
-    std::vector<int64_t> dynamic_dim;
-    for (size_t j = 0; j < input_tensor[i].dims.size(); ++j) {
-      dynamic_dim.emplace_back(input_tensor[i].dims[j]);
-    }
-    GELOGD("Input tensor dims is %s.", formats::JoinToString(dynamic_dim).c_str());
-    GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor[i].dims);
+    const TensorDesc &tensor_desc = input_tensor[i].GetTensorDesc();
+    const Shape &shape = tensor_desc.GetShape();
+    const auto &shape_dims = shape.GetDims();
+    GELOGD("Input tensor dims is %s.", formats::JoinToString(shape_dims).c_str());
+    GetLocalOmgContext().user_real_input_dims.emplace_back(shape_dims);
   }
 }
 
 Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> &dynamic_nodes,
-                                                           const std::vector<InputTensorInfo> &input_tensor) {
+                                                           const std::vector<ge::Tensor> &input_tensor) {
   GELOGD("Start parse inputs dims when coexist data and getnext sink.");
   for (size_t i = 0; i < dynamic_nodes.size(); ++i) {
     auto op_desc = dynamic_nodes.at(i)->GetOpDesc();
@@ -3109,13 +3230,16 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr>
       return PARAM_INVALID;
     }
 
-    GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims);
-    GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str());
+    const TensorDesc &tensor_desc = input_tensor[i].GetTensorDesc();
+    const Shape &shape = tensor_desc.GetShape();
+    const auto &shape_dims = shape.GetDims();
+    GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(shape_dims).c_str());
+    GetLocalOmgContext().user_real_input_dims.emplace_back(std::move(shape_dims));
   }
   return SUCCESS;
 }
 
-Status GraphManager::ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor) {
+Status GraphManager::ParseInputsDims(const std::vector<ge::Tensor> &input_tensor) {
   GELOGI("Start parse input dims of %zu input tensor.", input_tensor.size());
   GetLocalOmgContext().user_real_input_dims.clear();
   if (!GetLocalOmgContext().dynamic_node_type.empty()) {
@@ -3246,13 +3370,13 @@ void GraphManager::ReturnError(GraphManager *graph_manager, RunAsyncCallback cal
   }
   StopQueue(graph_manager);
   GELOGE(ret, "%s.", log.c_str());
-  std::vector<ge::OutputTensorInfo> outputs;
+  std::vector<ge::Tensor> outputs;
   callback(ret, outputs);
 }
 
-void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node,
-                               RunAsyncCallback callback, Status ret, const string &log) {
-  std::vector<ge::OutputTensorInfo> outputs;
+void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback,
+                               Status ret, const string &log) {
+  std::vector<ge::Tensor> outputs;
   auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
   if (graph_manager == nullptr || compute_graph == nullptr) {
     REPORT_INNER_ERROR("E19999", "Param graph_manager or compute_graph in graph_node is nullptr, "
@@ -3268,9 +3392,10 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_
     }
     for (size_t i = 0; i < node->GetAllInDataAnchorsSize(); i++) {
       auto input_desc = node->GetOpDesc()->MutableInputDesc(i);
-      ge::OutputTensorInfo tensor;
-      tensor.dims = input_desc->GetShape().GetDims();
-      tensor.data_type = static_cast<uint32_t>(input_desc->GetDataType());
+      GeShape ge_shape(input_desc->GetShape().GetDims());
+      GeTensorDesc ge_tensor_desc;
+      ge_tensor_desc.SetShape(ge_shape);
+      GeTensor ge_tensor(ge_tensor_desc);
       int64_t len = 1;
       if (input_desc->GetShape().GetDims() != std::vector<int64_t>({})) {
         len = input_desc->GetShape().GetShapeSize();
@@ -3286,30 +3411,19 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_
         GELOGI("getted shape size is 0.Do process as empty tensor!");
         len = 1;
       }
-      auto size = GetSizeByDataType(input_desc->GetDataType());
-      if (size <= 0) {
-        REPORT_INNER_ERROR("E19999", "data_type:%s of op:%s(%s) is not support, input_index:%zu check invalid",
-                           ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(),
-                           node->GetName().c_str(), node->GetType().c_str(), i);
-        GELOGE(PARAM_INVALID, "Failed to get cube size, the data type %s is invalid",
-               ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str());
-        callback(GRAPH_FAILED, outputs);
+      auto length = GetSizeInBytes(len, input_desc->GetDataType());
+      auto aligned_ptr = MakeShared<AlignedPtr>(length, kAlignment);
+      if (aligned_ptr == nullptr) {
+        REPORT_INNER_ERROR("E19999", "Aligned_ptr is nullptr");
+        GELOGE(GRAPH_FAILED, "[Analyze Mode] Aligned_ptr is nullptr");
         return;
       }
-      if (CheckInt64MulOverflow(len, static_cast<int64_t>(size)) != true) {
-        REPORT_INNER_ERROR("E19999", "shape_size:%ld of op:%s(%s) will overflow after multiply by "
-                           "size:%u of data_type:%s, input_index:%zu, check invalid", len,
-                           node->GetName().c_str(), node->GetType().c_str(), size,
-                           ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), i);
-        GELOGE(MEMALLOC_FAILED, "int64 multiply happens overflow! a:%ld b:%d", len, size);
-        callback(GRAPH_FAILED, outputs);
-        return;
-      }
-      tensor.length = len * size;
-      tensor.data.reset(new(std::nothrow) uint8_t[tensor.length]);
+      ge_tensor.SetData(aligned_ptr, length);
+      ge::Tensor tensor = TensorAdapter::AsTensor(ge_tensor);
       // To avoid global step too small and can not stop, totally set a bigger value
-      for (int64_t i = 0; i < tensor.length; i++) {
-        tensor.data[i] = 0x7F; // here stands for a positive max value
+      auto ptr = aligned_ptr->MutableGet();
+      for (int64_t i = 0; i < length; i++) {
+        ptr[i] = 0x7F;  // here stands for a positive max value
       }
       outputs.emplace_back(std::move(tensor));
     }
@@ -3657,7 +3771,7 @@ void GraphManager::UpdateLocalOmgContext(GraphId graph_id) {
   if (iter != omg_contexts_.end()) {
     SetLocalOmgContext(iter->second);
   } else {
-    GELOGW("OmgContext of graph %u not found.", graph_id);
+    GELOGW("OmgContext of graph %u is not found.", graph_id);
   }
 }
 
@@ -3687,9 +3801,9 @@ void GraphManager::RemoveGraphCount(GraphId graph_id) {
   std::lock_guard<std::mutex> lock(graph_count_mutex_);
   auto it = graph_count_.find(graph_id);
   if (it == graph_count_.end()) {
-    GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id);
+    GELOGW("Graph of id: %u has not been added, count cannot be decreased", graph_id);
   } else {
-    GELOGD("RemoveGraphCount success, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
+    GELOGD("RemoveGraphCount success, graph count of id[%u] is %u", graph_id, graph_count_[graph_id]);
     graph_count_.erase(it);
   }
 }
diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h
index 960c253c..945a5e5d 100644
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -105,6 +105,19 @@ class GraphManager {
 
   ///
   /// @ingroup ge_graph
+  /// @brief run specific graph with specific session id and stream
+  /// @param [in] graph_id graph id
+  /// @param [in] stream specific stream
+  /// @param [in] session_id session id
+  /// @param [in] inputs input data
+  /// @param [out] outputs output data
+  /// @return Status result of function
+  ///
+  Status RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t stream, uint64_t session_id, 
+                                 const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);
+
+  ///
+  /// @ingroup ge_graph
   /// @brief build specific graph
   /// @param [in] graph_id graph id
   /// @param [in] inputs input data
@@ -149,9 +162,8 @@ class GraphManager {
   /// @param [out] callback: callback while run graph async finish
   /// @return Status result of function
   ///
-  Status RunGraphAsync(const GraphId &graph_id, const std::vector<ge::InputTensorInfo> &inputs,
+  Status RunGraphAsync(const GraphId &graph_id, const std::vector<ge::Tensor> &inputs,
                        uint64_t session_id, RunAsyncCallback callback);
-
   ///
   /// @ingroup ge_graph
   /// @brief me register the callback function to get the result of summary or checkpoin
@@ -208,7 +220,7 @@ class GraphManager {
 
   struct PreRunArgs {
     GraphId graph_id;
-    std::vector<ge::InputTensorInfo> input_tensor;
+    std::vector<ge::Tensor> input_tensor;
     uint64_t session_id;
     struct error_message::Context error_context;
     GEThreadLocalContext context;
@@ -220,7 +232,7 @@ class GraphManager {
     GraphId graph_id;
     uint64_t session_id;
     struct error_message::Context error_context;
-    std::vector<ge::InputTensorInfo> input_tensor;
+    std::vector<ge::Tensor> input_tensor;
     GeRootModelPtr ge_root_model;
     GEThreadLocalContext context;
     RunAsyncCallback callback;
@@ -239,10 +251,10 @@ class GraphManager {
                                                 uint64_t session_id,
                                                 const struct error_message::Context &error_context,
                                                 const GEThreadLocalContext &ge_context);
-  Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor);
-  void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);
+  Status ParseInputsDims(const std::vector<ge::Tensor> &input_tensor);
+  void ParseInputsDimsForData(const std::vector<ge::Tensor> &input_tensor);
   Status ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> &dynamic_nodes,
-                                               const std::vector<InputTensorInfo> &input_tensor);
+                                               const std::vector<ge::Tensor> &input_tensor);
   Status RunCustomPass(const GraphNodePtr &graph_node);
   Status PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, GeRootModelPtr &ge_root_model,
                 uint64_t session_id = INVALID_SESSION_ID);
@@ -258,6 +270,9 @@ class GraphManager {
   Status InnerRunGraph(GraphNodePtr &graph_node, const GraphId &graph_id, const std::vector<GeTensor> &inputs,
                        std::vector<GeTensor> &outputs);
 
+  Status InnerRunGraphWithStream(GraphNodePtr &graph_node, const GraphId &graph_id, rtStream_t stream,
+                                 const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);
+
   Status ParseOptions(const std::map<std::string, std::string> &options);
 
   static void ParseOption(const std::map<std::string, std::string> &options, const std::string &key,
@@ -277,7 +292,7 @@ class GraphManager {
 
   static Status ParseParallelNum(const std::string &parallel_num, const std::string &key, int &num);
 
-  static Status ParseTrainGraphFlag(bool &options, bool &option);
+  static Status ParseTrainGraphFlag(const bool &run_flag, bool &train_flag);
 
   static bool IsPerfLevelInvalid(int32_t perf_level);
 
@@ -353,7 +368,6 @@ class GraphManager {
   void RemoveModelCacheHelper(const GraphId &graph_id);
   ModelCacheHelperPtr FindModelCacheHelper(GraphId graph_id);
 
-  static void ConstructGeInput(const std::vector<InputTensorInfo> &inputs, std::vector<GeTensor> &ge_inputs);
   static void PreRunThread(GraphManager *graph_manager);
   static void RunThread(GraphManager *graph_manager);
   static void StopQueue(GraphManager *graph_manager);
@@ -413,6 +427,8 @@ class GraphManager {
 
   void SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id);
 
+  Status ModifyDataIndex(const Graph &graph, const std::map<std::string, std::string> &graph_option);
+
   static Status CheckGraphAdded(const GraphId &graph_id, const Graph &graph);
 
   std::atomic_bool thread_run_flag_;
diff --git a/ge/graph/manager/graph_manager_utils.cc b/ge/graph/manager/graph_manager_utils.cc
index e9d72bd8..0f93654c 100644
--- a/ge/graph/manager/graph_manager_utils.cc
+++ b/ge/graph/manager/graph_manager_utils.cc
@@ -41,6 +41,7 @@ GraphNode::GraphNode(GraphId graph_id)
       build_flag_(false),
       load_flag_(false),
       async_(false),
+      is_specific_stream_(false),
       ge_model_(nullptr),
       sem_(1) {
   graph_run_async_listener_ = MakeShared<RunAsyncListener>();
@@ -113,7 +114,7 @@ GraphModelListener::GraphModelListener(std::mutex &mutex, std::condition_variabl
     : result_code_(0), is_finished_(false), mutex_(mutex), condition_(cond) {}
 
 Status GraphModelListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result,
-                                         std::vector<ge::OutputTensorInfo> &outputs) {
+                                         std::vector<ge::Tensor> &outputs) {
   GELOGI(
       "[GraphManager] graph compute call back, model_id:%u, task_id:%u, "
       "resultCode:%u.",
@@ -150,7 +151,7 @@ void RunAsyncListener::SetCallback(const RunAsyncCallback &callback) {
 }
 
 Status RunAsyncListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result,
-                                       std::vector<ge::OutputTensorInfo> &outputs) {
+                                       std::vector<ge::Tensor> &outputs) {
   GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.",
          model_id, task_id, result);
   GE_CHECK_NOTNULL(callback_);
diff --git a/ge/graph/manager/graph_manager_utils.h b/ge/graph/manager/graph_manager_utils.h
index bebba93e..d38b4321 100644
--- a/ge/graph/manager/graph_manager_utils.h
+++ b/ge/graph/manager/graph_manager_utils.h
@@ -130,7 +130,7 @@ class RunAsyncListener : public ge::ModelListener {
 
   // callback
   Status OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result,
-                       std::vector<ge::OutputTensorInfo> &outputs) override;
+                       std::vector<ge::Tensor> &outputs) override;
 
  private:
   RunAsyncCallback callback_;
@@ -167,6 +167,8 @@ class GraphNode {
   void UpdateLoadFlag() { load_flag_ = load_count_ == 0 || load_record_ >= kMaxLoadNum; }
   void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; }
   void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; }
+  void SetIsSpecificStream(bool specific_stream) { is_specific_stream_ = specific_stream; }
+  bool IsSpecificStream() const { return is_specific_stream_; }
   GeModelPtr GetGeModel() const { return ge_model_; }
   void SetGeRootModel(const GeRootModelPtr &ge_root_model) { ge_root_model_ = ge_root_model; }
   GeRootModelPtr GetGeRootModel() const { return ge_root_model_; }
@@ -200,6 +202,7 @@ class GraphNode {
   // load_flag_ is true if more than 1 model were loaded
   bool load_flag_;
   bool async_;
+  bool is_specific_stream_;
   GeModelPtr ge_model_;
   GeRootModelPtr ge_root_model_;
   BlockingQueue<uint8_t> sem_;
@@ -221,7 +224,7 @@ class GraphModelListener : public ge::ModelListener {
 
   // callback
   Status OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result,
-                       std::vector<ge::OutputTensorInfo> &outputs) override;
+                       std::vector<ge::Tensor> &outputs) override;
 
   Status ResetResult();
 
diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc
index 24e75356..0cccaf99 100755
--- a/ge/graph/manager/graph_mem_allocator.cc
+++ b/ge/graph/manager/graph_mem_allocator.cc
@@ -17,11 +17,9 @@
 #include "graph/manager/graph_mem_allocator.h"
 
 #include <string>
-#include "graph/manager/graph_caching_allocator.h"
-#include "graph/manager/rdma_pool_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+
 namespace ge {
-void MemoryAllocator::Initialize(uint32_t device_id) {
+Status MemoryAllocator::Initialize(uint32_t device_id) {
   GELOGI("MemoryAllocator::Initialize");
 
   // when redo Initialize free memory
@@ -31,6 +29,7 @@ void MemoryAllocator::Initialize(uint32_t device_id) {
     }
   }
   memory_base_map_.clear();
+  return SUCCESS;
 }
 
 void MemoryAllocator::Finalize(uint32_t device_id) {
@@ -152,113 +151,4 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic
 
   return it->second.memory_addr_;
 }
-
-MemManager::MemManager() {}
-
-MemManager::~MemManager() { Finalize(); }
-
-MemManager &MemManager::Instance() {
-  static MemManager mem_manager;
-  return mem_manager;
-}
-
-MemoryAllocator *MemManager::Instance(rtMemType_t memory_type) { return Instance().GetMemoryAllocator(memory_type); }
-
-Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
-  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-  MemoryAllocator *memory_allocator = nullptr;
-  for (unsigned int index : memory_type) {
-    auto it = memory_allocator_map_.find(index);
-    if (it == memory_allocator_map_.end()) {
-      memory_allocator = new (std::nothrow) MemoryAllocator(index);
-
-      if (memory_allocator != nullptr) {
-        memory_allocator_map_[index] = memory_allocator;
-        GELOGI("Create MemoryAllocator memory type[%u] success.", index);
-      } else {
-        REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index);
-        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed.");
-      }
-    } else {
-      memory_allocator = it->second;
-    }
-
-    if (memory_allocator == nullptr) {
-      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed.");
-      return ACL_ERROR_GE_MEMORY_ALLOCATION;
-    } else {
-      memory_allocator->Initialize(0);
-    }
-  }
-
-  auto ret = InitAllocator(memory_type, caching_allocator_map_);
-  if (ret != SUCCESS) {
-    GELOGE(ret, "Create CachingAllocator failed.");
-    return ret;
-  }
-
-  ret = InitAllocator(memory_type, rdma_allocator_map_);
-  if (ret != SUCCESS) {
-    GELOGE(ret, "Create RdmaAllocator failed.");
-    return ret;
-  }
-
-  ret = InitAllocator(memory_type, host_allocator_map_);
-  if (ret != SUCCESS) {
-    GELOGE(ret, "Create HostMemAllocator failed.");
-    return ret;
-  }
-  return SUCCESS;
-}
-
-template <typename T>
-void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) {
-  for (auto &allocator : allocate_map) {
-    if (allocator.second != nullptr) {
-      allocator.second->Finalize();
-      delete allocator.second;
-      allocator.second = nullptr;
-    }
-  }
-  allocate_map.clear();
-}
-
-void MemManager::Finalize() noexcept {
-  GELOGI("Finalize.");
-  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-  // caching and rdma allocator use memory allocator, so finalize them first
-  FinalizeAllocatorMap(caching_allocator_map_);
-  FinalizeAllocatorMap(rdma_allocator_map_);
-  FinalizeAllocatorMap(host_allocator_map_);
-  FinalizeAllocatorMap(memory_allocator_map_);
-}
-
-MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) {
-  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-  MemoryAllocator *memory_allocator = nullptr;
-  auto it = memory_allocator_map_.find(memory_type);
-  if (it != memory_allocator_map_.end()) {
-    memory_allocator = it->second;
-  }
-
-  // Usually impossible
-  if (memory_allocator == nullptr) {
-    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type);
-    static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED);
-    return &default_memory_allocator;
-  }
-
-  return memory_allocator;
-}
-
-CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
-  return Instance().GetAllocator(memory_type, caching_allocator_map_);
-}
-
-RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
-  return Instance().GetAllocator(memory_type, rdma_allocator_map_);
-}
-HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
-  return Instance().GetAllocator(memory_type, host_allocator_map_);
-}
 }  // namespace ge
diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h
index 9f8b86b2..b6d73f0a 100644
--- a/ge/graph/manager/graph_mem_allocator.h
+++ b/ge/graph/manager/graph_mem_allocator.h
@@ -26,7 +26,6 @@
 
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/ge_inner_error_codes.h"
-#include "graph/manager/host_mem_allocator.h"
 #include "graph/node.h"
 #include "runtime/mem.h"
 
@@ -71,9 +70,9 @@ class MemoryAllocator {
   /// @ingroup ge_graph
   /// @brief memory allocator init
   /// @param [in] options user config params
-  /// @return void
+  /// @return Status of init
   ///
-  void Initialize(uint32_t device_id = 0);
+  Status Initialize(uint32_t device_id = 0);
 
   ///
   /// @ingroup ge_graph
@@ -136,109 +135,6 @@ class MemoryAllocator {
   bool mem_malloced_;
   map<string, MemoryInfo> memory_base_map_;
 };
-
-using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
-class CachingAllocator;
-class RdmaPoolAllocator;
-class MemManager {
- public:
-  MemManager();
-  virtual ~MemManager();
-  static MemManager &Instance();
-  static MemoryAllocator *Instance(rtMemType_t memory_type);
-  CachingAllocator &CachingInstance(rtMemType_t memory_type);
-  RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
-  HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
-  MemManager(const MemManager &) = delete;
-  MemManager &operator=(const MemManager &) = delete;
-  ///
-  /// @ingroup ge_graph
-  /// @brief memory allocator manager init
-  /// @param [in] options user config params
-  /// @return Status result of function
-  ///
-  Status Initialize(const std::vector<rtMemType_t> &memory_type);
-
-  ///
-  /// @ingroup ge_graph
-  /// @brief memory allocator finalize
-  /// @return void
-  ///
-  void Finalize() noexcept;
-
- private:
-  ///
-  /// @ingroup ge_graph
-  /// @brief ge memory allocator
-  /// @param [in] memory_type memory type
-  /// @return MemoryAllocator ptr
-  ///
-  MemoryAllocator *GetMemoryAllocator(rtMemType_t memory_type);
-
-  ///
-  /// @ingroup ge_graph
-  /// @param [in] memory_type memory type
-  /// @param [in] allocate_map memory allocator map
-  /// @return Status result of function
-  ///
-  template <typename T>
-  Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) {
-    T *allocator = nullptr;
-    for (unsigned int index : memory_type) {
-      auto it = allocate_map.find(index);
-      if (it == allocate_map.end()) {
-        allocator = new (std::nothrow) T(index);
-        if (allocator != nullptr) {
-          allocate_map[index] = allocator;
-          GELOGI("Create Allocator memory type[%u] success.", index);
-        } else {
-          GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
-        }
-      } else {
-        allocator = it->second;
-      }
-
-      if (allocator == nullptr) {
-        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
-        return ACL_ERROR_GE_MEMORY_ALLOCATION;
-      } else {
-        if (allocator->Initialize() != SUCCESS) {
-          return ACL_ERROR_GE_INTERNAL_ERROR;
-        }
-      }
-    }
-    return SUCCESS;
-  }
-  ///
-  /// @ingroup ge_graph
-  /// @param [in] memory_type memory type
-  /// @param [in] allocate_map memory allocator map
-  /// @return Allocator ptr
-  ///
-  template <typename T>
-  T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) {
-    std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-    T *allocator = nullptr;
-    auto it = allocate_map.find(memory_type);
-    if (it != allocate_map.end()) {
-      allocator = it->second;
-    }
-
-    // Usually impossible
-    if (allocator == nullptr) {
-      GELOGW("Get allocator failed, memory type is %u.", memory_type);
-      static T default_allocator(RT_MEMORY_RESERVED);
-      return default_allocator;
-    }
-    return *allocator;
-  }
-
-  std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
-  std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
-  std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
-  std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
-  std::recursive_mutex allocator_mutex_;
-};
 }  // namespace ge
 
 #endif  // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_
diff --git a/ge/graph/manager/graph_mem_manager.cc b/ge/graph/manager/graph_mem_manager.cc
new file mode 100644
index 00000000..21eaf302
--- /dev/null
+++ b/ge/graph/manager/graph_mem_manager.cc
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/manager/graph_mem_manager.h"
+
+#include <string>
+
+namespace ge {
+MemManager::MemManager() {}
+
+MemManager::~MemManager() { Finalize(); }
+
+MemManager &MemManager::Instance() {
+  static MemManager mem_manager;
+  return mem_manager;
+}
+
+Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
+  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
+  if (init_) {
+    GELOGW("MemManager has been inited.");
+    return SUCCESS;
+  }
+
+  auto ret = InitAllocator(memory_type, memory_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create MemoryAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, caching_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create CachingAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, rdma_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create RdmaAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, host_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create HostMemAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, session_scope_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create HostMemAllocator failed.");
+    return ret;
+  }
+  init_ = true;
+  memory_type_ = memory_type;
+  return SUCCESS;
+}
+
+template <typename T>
+void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) {
+  for (auto &allocator : allocate_map) {
+    if (allocator.second != nullptr) {
+      allocator.second->Finalize();
+      delete allocator.second;
+      allocator.second = nullptr;
+    }
+  }
+  allocate_map.clear();
+}
+
+void MemManager::Finalize() noexcept {
+  GELOGI("Finalize.");
+  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
+  // caching and rdma allocator use memory allocator, so finalize them first
+  FinalizeAllocatorMap(session_scope_allocator_map_);
+  FinalizeAllocatorMap(caching_allocator_map_);
+  FinalizeAllocatorMap(rdma_allocator_map_);
+  FinalizeAllocatorMap(host_allocator_map_);
+  FinalizeAllocatorMap(memory_allocator_map_);
+  init_ = false;
+  memory_type_.clear();
+}
+
+MemoryAllocator &MemManager::MemInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, memory_allocator_map_);
+}
+
+CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, caching_allocator_map_);
+}
+
+RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, rdma_allocator_map_);
+}
+
+HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, host_allocator_map_);
+}
+
+SessionScopeMemAllocator &MemManager::SessionScopeMemInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, session_scope_allocator_map_);
+}
+}  // namespace ge
diff --git a/ge/graph/manager/graph_mem_manager.h b/ge/graph/manager/graph_mem_manager.h
new file mode 100644
index 00000000..d7993ed4
--- /dev/null
+++ b/ge/graph/manager/graph_mem_manager.h
@@ -0,0 +1,141 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_
+#define GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "framework/common/debug/ge_log.h"
+#include "framework/common/ge_inner_error_codes.h"
+#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_caching_allocator.h"
+#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/rdma_pool_allocator.h"
+#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/session_scope_mem_allocator.h"
+#include "graph/node.h"
+#include "runtime/mem.h"
+
+namespace ge {
+using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
+
+class MemManager {
+ public:
+  MemManager();
+  virtual ~MemManager();
+  static MemManager &Instance();
+  MemoryAllocator &MemInstance(rtMemType_t memory_type);
+  CachingAllocator &CachingInstance(rtMemType_t memory_type);
+  RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
+  HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
+  SessionScopeMemAllocator &SessionScopeMemInstance(rtMemType_t memory_type);
+  MemManager(const MemManager &) = delete;
+  MemManager &operator=(const MemManager &) = delete;
+  ///
+  /// @ingroup ge_graph
+  /// @brief memory allocator manager init
+  /// @param [in] options user config params
+  /// @return Status result of function
+  ///
+  Status Initialize(const std::vector<rtMemType_t> &memory_type);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief memory allocator finalize
+  /// @return void
+  ///
+  void Finalize() noexcept;
+
+  const std::vector<rtMemType_t> &GetAllMemoryType() const { return memory_type_; }
+
+ private:
+  ///
+  /// @ingroup ge_graph
+  /// @param [in] memory_type memory type
+  /// @param [in] allocate_map memory allocator map
+  /// @return Status result of function
+  ///
+  template <typename T>
+  Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) {
+    T *allocator = nullptr;
+    for (unsigned int index : memory_type) {
+      auto it = allocate_map.find(index);
+      if (it == allocate_map.end()) {
+        allocator = new (std::nothrow) T(index);
+        if (allocator != nullptr) {
+          allocate_map[index] = allocator;
+          GELOGI("Create Allocator memory type[%u] success.", index);
+        } else {
+          REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index);
+          GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
+        }
+      } else {
+        allocator = it->second;
+      }
+
+      if (allocator == nullptr) {
+        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
+        return ACL_ERROR_GE_MEMORY_ALLOCATION;
+      } else {
+        if (allocator->Initialize() != SUCCESS) {
+          return ACL_ERROR_GE_INTERNAL_ERROR;
+        }
+      }
+    }
+    return SUCCESS;
+  }
+  ///
+  /// @ingroup ge_graph
+  /// @param [in] memory_type memory type
+  /// @param [in] allocate_map memory allocator map
+  /// @return Allocator ptr
+  ///
+  template <typename T>
+  T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) {
+    std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
+    T *allocator = nullptr;
+    auto it = allocate_map.find(memory_type);
+    if (it != allocate_map.end()) {
+      allocator = it->second;
+    }
+
+    // Usually impossible
+    if (allocator == nullptr) {
+      GELOGW("Get allocator failed, memory type is %u.", memory_type);
+      static T default_allocator(RT_MEMORY_RESERVED);
+      return default_allocator;
+    }
+    return *allocator;
+  }
+
+  std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
+  std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
+  std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
+  std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
+  std::map<rtMemType_t, SessionScopeMemAllocator *> session_scope_allocator_map_;
+  std::recursive_mutex allocator_mutex_;
+  std::vector<rtMemType_t> memory_type_;
+  bool init_ = false;
+};
+}  // namespace ge
+
+#endif  // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_
diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc
index 5d440f00..5f7586da 100755
--- a/ge/graph/manager/graph_var_manager.cc
+++ b/ge/graph/manager/graph_var_manager.cc
@@ -17,8 +17,7 @@
 #include "graph/manager/graph_var_manager.h"
 
 #include "graph/debug/ge_attr_define.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/rdma_pool_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
 #include "graph/utils/type_utils.h"
 
@@ -542,11 +541,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
     GELOGE(ge::INTERNAL_ERROR, "MemResource is invalid, memory_type = %u.", memory_type);
     return ge::INTERNAL_ERROR;
   }
-  result = mem_resource->AssignVarMem(var_name, tensor_desc_size, session_id_, mem_offset);
-  if (result != SUCCESS) {
-    GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed.");
-    return ge::INTERNAL_ERROR;
-  }
+
   if (var_resource_ == nullptr) {
     REPORT_INNER_ERROR("E19999", "VarManager has not been init, memory_type:%d, session_id:%lu, "
                        "check invalid", memory_type, session_id_);
@@ -554,31 +549,46 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
     return ge::INTERNAL_ERROR;
   }
 
-  result = var_resource_->SaveVarAddr(
-    var_name, tensor_desc, reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type);
-  if (result != SUCCESS) {
-    GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed.");
-    return ge::INTERNAL_ERROR;
+  ge::GeTensorDesc cur_tensor_desc;
+  int64_t cur_tensor_desc_size = 0;
+  result = var_resource_->GetCurVarDesc(var_name, cur_tensor_desc);
+  // reuse old format variable memory
+  if (result == SUCCESS) {
+    result = var_resource_->GetVarAddr(
+      var_name, cur_tensor_desc, reinterpret_cast<uint8_t **>(reinterpret_cast<uintptr_t>(&mem_offset)), memory_type);
+    if (result == SUCCESS) {
+      result = TensorUtils::GetSize(cur_tensor_desc, cur_tensor_desc_size);
+      GELOGD("tensor_desc_size is %ld, cur_tensor_desc_size is %ld, memoffset is %zu", tensor_desc_size,
+             cur_tensor_desc_size, mem_offset);
+    }
   }
 
-  result = var_resource_->GetVarAddr(
-    var_name, tensor_desc, reinterpret_cast<uint8_t **>(reinterpret_cast<uintptr_t>(&mem_offset)), memory_type);
-  if (result != SUCCESS) {
-    GELOGE(ge::INTERNAL_ERROR, "GetVarAddr by offset failed.");
-    return ge::INTERNAL_ERROR;
-  }
+  bool can_not_reuse_old_memory = (result != SUCCESS) || (tensor_desc_size > cur_tensor_desc_size);
+  if (can_not_reuse_old_memory) {
+    result = mem_resource->AssignVarMem(var_name, tensor_desc_size, session_id_, mem_offset);
+    if (result != SUCCESS) {
+      GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed.");
+      return ge::INTERNAL_ERROR;
+    }
 
-  ge::GeTensorDesc cur_tensor_desc;
+    result = var_resource_->SaveVarAddr(
+      var_name, tensor_desc, reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type);
+    if (result != SUCCESS) {
+      GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed.");
+      return ge::INTERNAL_ERROR;
+    }
+  }
+  // old not exist only save new tensor
   result = var_resource_->GetCurVarDesc(var_name, cur_tensor_desc);
   if (result != SUCCESS) {
     var_resource_->SetVarAddr(var_name, tensor_desc,
                               reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type);
     return SUCCESS;
   }
-
-  if (cur_tensor_desc.GetFormat() != tensor_desc.GetFormat() ||
-      cur_tensor_desc.GetDataType() != tensor_desc.GetDataType() ||
-      cur_tensor_desc.GetShape().GetDims() != tensor_desc.GetShape().GetDims()) {
+  bool format_changed = cur_tensor_desc.GetFormat() != tensor_desc.GetFormat() ||
+                        cur_tensor_desc.GetDataType() != tensor_desc.GetDataType() ||
+                        cur_tensor_desc.GetShape().GetDims() != tensor_desc.GetShape().GetDims();
+  if (format_changed) {
     GELOGI("var %s assigned new memory (format, data type, shape)  (%s, %s, %zu) from (%s, %s, %zu)", var_name.c_str(),
            ge::TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str(),
            ge::TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(),
@@ -717,7 +727,7 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) {
   var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize;
 
   const string purpose("variables and constant op memory in training network.");
-  var_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, var_memory_size);
+  var_mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, var_memory_size);
   if (var_mem_base == nullptr) {
     GELOGE(ge::INTERNAL_ERROR,
            "VarManager::MallocVarMemory failed "
@@ -734,7 +744,7 @@ uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) {
     return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr();
   }
   string memory_key = std::to_string(session_id_);
-  return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key);
+  return MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(memory_key);
 }
 
 uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) {
@@ -743,7 +753,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
     return logic_addr;
   }
   string mem_key = std::to_string(session_id_);
-  uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key);
+  uint8_t *mem_base = MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(mem_key);
   if (mem_base == nullptr) {
     return nullptr;
   }
@@ -755,7 +765,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
 ge::Status VarManager::FreeVarMemory() {
   std::lock_guard<std::recursive_mutex> lock(mutex_);
   string memory_key = std::to_string(SessionId());
-  return MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key);
+  return MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key);
 }
 
 ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) {
diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc
index 40a0d1b9..2908df39 100644
--- a/ge/graph/manager/host_mem_manager.cc
+++ b/ge/graph/manager/host_mem_manager.cc
@@ -45,12 +45,11 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) {
     return GE_GRAPH_MEMORY_ALLOC_FAILED;
   }
   mem_info.fd = output_para.fd;
-  mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) {
-                                                               ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr));
-                                                             },
-                                                             [](uint8_t *ptr) {
-                                                               ptr = nullptr;
-                                                             });
+  mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc(
+    [&output_para](std::unique_ptr<uint8_t[], AlignedPtr::Deleter> &ptr) {
+      ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr));
+    },
+    [](uint8_t *ptr) { ptr = nullptr; });
   mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr);
   return SUCCESS;
 }
diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc
index 415f8088..8e737021 100644
--- a/ge/graph/manager/memory_api.cc
+++ b/ge/graph/manager/memory_api.cc
@@ -19,7 +19,7 @@
 #include <memory>
 
 #include "common/ge/plugin_manager.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/rdma_pool_allocator.h"
 #include "graph/utils/type_utils.h"
diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc
index c19a2159..58829adb 100644
--- a/ge/graph/manager/rdma_pool_allocator.cc
+++ b/ge/graph/manager/rdma_pool_allocator.cc
@@ -20,6 +20,7 @@
 #include "framework/common/debug/ge_log.h"
 #include "graph/ge_context.h"
 #include "runtime/dev.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace {
 const size_t kAlignedSize = 512;
@@ -49,7 +50,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type)
       })) {}
 
 Status RdmaPoolAllocator::Initialize() {
-  memory_allocator_ = MemManager::Instance(memory_type_);
+  memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
   if (memory_allocator_ == nullptr) {
     return ACL_ERROR_GE_INTERNAL_ERROR;
   }
diff --git a/ge/graph/manager/session_scope_mem_allocator.cc b/ge/graph/manager/session_scope_mem_allocator.cc
new file mode 100644
index 00000000..aedc2e92
--- /dev/null
+++ b/ge/graph/manager/session_scope_mem_allocator.cc
@@ -0,0 +1,83 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/manager/session_scope_mem_allocator.h"
+
+#include <set>
+#include <string>
+#include <utility>
+
+#include "framework/common/debug/ge_log.h"
+#include "graph/manager/graph_mem_manager.h"
+
+namespace ge {
+
+SessionScopeMemAllocator::SessionScopeMemAllocator(rtMemType_t memory_type)
+    : memory_type_(memory_type), memory_allocator_(nullptr) {}
+
+Status SessionScopeMemAllocator::Initialize(uint32_t device_id) {
+  GELOGI("Device id %u", device_id);
+  // when redo Initialize free old memory
+  FreeAllMemory();
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
+  if (memory_allocator_ == nullptr) {
+    return ACL_ERROR_GE_INTERNAL_ERROR;
+  }
+  return ge::SUCCESS;
+}
+
+void SessionScopeMemAllocator::Finalize(uint32_t device_id) {
+  GELOGI("Device id %u", device_id);
+  FreeAllMemory();
+}
+
+uint8_t *SessionScopeMemAllocator::Malloc(size_t size, uint64_t session_id, uint32_t device_id) {
+  GELOGI("Start malloc memory, size:%zu, session id:%lu device id:%u", size, session_id, device_id);
+  const std::string purpose = "Memory for session scope.";
+  auto ptr = memory_allocator_->MallocMemory(purpose, size, device_id);
+  if (ptr == nullptr) {
+    GELOGE(ge::FAILED, "Malloc failed, no enough memory for size:%zu, session_id:%lu device_id:%u", size,
+           session_id, device_id);
+    return nullptr;
+  }
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  std::shared_ptr<uint8_t> mem_ptr(ptr, [&](uint8_t *p) { (void)memory_allocator_->FreeMemory(p); });
+  allocated_memory_[session_id].emplace_back(size, mem_ptr);
+  return ptr;
+}
+
+Status SessionScopeMemAllocator::Free(uint64_t session_id, uint32_t device_id) {
+  GELOGI("Free session:%lu memory, device id:%u.", session_id, device_id);
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  auto it = allocated_memory_.find(session_id);
+  if (it == allocated_memory_.end()) {
+    GELOGW("Invalid session_id");
+    return ge::PARAM_INVALID;
+  }
+  allocated_memory_.erase(it);
+  return ge::SUCCESS;
+}
+
+void SessionScopeMemAllocator::FreeAllMemory() {
+  GELOGI("Free all memory");
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  for (auto &session_mem : allocated_memory_) {
+    session_mem.second.clear();
+  }
+  allocated_memory_.clear();
+}
+}  // namespace ge
diff --git a/ge/graph/manager/session_scope_mem_allocator.h b/ge/graph/manager/session_scope_mem_allocator.h
new file mode 100644
index 00000000..3dbf3cb0
--- /dev/null
+++ b/ge/graph/manager/session_scope_mem_allocator.h
@@ -0,0 +1,124 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
+#define GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <functional>
+
+#include "framework/common/ge_inner_error_codes.h"
+#include "graph/node.h"
+#include "graph/manager/block_memory.h"
+#include "runtime/mem.h"
+#include "graph/manager/graph_mem_allocator.h"
+
+namespace ge {
+class SessionScopeMemoryInfo {
+ public:
+  SessionScopeMemoryInfo(size_t size, const std::shared_ptr<uint8_t> &ptr) : size(size), ptr(ptr) {}
+  SessionScopeMemoryInfo() = delete;
+  virtual ~SessionScopeMemoryInfo() = default;
+
+  SessionScopeMemoryInfo(const SessionScopeMemoryInfo &other) {
+    if (&other == this) {
+      return;
+    }
+    size = other.size;
+    ptr = other.ptr;
+  };
+
+  SessionScopeMemoryInfo &operator=(const SessionScopeMemoryInfo &other) {
+    if (&other == this) {
+      return *this;
+    }
+    size = other.size;
+    ptr = other.ptr;
+    return *this;
+  };
+
+ private:
+  size_t size = 0;
+  std::shared_ptr<uint8_t> ptr = nullptr;
+};
+
+class SessionScopeMemAllocator {
+ public:
+  explicit SessionScopeMemAllocator(rtMemType_t memory_type);
+
+  SessionScopeMemAllocator(const SessionScopeMemAllocator &) = delete;
+
+  SessionScopeMemAllocator &operator=(const SessionScopeMemAllocator &) = delete;
+
+  virtual ~SessionScopeMemAllocator() = default;
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief caching allocator init
+  /// @param [in] device id
+  /// @return Status of init
+  ///
+  Status Initialize(uint32_t device_id = 0);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief memory allocator finalize, release all memory
+  /// @return void
+  ///
+  void Finalize(uint32_t device_id = 0);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief malloc memory
+  /// @param [in] size memory size
+  /// @param [in] session_id session id
+  /// @param [in] device id
+  /// @return  memory address
+  ///
+  uint8_t *Malloc(size_t size, uint64_t session_id, uint32_t device_id = 0);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief free memory
+  /// @param [in] session_id session id
+  /// @param [in] device_id device id
+  /// @return Status result of function
+  ///
+  Status Free(uint64_t session_id, uint32_t device_id = 0);
+
+ private:
+  void FreeAllMemory();
+
+ private:
+  rtMemType_t memory_type_;
+
+  // device memory allocator
+  MemoryAllocator *memory_allocator_;
+
+  // lock around all operations
+  mutable std::recursive_mutex mutex_;
+
+  // allocated blocks by memory pointer
+  std::unordered_map<uint64_t, std::vector<SessionScopeMemoryInfo>> allocated_memory_;
+};
+}  // namespace ge
+#endif  // GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc
index bd95d0c5..516d06d1 100755
--- a/ge/graph/partition/dynamic_shape_partition.cc
+++ b/ge/graph/partition/dynamic_shape_partition.cc
@@ -31,6 +31,7 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/op_desc_utils.h"
+#include "graph/common/omg_util.h"
 
 #define REQUIRE(cond, ...)                                     \
   do {                                                         \
@@ -45,6 +46,11 @@
 #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__)
 
 namespace ge {
+namespace {
+const std::set<std::string> kControlFlowOps{
+    STREAMACTIVE, STREAMSWITCH, STREAMMERGE, ENTER, REFENTER, LOOPCOND, NEXTITERATION, REFNEXTITERATION, EXIT, REFEXIT
+};
+}
 using Cluster = DynamicShapePartitioner::Cluster;
 using ClusterPtr = std::shared_ptr<Cluster>;
 
@@ -273,7 +279,7 @@ Status DynamicShapePartitioner::InitClusters() {
     auto cluster = MakeShared<Cluster>(rank++, type, node, this);
     REQUIRE_NOT_NULL(cluster, "Failed new memory for cluster.");
     node_2_cluster_[node] = cluster;
-    if (cluster->IsUnknownShape()) {
+    if (cluster->IsUnknownShape() && !cluster->IsControlFlow()) {
       ordered_cluster_.push_back(cluster);
     }
     // Already sorted topologically, so access to the parent cluster is safe
@@ -347,11 +353,11 @@ static std::string ToString(const std::vector<ClusterPtr> &clusters) {
 void DynamicShapePartitioner::MergeClustersUnknownShape() {
   // Merge unknown shape clusters
   for (const auto &cluster : ordered_cluster_) {
-    if (cluster->IsIndependent()) {
+    if (cluster->IsIndependent() || cluster->IsControlFlow()) {
       continue;
     }
     for (const auto &in_cluster : cluster->Inputs()) {
-      if (!in_cluster->IsUnknownShape()) {
+      if (!in_cluster->IsUnknownShape() || in_cluster->IsControlFlow()) {
         continue;
       }
       auto merged_clusters = cluster->MergeAllPathFrom(in_cluster);
@@ -545,17 +551,6 @@ Status DynamicShapePartitioner::IsUnknownShapeGraph(ComputeGraphPtr graph, bool
   return SUCCESS;
 }
 
-bool DynamicShapePartitioner::IsUnknownShapeTensor(const GeTensorDesc &tensor) {
-  const static int kUnknowShape = -1;
-  const static int kUnknowRank = -2;
-  for (auto dim_size : tensor.GetShape().GetDims()) {
-    if (dim_size == kUnknowShape || dim_size == kUnknowRank) {
-      return true;
-    }
-  }
-  return false;
-}
-
 std::string Cluster::DebugString() const {
   std::stringstream ss;
   switch (type_) {
@@ -612,6 +607,14 @@ bool Cluster::IsRefVariable() const {
   }
   return false;
 }
+
+bool Cluster::IsControlFlow() const {
+  const auto &op_desc = nodes_[0]->GetOpDesc();
+  bool is_ctrl_flow = kControlFlowOps.count(op_desc->GetType()) > 0 && op_desc->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE);
+  GELOGD("[%s] %s rts control flow Op ", op_desc->GetName().c_str(), is_ctrl_flow ? "Is" : "Not");
+  return is_ctrl_flow;
+}
+
 void Cluster::AddInput(ClusterPtr in) {
   if (std::find(in_clusters_.begin(), in_clusters_.end(), in) != in_clusters_.end()) return;
   in_clusters_.insert(in_clusters_.end(), in);
@@ -732,29 +735,33 @@ std::vector<ClusterPtr> Cluster::Outputs() const { return out_clusters_; };
 std::vector<NodePtr> Cluster::Nodes() const { return nodes_; };
 
 void Cluster::AddFrameInput(InDataAnchorPtr anchor) {
-  inputs_index_[anchor] = inputs_.size();
-  inputs_.push_back(anchor);
-};
+  if (anchor != nullptr && anchor->GetPeerOutAnchor() != nullptr) {
+    inputs_index_[anchor] = inputs_.size();
+    inputs_.push_back(anchor);
+  }
+}
 
 void Cluster::AddFrameOutput(OutDataAnchorPtr anchor) {
-  outputs_index_[anchor] = outputs_.size();
-  outputs_.push_back(anchor);
-};
+  if (anchor != nullptr) {
+    outputs_index_[anchor] = outputs_.size();
+    outputs_.push_back(anchor);
+  }
+}
 
 InDataAnchorPtr Cluster::GetFrameInDataAnchor(InDataAnchorPtr anchor) {
   return partition_node_->GetInDataAnchor(static_cast<int>(inputs_index_[anchor]));
-};
+}
 
 OutDataAnchorPtr Cluster::GetFrameOutDataAnchor(OutDataAnchorPtr anchor) {
   return partition_node_->GetOutDataAnchor(static_cast<int>(outputs_index_[anchor]));
-};
+}
 
 InControlAnchorPtr Cluster::GetFrameInControlAnchor() { return partition_node_->GetInControlAnchor(); };
 
 OutControlAnchorPtr Cluster::GetFrameOutControlAnchor() { return partition_node_->GetOutControlAnchor(); };
 
 Status Cluster::BuildFrame() {
-  if (IsUnknownShape() || IsKnownShape() || IsInputNode()) {
+  if ((IsUnknownShape() || IsKnownShape() || IsInputNode()) && !IsControlFlow()) {
     return BuildPartitionFrame();
   } else {
     auto node = nodes_.front();
@@ -889,7 +896,7 @@ Status Cluster::CombinePartitionFrame() {
 }
 
 Status Cluster::BuildPartitionSubgraph() {
-  if (IsData() || IsNetOutput() || IsIndependent()) {
+  if (IsData() || IsNetOutput() || IsIndependent() || IsControlFlow()) {
     return SUCCESS;
   }
   int64_t parent_node_index = 0;
diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h
index e8408ff9..93f86d82 100644
--- a/ge/graph/partition/dynamic_shape_partition.h
+++ b/ge/graph/partition/dynamic_shape_partition.h
@@ -47,6 +47,7 @@ class DynamicShapePartitioner {
     bool IsUnknownShape() const;
     bool IsIndependent() const;
     bool IsNetOutput() const;
+    bool IsControlFlow() const;
     std::vector<std::shared_ptr<Cluster>> Inputs() const;
     std::vector<std::shared_ptr<Cluster>> Outputs() const;
     bool IsInputNode() const;
@@ -151,7 +152,6 @@ class DynamicShapePartitioner {
   Status CollectSpreadUnknownShapeNodes(NodePtr node);
   Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow);
   Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow);
-  bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor);
   Status CtrlEdgeTransfer();
   ge::ComputeGraphPtr root_graph_;                                        // The original graph to partition
   std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_;  // Record nodes and the cluster it belongs to
diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc
index 24b84fa0..1bc6e9c9 100755
--- a/ge/graph/passes/atomic_addr_clean_pass.cc
+++ b/ge/graph/passes/atomic_addr_clean_pass.cc
@@ -182,10 +182,11 @@ Status AtomicAddrCleanPass::HandleLoopGraph(ComputeGraphPtr &graph, const vector
 }
 
 Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vector<NodePtr> &atomic_node_vec) {
-  GELOGD("Not loop graph and unknown graph. It will insert only 1 clean node.");
+  GELOGD("Not loop graph and unknown graph. It will insert atomic clean nodes.");
 
   vector<NodePtr> common_atomic_nodes;
-  auto ret = HandleDispersedAtomicNodes(graph, atomic_node_vec, common_atomic_nodes);
+  vector<NodePtr> dispersed_atomic_nodes;
+  auto ret = HandleDispersedAtomicNodes(graph, atomic_node_vec, common_atomic_nodes, dispersed_atomic_nodes);
   if (ret != SUCCESS) {
     GELOGE(ret, "Handle dispersed atomic nodes failed, graph name is %s.", graph->GetName().c_str());
     return ret;
@@ -222,7 +223,7 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
       }
     }
   }
-  return LinkToPotentialPrecedenceNode(graph, clean_addr_node);
+  return LinkToPotentialPrecedenceNode(graph, clean_addr_node, dispersed_atomic_nodes);
 }
 
 // Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
@@ -231,7 +232,8 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
 // edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
 // each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
 // successors of Data/Variable.
-Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) {
+Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node,
+                                                          const vector<NodePtr> &dispersed_atomic_nodes) {
   GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
          atomic_clean_node->GetName().c_str());
   auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
@@ -246,6 +248,10 @@ Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph
     auto second_nodes = node->GetOutAllNodes();
     for (const auto &second_node : second_nodes) {
       GE_CHECK_NOTNULL(second_node);
+      if ((std::find(dispersed_atomic_nodes.begin(), dispersed_atomic_nodes.end(), second_node) !=
+          dispersed_atomic_nodes.end()) || (second_node->GetType() == NETOUTPUT)) {
+        continue;
+      }
       auto in_ctrl_anchor = second_node->GetInControlAnchor();
       GE_CHECK_NOTNULL(in_ctrl_anchor);
       if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
@@ -260,7 +266,8 @@ Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph
 
 Status AtomicAddrCleanPass::HandleDispersedAtomicNodes(ComputeGraphPtr &graph,
                                                        const std::vector<NodePtr> &atomic_node_vec,
-                                                       std::vector<NodePtr> &common_atomic_nodes) {
+                                                       std::vector<NodePtr> &common_atomic_nodes,
+                                                       vector<NodePtr> &dispersed_atomic_nodes) {
   int index = 0;
   for (const auto &node : atomic_node_vec) {
     vector<int> node_anchors_connect_netoutput;
@@ -280,7 +287,7 @@ Status AtomicAddrCleanPass::HandleDispersedAtomicNodes(ComputeGraphPtr &graph,
       oss << node_name << "_" << index;
       node_name = oss.str();
       dispersed_node_op_desc->SetName(node_name);
-      GELOGD("Inserted dispersed atomic clean node name is %s", node_name.c_str());
+      GELOGD("Inserted dispersed atomic clean node [%s] before [%s]", node_name.c_str(), node->GetName().c_str());
       ++index;
       Status ret = LinkToAtomicNode(node, dispersed_clean_addr_node);
       if (ret != SUCCESS) {
@@ -288,6 +295,7 @@ Status AtomicAddrCleanPass::HandleDispersedAtomicNodes(ComputeGraphPtr &graph,
                node->GetName().c_str(), dispersed_clean_addr_node->GetName().c_str());
         return ret;
       }
+      dispersed_atomic_nodes.emplace_back(node);
     } else {
       common_atomic_nodes.emplace_back(node);
     }
diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h
index 96147fa2..0d0b8fff 100755
--- a/ge/graph/passes/atomic_addr_clean_pass.h
+++ b/ge/graph/passes/atomic_addr_clean_pass.h
@@ -73,7 +73,8 @@ class AtomicAddrCleanPass : public GraphPass {
    * @param atomic_clean_node
    * @return
    */
-  Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node);
+  Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node,
+                                       const std::vector<NodePtr> &dispersed_atomic_nodes);
 
   /**
    * Check if this node is atomic op.
@@ -90,7 +91,8 @@ class AtomicAddrCleanPass : public GraphPass {
   Status CompileUnknownGraphOp(const vector<NodePtr> &atomic_node_vec);
 
   Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector<NodePtr> &atomic_node_vec,
-                                    std::vector<NodePtr> &common_atomic_nodes);
+                                    std::vector<NodePtr> &common_atomic_nodes,
+                                    std::vector<NodePtr> &dispersed_atomic_nodes);
 
   bool CheckAtomicFromOpsKernel(const NodePtr &node);
 
diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc
index 0868b729..2f94c6ad 100755
--- a/ge/graph/passes/base_pass.cc
+++ b/ge/graph/passes/base_pass.cc
@@ -36,6 +36,8 @@ struct DuringPassNodeSets {
   std::unordered_set<NodePtr> nodes_re_pass;
   std::unordered_set<NodePtr> nodes_re_pass_immediately;
   std::unordered_set<NodePtr> nodes_last;
+  std::unordered_set<NodePtr> nodes_suspend;
+  std::unordered_set<NodePtr> nodes_resume;
 };
 
 void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes,
@@ -55,8 +57,15 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &i
   }
 }
 
+bool IsAllInNodesAlive(const Node::Vistor<NodePtr> &nodes, const std::unordered_set<NodePtr> &nodes_suspend) {
+  return !std::any_of(nodes.begin(), nodes.end(), [&](const NodePtr &n) { return nodes_suspend.count(n) > 0; });
+}
+
 void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass,
-                      std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) {
+                      DuringPassNodeSets &during_pass_node_set) {
+  auto &nodes_seen = during_pass_node_set.nodes_seen;
+  const auto &nodes_last = during_pass_node_set.nodes_last;
+  const auto &nodes_suspend = during_pass_node_set.nodes_suspend;
   for (auto &node : nodes) {
     if (node == nullptr) {
       continue;
@@ -64,16 +73,57 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &n
     if (nodes_last.count(node) != 0) {
       continue;
     }
+    if (nodes_suspend.count(node) > 0) {
+      GELOGD("The node %s has suspend by pass, skip it.", node->GetName().c_str());
+      continue;
+    }
 
+    bool all_in_nodes_alive = IsAllInNodesAlive(node->GetInAllNodes(), nodes_suspend);
     bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen);
-    if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) {
+    if (all_in_nodes_seen && all_in_nodes_alive && nodes_seen.insert(node.get()).second) {
       nodes_to_pass.push_back(node);
     }
   }
 }
 
+void AddRepassNodes(DuringPassNodeSets &during_pass_node_set, std::deque<NodePtr> &nodes) {
+  for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) {
+    GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str());
+    nodes.push_front(node);
+  }
+  during_pass_node_set.nodes_re_pass_immediately.clear();
+}
+
+void AddResumeNodes(DuringPassNodeSets &during_pass_node_set, std::deque<NodePtr> &nodes) {
+  for (auto &node : during_pass_node_set.nodes_resume) {
+    const auto &it = during_pass_node_set.nodes_suspend.find(node);
+    if (it != during_pass_node_set.nodes_suspend.end()) {
+      during_pass_node_set.nodes_suspend.erase(node);
+      GELOGD("The node %s resumed by pass.", node->GetName().c_str());
+      nodes.push_back(node);
+    } else {
+      GELOGW("The node %s not suspend, drop from resumed", node->GetName().c_str());
+    }
+  }
+  during_pass_node_set.nodes_resume.clear();
+}
+
+void PushToSuspendNodes(DuringPassNodeSets &during_pass_node_set, const std::string &pass_name,
+                        const std::unordered_set<NodePtr> &nodes_suspend,
+                        const std::unordered_set<NodePtr> &nodes_resume) {
+  for (const auto &node : nodes_suspend) {
+    GELOGD("The iteration suspend of node %s has been set by pass %s", node->GetName().c_str(), pass_name.c_str());
+    during_pass_node_set.nodes_suspend.emplace(node);
+  }
+
+  for (const auto &node : nodes_resume) {
+    GELOGD("The iteration suspend of node %s has been resumed by pass %s", node->GetName().c_str(), pass_name.c_str());
+    during_pass_node_set.nodes_resume.emplace(node);
+  }
+}
+
 void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass,
-                        std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass,
+                        std::unordered_set<Node *> &nodes_seen, const std::unordered_set<NodePtr> &nodes_to_re_pass,
                         std::unordered_set<NodePtr> &nodes_re_pass) {
   for (const auto &node_to_re_pass : nodes_to_re_pass) {
     if (node_to_re_pass == nullptr) {
@@ -113,15 +163,18 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNo
       return result;
     }
 
-    auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass();
+    const auto &nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass();
     PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass,
                        during_pass_node_set.nodes_re_pass);
 
-    auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately();
+    const auto &nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately();
     PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately,
                        during_pass_node_set.nodes_re_pass_immediately);
 
-    auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted();
+    PushToSuspendNodes(during_pass_node_set, name_to_pass.first,
+                       name_to_pass.second->GetNodesSuspend(), name_to_pass.second->GetNodesResume());
+
+    const auto &nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted();
     during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end());
     if (nodes_deleted_by_pass.count(node) > 0) {
       GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(),
@@ -221,8 +274,13 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
         GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str());
         continue;
       }
+      if (during_pass_node_set.nodes_suspend.count(node) > 0) {
+        GELOGD("The node %s has been added to suspend-iteration nodes list, the iteration of it will be suspend.",
+               node->GetName().c_str());
+        continue;
+      }
 
-      AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last);
+      AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set);
 
       auto ret = RunPasses(node, names_to_passes, during_pass_node_set);
       if (ret != SUCCESS) {
@@ -253,11 +311,9 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
         // should be called each time at the begin of the iteration
         ClearOption(names_to_passes);
       }
-      for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) {
-        GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str());
-        nodes.push_front(node);
-      }
-      during_pass_node_set.nodes_re_pass_immediately.clear();
+
+      AddRepassNodes(during_pass_node_set, nodes);
+      AddResumeNodes(during_pass_node_set, nodes);
     }
 
     for (auto &node : during_pass_node_set.nodes_last) {
diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h
index a9f4f000..d0f125b2 100644
--- a/ge/graph/passes/base_pass.h
+++ b/ge/graph/passes/base_pass.h
@@ -51,11 +51,15 @@ class BaseNodePass {
 
   virtual ~BaseNodePass() = default;
 
-  std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; }
+  const std::unordered_set<NodePtr> &GetNodesNeedRePass() { return nodes_need_re_pass_; }
 
-  std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; }
+  const std::unordered_set<NodePtr> &GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; }
 
-  std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; }
+  const std::unordered_set<NodePtr> &GetNodesDeleted() { return nodes_deleted_; }
+
+  const std::unordered_set<NodePtr> &GetNodesSuspend() { return nodes_suspend_; }
+
+  const std::unordered_set<NodePtr> &GetNodesResume() { return nodes_resume_; }
 
   void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; }
 
@@ -65,6 +69,8 @@ class BaseNodePass {
     nodes_need_re_pass_.clear();
     nodes_deleted_.clear();
     nodes_need_re_pass_immediately_.clear();
+    nodes_suspend_.clear();
+    nodes_resume_.clear();
   }
 
  protected:
@@ -80,7 +86,7 @@ class BaseNodePass {
   /// optimized by other passes, call this function.
   /// @param node
   ///
-  void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); }
+  void AddRePassNode(const NodePtr &node) { nodes_need_re_pass_.insert(node); }
 
   ///
   /// Add a node to be optimized immediately again. If you add a new node to the graph, or
@@ -88,13 +94,13 @@ class BaseNodePass {
   /// optimized by other passes, call this function.
   /// @param node
   ///
-  void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); }
+  void AddImmediateRePassNode(const NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); }
 
   ///
   /// Add a node and it's input/output data nodes to be optimized again.
   /// @param node
   ///
-  void AddRePassNodesWithInOut(NodePtr &node) {
+  void AddRePassNodesWithInOut(const NodePtr &node) {
     AddRePassNode(node);
     auto out_nodes = node->GetOutNodes();
     for (auto &out_node : out_nodes) {
@@ -116,12 +122,34 @@ class BaseNodePass {
   ///
   void AddNodeDeleted(const NodePtr &node) { nodes_deleted_.insert(node); }
 
+  ///
+  /// If you suspend a node from the graph, especially following node. The remain
+  /// iterate passes will stop process on the suspend node(if it can be
+  /// reached by edge connections) till the last one. Obviously it is a waste of
+  /// time. You can add the suspend nodes by calling this function, to stop the
+  /// next iterations.
+  /// @param node
+  ///
+  void AddNodeSuspend(const NodePtr &node) { nodes_suspend_.insert(node); }
+
+  ///
+  /// If you resume a node from the graph, especially following node. The remain
+  /// iterate passes will continue process on the resume node(if it can be
+  /// reached by edge connections) till the last one.
+  /// You can add the resume nodes by calling this function, to resume the
+  /// next iterations.
+  /// @param node
+  ///
+  void AddNodeResume(const NodePtr &node) { nodes_resume_.insert(node); }
+
   bool OptionExists(NodePassOption option) { return options_.count(option) > 0; }
 
  private:
   std::unordered_set<NodePtr> nodes_need_re_pass_;
   std::unordered_set<NodePtr> nodes_need_re_pass_immediately_;
   std::unordered_set<NodePtr> nodes_deleted_;
+  std::unordered_set<NodePtr> nodes_suspend_;
+  std::unordered_set<NodePtr> nodes_resume_;
   std::map<NodePassOption, std::string> options_;
 };
 
diff --git a/ge/graph/passes/global_step_insert_pass.cc b/ge/graph/passes/global_step_insert_pass.cc
index 9fc1d066..d702e758 100755
--- a/ge/graph/passes/global_step_insert_pass.cc
+++ b/ge/graph/passes/global_step_insert_pass.cc
@@ -26,6 +26,11 @@
 #include "common/ge/ge_util.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/passes/pass_utils.h"
+#include "graph/ge_context.h"
+
+namespace {
+const char *const kFlagOff = "0";
+}  // namespace
 
 namespace ge {
 NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph,
@@ -72,6 +77,13 @@ NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph,
 }
 
 Status GlobalStepInsertPass::Run(ComputeGraphPtr compute_graph) {
+  // run_flag off means offline, no need insert global step node which type is variable
+  std::string run_flag;
+  if (ge::GetContext().GetOption(ge::RUN_FLAG, run_flag) == GRAPH_SUCCESS && run_flag == kFlagOff) {
+    GELOGI("compute_graph [%u] [%s] skip insert global step", compute_graph->GetGraphID(),
+           compute_graph->GetName().c_str());
+    return SUCCESS;
+  }
   NodePtr output_node = compute_graph->FindFirstNodeMatchType(NETOUTPUT);
   if (output_node == nullptr) {
     GELOGD("Node type %s can't be found in graph %u", NETOUTPUT, compute_graph->GetGraphID());
diff --git a/ge/graph/passes/infershape_pass.cc b/ge/graph/passes/infershape_pass.cc
index 728f5512..cb649240 100755
--- a/ge/graph/passes/infershape_pass.cc
+++ b/ge/graph/passes/infershape_pass.cc
@@ -17,11 +17,13 @@
 #include "graph/passes/infershape_pass.h"
 #include "common/util/error_manager/error_manager.h"
 #include "framework/common/debug/ge_log.h"
-#include "framework/common/ge_inner_error_codes.h"
 #include "analyzer/analyzer.h"
 #include "framework/common/util.h"
 #include "graph/shape_refiner.h"
 #include "graph/utils/graph_utils.h"
+#include "graph/utils/node_utils.h"
+#include "graph/common/omg_util.h"
+#include "graph/debug/ge_attr_define.h"
 #include "utils/tensor_utils.h"
 #include "utils/type_utils.h"
 
@@ -94,8 +96,10 @@ Status InferShapePass::Run(NodePtr &node) {
     GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str());
     return GE_GRAPH_INFERSHAPE_FAILED;
   }
+
+  GE_CHK_STATUS_RET_NOLOG(RePassLoopNode(node));
   bool need_repass = false;
-  auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "_need_infer_again", need_repass);
+  auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_NEED_INFER_AGAIN, need_repass);
   if (has_attr) {
     if (!OptionExists(kOptimizeAfterSubGraph)) {
       return SUCCESS;
@@ -105,9 +109,65 @@ Status InferShapePass::Run(NodePtr &node) {
       GELOGD("Node %s need repass immediately.", node->GetName().c_str());
     } else {
       // clear attr on while
-      node->GetOpDesc()->DelAttr("_need_infer_again");
+      node->GetOpDesc()->DelAttr(ATTR_NAME_NEED_INFER_AGAIN);
     }
   }
   return SUCCESS;
 }
+
+Status InferShapePass::RePassLoopNode(const NodePtr &node) {
+  const auto RePassNode = [&](const std::set<std::string> &re_pass_types) {
+    for (auto &n : node->GetOutDataNodes()) {
+      GE_CHECK_NOTNULL(n);
+      std::string node_type;
+      GE_CHK_STATUS_RET(GetOriginalType(n, node_type), "Get original node type failed.");
+      if (re_pass_types.count(node_type) > 0) {
+        AddImmediateRePassNode(n);
+        (void)AttrUtils::SetBool(n->GetOpDesc(), ATTR_NAME_NEED_INFER_AGAIN, false);
+        GELOGD("Node %s need repass immediately after %s.", n->GetName().c_str(), node->GetName().c_str());
+      }
+    }
+    return SUCCESS;
+  };
+
+  const auto ExProcNode = [&](const std::set<std::string> &proc_types,
+                              const std::function<void(InferShapePass *, NodePtr)> &proc_func,
+                              const std::string &info) {
+    for (auto &n : node->GetOutDataNodes()) {
+      GE_CHECK_NOTNULL(n);
+      std::string node_type;
+      GE_CHK_STATUS_RET(GetOriginalType(n, node_type), "Get original node type failed.");
+      if (proc_types.count(node_type) > 0) {
+        proc_func(this, n);
+        GELOGD("Node %s %s after %s.", n->GetName().c_str(), info.c_str(), node->GetName().c_str());
+      }
+    }
+    return SUCCESS;
+  };
+
+  std::string node_type;
+  GE_CHK_STATUS_RET(GetOriginalType(node, node_type), "Get original node type failed.");
+  if (kNextIterationOpTypes.count(node_type) > 0) {
+    return RePassNode(kMergeOpTypes); // Re-Pass Merge
+  }
+
+  if (kMergeOpTypes.count(node_type) > 0) {
+    if (node->GetOpDesc()->HasAttr(ATTR_NAME_NEED_INFER_AGAIN)) {
+      node->GetOpDesc()->DelAttr(ATTR_NAME_NEED_INFER_AGAIN);
+      return RePassNode(kSwitchOpTypes); // Re-Pass Switch
+    }
+    return SUCCESS;
+  }
+
+  if (kSwitchOpTypes.count(node_type) > 0) {
+    if (node->GetOpDesc()->HasAttr(ATTR_NAME_NEED_INFER_AGAIN)) {
+      node->GetOpDesc()->DelAttr(ATTR_NAME_NEED_INFER_AGAIN);
+      return ExProcNode(kExitOpTypes, &InferShapePass::AddNodeResume, "need resume"); // Resume Exit
+    } else {
+      return ExProcNode(kExitOpTypes, &InferShapePass::AddNodeSuspend, "need suspend"); // Suspend Exit
+    }
+  }
+
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/ge/graph/passes/infershape_pass.h b/ge/graph/passes/infershape_pass.h
index 30cf0472..9c5d432d 100644
--- a/ge/graph/passes/infershape_pass.h
+++ b/ge/graph/passes/infershape_pass.h
@@ -30,6 +30,9 @@ class InferShapePass : public BaseNodePass {
   /// @author
   ///
   Status Run(ge::NodePtr &node) override;
+
+ private:
+  Status RePassLoopNode(const NodePtr &node);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_INFERSHAPE_PASS_H_
diff --git a/ge/graph/passes/input_output_connection_identify_pass.cc b/ge/graph/passes/input_output_connection_identify_pass.cc
index 6e2b3049..e27c2e1f 100644
--- a/ge/graph/passes/input_output_connection_identify_pass.cc
+++ b/ge/graph/passes/input_output_connection_identify_pass.cc
@@ -48,8 +48,6 @@ Status InputOutputConnectionIdentifyPass::Run(ComputeGraphPtr graph) {
   }
 
   if (graph->GetParentGraph() != nullptr) {
-    REPORT_INNER_ERROR("E19999", "Param graph's parent graph is nullptr, "
-                       "check invalid");
     GELOGD("Current graph %s is a subgraph, skip identification of nodes that connect to input and output.",
            graph->GetName().c_str());
     return SUCCESS;
diff --git a/ge/graph/passes/isolated_op_remove_pass.cc b/ge/graph/passes/isolated_op_remove_pass.cc
deleted file mode 100644
index 5c9093e9..00000000
--- a/ge/graph/passes/isolated_op_remove_pass.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "graph/passes/isolated_op_remove_pass.h"
-
-#include "common/debug/log.h"
-#include "common/types.h"
-#include "common/util.h"
-
-namespace ge {
-Status IsolatedOpRemovePass::Run(ge::ComputeGraphPtr graph) {
-  GE_CHECK_NOTNULL(graph);
-  for (NodePtr &node_ptr : graph->GetDirectNode()) {
-    GE_IF_BOOL_EXEC(node_ptr->GetOpDesc() == nullptr, continue);
-    if (node_ptr->GetInDataNodes().size() == 0 && node_ptr->GetOutAllNodes().size() == 0 &&
-        !(node_ptr->GetOpDesc()->HasAttr(TO_BE_OUTPUT))) {
-      GE_RETURN_WITH_LOG_IF_ERROR(graph->RemoveNode(node_ptr), "remove graph node [%s] fail",
-                                  node_ptr->GetOpDesc()->GetName().c_str());
-    }
-  }
-
-  return SUCCESS;
-}
-}  // namespace ge
diff --git a/ge/graph/passes/isolated_op_remove_pass.h b/ge/graph/passes/isolated_op_remove_pass.h
deleted file mode 100755
index 3b7fe7d1..00000000
--- a/ge/graph/passes/isolated_op_remove_pass.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef GE_GRAPH_PASSES_ISOLATED_OP_REMOVE_PASS_H_
-#define GE_GRAPH_PASSES_ISOLATED_OP_REMOVE_PASS_H_
-
-#include "inc/graph_pass.h"
-
-namespace ge {
-class IsolatedOpRemovePass : public GraphPass {
- public:
-  Status Run(ge::ComputeGraphPtr graph);
-};
-}  // namespace ge
-#endif  // GE_GRAPH_PASSES_ISOLATED_OP_REMOVE_PASS_H_
diff --git a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc
new file mode 100644
index 00000000..6729a647
--- /dev/null
+++ b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc
@@ -0,0 +1,141 @@
+/**
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mark_force_unknown_for_cond_pass.h"
+
+#include <queue>
+
+#include "graph/common/omg_util.h"
+
+namespace ge {
+namespace {
+const std::set<std::string> kMergeOpTypes{ MERGE, REFMERGE };
+
+const std::set<std::string> kSwitchOpTypes{ SWITCH, REFSWITCH };
+
+const std::set<std::string> kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION };
+
+inline bool IsMergeInLoop(const NodePtr &node) {
+  std::string node_type;
+  (void)GetOriginalType(node, node_type);
+  return kLoopMergeInputs.count(node_type) > 0;
+}
+}
+
+Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) {
+  GELOGD("MarkForceUnknownForCondPass Enter");
+  std::map<NodePtr, std::vector<NodePtr>> switch_groups;
+  for (const auto &node : graph->GetDirectNode()) {
+    std::string node_type;
+    GE_CHK_STATUS_RET(GetOriginalType(node, node_type), "Get original type failed.");
+    if (kMergeOpTypes.count(node_type) == 0) {
+      continue;
+    }
+
+    const auto &all_in_nodes = node->GetInDataNodes();
+    if (std::any_of(all_in_nodes.begin(), all_in_nodes.end(), IsMergeInLoop)) {
+      continue;  // LoopCond marked in NextIterationPass.
+    }
+
+    MarkUnknownForSwitch(node, switch_groups[node]);
+  }
+
+  MarkUnknownForSwitch(switch_groups);
+  GELOGD("MarkForceUnknownForCondPass Leave");
+  return SUCCESS;
+}
+
+///
+/// @brief Mark force unknown shape for Switch node
+/// @param [in] merge node
+/// @param [out] switch group
+/// @return
+///
+void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std::vector<NodePtr> &switch_group) {
+  // Switch --> {Switch --> Merge} --> Merge
+  std::unordered_set<NodePtr> nodes_seen;
+  std::queue<std::pair<NodePtr, uint32_t>> search_queue({{node, 0}});
+  while (!search_queue.empty()) {
+    const auto dst_node = search_queue.front().first;
+    const auto dst_span = search_queue.front().second;
+    search_queue.pop();
+
+    // Switch --> Identity --> Constant
+    for (const auto &in_node : dst_node->GetInControlNodes()) {
+      if (nodes_seen.count(in_node) > 0) {
+        GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str());
+        continue;
+      }
+      nodes_seen.insert(in_node);
+
+      if (in_node->GetType() == IDENTITY) {
+        GELOGD("Travel node: %s, In control: %s, span is: %u", dst_node->GetName().c_str(),
+               in_node->GetName().c_str(), dst_span);
+        search_queue.push({in_node, dst_span});
+      }
+    }
+
+    for (const auto &in_node : dst_node->GetInDataNodes()) {
+      if (nodes_seen.count(in_node) > 0) {
+        GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str());
+        continue;
+      }
+      nodes_seen.insert(in_node);
+
+      std::string node_type;
+      (void)GetOriginalType(in_node, node_type);
+      GELOGD("Travel node: %s, %s node: %s, span is: %u", dst_node->GetName().c_str(), node_type.c_str(),
+             in_node->GetName().c_str(), dst_span);
+      if (kSwitchOpTypes.count(node_type) > 0) { // Switch input node.
+        if (dst_span > 0) {
+          search_queue.push({in_node, dst_span - 1});
+        } else {
+          switch_group.emplace_back(in_node);
+        }
+      } else if (kMergeOpTypes.count(node_type) > 0) { // Merge input node.
+        search_queue.push({in_node, dst_span + 1});
+      } else {
+        search_queue.push({in_node, dst_span});
+      }
+    }
+  }
+}
+
+///
+/// @brief Mark force unknown shape for Switch node
+/// @param [in] switch groups
+/// @return
+///
+void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const std::map<NodePtr, std::vector<NodePtr>> &switch_groups) {
+  std::function<bool(const NodePtr &)> callback = [](const NodePtr &n) {
+    return n->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE);
+  };
+
+  for (const auto &group : switch_groups) {
+    const auto &node = group.first;
+    const auto &switch_group = group.second;
+    const auto &op_desc = node->GetOpDesc();
+    if (IsUnknownShapeTensor(op_desc->GetOutputDesc(0)) || op_desc->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE) ||
+        std::any_of(switch_group.begin(), switch_group.end(), callback)) {
+      GELOGI("Mark [%s] as force unknown shape", node->GetName().c_str());
+      MarkForceUnknownShape(node, true);
+      for (const auto &n : switch_group) {
+        MarkForceUnknownShape(n, true);
+      }
+    }
+  }
+}
+} // namespace ge
diff --git a/ge/graph/passes/mark_force_unknown_for_cond_pass.h b/ge/graph/passes/mark_force_unknown_for_cond_pass.h
new file mode 100644
index 00000000..528a8fdc
--- /dev/null
+++ b/ge/graph/passes/mark_force_unknown_for_cond_pass.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_GRAPH_PASSES_MARK_FORCE_UNKNOWN_FOR_COND_PASS_H_
+#define GE_GRAPH_PASSES_MARK_FORCE_UNKNOWN_FOR_COND_PASS_H_
+
+#include "inc/graph_pass.h"
+
+namespace ge {
+class MarkForceUnknownForCondPass : public GraphPass {
+ public:
+  Status Run(ComputeGraphPtr graph);
+
+ private:
+  ///
+  /// @brief Mark force unknown shape for Switch node
+  /// @param [in] merge node
+  /// @param [out] switch group
+  /// @return
+  ///
+  void MarkUnknownForSwitch(const NodePtr &node, std::vector<NodePtr> &switch_group);
+
+  ///
+  /// @brief Mark force unknown shape for Switch node
+  /// @param [in] switch groups
+  /// @return
+  ///
+  void MarkUnknownForSwitch(const std::map<NodePtr, std::vector<NodePtr>> &switch_groups);
+};
+} // namespace ge
+#endif  // GE_GRAPH_PASSES_MARK_FORCE_UNKNOWN_FOR_COND_PASS_H_
diff --git a/ge/graph/passes/merge_input_memcpy_pass.cc b/ge/graph/passes/merge_input_memcpy_pass.cc
index 99f8712b..c4273584 100644
--- a/ge/graph/passes/merge_input_memcpy_pass.cc
+++ b/ge/graph/passes/merge_input_memcpy_pass.cc
@@ -15,6 +15,7 @@
  */
 
 #include "graph/passes/merge_input_memcpy_pass.h"
+
 #include "common/ge/ge_util.h"
 #include "ge/ge_api_types.h"
 #include "graph/common/omg_util.h"
@@ -22,16 +23,19 @@
 namespace ge {
 Status MergeInputMemcpyPass::Run(ComputeGraphPtr graph) {
   GELOGD("MergeInputMemcpyPass Enter");
+  std::unordered_map<NodePtr, std::vector<NodePtr>> switch_groups;
   for (const auto &node : graph->GetDirectNode()) {
     std::string type;
     GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed.");
     if ((type != MERGE) && (type != REFMERGE)) {
       continue;
     }
+
     GE_CHECK_NOTNULL(node->GetOpDesc());
     GE_CHK_STATUS_RET(AddMemcpyAsyncNodes(graph, node, node->GetOpDesc()->HasAttr(ATTR_INSERT_BY_MBATCH)),
                       "Merge add memcpy node failed.");
   }
+
   GELOGD("MergeInputMemcpyPass Leave");
   return SUCCESS;
 }
diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc
index 8866831b..f3a437a6 100644
--- a/ge/graph/passes/merge_to_stream_merge_pass.cc
+++ b/ge/graph/passes/merge_to_stream_merge_pass.cc
@@ -69,51 +69,9 @@ Status MergeToStreamMergePass::Run(ComputeGraphPtr graph) {
 Status MergeToStreamMergePass::ReplaceMergeNode(const ComputeGraphPtr &graph, const NodePtr &merge_node) {
   OpDescPtr merge_op_desc = merge_node->GetOpDesc();
   GE_CHECK_NOTNULL(merge_op_desc);
+  merge_op_desc->SetType(STREAMMERGE);
 
-  const std::string &node_name = merge_node->GetName();
-  GELOGI("Create StreamMerge Op, name=%s.", node_name.c_str());
-  OpDescPtr op_desc = MakeShared<OpDesc>(node_name, STREAMMERGE);
-  if (op_desc == nullptr) {
-    REPORT_CALL_ERROR("E19999", "New GeTensor failed");
-    GELOGE(FAILED, "Create op_desc failed, StreamMerge:%s.", node_name.c_str());
-    return FAILED;
-  }
-
-  for (const InDataAnchorPtr &in_anchor : merge_node->GetAllInDataAnchors()) {
-    GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(merge_op_desc->GetInputDesc(in_anchor->GetIdx())) == GRAPH_SUCCESS,
-                     REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed",
-                                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                     return FAILED, "Create StreamMerge op: add input desc failed.");
-  }
-
-  for (const OutDataAnchorPtr &out_anchor : merge_node->GetAllOutDataAnchors()) {
-    GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(merge_op_desc->GetOutputDesc(out_anchor->GetIdx())) == GRAPH_SUCCESS,
-                     REPORT_CALL_ERROR("E19999", "Add ouput desc to op:%s(%s) failed",
-                                       op_desc->GetName().c_str(), op_desc->GetType().c_str());
-                     return FAILED, "Create StreamMerge op: add output desc failed.");
-  }
-
-  NodePtr stream_merge = graph->AddNode(op_desc);
-  GE_CHK_BOOL_EXEC(stream_merge != nullptr,
-                   REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s failed",
-                                     op_desc->GetName().c_str(), op_desc->GetType().c_str(),
-                                     graph->GetName().c_str());
-                   return FAILED, "Insert StreamMerge node failed.");
-  GE_CHK_STATUS_RET(MoveEdges(merge_node, stream_merge), "Move edges failed.");
-  bypass_nodes_.insert(merge_node);
-
-  if (merge_op_desc->HasAttr(ATTR_NAME_NEXT_ITERATION)) {
-    std::string next_iteration_name;
-    GE_IF_BOOL_EXEC(!AttrUtils::GetStr(merge_op_desc, ATTR_NAME_NEXT_ITERATION, next_iteration_name),
-                    REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) failed",
-                                      ATTR_NAME_NEXT_ITERATION.c_str(),
-                                      merge_op_desc->GetName().c_str(), merge_op_desc->GetType().c_str());
-                    GELOGE(INTERNAL_ERROR, "Get ATTR_NAME_NEXT_ITERATION failed");
-                    return INTERNAL_ERROR);
-    GE_CHK_STATUS_RET(SetNextIteration(stream_merge, next_iteration_name), "Set next iteration failed");
-  }
-
-  return AddActiveNodes(graph, stream_merge);
+  return AddActiveNodes(graph, merge_node);
 }
 
 ///
@@ -126,6 +84,8 @@ Status MergeToStreamMergePass::AddActiveNodes(const ComputeGraphPtr &graph, cons
   GE_CHK_BOOL_EXEC(node != nullptr,
                    REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid");
                    return FAILED, "Param of pre node is null.");
+  bool force_unknown = node->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE);
+  MarkForceUnknownShape(node, force_unknown);
   for (const InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
     OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
     GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
@@ -142,6 +102,7 @@ Status MergeToStreamMergePass::AddActiveNodes(const ComputeGraphPtr &graph, cons
       GELOGE(FAILED, "SetActiveLabelList for node %s failed.", active_node->GetName().c_str());
       return FAILED;
     }
+    MarkForceUnknownShape(active_node, force_unknown);
   }
 
   return SUCCESS;
diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc
index 9e1fe80a..8d4bcb66 100755
--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -42,6 +42,7 @@ const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const";
 const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex";
 const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_";
 const char *const kGetNextName = "IteratorV2";
+const char *const kMbatchCaseName = "mbatch-switch-name";
 }  // namespace
 
 inline bool IsGetNextType(const NodePtr &node) {
@@ -943,6 +944,12 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &node, size_t out_an
     }
   }
   (void)AttrUtils::SetListInt(node->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());
+  if (!AttrUtils::SetStr(node->GetOpDesc(), kMbatchCaseName, case_node_->GetName())) {
+    REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed",
+                      kMbatchCaseName, node->GetName().c_str(), node->GetType().c_str());
+    GELOGE(INTERNAL_ERROR, "Failed to add switchn attr on data node %s", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
 
   GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, kDataOutIndex));
   std::vector<std::string> input_dims_str;
diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc
index c52e6743..5f4fc4d0 100644
--- a/ge/graph/passes/next_iteration_pass.cc
+++ b/ge/graph/passes/next_iteration_pass.cc
@@ -140,6 +140,7 @@ Status NextIterationPass::FindWhileGroups() {
           GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str());
           return INTERNAL_ERROR;
         }
+        loop_group_iter.second->switch_nodes.emplace_back(switch_node);
         if (loop_group_iter.second->loop_cond == nullptr) {
           loop_group_iter.second->loop_cond = loop_cond;
         } else if (loop_group_iter.second->loop_cond != loop_cond) {
@@ -181,6 +182,12 @@ bool NextIterationPass::VerifyWhileGroup() {
                frame_name.c_str());
         return false;
       }
+
+      // Mark loop as unknown shape If any merge has unknown shape output.
+      const auto &op_desc = pair_iter.first->GetOpDesc();
+      if (IsUnknownShapeTensor(op_desc->GetOutputDesc(0))) {
+        loop_group_iter.second->is_unknown_shape = true;  // under check loop, cannot break.
+      }
     }
   }
 
@@ -194,6 +201,7 @@ bool NextIterationPass::VerifyWhileGroup() {
 ///
 Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) {
   for (const auto &loop_cond_iter : loop_group_map_) {
+    const LoopCondGroup &loop_group = *loop_cond_iter.second;
     const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName();
     GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str());
 
@@ -215,6 +223,7 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) {
                enter_active->GetName().c_str());
         return INTERNAL_ERROR;
       }
+      MarkForceUnknownShape(enter_node, loop_group.is_unknown_shape);
     }
 
     for (const auto &pair : loop_cond_iter.second->merge_next_pairs) {
@@ -243,6 +252,9 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) {
         GELOGE(INTERNAL_ERROR, "Break NextIteration failed");
         return INTERNAL_ERROR;
       }
+
+      MarkForceUnknownShape(next_node, loop_group.is_unknown_shape);
+      MarkForceUnknownShape(merge_node, loop_group.is_unknown_shape);
     }
 
     if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) ||
@@ -250,12 +262,39 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) {
       GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed.");
       return INTERNAL_ERROR;
     }
+
+    MarkForceUnknownShape(loop_group.loop_cond, loop_group.is_unknown_shape);
+    MarkForceUnknownShape(enter_active, loop_group.is_unknown_shape);
+    MarkForceUnknownShape(next_active, loop_group.is_unknown_shape);
+    HandleSwitchExitNodes(loop_group);
   }
 
   return SUCCESS;
 }
 
 ///
+/// @brief Mark force unknown for Exit node
+/// @param [in] group of LoopCond
+/// @return void
+///
+void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group) {
+  if (!loop_group.is_unknown_shape) {
+    return;
+  }
+
+  for (const auto &switch_node : loop_group.switch_nodes) {
+    MarkForceUnknownShape(switch_node, loop_group.is_unknown_shape);
+    for (const auto &node : switch_node->GetOutDataNodes()) {
+      std::string node_type;
+      (void)GetOriginalType(node, node_type);
+      if (node_type == EXIT || node_type == REFEXIT) {
+        MarkForceUnknownShape(node, loop_group.is_unknown_shape);
+      }
+    }
+  }
+}
+
+///
 /// @brief Create Active Node
 /// @param [in] graph
 /// @param [in] name
diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h
index 3266254d..e8786516 100755
--- a/ge/graph/passes/next_iteration_pass.h
+++ b/ge/graph/passes/next_iteration_pass.h
@@ -20,10 +20,11 @@
 #include "inc/graph_pass.h"
 
 struct LoopCondGroup {
-  LoopCondGroup() : loop_cond(nullptr) {}
   ge::NodePtr loop_cond;                                              // LoopCond node
   std::vector<ge::NodePtr> enter_nodes;                               // Enter nodes
   std::vector<std::pair<ge::NodePtr, ge::NodePtr>> merge_next_pairs;  // <Merge, NextIteration>
+  std::vector<ge::NodePtr> switch_nodes;                              // Switch nodes
+  bool is_unknown_shape{false};
 };
 using LoopCondGroupPtr = std::shared_ptr<LoopCondGroup>;
 
@@ -92,6 +93,13 @@ class NextIterationPass : public GraphPass {
   ///
   Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, NodePtr &target_node);
 
+  ///
+  /// @brief Mark force unknown for Exit node
+  /// @param [in] group of LoopCond
+  /// @return void
+  ///
+  void HandleSwitchExitNodes(const LoopCondGroup &loop_group);
+
   // map<frame_name, LoopCondGroup>
   std::unordered_map<std::string, LoopCondGroupPtr> loop_group_map_;
 };
diff --git a/ge/graph/passes/remove_nodes_pass.cc b/ge/graph/passes/remove_nodes_pass.cc
deleted file mode 100644
index c238f003..00000000
--- a/ge/graph/passes/remove_nodes_pass.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "remove_nodes_pass.h"
-#include "debug/ge_log.h"
-#include "inc/framework/common/util.h"
-#include "inc/graph/utils/node_utils.h"
-
-namespace ge {
-Status RemoveNodesPass::Run(NodePtr &node) {
-  GE_CHECK_NOTNULL(node);
-  auto node_type = NodeUtils::GetNodeType(*node);
-  auto type_iter = remove_node_types_to_arg_.find(node_type);
-  if (type_iter != remove_node_types_to_arg_.end()) {
-    GELOGI("Remove node %s by type %s", node->GetName().c_str(), node_type.c_str());
-    return IsolateAndDeleteNode(node, type_iter->second);
-  }
-  for (const auto &attr_name_to_arg : remove_node_attr_names_to_arg_) {
-    if (AttrUtils::HasAttr(node->GetOpDesc(), attr_name_to_arg.first)) {
-      GELOGI("Remove node %s by attr name %s", node->GetName().c_str(), attr_name_to_arg.first.c_str());
-      return IsolateAndDeleteNode(node, attr_name_to_arg.second);
-    }
-  }
-
-  return SUCCESS;
-}
-RemoveNodesPass &RemoveNodesPass::AddNodeType(const string &node_type, std::initializer_list<int> arg) {
-  remove_node_types_to_arg_[node_type] = std::move(arg);
-  return *this;
-}
-RemoveNodesPass &RemoveNodesPass::AddAttrName(const string &attr_name, std::initializer_list<int> arg) {
-  remove_node_attr_names_to_arg_[attr_name] = std::move(arg);
-  return *this;
-}
-}  // namespace ge
\ No newline at end of file
diff --git a/ge/graph/passes/remove_nodes_pass.h b/ge/graph/passes/remove_nodes_pass.h
deleted file mode 100644
index 1d4fced9..00000000
--- a/ge/graph/passes/remove_nodes_pass.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef GE_REMOVE_NODES_PASS_H_
-#define GE_REMOVE_NODES_PASS_H_
-#include "graph/passes/base_pass.h"
-
-namespace ge {
-class RemoveNodesPass : public BaseNodePass {
- public:
-  Status Run(NodePtr &node) override;
-  RemoveNodesPass &AddNodeType(const std::string &node_type, std::initializer_list<int> arg = {0});
-  RemoveNodesPass &AddAttrName(const std::string &attr_name, std::initializer_list<int> arg = {0});
-
- private:
-  std::map<std::string, std::initializer_list<int>> remove_node_types_to_arg_;
-  std::map<std::string, std::initializer_list<int>> remove_node_attr_names_to_arg_;
-};
-}  // namespace ge
-#endif //GE_REMOVE_NODES_PASS_H_
diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc
index 7a9d085b..ba12ba15 100644
--- a/ge/graph/passes/reshape_recovery_pass.cc
+++ b/ge/graph/passes/reshape_recovery_pass.cc
@@ -60,7 +60,7 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
              node->GetName().c_str(), src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx());
       GE_CHECK_NOTNULL(dst_node);
       GE_CHECK_NOTNULL(dst_node->GetOpDesc());
-      auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx());
+      auto dst_tensor = dst_node->GetOpDesc()->MutableInputDesc(dst_anchor->GetIdx());
       GE_CHECK_NOTNULL(dst_tensor);
       bool is_dynamic = false;
       const auto &src_tensor_dims = src_tensor->GetShape().GetDims();
@@ -71,6 +71,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
                dst_node->GetName().c_str());
         is_dynamic = true;
       }
+      if (dst_node->GetType() == NETOUTPUT && is_dynamic) {
+        // NetOutput shape must be continuous when dynamic shape.
+        // Otherwise, there may be an error waiting for the shape refresh to time out during execution.
+        dst_tensor->SetShape(src_tensor->GetShape());
+        continue;
+      }
       bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims &&
                                     !is_dynamic;
       if (is_need_insert_reshape) {
diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc
index 97d9926f..949fff41 100644
--- a/ge/graph/passes/switch_to_stream_switch_pass.cc
+++ b/ge/graph/passes/switch_to_stream_switch_pass.cc
@@ -369,6 +369,7 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr &
   GE_CHK_STATUS(GraphUtils::AddEdge(peer_cond_anchor, stream_switch->GetInDataAnchor(0)),
                 "StreamSwitch node add cond edge failed.");
 
+  MarkForceUnknownShape(stream_switch, switch_node->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE));
   return stream_switch;
 }
 
@@ -487,6 +488,12 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph)
         return FAILED;
       }
 
+      std::function<bool(const NodePtr &)> callback = [](const NodePtr &n) {
+        return n->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE);
+      };
+      bool is_unknown_shape = std::any_of(same_cond_switch.begin(), same_cond_switch.end(), callback);
+      MarkForceUnknownShape(active_node, is_unknown_shape);
+
       const std::string &cond_group = cond_node->GetName();
       for (uint32_t i = 0; i < SWITCH_OUTPUT_NUM; ++i) {
         bool true_branch_flag = (i == SWITCH_TRUE_OUTPUT);
@@ -515,6 +522,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph)
         GE_CHK_STATUS(GraphUtils::AddEdge(cast_node->GetOutDataAnchor(0), stream_switch->GetInDataAnchor(0)),
                       "Cast add data edge failed.");
 
+        MarkForceUnknownShape(stream_switch, is_unknown_shape);
         for (const NodePtr &node : switch_list) {
           GE_IF_BOOL_EXEC(node != stream_switch, {
             GE_CHK_STATUS(GraphUtils::RemoveEdge(peer_cond_anchor, node->GetInDataAnchor(0)),
diff --git a/ge/graph/passes/unused_op_remove_pass.cc b/ge/graph/passes/unused_op_remove_pass.cc
deleted file mode 100644
index 41f7c828..00000000
--- a/ge/graph/passes/unused_op_remove_pass.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "graph/passes/unused_op_remove_pass.h"
-#include <queue>
-#include <set>
-#include <string>
-#include <vector>
-#include "common/debug/log.h"
-#include "common/op/ge_op_utils.h"
-#include "common/types.h"
-#include "common/util.h"
-#include "graph/utils/attr_utils.h"
-#include "graph/utils/graph_utils.h"
-#include "graph/utils/op_desc_utils.h"
-#include "inc/pass_manager.h"
-#include "graph/passes/isolated_op_remove_pass.h"
-
-using domi::SUCCESS;
-
-namespace ge {
-const std::set<std::string> kRemoveOpSet = {DROPOUT, PERMUTE, UNUSEDCONST, ASSERT};
-const std::set<std::string> kOtherRemoveOpSet = {DROPOUT};
-
-Status UnusedOpRemovePass::Run(ComputeGraphPtr graph) {
-  GE_CHECK_NOTNULL(graph);
-  std::set<std::string> remove_op_set;
-  vector<NodePtr> nodes_to_be_deleted;
-  if (fmktype_ == TENSORFLOW) {
-    remove_op_set = kRemoveOpSet;
-  } else {
-    remove_op_set = kOtherRemoveOpSet;
-  }
-
-  for (auto &node : graph->GetDirectNode()) {
-    GE_CHECK_NOTNULL(node->GetOpDesc());
-    std::string op_type_str = node->GetOpDesc()->GetType();
-    if (remove_op_set.count(op_type_str)) {
-      if (IsExceptions(node)) {
-        continue;
-      }
-      for (auto &out_anchor : node->GetAllOutDataAnchors()) {
-        for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
-          NodePtr dst_node = in_anchor->GetOwnerNode();
-          GE_CHECK_NOTNULL(dst_node->GetOpDesc());
-          int dst_index = in_anchor->GetIdx();
-          std::vector<bool> list_bool;
-          GE_CHECK_NOTNULL(dst_node->GetOpDesc());
-          list_bool = dst_node->GetOpDesc()->GetIsInputConst();
-          GE_IF_BOOL_EXEC(list_bool.size() == 0, continue);
-          list_bool.erase(list_bool.begin() + dst_index);
-          dst_node->GetOpDesc()->SetIsInputConst(list_bool);
-        }
-      }
-      if (op_type_str == ASSERT) {
-        GE_CHK_STATUS_RET(CollectParentNode(graph, node, nodes_to_be_deleted), "remove node failed");
-      } else {
-        GE_CHK_STATUS_RET(graph->RemoveNode(node), "remove node failed");
-      }
-    }
-  }
-  for (auto &node : nodes_to_be_deleted) {
-    for (InDataAnchorPtr &inAnchor : node->GetAllInDataAnchors()) {
-      inAnchor->UnlinkAll();
-    }
-    for (OutDataAnchorPtr &outAnchorPtr : node->GetAllOutDataAnchors()) {
-      outAnchorPtr->UnlinkAll();
-    }
-    if (node->GetOutControlAnchor() != nullptr) {
-      node->GetOutControlAnchor()->UnlinkAll();
-    }
-    GE_CHK_STATUS_RET(graph->RemoveNode(node), "remove node:%s failed", node->GetName().c_str());
-  }
-
-  return SUCCESS;
-}
-
-Status UnusedOpRemovePass::CollectParentNode(const ComputeGraphPtr &graph, const NodePtr &node,
-                                             vector<NodePtr> &node_vec) {
-  GE_CHECK_NOTNULL(graph);
-  GE_CHECK_NOTNULL(node);
-  node_vec.push_back(node);
-  std::queue<NodePtr> node_queue;
-
-  for (auto &src_node : node->GetInDataNodes()) {
-    if (src_node->GetOutDataNodesSize() == 1) {
-      node_queue.push(src_node);
-    }
-  }
-
-  while (!node_queue.empty()) {
-    NodePtr temp = node_queue.front();
-    node_queue.pop();
-
-    for (auto &src_node : temp->GetInDataNodes()) {
-      if (src_node->GetOutDataNodesSize() == 1) {
-        node_queue.push(src_node);
-      }
-    }
-    node_vec.push_back(temp);
-  }
-
-  return SUCCESS;
-}
-
-bool UnusedOpRemovePass::IsExceptions(const NodePtr &node) {
-  GE_CHK_BOOL_EXEC(node != nullptr, return false, "node is nullptr");
-  auto op_def = node->GetOpDesc();
-  GE_CHK_BOOL_EXEC(op_def != nullptr, return false, "opdesc is nullptr");
-  // permute optimised in permute_pass.cpp
-  if (op_def->GetType() == PERMUTE) {
-    GE_IF_BOOL_EXEC(
-        (node->GetInDataNodes().size() != 0 &&
-         (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr &&
-          node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)),
-        return false);
-    return true;
-  }
-  return false;
-}
-}  // namespace ge
diff --git a/ge/graph/passes/unused_op_remove_pass.h b/ge/graph/passes/unused_op_remove_pass.h
deleted file mode 100755
index b9429cfd..00000000
--- a/ge/graph/passes/unused_op_remove_pass.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef GE_GRAPH_PASSES_UNUSED_OP_REMOVE_PASS_H_
-#define GE_GRAPH_PASSES_UNUSED_OP_REMOVE_PASS_H_
-
-#include <string>
-#include <vector>
-#include "framework/common/ge_types.h"
-#include "inc/graph_pass.h"
-
-namespace ge {
-class UnusedOpRemovePass : public GraphPass {
- public:
-  explicit UnusedOpRemovePass(FrameworkType type) : fmktype_(type) {}
-  ~UnusedOpRemovePass() {}
-  Status Run(ge::ComputeGraphPtr graph) override;
-  bool IsExceptions(const ge::NodePtr &node);
-
- private:
-  Status CollectParentNode(const ge::ComputeGraphPtr &graph, const ge::NodePtr &node,
-                           std::vector<ge::NodePtr> &node_vec);
-  std::vector<std::string> v_remove_ops;
-  FrameworkType fmktype_;
-};
-}  // namespace ge
-
-#endif  // GE_GRAPH_PASSES_UNUSED_OP_REMOVE_PASS_H_
diff --git a/ge/graph/passes/variable_format_pass.cc b/ge/graph/passes/variable_format_pass.cc
deleted file mode 100644
index bd5300a5..00000000
--- a/ge/graph/passes/variable_format_pass.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "graph/passes/variable_format_pass.h"
-#include <map>
-#include <set>
-#include <string>
-#include "framework/common/debug/ge_log.h"
-
-namespace ge {
-Status VariableFormatPass::Run(ge::ComputeGraphPtr graph) {
-  GE_CHECK_NOTNULL(graph);
-
-  for (auto &node : graph->GetDirectNode()) {
-    GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue);
-    GE_IF_BOOL_EXEC(node->GetOpDesc()->GetType() != VARIABLE, continue);
-
-    ge::NodePtr use_node = nullptr;
-    if (GetApplyMomentumOpByVariableInput(node, use_node)) {
-      GE_CHK_STATUS_RET(UpdateVariableOutFormat(node, use_node), "update variable out format failed");
-      GE_CHK_STATUS_RET(UpdateApplyMomentumInputFormat(use_node), "update apply momentum input format failed");
-    }
-  }
-
-  return domi::SUCCESS;
-}
-
-bool VariableFormatPass::GetApplyMomentumOpByVariableInput(const ge::NodePtr &var_node, ge::NodePtr &use_node) {
-  GE_IF_BOOL_EXEC(var_node == nullptr, return false);
-
-  std::map<std::string, std::set<int>> confirm_ops = {{"ApplyMomentum", {1}}};
-  for (auto &out_anchor : var_node->GetAllOutDataAnchors()) {
-    for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
-      GE_IF_BOOL_EXEC(ConfirmUseOpAndIndexByAnchor(in_anchor, confirm_ops, use_node), return true);
-    }
-  }
-
-  return false;
-}
-
-bool VariableFormatPass::ConfirmUseOpAndIndexByAnchor(const ge::InDataAnchorPtr &in_anchor,
-                                                      const map<string, std::set<int>> &confirm_ops,
-                                                      ge::NodePtr &use_node) {
-  GE_IF_BOOL_EXEC(in_anchor == nullptr, return false);
-  ge::NodePtr dst_node = in_anchor->GetOwnerNode();
-  ge::OpDescPtr dst_op_desc = dst_node->GetOpDesc();
-  GE_IF_BOOL_EXEC(dst_op_desc == nullptr, return false);
-  const string &dst_type = dst_op_desc->GetType();
-  int input_index = in_anchor->GetIdx();
-
-  GELOGD("ConfirmUseOpAndIndex, var name %s, dst_type = %s, input index %d", dst_node->GetName().c_str(),
-         dst_type.c_str(), input_index);
-
-  GE_IF_BOOL_EXEC(confirm_ops.count(dst_type) > 0,
-                  GE_IF_BOOL_EXEC(confirm_ops.at(dst_type).count(input_index) > 0, use_node = dst_node; return true););
-  return false;
-}
-
-Status VariableFormatPass::UpdateVariableOutFormat(const ge::NodePtr &var_node, ge::NodePtr &use_node) {
-  GE_CHECK_NOTNULL(var_node);
-  GE_CHECK_NOTNULL(use_node);
-  ge::OpDescPtr op_desc_ptr = use_node->GetOpDesc();
-  GE_CHECK_NOTNULL(op_desc_ptr);
-  GE_CHECK_NOTNULL(use_node->GetInDataAnchor(0));
-  GE_CHECK_NOTNULL(use_node->GetInDataAnchor(0)->GetPeerOutAnchor());
-  NodePtr in_node = use_node->GetInDataAnchor(0)->GetPeerOutAnchor()->GetOwnerNode();
-  if (in_node != nullptr) {
-    string in_op_type = in_node->GetType();
-    if ((in_op_type == VARIABLE) && (in_node->GetOpDesc() != nullptr) &&
-        (in_node->GetOpDesc()->MutableOutputDesc(0) != nullptr)) {
-      ge::Format format = in_node->GetOpDesc()->MutableOutputDesc(0)->GetFormat();
-      ge::OpDescPtr cur_op_desc_ptr = var_node->GetOpDesc();
-      if (cur_op_desc_ptr != nullptr) {
-        cur_op_desc_ptr->MutableOutputDesc(0)->SetFormat(format);
-        cur_op_desc_ptr->MutableOutputDesc(0)->SetOriginFormat(format);
-      }
-    }
-  }
-  return domi::SUCCESS;
-}
-
-Status VariableFormatPass::UpdateApplyMomentumInputFormat(const ge::NodePtr &node) {
-  GE_CHECK_NOTNULL(node);
-  ge::OpDescPtr op_desc_ptr = node->GetOpDesc();
-  GE_CHECK_NOTNULL(op_desc_ptr);
-  GE_CHECK_NOTNULL(node->GetInDataAnchor(0));
-  GE_CHECK_NOTNULL(node->GetInDataAnchor(0)->GetPeerOutAnchor());
-  GE_CHECK_NOTNULL(op_desc_ptr->MutableInputDesc(0));
-  GE_CHECK_NOTNULL(op_desc_ptr->MutableInputDesc(1));
-  GE_CHECK_NOTNULL(op_desc_ptr->MutableOutputDesc(0));
-  NodePtr in_node = node->GetInDataAnchor(0)->GetPeerOutAnchor()->GetOwnerNode();
-  if (in_node != nullptr) {
-    string in_op_type = in_node->GetType();
-    if ((in_op_type == VARIABLE) && (in_node->GetOpDesc() != nullptr)) {
-      ge::Format format = in_node->GetOpDesc()->MutableOutputDesc(0)->GetFormat();
-      op_desc_ptr->MutableInputDesc(0)->SetFormat(format);
-      op_desc_ptr->MutableInputDesc(0)->SetOriginFormat(format);
-      op_desc_ptr->MutableInputDesc(1)->SetFormat(format);
-      op_desc_ptr->MutableInputDesc(1)->SetOriginFormat(format);
-      op_desc_ptr->MutableOutputDesc(0)->SetFormat(format);
-      op_desc_ptr->MutableOutputDesc(0)->SetOriginFormat(format);
-    }
-  }
-  return domi::SUCCESS;
-}
-}  // namespace ge
diff --git a/ge/graph/passes/variable_format_pass.h b/ge/graph/passes/variable_format_pass.h
deleted file mode 100755
index e2c32903..00000000
--- a/ge/graph/passes/variable_format_pass.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef GE_GRAPH_PASSES_VARIABLE_FORMAT_PASS_H_
-#define GE_GRAPH_PASSES_VARIABLE_FORMAT_PASS_H_
-
-#include <map>
-#include <set>
-#include <string>
-#include "graph/types.h"
-#include "graph/utils/op_desc_utils.h"
-#include "inc/graph_pass.h"
-
-namespace ge {
-class VariableFormatPass : public GraphPass {
- public:
-  Status Run(ge::ComputeGraphPtr graph) override;
-
- private:
-  bool GetApplyMomentumOpByVariableInput(const ge::NodePtr &var_node, ge::NodePtr &use_node);
-
-  bool ConfirmUseOpAndIndexByAnchor(const ge::InDataAnchorPtr &in_anchor,
-                                    const map<string, std::set<int> > &confirm_ops, ge::NodePtr &use_node);
-
-  Status UpdateApplyMomentumInputFormat(const ge::NodePtr &node);
-
-  Status UpdateVariableOutFormat(const ge::NodePtr &var_node, ge::NodePtr &use_node);
-};
-}  // namespace ge
-
-#endif  // GE_GRAPH_PASSES_VARIABLE_FORMAT_PASS_H_
diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 2d06cd5d..4e9046e4 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -610,7 +610,7 @@ Status ModifyDataNetOutputFormatAndShape(OpDescPtr &op_desc, uint32_t index, For
   return SUCCESS;
 }
 
-Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, NodePtr &switchn_node) {
+Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, NodePtr &mbatch_node, int32_t &index) {
   is_dynamic_batch = false;
   std::string related_node_name;
   if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) {
@@ -621,13 +621,17 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node
              data_node->GetName().c_str());
       return INTERNAL_ERROR;
     }
-    for (const NodePtr &next_node : data_node->GetOutNodes()) {
-      if (next_node->GetName() == related_node_name) {
-        switchn_node = next_node;
+
+    auto out_data_nodes_anchors = data_node->GetOutDataNodesAndAnchors();
+    for (const auto &out_data_node_anchor : out_data_nodes_anchors) {
+      if (out_data_node_anchor.first->GetName() == related_node_name) {
+        mbatch_node = out_data_node_anchor.first;
+        index = out_data_node_anchor.second->GetIdx();
         break;
       }
     }
-    if (switchn_node == nullptr) {
+
+    if (mbatch_node == nullptr) {
       ErrorManager::GetInstance().ATCReportErrMessage(
         "E15002", {"opname", "value", "reason"},
         {data_node->GetName(), related_node_name, "but can not find it on the graph"});
@@ -680,7 +684,7 @@ Status CheckIfNeedSetNdFormat(const NodePtr &node_ptr) {
 // In the dynamic shape process, transnode insertion by FE is advanced to the stage of whole
 // graph optimization, GE only sets the final data_type/format/shape information for variable,
 // data and netoutput, and no longer inserts the transnode.
-Status ProcessInputDtDynShape(NodePtr &node_ptr, bool &is_dynamic_batch, NodePtr &switchn_node, DataType &dt_set) {
+Status ProcessInputDtDynShape(NodePtr &node_ptr, NodePtr &switchn_node, DataType &dt_set) {
   GE_CHECK_NOTNULL(node_ptr);
   auto op_desc = node_ptr->GetOpDesc();
   GE_CHECK_NOTNULL(op_desc);
@@ -713,19 +717,84 @@ Status ProcessInputDtDynShape(NodePtr &node_ptr, bool &is_dynamic_batch, NodePtr
     GELOGI("[Process][InputDynShape] Set input and output size of node [%s] success.", node_ptr->GetName().c_str());
   }
 
-  if (is_dynamic_batch) {
-    GELOGI("The node [%s] dtype set fp16", switchn_node->GetName().c_str());
-    auto switchn_op_desc = switchn_node->GetOpDesc();
-    GE_CHECK_NOTNULL(switchn_op_desc);
-    auto switchn_input = switchn_op_desc->MutableInputDesc(0);
-    GE_CHECK_NOTNULL(switchn_input);
-    switchn_input->SetDataType(dt_set);
-    for (uint32_t i = 0; i < switchn_node->GetAllOutDataAnchorsSize(); ++i) {
-      const GeTensorDescPtr &switchn_output = switchn_op_desc->MutableOutputDesc(i);
-      GE_CHECK_NOTNULL(switchn_output);
-      switchn_output->SetDataType(dt_set);
+  return SUCCESS;
+}
+
+Status UpdateInputOutputDataType(NodePtr &mbatch_node, DataType &dt_set, int32_t index) {
+  auto mbatch_desc = mbatch_node->GetOpDesc();
+  GE_CHECK_NOTNULL(mbatch_desc);
+  auto mbatch_input = mbatch_desc->MutableInputDesc(index);
+  GE_CHECK_NOTNULL(mbatch_input);
+  mbatch_input->SetDataType(dt_set);
+
+  if (mbatch_node->GetType() == SWITCHN) {
+    for (uint32_t i = 0; i < mbatch_node->GetAllOutDataAnchorsSize(); ++i) {
+      const GeTensorDescPtr &mbatch_output = mbatch_desc->MutableOutputDesc(i);
+      GE_CHECK_NOTNULL(mbatch_output);
+      mbatch_output->SetDataType(dt_set);
+    }
+  }
+
+  GELOGD("Update input and output data type of node[name: %s, type: %s, input index: %d] to %s.",
+         mbatch_node->GetName().c_str(), mbatch_node->GetType().c_str(), index,
+         TypeUtils::DataTypeToSerialString(dt_set).c_str());
+
+  return SUCCESS;
+}
+
+Status UpdateSubgraphDataOfCase(NodePtr &mbatch_node, DataType &dt_set, int32_t index) {
+  if (mbatch_node->GetType() != CASE) {
+    return SUCCESS;
+  }
+
+  auto subgraphs = NodeUtils::GetAllSubgraphs(*mbatch_node);
+  for (const auto &subgraph : subgraphs) {
+    GE_CHECK_NOTNULL(subgraph);
+    for (auto &sub_node : subgraph->GetDirectNode()) {
+      GE_CHECK_NOTNULL(sub_node);
+      if (sub_node->GetType() != DATA) {
+        continue;
+      }
+
+      auto data_desc = sub_node->GetOpDesc();
+      GE_CHECK_NOTNULL(data_desc);
+      int32_t parent_node_index = 0;
+      if (!AttrUtils::GetInt(data_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index) ||
+          (parent_node_index != index)) {
+        continue;
+      }
+
+      auto data_input = data_desc->MutableInputDesc(0);
+      GE_CHECK_NOTNULL(data_input);
+      data_input->SetDataType(dt_set);
+      auto data_output = data_desc->MutableOutputDesc(0);
+      GE_CHECK_NOTNULL(data_output);
+      data_output->SetDataType(dt_set);
+      GELOGD("Update input and output data type of node[name: %s, type: %s, parent_node_index: %d] in subgraph %s "
+             "to %s.", data_desc->GetName().c_str(), data_desc->GetType().c_str(), parent_node_index,
+             subgraph->GetName().c_str(), TypeUtils::DataTypeToSerialString(dt_set).c_str());
     }
   }
+
+  return SUCCESS;
+}
+
+Status ProcessMbatchScene(NodePtr &mbatch_node, DataType &dt_set, int32_t index) {
+  GELOGI("The node [%s] dtype set fp16.", mbatch_node->GetName().c_str());
+  if (UpdateInputOutputDataType(mbatch_node, dt_set, index) != SUCCESS) {
+    GELOGE(FAILED, "Update input and output data type of node[name: %s, type: %s] to %s failed.",
+           mbatch_node->GetName().c_str(), mbatch_node->GetType().c_str(),
+           TypeUtils::DataTypeToSerialString(dt_set).c_str());
+    return FAILED;
+  }
+
+  if (UpdateSubgraphDataOfCase(mbatch_node, dt_set, index) != SUCCESS) {
+    GELOGE(FAILED, "Update input and output data type of Data node[parent_node_index: %d] in subgraphs of "
+           "node[name: %s, type: %s] to %s failed.", index, mbatch_node->GetName().c_str(),
+           mbatch_node->GetType().c_str(), TypeUtils::DataTypeToSerialString(dt_set).c_str());
+    return FAILED;
+  }
+
   return SUCCESS;
 }
 
@@ -786,21 +855,27 @@ Status ProcessDataNodeDynShape(NodePtr &node_ptr) {
   DataType dt_set = TypeUtils::SerialStringToDataType(set_dt_str);
   GELOGI("input_fp16 is found, the node name is %s.", node_ptr->GetName().c_str());
   bool is_dynamic_batch = false;
-  NodePtr switchn_node = nullptr;
-  if (CheckIfDynamicBatchScene(node_ptr, is_dynamic_batch, switchn_node)) {
+  NodePtr mbatch_node = nullptr;
+  int32_t index = 0;
+  if (CheckIfDynamicBatchScene(node_ptr, is_dynamic_batch, mbatch_node, index)) {
     GELOGE(INTERNAL_ERROR, "CheckIfDynamicBatchScene failed");
     return FAILED;
   }
-  if (ProcessInputDtDynShape(node_ptr, is_dynamic_batch, switchn_node, dt_set) != SUCCESS) {
+  if (ProcessInputDtDynShape(node_ptr, mbatch_node, dt_set) != SUCCESS) {
     GELOGE(INTERNAL_ERROR, "ProcessInputFP16 failed");
     return FAILED;
   }
+  if (is_dynamic_batch && ProcessMbatchScene(mbatch_node, dt_set, index) != SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "ProcessMbatchScene failed");
+    return FAILED;
+  }
+
   // check if need to set format
   string set_format;
   bool ret = ge::AttrUtils::GetStr(node_ptr->GetOpDesc(), ATTR_ATC_USER_DEFINE_FORMAT, set_format);
   if (ret && (!set_format.empty()) && TypeUtils::SerialStringToFormat(set_format) == FORMAT_NC1HWC0) {
     GELOGI("The format of node [%s] should be set NC1HWC0.", node_ptr->GetName().c_str());
-    if (ProcessInputNC1HWC0DynShape(node_ptr, is_dynamic_batch, switchn_node) != SUCCESS) {
+    if (ProcessInputNC1HWC0DynShape(node_ptr, is_dynamic_batch, mbatch_node) != SUCCESS) {
       GELOGE(INTERNAL_ERROR, "ProcessInputNC1HWC0 failed");
       return FAILED;
     }
@@ -1182,7 +1257,8 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i
   // Since ME dont differentiate between RefSwitch and Switch, and only issue Switch.
   static std::set<std::string> acceptable_types = {ge::VARIABLE,         ge::VARIABLEV2, ge::VARHANDLEOP,
                                                    ge::REFSWITCH,        ge::REFMERGE,   ge::REFENTER,
-                                                   ge::REFNEXTITERATION, ge::REFEXIT,    ge::SWITCH};
+                                                   ge::REFNEXTITERATION, ge::REFEXIT,    ge::SWITCH,
+                                                   ge::DATA};
   GE_CHECK_NOTNULL(node);
   const auto &op_desc = node->GetOpDesc();
   GE_CHECK_NOTNULL(op_desc);
diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc
index 5a04c461..b66038d9 100644
--- a/ge/hybrid/common/npu_memory_allocator.cc
+++ b/ge/hybrid/common/npu_memory_allocator.cc
@@ -17,10 +17,7 @@
 #include "npu_memory_allocator.h"
 #include <mutex>
 #include "framework/common/debug/log.h"
-#include "graph/manager/graph_caching_allocator.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/rdma_pool_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 namespace hybrid {
diff --git a/ge/hybrid/common/tensor_value.h b/ge/hybrid/common/tensor_value.h
index 9f68cf2c..348e4e6d 100644
--- a/ge/hybrid/common/tensor_value.h
+++ b/ge/hybrid/common/tensor_value.h
@@ -21,6 +21,7 @@
 #include <cstddef>
 #include <memory>
 #include "memory/memory_api.h"
+#include "framework/common/util.h"
 
 namespace ge {
 namespace hybrid {
@@ -39,6 +40,12 @@ class TensorBuffer {
   TensorBuffer &operator = (const TensorBuffer &) = delete;
   ~TensorBuffer();
 
+  void* Release() {
+    auto ret = buffer_;
+    buffer_ = nullptr;
+    return ret;
+  }
+
   void *GetData() {
     return buffer_;
   }
@@ -47,6 +54,10 @@ class TensorBuffer {
     return size_;
   }
 
+  MemStorageType GetMemType() const {
+    return mem_type_;
+  }
+
  private:
   TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM);
 
@@ -68,6 +79,10 @@ class TensorValue {
 
   void Destroy();
 
+  void *Release() {
+    return buffer_->Release();
+  }
+
   bool IsEmpty() {
     return ref_buffer_ == nullptr && buffer_ == nullptr;
   }
@@ -79,11 +94,21 @@ class TensorValue {
   void SetName(const std::string &name) {
     name_ = name;
   }
+  
+  MemStorageType GetMemType() const {
+    return buffer_->GetMemType();
+  }
 
   void *MutableData();
 
   size_t GetSize() const;
 
+  template<typename T>
+  Status CopyScalarValueToHost(T &value) const {
+    GE_CHECK_GE(this->GetSize(), sizeof(value));
+    return rtMemcpy(&value, sizeof(value), this->GetData(), sizeof(value), RT_MEMCPY_DEVICE_TO_HOST);
+  }
+
  private:
   std::shared_ptr<TensorBuffer> buffer_;
   std::string name_;
diff --git a/ge/hybrid/executor/hybrid_execution_context.cc b/ge/hybrid/executor/hybrid_execution_context.cc
index f1357285..0f978bf8 100644
--- a/ge/hybrid/executor/hybrid_execution_context.cc
+++ b/ge/hybrid/executor/hybrid_execution_context.cc
@@ -28,6 +28,8 @@ const int32_t kModelAbortNormalNew = 507024;
 std::atomic_ulong context_id_gen {};
 }  // namespace
 
+long GraphExecutionContext::profiling_level = 0;
+
 GraphExecutionContext::GraphExecutionContext() {
   context_id = context_id_gen++;
 }
diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h
index 67a96e98..f2628409 100644
--- a/ge/hybrid/executor/hybrid_execution_context.h
+++ b/ge/hybrid/executor/hybrid_execution_context.h
@@ -73,7 +73,7 @@ struct GraphExecutionContext {
   ExceptionDumper exception_dumper;
   std::vector<std::shared_ptr<ge::DavinciModel>> davinci_model;
   std::atomic_bool is_eos_{false};
-  long profiling_level = 0;
+  static long profiling_level;
   long iteration = 0;
   void *global_step = nullptr;
 
@@ -82,17 +82,18 @@ struct GraphExecutionContext {
   mutable std::mutex mu;
 };
 
-#define RECORD_PROFILING_EVENT(context, evt_type, fmt, category, node_name, ...) \
-do { \
-  if ((context != nullptr) && (context)->profiler != nullptr) { \
-    if (node_name != nullptr) { \
-      context->profiler->RecordEvent(evt_type, "tid:%lu [%s@%ld] [%s] " fmt,     \
-                                       GeLog::GetTid(), node_name, context->iteration, category, \
-                                     ##__VA_ARGS__); \
-    } else { \
-      context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \
-    }\
-  } \
+#define RECORD_PROFILING_EVENT(context, evt_type, fmt, category, node_name, ...)                                 \
+do {                                                                                                             \
+  if (ge::hybrid::GraphExecutionContext::profiling_level > 0) {                                                  \
+    if ((context != nullptr) && (context)->profiler != nullptr) {                                                \
+      if (node_name != nullptr) {                                                                                \
+        context->profiler->RecordEvent(evt_type, "tid:%lu [%s@%ld] [%s] " fmt,                                   \
+                                       GeLog::GetTid(), node_name, context->iteration, category, ##__VA_ARGS__); \
+      } else {                                                                                                   \
+        context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \
+      }                                                                                                          \
+    }                                                                                                            \
+  }                                                                                                              \
 } while (0)
 
 #define RECORD_MODEL_EXECUTION_EVENT(context, fmt, ...) \
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc
index 3294a286..930412e3 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -19,6 +19,13 @@
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "graph/ge_context.h"
+#include "graph/types.h"
+#include "graph/debug/ge_attr_define.h"
+#include "graph/manager/graph_caching_allocator.h"
+#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/rdma_pool_allocator.h"
+#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 namespace hybrid {
@@ -26,6 +33,7 @@ namespace {
 const int kDataOutputIndex = 0;
 const size_t kMinimumPiplineStages = 2;
 const int kDefaultLoopCount = 10;
+const size_t kAlignment = 64;
 }
 HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model)
     : model_(model), run_flag_(false), data_dumper_(nullptr) {
@@ -70,6 +78,8 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
     GetThreadLocalContext() = *executor_->GetContext()->ge_context;
     GetContext().SetSessionId(executor_->GetContext()->session_id);
     GetContext().SetContextId(executor_->GetContext()->context_id);
+    GE_CHECK_NOTNULL(executor_->GetContext()->ge_context);
+    GetThreadLocalContext() = *executor_->GetContext()->ge_context;
     return RunInternal();
   });
 
@@ -197,7 +207,7 @@ Status HybridModelAsyncExecutor::HandleResult(Status exec_ret,
                                               HybridModelExecutor::ExecuteArgs &args,
                                               OutputData *output_data) {
   GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret);
-  std::vector<ge::OutputTensorInfo> output_tensor_info_list;
+  std::vector<ge::Tensor> output_tensor_info_list;
   if (args.is_eos) {
     GELOGI("End of sequence, model id = %u", model_id_);
     GE_CHK_STATUS_RET_NOLOG(OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list));
@@ -368,7 +378,7 @@ Status HybridModelAsyncExecutor::InitInputDesc() {
 }
 
 Status HybridModelAsyncExecutor::OnComputeDone(uint32_t data_index, uint32_t result_code,
-                                               std::vector<ge::OutputTensorInfo> &outputs) {
+                                               std::vector<ge::Tensor> &outputs) {
   GELOGD("OnComputeDone. model id = %u, data index = %u, execution ret = %u", model_id_, data_index, result_code);
   if (listener_ != nullptr) {
     GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs),
@@ -378,9 +388,8 @@ Status HybridModelAsyncExecutor::OnComputeDone(uint32_t data_index, uint32_t res
   return result_code;
 }
 
-Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args,
-                                             OutputData *output_data,
-                                             std::vector<ge::OutputTensorInfo> &outputs) {
+Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data,
+                                             std::vector<ge::Tensor> &outputs) {
   // copy output data from op to designated position
   std::vector<ConstGeTensorDescPtr> &output_tensor_desc_list = args.output_desc;
   std::vector<TensorValue> &output_tensors = args.outputs;
@@ -395,6 +404,12 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
   }
 
   GELOGD("Number of outputs = %zu", output_tensor_desc_list.size());
+  string execute_mode;
+  auto result = ge::GetContext().GetOption(OPTION_EXEC_DYNAMIC_EXECUTE_MODE, execute_mode);
+  if (result != SUCCESS) {
+    GELOGW("Can not get dynamic execute mode attr");
+  }
+  GELOGD("The dynamic execute is %s", execute_mode.c_str());
   for (size_t i = 0; i < output_tensors.size(); ++i) {
     GELOGD("Start to process output[%zu]", i);
     auto &output_tensor = output_tensors[i];
@@ -429,37 +444,65 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
       return INTERNAL_ERROR;
     }
 
-    ge::OutputTensorInfo output;
-    output.data_type = static_cast<uint32_t>(tensor_desc->GetDataType());
-    output.dims = tensor_desc->GetShape().GetDims();
-    output.length = output_size;
+    GeShape ge_shape(tensor_desc->GetShape().GetDims());
+    GeTensorDesc ge_tensor_desc;
+    ge_tensor_desc.SetShape(ge_shape);
     if (output_size > 0) {
-      std::unique_ptr<uint8_t[]> data_buf(new(std::nothrow) uint8_t[output_size]);
-      GE_CHECK_NOTNULL(data_buf);
-      GE_CHK_RT_RET(rtMemcpy(data_buf.get(),
-                             output_size,
-                             output_tensor.GetData(),
-                             output_size,
-                             RT_MEMCPY_DEVICE_TO_HOST));
-      output.data = std::move(data_buf);
-      output_data->blobs.emplace_back(data_buf.get(), static_cast<uint32_t>(output_size), false);
+      if (execute_mode != kLazyRecompile) {
+        auto aligned_ptr = MakeShared<AlignedPtr>(output_size, kAlignment);
+        GE_CHECK_NOTNULL(aligned_ptr);
+        auto data_buf = aligned_ptr->MutableGet();
+        GE_CHECK_NOTNULL(data_buf);
+        GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST));
+        GeTensor ge_tensor(ge_tensor_desc);
+        ge_tensor.SetData(aligned_ptr, output_size);
+        output_data->blobs.emplace_back(data_buf, static_cast<uint32_t>(output_size), false);
+        auto tensor = TensorAdapter::AsTensor(ge_tensor);
+        outputs.emplace_back(std::move(tensor));
+      } else {
+        BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs);
+        output_data->blobs.emplace_back(output_tensor.Release(), static_cast<uint32_t>(output_size), false,
+                                        static_cast<uint32_t>(kPlacementDevice));
+      }
     } else {
-      GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str());
-      output.data = nullptr;
+      GELOGW("Output [%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str());
+      GeTensor ge_tensor(ge_tensor_desc);
+      ge_tensor.SetData(nullptr, 0U);
       output_data->blobs.emplace_back(nullptr, 0U, false);
+      auto tensor = TensorAdapter::AsTensor(ge_tensor);
+      outputs.emplace_back(std::move(tensor));
     }
-
-    outputs.emplace_back(std::move(output));
-    GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld",
-           i,
+    GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i,
            TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(),
-           tensor_desc->GetShape().ToString().c_str(),
-           output_size);
+           tensor_desc->GetShape().ToString().c_str(), output_size);
   }
 
   return SUCCESS;
 }
 
+void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc,
+                                                 int64_t output_size, std::vector<ge::Tensor> &outputs) {
+  GELOGD("Start to build device tensor");
+  auto mem_type = output_tensor.GetMemType();
+  GELOGD("Mem type is %d", static_cast<uint32_t>(mem_type));
+  auto deleter = [=](uint8_t *device_data) {
+    if (device_data != nullptr) {
+      if (mem_type == RDMA_HBM) {
+        MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(device_data, device_id_);
+      } else if (mem_type == HOST_DDR) {
+        MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(device_data);
+      } else {
+        MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(device_data, device_id_);
+      }
+    }
+  };
+  ge_tensor_desc.SetPlacement(kPlacementDevice);
+  GeTensor ge_tensor(ge_tensor_desc);
+  auto tensor = TensorAdapter::AsTensor(ge_tensor);
+  tensor.SetData(reinterpret_cast<uint8_t *>(output_tensor.Release()), static_cast<size_t>(output_size), deleter);
+  outputs.emplace_back(std::move(tensor));
+}
+
 Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs,
                                          const std::vector<GeTensorDesc> &input_desc,
                                          std::vector<DataBuffer> &outputs,
@@ -507,7 +550,7 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
   GELOGD("Done copying input data successfully.");
   GE_CHK_STATUS_RET(executor_->Execute(args), "[Invoke][Execute] Failed, model_id = %u.", model_id_);
 
-  std::vector<ge::OutputTensorInfo> output_tensor_info_list;
+  std::vector<ge::Tensor> output_tensor_info_list;
   OutputData output_data;
   GE_CHK_STATUS_RET(CopyOutputs(args, &output_data, output_tensor_info_list),
       "[Invoke][CopyOutputs]Failed to copy outputs, model_id = %u.", model_id_);
@@ -517,15 +560,15 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
   outputs.resize(output_tensor_info_list.size());
   for (auto &out_tensor_info : output_tensor_info_list) {
     auto &ge_tensor = outputs[out_index];
-    if (out_tensor_info.length > 0) {
-      GE_CHK_GRAPH_STATUS_RET(ge_tensor.SetData(out_tensor_info.data.get(), out_tensor_info.length),
+    if (out_tensor_info.GetSize() > 0) {
+      GE_CHK_GRAPH_STATUS_RET(ge_tensor.SetData(out_tensor_info.GetData(), out_tensor_info.GetSize()),
                               "Failed to set output[%d].", out_index);
     }
 
     ge_tensor.MutableTensorDesc() = *args.output_desc[out_index];
     GELOGD("Set output[%d], tensor size = %ld, shape = [%s]",
            out_index,
-           out_tensor_info.length,
+           out_tensor_info.GetSize(),
            ge_tensor.MutableTensorDesc().MutableShape().ToString().c_str());
     ++out_index;
   }
@@ -544,9 +587,15 @@ Status HybridModelAsyncExecutor::DumpOpDebug() {
     data_dumper_.SetModelId(model_->GetModelId());
     data_dumper_.SetDeviceId(model_->GetDeviceId());
     void *global_step = nullptr;
-    TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
-    if (varible_global_step != nullptr) {
-      global_step = const_cast<void *>(varible_global_step->GetData());
+
+    if (dump_properties.IsInferOpDebug()) {
+      GELOGD("Init global step when infer with op debug.");
+      global_step = executor_->GetContext()->global_step;
+    } else {
+      TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
+      if (varible_global_step != nullptr) {
+        global_step = const_cast<void *>(varible_global_step->GetData());
+      }
     }
 
     void *loop_per_iter = nullptr;
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index c5a6533a..5ae1a222 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -61,7 +61,7 @@ class HybridModelAsyncExecutor {
 
   void SetRunningFlag(bool flag) { running_flag_ = flag; }
 
-  const GraphExecutionContext * GeContext() { return executor_->GetContext(); }
+  const GraphExecutionContext *GeContext() { return executor_->GetContext(); }
 
  private:
   Status InitInputDesc();
@@ -75,11 +75,11 @@ class HybridModelAsyncExecutor {
                       HybridModelExecutor::ExecuteArgs &args,
                       OutputData *output_data);
 
-  Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args,
-                     OutputData *output_data,
-                     std::vector<ge::OutputTensorInfo> &outputs);
+  Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector<ge::Tensor> &outputs);
+  void BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size,
+                         std::vector<ge::Tensor> &outputs);
 
-  Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::OutputTensorInfo> &outputs);
+  Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::Tensor> &outputs);
 
   Status PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args);
 
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index 2ab4ed5d..f8635a97 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -155,9 +155,9 @@ Status HybridModelExecutor::InitExecutionContext() {
   context_.dump_properties = DumpManager::GetInstance().GetDumpProperties(context_.session_id);
   const char *profiling_level = std::getenv(kEnvProfilingLevel);
   if (profiling_level != nullptr) {
-    context_.profiling_level = std::strtol(profiling_level, nullptr, kIntBase);
-    GELOGD("Got profiling level = %ld", context_.profiling_level);
-    if (context_.profiling_level > 0) {
+    GraphExecutionContext::profiling_level = std::strtol(profiling_level, nullptr, kIntBase);
+    GELOGD("Got profiling level = %ld", GraphExecutionContext::profiling_level);
+    if (GraphExecutionContext::profiling_level > 0) {
       context_.profiler.reset(new(std::nothrow)HybridProfiler());
       GE_CHECK_NOTNULL(context_.profiler);
     }
diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
index b2a77653..ba24d78d 100644
--- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
@@ -187,9 +187,9 @@ void StageExecutor::Reset() {
 Status HybridModelPipelineExecutor::Init() {
   const char *profiling_level = std::getenv(kEnvProfilingLevel);
   if (profiling_level != nullptr) {
-    context_.profiling_level = std::strtol(profiling_level, nullptr, kIntBase);
-    GELOGD("Got profiling level = %ld", context_.profiling_level);
-    if (context_.profiling_level > 0) {
+    GraphExecutionContext::profiling_level = std::strtol(profiling_level, nullptr, kIntBase);
+    GELOGD("Got profiling level = %ld", GraphExecutionContext::profiling_level);
+    if (GraphExecutionContext::profiling_level > 0) {
       context_.profiler.reset(new (std::nothrow) HybridProfiler());
       GE_CHECK_NOTNULL(context_.profiler);
     }
@@ -210,7 +210,6 @@ Status HybridModelPipelineExecutor::InitStageExecutors() {
     if (context_.profiler != nullptr) {
       // will call unique_ptr::release later
       stage_executor->context_.profiler.reset(context_.profiler.get());
-      stage_executor->context_.profiling_level = context_.profiling_level;
     }
 
     stage_executors_.emplace_back(std::move(stage_executor));
diff --git a/ge/hybrid/executor/node_done_manager.cc b/ge/hybrid/executor/node_done_manager.cc
index f33ffcd9..d31765c2 100644
--- a/ge/hybrid/executor/node_done_manager.cc
+++ b/ge/hybrid/executor/node_done_manager.cc
@@ -36,6 +36,16 @@ bool NodeDoneManager::Cond::Await() {
   return is_released_;
 }
 
+void NodeDoneManager::Cond::Reset() {
+  std::unique_lock<std::mutex> lk(cond_mu_);
+  if (!is_released_ && !is_cancelled_) {
+    GELOGW("Called before done, released: %d, cancelled: %d", is_released_, is_cancelled_);
+  }
+
+  is_released_ = false;
+  is_cancelled_ = false;
+}
+
 void NodeDoneManager::Cond::Release() {
   std::unique_lock<std::mutex> lk(cond_mu_);
   is_released_ = true;
@@ -103,5 +113,13 @@ bool NodeDoneManager::Await(const NodePtr &node) {
   GELOGD("[%s] Await ended. is_released = %s", node->GetName().c_str(), sub->IsRelease() ? "true" : "false");
   return ret;
 }
+
+void NodeDoneManager::Reset(const NodePtr &node) {
+  auto sub = GetSubject(node);
+  if (sub != nullptr) {
+    sub->Reset();
+    GELOGD("[%s] Node reset.", node->GetName().c_str());
+  }
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/node_done_manager.h b/ge/hybrid/executor/node_done_manager.h
index faf12b46..292d1369 100644
--- a/ge/hybrid/executor/node_done_manager.h
+++ b/ge/hybrid/executor/node_done_manager.h
@@ -31,6 +31,8 @@ class NodeDoneManager {
 
   bool Await(const NodePtr &node);
 
+  void Reset(const NodePtr &node);
+
   void Destroy();
 
  private:
@@ -40,6 +42,7 @@ class NodeDoneManager {
     void Release();
     void Cancel();
     bool Await();
+    void Reset();
    private:
     std::mutex cond_mu_;
     std::condition_variable cv_;
diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc
index ce8304b0..aaa7801f 100644
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -30,6 +30,10 @@ constexpr auto kWaitInternal = 5;
 constexpr auto kMaxWaitTimes = 120;
 }
 ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(node_item) {
+  InitShapeState();
+}
+
+void ShapeInferenceState::InitShapeState() {
   this->num_pending_shapes_ = node_item.num_inputs - node_item.num_static_input_shapes;
   GELOGD("[%s] ShapeInferenceState created, pending shape count = %d",
          node_item.NodeName().c_str(),
@@ -135,19 +139,23 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
     }
   }
 
-  for (size_t i = 0; i < input_tensor_desc.size(); ++i) {
-    auto dst_tensor_desc = node_item.op_desc->MutableInputDesc(i);
-    if (dst_tensor_desc == nullptr) {
-      continue;
-    }
+  {
+    const auto &guard = node_item.MutexGuard("AwaitShapesReady");
+    for (size_t i = 0; i < input_tensor_desc.size(); ++i) {
+      auto dst_tensor_desc = node_item.MutableInputDesc(i);
+      if (dst_tensor_desc == nullptr) {
+        continue;
+      }
 
-    auto &tensor_desc = input_tensor_desc[i];
-    int64_t tensor_size = -1;
-    (void) TensorUtils::GetSize(tensor_desc, tensor_size);
+      auto &tensor_desc = input_tensor_desc[i];
+      int64_t tensor_size = -1;
+      (void)TensorUtils::GetSize(tensor_desc, tensor_size);
 
-    dst_tensor_desc->SetShape(tensor_desc.MutableShape());
-    dst_tensor_desc->SetOriginShape(tensor_desc.GetOriginShape());
-    (void) TensorUtils::SetSize(*dst_tensor_desc, tensor_size);
+      dst_tensor_desc->SetShape(tensor_desc.MutableShape());
+      dst_tensor_desc->SetOriginShape(tensor_desc.GetOriginShape());
+      (void)TensorUtils::SetSize(*dst_tensor_desc, tensor_size);
+    }
+    (void)guard;
   }
 
   for (auto &p : shape_futures) {
@@ -159,8 +167,6 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
     GE_CHECK_NOTNULL(src_tensor_desc);
     RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);
 
-    auto input_desc = node_item.MutableInputDesc(idx);
-    GE_CHECK_NOTNULL(input_desc);
     int64_t tensor_size = -1;
     (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
     GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
@@ -169,9 +175,13 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
            src_tensor_desc->GetShape().ToString().c_str(),
            src_tensor_desc->GetOriginShape().ToString().c_str(),
            tensor_size);
+    const auto &guard = node_item.MutexGuard("AwaitShapesReady");
+    auto input_desc = node_item.MutableInputDesc(idx);
+    GE_CHECK_NOTNULL(input_desc);
     input_desc->SetShape(src_tensor_desc->GetShape());
     input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
     (void) TensorUtils::SetSize(*input_desc, tensor_size);
+    (void)guard;
   }
 
   return SUCCESS;
@@ -207,6 +217,11 @@ NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_contex
 }
 
 Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const {
+  if (node_item_->IsMergeOp()) {
+    GELOGD("[%s] merge index %d, input nodes: %zu", GetName().c_str(), merge_index_, node_item_->data_recv_.size());
+    return SUCCESS;
+  }
+
   for (auto &src_node : node_item_->dependents_for_execution) {
     GELOGD("[%s] Start to wait for data dependent node: [%s]",
            node_item_->NodeName().c_str(),
@@ -225,7 +240,7 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const {
                            node_item_->NodeName().c_str(),
                            "[AwaitNodeDone] [%s] End",
                            src_node->GetName().c_str());
-    GELOGD("[%s] Done waiting node.", src_node->GetName().c_str());
+    GELOGD("[%s] Done waiting node: [%s]", node_item_->NodeName().c_str(), src_node->GetName().c_str());
   }
 
   return SUCCESS;
@@ -255,6 +270,126 @@ std::shared_ptr<TaskContext> NodeState::GetTaskContext() {
   return task_context_;
 }
 
+void NodeState::ResetContext(int group) {
+  SetGroup(group);
+  if (loop_count_ == 0) {
+    ++loop_count_;
+    return;
+  }
+
+  ++loop_count_;
+  if (loop_count_ == UINT64_MAX) {
+    loop_count_ = 1;
+  }
+
+  switch_index_ = -1;
+  const auto &guard = node_item_->MutexGuard("ResetContext");
+  shape_inference_state_.InitShapeState();
+  subgraph_context_->ResetContext(node_item_->node);
+  GELOGD("Node[%s] in while loop, current loop: %lu, merge index: %d", GetName().c_str(), loop_count_, merge_index_);
+  (void)guard;
+}
+
+void NodeState::ResetSchedule() {
+  std::lock_guard<std::mutex> lk(mu_);
+  data_scheduled_ = static_cast<uint32_t>(node_item_->root_data_.size());
+  ctrl_scheduled_ = static_cast<uint32_t>(node_item_->root_ctrl_.size());
+  GELOGD("[%s] set schedule for root nodes, data: %u, ctrl: %u", GetName().c_str(), data_scheduled_, ctrl_scheduled_);
+}
+
+Status NodeState::NodeScheduled(const std::function<void(const NodeItem *)> &ready) const {
+  // Schedule data output.
+  for (const auto &node : node_item_->data_send_) {
+    const auto &dst_node_state = subgraph_context_->GetOrCreateNodeState(node);
+    GE_CHECK_NOTNULL(dst_node_state);
+    dst_node_state->SetDataSchedule(node_item_, ready);
+  }
+
+  // Schedule ctrl output.
+  for (const auto &node : node_item_->ctrl_send_) {
+    const auto &dst_node_state = subgraph_context_->GetOrCreateNodeState(node);
+    GE_CHECK_NOTNULL(dst_node_state);
+    dst_node_state->SetCtrlSchedule(node_item_, ready);
+  }
+
+  // Schedule switch group.
+  if (switch_index_ >= 0 && static_cast<uint32_t>(switch_index_) < node_item_->switch_groups_.size()) {
+    GELOGI("After [%s] scheduled, switch index: %d", GetName().c_str(), switch_index_);
+    for (const auto &node : node_item_->switch_groups_[switch_index_]) {
+      const auto &dst_node_state = subgraph_context_->GetOrCreateNodeState(node);
+      GE_CHECK_NOTNULL(dst_node_state);
+      dst_node_state->SetCtrlSchedule(node_item_, ready);
+    }
+  }
+
+  return SUCCESS;
+}
+
+bool NodeState::IsScheduleReady() const {
+  GELOGD("[%s] data[input: %zu, scheduled: %u], ctrl[input: %zu, scheduled: %u]", GetName().c_str(),
+         node_item_->data_recv_.size(), data_scheduled_, node_item_->ctrl_recv_.size(), ctrl_scheduled_);
+  if (ctrl_scheduled_ != node_item_->ctrl_recv_.size()) {
+    return false;
+  }
+
+  if (node_item_->IsMergeOp()) {
+    return data_scheduled_ > 0;
+  }
+
+  // Exit may feed loop times...
+  return data_scheduled_ >= node_item_->data_recv_.size();
+}
+
+void NodeState::SetDataSchedule(const NodeItem *node_item, const std::function<void(const NodeItem *)> &ready) {
+  GELOGD("[%s] data schedule node[%s], data num: %zu, current scheduled: %u, ctrl num: %zu, current scheduled: %u",
+         node_item->node_name.c_str(), GetName().c_str(), node_item_->data_recv_.size(), data_scheduled_,
+         node_item_->ctrl_recv_.size(), ctrl_scheduled_);
+
+  std::lock_guard<std::mutex> lk(mu_);
+  ++data_scheduled_;
+
+  if (node_item_->IsMergeOp()) {
+    const auto it = node_item_->data_recv_.find(node_item);
+    if (it != node_item_->data_recv_.end()) {
+      merge_index_ = it->second;
+      (void)AttrUtils::SetInt(node_item_->node->GetOpDesc(), ATTR_NAME_MERGE_INPUT_INDEX, it->second);
+      GELOGD("[%s] scheduled, [%s] set merge index: %d", node_item->node_name.c_str(), GetName().c_str(), it->second);
+    } else {
+      GELOGW("[%s] scheduled, [%s] not followed", node_item->node_name.c_str(), GetName().c_str());
+    }
+  }
+
+  if (IsScheduleReady()) {
+    ready(node_item_);
+  }
+}
+
+void NodeState::SetCtrlSchedule(const NodeItem *node_item, const std::function<void(const NodeItem *)> &ready) {
+  GELOGD("[%s] ctrl schedule node[%s], data num: %zu, current scheduled: %u, ctrl num: %zu, current scheduled: %u",
+         node_item->node_name.c_str(), GetName().c_str(), node_item_->data_recv_.size(), data_scheduled_,
+         node_item_->ctrl_recv_.size(), ctrl_scheduled_);
+
+  std::lock_guard<std::mutex> lk(mu_);
+  ++ctrl_scheduled_;
+
+  if (IsScheduleReady()) {
+    ready(node_item_);
+  }
+}
+
+void NodeState::SetScheduleFuture(std::future<Status> &&future) {
+  schedule_future_ = std::move(future);
+}
+
+Status NodeState::WaitForScheduleDone() {
+  if (schedule_future_.valid()) {
+    GELOGD("[%s] Start to wait for schedule future.", GetName().c_str());
+    GE_CHK_STATUS_RET(schedule_future_.get(), "[Check][Status][%s] wait thread failed", GetName().c_str());
+  }
+
+  return SUCCESS;
+}
+
 Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
   GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
   HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_->GetNodeItem()->node), "cancelled");
diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h
index 84a52abd..49861611 100644
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -20,6 +20,8 @@
 #include <condition_variable>
 #include <future>
 #include <mutex>
+
+#include "common/blocking_queue.h"
 #include "external/ge/ge_api_error_codes.h"
 #include "hybrid/model/node_item.h"
 #include "node_done_manager.h"
@@ -32,6 +34,8 @@ class SubgraphContext;
 class TaskContext;
 struct NodeState;
 
+using NodeStatePtr = std::shared_ptr<NodeState>;
+
 class ShapeFuture {
  public:
   ShapeFuture(NodeState *src_node, uint32_t src_index, SubgraphContext *subgraph_context);
@@ -48,6 +52,8 @@ class ShapeFuture {
 struct ShapeInferenceState {
   explicit ShapeInferenceState(const NodeItem &node_item);
 
+  void InitShapeState();
+
   Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);
 
   void UpdateInputShapeFuture(int idx, ShapeFuture &&future);
@@ -100,6 +106,43 @@ struct NodeState {
 
   Status UpdateOutputShapes(int index, const GeShape &shape, const GeShape &ori_shape);
 
+  inline bool IsShapeDependence() const {
+    return node_item_->IsControlFlowOp() || node_item_->shape_inference_type >= DEPEND_SHAPE_RANGE;
+  }
+
+  void ResetContext(int group);
+
+  void ResetSchedule();
+
+  Status NodeScheduled(const std::function<void(const NodeItem *)> &ready) const;
+
+  void SetScheduleFuture(std::future<Status> &&future);
+  Status WaitForScheduleDone();
+
+  void SetSwitchIndex(int index) {
+    switch_index_ = index;
+  }
+
+  int GetSwitchIndex() const {
+    return switch_index_;
+  }
+
+  void SetMergeIndex(int index) {
+    merge_index_ = index;
+  }
+
+  int GetMergeIndex() const {
+    return merge_index_;
+  }
+
+  void SetGroup(int group) {
+    group_ = group;
+  }
+
+  int GetGroup() const {
+    return group_;
+  }
+
   const shared_ptr<NodeTask> &GetKernelTask() const {
     return kernel_task_;
   }
@@ -120,6 +163,10 @@ struct NodeState {
   std::shared_ptr<TaskContext> GetTaskContext();
 
  private:
+  bool IsScheduleReady() const;
+  void SetDataSchedule(const NodeItem *node_item, const std::function<void(const NodeItem *)> &ready);
+  void SetCtrlSchedule(const NodeItem *node_item, const std::function<void(const NodeItem *)> &ready);
+
   const NodeItem *node_item_ = nullptr;
   std::shared_ptr<NodeTask> kernel_task_ = nullptr;
   std::future<Status> prepare_future_;
@@ -128,9 +175,15 @@ struct NodeState {
   SubgraphContext *subgraph_context_;
   std::shared_ptr<TaskContext> task_context_ = nullptr;
   std::mutex mu_;
-};
 
-using NodeStatePtr = std::shared_ptr<NodeState>;
+  std::future<Status> schedule_future_;
+  uint64_t loop_count_ = 0;
+  uint32_t ctrl_scheduled_ = 0;
+  uint32_t data_scheduled_ = 0;
+  int merge_index_ = -1; // Use for Execute (Reset after Executed).
+  int switch_index_ = -1; // Use for Schedule (Reset after Prepared).
+  int group_ = -1;
+};
 }  // namespace hybrid
 }  // namespace ge
 
diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc
index b26afb9c..9a9a97c2 100644
--- a/ge/hybrid/executor/subgraph_context.cc
+++ b/ge/hybrid/executor/subgraph_context.cc
@@ -37,11 +37,17 @@ Status SubgraphContext::Init() {
   return SUCCESS;
 }
 
+void SubgraphContext::ResetContext(const NodePtr &node) {
+  node_done_manager_.Reset(node);
+}
+
 NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) {
   std::lock_guard<std::mutex> lk(mu_);
   auto &node_state = node_states_[node_item];
   if (node_state == nullptr) {
+    const auto &guard = node_item->MutexGuard("GetOrCreateNodeState");
     node_state.reset(new(std::nothrow)NodeState(*node_item, this));
+    (void)guard;
   }
 
   return node_state;
diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h
index 8ce33f23..ff692ed9 100644
--- a/ge/hybrid/executor/subgraph_context.h
+++ b/ge/hybrid/executor/subgraph_context.h
@@ -34,6 +34,7 @@ class SubgraphContext {
   ~SubgraphContext() = default;
 
   Status Init();
+  void ResetContext(const NodePtr &node);
   NodeStatePtr GetOrCreateNodeState(const NodeItem *node_item);
 
   void OnError(Status error);
diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc
index e41ab253..60895c7e 100644
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -177,8 +177,11 @@ Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue
 
   known_shape_task_context_ = TaskContext::Create(node_state.get(), context_, subgraph_context_.get());
   GE_CHECK_NOTNULL(known_shape_task_context_);
+  node_state->SetTaskContext(known_shape_task_context_);
 
-  HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_),
+  std::function<void()> callback;
+  GE_CHK_STATUS_RET_NOLOG(InitCallback(node_state.get(), callback));
+  HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_, callback),
                         "[%s] Failed to execute node [%s] for known subgraph.",
                         graph_item_->GetName().c_str(),
                         known_shape_task_context_->GetNodeName());
@@ -206,76 +209,260 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
   return SUCCESS;
 }
 
+BlockingQueue<const NodeItem *> &SubgraphExecutor::GetPrepareQueue(int group) {
+  std::lock_guard<std::mutex> lk(mu_);
+  return prepare_queues_[group];
+}
+
+Status SubgraphExecutor::NodeEnqueue(NodeState *node_state) {
+  if (!ready_queue_.Push(node_state)) {
+    if (context_->is_eos_) {
+      GELOGD("Got end of sequence");
+      return SUCCESS;
+    }
+    GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
+           graph_item_->GetName().c_str());
+    REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes.",
+                       graph_item_->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  GELOGD("[%s] Push node [%s] to queue.", graph_item_->GetName().c_str(), node_state->GetName().c_str());
+  return SUCCESS;
+}
+
+Status SubgraphExecutor::PrepareNode(const NodeItem &node_item, int group) {
+  GELOGD("[%s] Start to prepare node [%s].", graph_item_->GetName().c_str(), node_item.NodeName().c_str());
+  // for while op
+  if (force_infer_shape_ && !node_item.is_dynamic) {
+    GELOGD("[%s] Force infer shape is set, updating node to dynamic.", node_item.NodeName().c_str());
+    auto &mutable_node_item = const_cast<NodeItem &>(node_item);
+    mutable_node_item.SetToDynamic();
+  }
+
+  auto node_state = subgraph_context_->GetOrCreateNodeState(&node_item);
+  GE_CHECK_NOTNULL(node_state);
+  node_state->ResetContext(group);
+  auto p_node_state = node_state.get();
+
+  if (node_item.node_type == NETOUTPUT) {
+    GE_CHK_STATUS_RET_NOLOG(NodeEnqueue(p_node_state));
+    return AfterPrepared(p_node_state);
+  }
+
+  // only do shape inference and compilation for nodes with dynamic shapes.
+  if (node_item.is_dynamic) {
+    auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
+      GetContext().SetSessionId(context_->session_id);
+      GetContext().SetContextId(context_->context_id);
+      GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state));
+      GE_CHK_STATUS_RET_NOLOG(PrepareForExecution(context_, *p_node_state));
+      return AfterPrepared(p_node_state);
+    });
+
+    p_node_state->SetPrepareFuture(std::move(prepare_future));
+    return NodeEnqueue(p_node_state);
+  } else {
+    GELOGD("[%s] Skipping shape inference and compilation for node with static shape.",
+           node_item.NodeName().c_str());
+    if (node_item.kernel_task == nullptr) {
+      GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
+      GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
+                        "[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
+    } else {
+      node_state->SetKernelTask(node_item.kernel_task);
+    }
+    auto unique_task_context = TaskContext::Create(node_state.get(), context_, subgraph_context_.get());
+    GE_CHECK_NOTNULL(unique_task_context);
+    const auto &task = node_state->GetKernelTask();
+    if (task == nullptr) {
+      GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
+      REPORT_CALL_ERROR("E19999", "GetKernelTask failed for %s, nodetask is null.", node_state->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+    auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+    node_state->SetTaskContext(shared_task_context);
+    GE_CHK_STATUS_RET_NOLOG(NodeEnqueue(p_node_state));
+    return AfterPrepared(p_node_state);
+  }
+}
+
 Status SubgraphExecutor::PrepareNodes(int group) {
-  GELOGD("[%s] Start to prepare nodes. group = %d",
-         graph_item_->GetName().c_str(),
-         group);
-  auto &all_nodes = graph_item_->GetAllNodes(group);
-  for (auto all_node : all_nodes) {
-    auto &node_item = *all_node;
-    // for while op
-    if (force_infer_shape_ && !node_item.is_dynamic) {
-      GELOGD("[%s] Force infer shape is set, updating node to dynamic.", node_item.NodeName().c_str());
-      auto &mutable_node_item = const_cast<NodeItem &>(node_item);
-      mutable_node_item.SetToDynamic();
+  const size_t node_size = graph_item_->GetNodeSize(group);
+  GELOGD("[%s] Start to prepare nodes. group = %d, size = %zu", graph_item_->GetName().c_str(), group, node_size);
+  if (!graph_item_->HasCtrlFlowOp()) {
+    for (const auto &node_item : graph_item_->GetAllNodes(group)) {
+      RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] Start");
+      GE_CHK_STATUS_RET(PrepareNode(*node_item, group), "[%s] failed to prepare task.", node_item->NodeName().c_str());
+      RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] End");
     }
 
-    GELOGD("[%s] Start to prepare node [%s].", graph_item_->GetName().c_str(), node_item.NodeName().c_str());
-    auto node_state = subgraph_context_->GetOrCreateNodeState(&node_item);
-    GE_CHECK_NOTNULL(node_state);
-    auto p_node_state = node_state.get();
-
-    if (node_item.node_type != NETOUTPUT) {
-      // only do shape inference and compilation for nodes with dynamic shapes.
-      if (node_item.is_dynamic) {
-        auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
-          GetContext().SetSessionId(context_->session_id);
-          GetContext().SetContextId(context_->context_id);
-          GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state));
-          return PrepareForExecution(context_, *p_node_state);
-        });
-
-        p_node_state->SetPrepareFuture(std::move(prepare_future));
-      } else {
-        GELOGD("[%s] Skipping shape inference and compilation for node with static shape.",
-               node_item.NodeName().c_str());
-        if (node_item.kernel_task == nullptr) {
-          GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
-          GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
-                            "[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
-        } else {
-          node_state->SetKernelTask(node_item.kernel_task);
-        }
-        auto unique_task_context =
-            TaskContext::Create(node_state.get(), context_, subgraph_context_.get());
-        GE_CHECK_NOTNULL(unique_task_context);
-        const auto &task = node_state->GetKernelTask();
-        if (task == nullptr) {
-          GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
-          REPORT_CALL_ERROR("E19999", "GetKernelTask failed for %s, nodetask is null.", node_state->GetName().c_str());
-          return INTERNAL_ERROR;
-        }
-        auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
-        node_state->SetTaskContext(shared_task_context);
+    GELOGD("[%s] Done preparing nodes successfully.", graph_item_->GetName().c_str());
+    return SUCCESS;
+  }
+
+  // Initialize the ready queue
+  size_t node_count = 0;
+  bool node_complete = false;
+  for (const auto &node_item : graph_item_->GetRootNodes(group)) {
+    RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] Start");
+    GE_CHK_STATUS_RET(PrepareNode(*node_item, group), "[%s] failed to prepare task.", node_item->NodeName().c_str());
+    RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] End");
+    node_complete = node_item->NodeType() == NETOUTPUT;
+    node_count++;
+  }
+
+  GELOGD("[%s] Done preparing root nodes.", graph_item_->GetName().c_str());
+  BlockingQueue<const NodeItem *> &prepare_queue = GetPrepareQueue(group);
+  while (((group != -1) && (node_count < node_size)) || ((group == -1) && !node_complete)) {
+    const NodeItem *node_item = nullptr;
+    if (!prepare_queue.Pop(node_item)) {
+      if (context_->is_eos_) {
+        GELOGD("[%s] Got end of sequence.", graph_item_->GetName().c_str());
+        break;
       }
+      if (context_->GetStatus() != SUCCESS) {
+        GELOGD("[%s] Graph execution Got failed.", graph_item_->GetName().c_str());
+        return SUCCESS;
+      }
+      GELOGE(INTERNAL_ERROR, "[%s] failed to pop node.", graph_item_->GetName().c_str());
+      return INTERNAL_ERROR;
     }
 
-    if (!ready_queue_.Push(p_node_state)) {
+    if (node_item == nullptr) {
+      GELOGD("[%s] Got EOF from queue.", graph_item_->GetName().c_str());
+      break;
+    }
+
+    RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] Start");
+    GE_CHK_STATUS_RET(PrepareNode(*node_item, group), "[%s] failed to prepare task.", node_item->NodeName().c_str());
+    RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] End");
+    node_complete = node_item->NodeType() == NETOUTPUT;
+    node_count++;
+  }
+
+  GELOGD("[%s] Done preparing nodes successfully.", graph_item_->GetName().c_str());
+  return SUCCESS;
+}
+
+Status SubgraphExecutor::NodeScheduled(NodeState *node_state) {
+  GELOGD("Graph[%s] After [%s] scheduled, data size: %zu, ctrl size: %zu, switch index: %d, merge index: %d",
+         graph_item_->GetName().c_str(), node_state->GetName().c_str(),
+         node_state->GetNodeItem()->data_send_.size(), node_state->GetNodeItem()->ctrl_send_.size(),
+         node_state->GetSwitchIndex(), node_state->GetMergeIndex());
+
+  auto future = pre_run_pool_.commit([this, node_state]() -> Status {
+    RECORD_CALLBACK_EVENT(context_, node_state->GetName().c_str(), "[NodeScheduled] Start");
+    std::function<void(const NodeItem *)> callback = [&](const NodeItem *node_item) {
+      const auto &node_name = node_item->node_name;
+      int group = (node_state->GetGroup() != -1) ? node_item->group : -1;
+      GELOGI("After [%s] scheduled, [%s] is ready for prepare.", node_state->GetName().c_str(), node_name.c_str());
+      BlockingQueue<const NodeItem *> &prepare_queue = GetPrepareQueue(group);
+      if (!prepare_queue.Push(node_item)) {
+        if (!context_->is_eos_) {
+          GELOGE(INTERNAL_ERROR, "[Check][State][%s] error occurs when push to queue.", graph_item_->GetName().c_str());
+          REPORT_INNER_ERROR("E19999", "[%s] error occurs when push to queue.", graph_item_->GetName().c_str());
+        }
+      }
+    };
+
+    GE_CHK_STATUS_RET_NOLOG(node_state->NodeScheduled(callback));
+    node_state->ResetSchedule();
+    RECORD_CALLBACK_EVENT(context_, node_state->GetName().c_str(), "[NodeScheduled] End");
+    return SUCCESS;
+  });
+
+  node_state->SetScheduleFuture(std::move(future));
+  if (schedule_queue_.Push(node_state)) {
+    return SUCCESS;
+  }
+
+  if (context_->is_eos_) {
+    GELOGD("[%s] Got end of sequence", graph_item_->GetName().c_str());
+    return SUCCESS;
+  }
+
+  GELOGE(INTERNAL_ERROR, "[Check][State][%s] error occurs when push to queue.", graph_item_->GetName().c_str());
+  REPORT_INNER_ERROR("E19999", "[%s] error occurs when push to queue.", graph_item_->GetName().c_str());
+  return INTERNAL_ERROR;
+}
+
+Status SubgraphExecutor::AfterPrepared(NodeState *node_state) {
+  if (!graph_item_->HasCtrlFlowOp()) {
+    return SUCCESS;
+  }
+  if (node_state->IsShapeDependence()) {
+    return SUCCESS;
+  }
+
+  // Not control flow node, propagate state.
+  return NodeScheduled(node_state);
+}
+
+void SubgraphExecutor::AfterExecuted(NodeState *node_state) {
+  if (!node_state->IsShapeDependence()) {
+    return;
+  }
+
+  // For control flow node, propagate state.
+  auto error = NodeScheduled(node_state);
+  if (error != SUCCESS) {
+    auto task_context = node_state->GetTaskContext();
+    task_context->OnError(error);
+  }
+}
+
+void SubgraphExecutor::OnNodeDone(NodeState *node_state) {
+  auto task_context = node_state->GetTaskContext();
+  NodeDoneCallback cb(context_, task_context);
+  auto error = cb.OnNodeDone();
+  if (error != SUCCESS) {
+    task_context->OnError(error);
+  }
+
+  if (node_state->IsShapeDependence() && graph_item_->HasCtrlFlowOp()) {
+    AfterExecuted(node_state);
+  }
+}
+
+Status SubgraphExecutor::InitCallback(NodeState *node_state, std::function<void()> &callback) {
+  auto task_context = node_state->GetTaskContext();
+  GE_CHECK_NOTNULL(task_context);
+  if (task_context->NeedCallback()) {
+    callback = std::bind(&SubgraphExecutor::OnNodeDone, this, node_state);
+  } else if (node_state->IsShapeDependence() && graph_item_->HasCtrlFlowOp()) {
+    callback = std::bind(&SubgraphExecutor::AfterExecuted, this, node_state);
+  }
+
+  return SUCCESS;
+}
+
+Status SubgraphExecutor::ScheduleNodes() {
+  GELOGD("[%s] Start to schedule nodes.", graph_item_->GetName().c_str());
+  while (true) {
+    NodeState *node_state = nullptr;
+    if (!schedule_queue_.Pop(node_state)) {
       if (context_->is_eos_) {
-        GELOGD("Got end of sequence");
+        GELOGD("[%s] Got end of sequence.", graph_item_->GetName().c_str());
+        break;
+      }
+      if (context_->GetStatus() != SUCCESS) {
+        GELOGD("[%s] Graph execution Got failed.", graph_item_->GetName().c_str());
         return SUCCESS;
       }
-      GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
-             graph_item_->GetName().c_str());
-      REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes.",
-                         graph_item_->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[%s] failed to pop node.", graph_item_->GetName().c_str());
       return INTERNAL_ERROR;
     }
 
-    GELOGD("[%s] Push node [%s] to queue.", graph_item_->GetName().c_str(), node_item.NodeName().c_str());
+    if (node_state == nullptr) {
+      GELOGD("[%s] Got EOF from queue.", graph_item_->GetName().c_str());
+      break;
+    }
+
+    GE_CHK_STATUS_RET_NOLOG(node_state->WaitForScheduleDone());
   }
 
-  GELOGD("[%s] Done preparing nodes successfully.", graph_item_->GetName().c_str());
+  GELOGD("[%s] Done schedule nodes successfully.", graph_item_->GetName().c_str());
   return SUCCESS;
 }
 
@@ -341,7 +528,10 @@ Status SubgraphExecutor::LaunchTasks() {
     auto shared_task_context = node_state->GetTaskContext();
     GE_CHECK_NOTNULL(shared_task_context);
     shared_task_context->SetForceInferShape(force_infer_shape_);
-    HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
+
+    std::function<void()> callback;
+    GE_CHK_STATUS_RET_NOLOG(InitCallback(node_state, callback));
+    HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_, callback),
                           "[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
     GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
   }
@@ -354,22 +544,38 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
     GetContext().SetContextId(context_->context_id);
     auto ret = PrepareNodes(group);
     ready_queue_.Push(nullptr);
+    schedule_queue_.Push(nullptr);
+    for (auto &item : prepare_queues_) {
+      item.second.Push(nullptr);
+    }
     return ret;
   });
 
+  auto schedule_future = std::async(std::launch::async, [&]() -> Status {
+    return ScheduleNodes();
+  });
+
   GELOGD("[%s] Start to execute subgraph.", graph_item_->GetName().c_str());
   auto ret = LaunchTasks();
   if (ret != SUCCESS) {
     subgraph_context_->OnError(ret);
     context_->SetErrorCode(ret);
     ready_queue_.Stop();
+    schedule_queue_.Stop();
+    for (auto &item : prepare_queues_) {
+      item.second.Stop();
+    }
     prepare_future.wait();
+    schedule_future.wait();
     return ret;
   }
 
   GE_CHK_STATUS_RET(prepare_future.get(), "[Invoke][get] [%s] Error occurred in task preparation.",
                     graph_item_->GetName().c_str());
 
+  GE_CHK_STATUS_RET(schedule_future.get(), "[Invoke][get] [%s] Error occurred in task preparation.",
+                    graph_item_->GetName().c_str());
+
   GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
   return SUCCESS;
 }
diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h
index b9bfceb4..758bf426 100644
--- a/ge/hybrid/executor/subgraph_executor.h
+++ b/ge/hybrid/executor/subgraph_executor.h
@@ -105,6 +105,18 @@ class SubgraphExecutor {
   Status PrepareNodes(int group = -1);
   Status LaunchTasks();
   Status SetOutputsToParentNode(TaskContext &task_context);
+  Status InitCallback(NodeState *node_state, std::function<void()> &callback);
+
+  Status NodeEnqueue(NodeState *node_state);
+  Status PrepareNode(const NodeItem &node_item, int group);
+
+  BlockingQueue<const NodeItem *> &GetPrepareQueue(int group);
+
+  Status ScheduleNodes();
+  Status NodeScheduled(NodeState *node_state);
+  Status AfterPrepared(NodeState *node_state);
+  void AfterExecuted(NodeState *node_state);
+  void OnNodeDone(NodeState *node_state);
 
   const GraphItem *graph_item_;
   GraphExecutionContext *context_;
@@ -114,6 +126,10 @@ class SubgraphExecutor {
   BlockingQueue<NodeState *> ready_queue_;
   std::unique_ptr<ShapeInferenceEngine> shape_inference_engine_;
   std::shared_ptr<TaskContext> known_shape_task_context_;
+
+  std::mutex mu_; // Guard for prepare_queues_.
+  std::map<int, BlockingQueue<const NodeItem *>> prepare_queues_;
+  BlockingQueue<NodeState *> schedule_queue_;
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index d6bbc36d..32758f61 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -16,13 +16,9 @@
 
 #include "hybrid/executor/worker/execution_engine.h"
 #include "graph/runtime_inference_context.h"
-#include "graph/utils/tensor_utils.h"
-#include "graph/utils/tensor_adapter.h"
-#include "graph/debug/ge_attr_define.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor//worker//shape_inference_engine.h"
-#include "common/dump/dump_op.h"
 #include "common/profiling/profiling_manager.h"
 
 namespace ge {
@@ -62,22 +58,6 @@ Status LogOutputs(const NodeItem &node_item, const TaskContext &task_context) {
   return SUCCESS;
 }
 }  // namespace
-class NodeDoneCallback {
- public:
-  NodeDoneCallback(GraphExecutionContext *graph_context, std::shared_ptr<TaskContext> task_context);
-  ~NodeDoneCallback() = default;
-  Status OnNodeDone();
- private:
-  Status PrepareConstInputs(const NodeItem &node_item);
-  Status DumpDynamicNode();
-  Status ProfilingReport();
-  Status SaveDumpOpInfo();
-  Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
-                         std::vector<TaskDescInfo> &task_desc_info);
-  GraphExecutionContext *graph_context_;
-  std::shared_ptr<TaskContext> context_;
-  DumpOp dump_op_;
-};
 
 NodeDoneCallback::NodeDoneCallback(GraphExecutionContext *graph_context,
                                    std::shared_ptr<TaskContext> task_context)
@@ -320,7 +300,7 @@ Status NodeDoneCallback::OnNodeDone() {
     GE_CHK_STATUS_RET(SaveDumpOpInfo(), "[Save][DumpOpInfo] Failed to dump op info.");
   }
 
-  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
+  if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
     GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.", node_item.NodeName().c_str());
   }
 
@@ -334,8 +314,10 @@ Status NodeDoneCallback::OnNodeDone() {
   GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
   if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
     // update output tensor sizes
+    const auto &guard = node_item.MutexGuard("OnNodeDone");
     GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
     GE_CHK_STATUS_RET_NOLOG(context_->GetNodeState()->GetShapeInferenceState().UpdateOutputDesc());
+    (void)guard;
   }
   // PropagateOutputs for type == DEPEND_COMPUTE
   if (node_item.shape_inference_type == DEPEND_COMPUTE) {
@@ -361,31 +343,15 @@ Status NodeDoneCallback::OnNodeDone() {
 
 Status ExecutionEngine::ExecuteAsync(NodeState &node_state,
                                      const std::shared_ptr<TaskContext> &task_context,
-                                     GraphExecutionContext &execution_context) {
+                                     GraphExecutionContext &execution_context,
+                                     const std::function<void()> &callback) {
   GELOGI("[%s] Node is ready for execution", task_context->GetNodeName());
   RECORD_EXECUTION_EVENT(&execution_context, task_context->GetNodeName(), "Start");
-  std::function<void()> callback = nullptr;
-  GE_CHK_STATUS_RET_NOLOG(InitCallback(task_context, execution_context, callback));
   GE_CHK_STATUS_RET_NOLOG(DoExecuteAsync(node_state, *task_context, execution_context, callback));
   GE_CHK_STATUS_RET_NOLOG(PropagateOutputs(*node_state.GetNodeItem(), *task_context, execution_context));
   return SUCCESS;
 }
 
-Status ExecutionEngine::InitCallback(const std::shared_ptr<TaskContext> &task_context,
-                                     GraphExecutionContext &execution_context, std::function<void()> &callback) {
-  if (task_context->NeedCallback()) {
-    auto cb = std::shared_ptr<NodeDoneCallback>(new(std::nothrow) NodeDoneCallback(&execution_context, task_context));
-    GE_CHECK_NOTNULL(cb);
-    callback = [task_context, cb]() {
-      auto ret = cb->OnNodeDone();
-      if (ret != SUCCESS) {
-        task_context->OnError(ret);
-      }
-    };
-  }
-  return SUCCESS;
-}
-
 Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
                                        TaskContext &task_context,
                                        GraphExecutionContext &context,
@@ -423,7 +389,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
                     node_state.GetName().c_str());
   RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ValidateInputTensors] End");
 
-  if (context.profiling_level > 0) {
+  if (GraphExecutionContext::profiling_level > 0) {
     auto *ctx = &context;
     const string &name = node_state.GetName();
     (void)task_context.RegisterCallback([ctx, name]() {
diff --git a/ge/hybrid/executor/worker/execution_engine.h b/ge/hybrid/executor/worker/execution_engine.h
index c10ad729..24557bc6 100644
--- a/ge/hybrid/executor/worker/execution_engine.h
+++ b/ge/hybrid/executor/worker/execution_engine.h
@@ -19,14 +19,33 @@
 
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/node_executor/task_context.h"
+#include "common/dump/dump_op.h"
 
 namespace ge {
 namespace hybrid {
+class NodeDoneCallback {
+ public:
+  NodeDoneCallback(GraphExecutionContext *graph_context, std::shared_ptr<TaskContext> task_context);
+  ~NodeDoneCallback() = default;
+  Status OnNodeDone();
+ private:
+  Status PrepareConstInputs(const NodeItem &node_item);
+  Status DumpDynamicNode();
+  Status ProfilingReport();
+  Status SaveDumpOpInfo();
+  Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
+                         std::vector<TaskDescInfo> &task_desc_info);
+  GraphExecutionContext *graph_context_;
+  std::shared_ptr<TaskContext> context_;
+  DumpOp dump_op_;
+};
+
 class ExecutionEngine {
  public:
   static Status ExecuteAsync(NodeState &node_state,
                              const std::shared_ptr<TaskContext> &task_context,
-                             GraphExecutionContext &execution_context);
+                             GraphExecutionContext &execution_context,
+                             const std::function<void()> &callback);
 
  private:
   static Status ValidateInputTensors(const NodeState &node_state, const TaskContext &task_context);
@@ -35,8 +54,6 @@ class ExecutionEngine {
                                TaskContext &task_context,
                                GraphExecutionContext &context,
                                const std::function<void()> &callback);
-  static Status InitCallback(const std::shared_ptr<TaskContext> &task_context,
-                             GraphExecutionContext &execution_context, std::function<void()> &callback);
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc
index 33e8fce6..a2efbb25 100755
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -45,11 +45,13 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
     return SUCCESS;
   }
 
+  const auto &guard = node_item.MutexGuard("InferShape");
   if (node_item.fused_subgraph != nullptr) {
     GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
     GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
     return SUCCESS;
   }
+  (void)guard;
 
   // Skip shape inference for node of type DEPEND_COMPUTE
   if (node_item.shape_inference_type == DEPEND_COMPUTE) {
@@ -123,8 +125,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(NodeState &node_state) {
          node_item.shape_inference_type);
   RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[PropagateOutputShapes] Start");
   // propagate each output
+  const auto &guard = node_item.MutexGuard("PropagateOutputShapes");
   for (int i = 0; i < node_item.num_outputs; ++i) {
-    auto output_desc = node_item.op_desc->MutableOutputDesc(i);
+    auto output_desc = node_item.MutableOutputDesc(i);
     auto &output_nodes = node_item.outputs[i];
 
     // propagate output to all sub-inputs
@@ -148,6 +151,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(NodeState &node_state) {
       }
     }
   }
+  (void)guard;
   RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[PropagateOutputShapes] End");
   GELOGD("[%s] Propagating output shapes finished successfully.", node_item.NodeName().c_str());
   return SUCCESS;
diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc
index 0ad1c865..7368784c 100755
--- a/ge/hybrid/hybrid_davinci_model.cc
+++ b/ge/hybrid/hybrid_davinci_model.cc
@@ -86,7 +86,7 @@ class HybridDavinciModel::Impl {
     return model_.GetDeviceId();
   }
 
-  const GraphExecutionContext * GeContext() { return executor_.GeContext(); }
+  const GraphExecutionContext *GeContext() { return executor_.GeContext(); }
 
   uint64_t GetSessionId() {
     return model_.GetSessionId();
@@ -126,6 +126,11 @@ class HybridDavinciModel::Impl {
     return SUCCESS;
   }
 
+  Status GetOpAttr(const std::string &op_name, const std::string &attr_name,
+                                       std::string &attr_value) {
+    return model_.GetOpAttr(op_name, attr_name, attr_value);
+  }
+
  private:
   std::shared_ptr<ModelListener> listener_;
   HybridModel model_;
@@ -273,5 +278,11 @@ bool HybridDavinciModel::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpD
   }
   return ret;
 }
+
+Status HybridDavinciModel::GetOpAttr(const std::string &op_name, const std::string &attr_name,
+                                     std::string &attr_value) const {
+  GE_CHECK_NOTNULL(impl_);
+  return impl_->GetOpAttr(op_name, attr_name, attr_value);
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h
index 472fff17..34503b01 100644
--- a/ge/hybrid/hybrid_davinci_model.h
+++ b/ge/hybrid/hybrid_davinci_model.h
@@ -84,6 +84,8 @@ class HybridDavinciModel {
 
   bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
 
+  Status GetOpAttr(const std::string &op_name, const std::string &attr_name, std::string &attr_value) const;
+
  private:
   HybridDavinciModel() = default;
   class Impl;
diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc
index 2d4fbe03..67cd29b8 100644
--- a/ge/hybrid/hybrid_davinci_model_stub.cc
+++ b/ge/hybrid/hybrid_davinci_model_stub.cc
@@ -107,5 +107,9 @@ Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &c
 bool HybridDavinciModel::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
   return true;
 }
+Status HybridDavinciModel::GetOpAttr(const std::string &op_name, const std::string &attr_name,
+                                     std::string &attr_value) const {
+  return UNSUPPORTED;
+}
 }  // namespace hybrid
 }  // namespace ge
\ No newline at end of file
diff --git a/ge/hybrid/model/graph_item.cc b/ge/hybrid/model/graph_item.cc
index 09e0a117..c38e0a0d 100644
--- a/ge/hybrid/model/graph_item.cc
+++ b/ge/hybrid/model/graph_item.cc
@@ -43,6 +43,27 @@ const vector<NodeItem *> &GraphItem::GetAllNodes(int group) const {
   return grouped_node_items_[group];
 }
 
+const vector<NodeItem *> &GraphItem::GetRootNodes(int group) const {
+  if (group == -1) {
+    return root_items_;
+  }
+
+  if (static_cast<uint32_t>(group) >= grouped_root_items_.size()) {
+    static vector<NodeItem *> empty_nodes;
+    return empty_nodes;
+  }
+
+  return grouped_root_items_[group];
+}
+
+size_t GraphItem::GetNodeSize(int group) const {
+  if (group == -1) {
+    return node_items_.size();
+  }
+
+  return (static_cast<uint32_t>(group) < grouped_node_items_.size()) ? grouped_node_items_[group].size() : 0;
+}
+
 const vector<const NodeItem *> &GraphItem::GetInputNodes() const {
   return input_nodes_;
 }
@@ -88,10 +109,12 @@ const vector<std::pair<const NodeItem *, int>> &GraphItem::GetOutputEdges() cons
   return output_edges_;
 }
 
-Status GraphItem::GroupNodes() {
+Status GraphItem::GroupNodes(const std::vector<NodeItem *> &node_items,
+                             std::vector<std::vector<NodeItem *>> &grouped_node_items) const {
+  int curr_group = 0;
   int last_group = INT32_MIN;
   std::set<int> seen_groups;
-  for (auto node : node_items_) {
+  for (auto node : node_items) {
     int group = node->group;
     if (group != last_group) {
       if (seen_groups.find(group) != seen_groups.end()) {
@@ -101,15 +124,23 @@ Status GraphItem::GroupNodes() {
       } else {
         last_group = group;
         seen_groups.insert(group);
-        grouped_node_items_.emplace_back(std::vector<NodeItem *>());
+        curr_group = static_cast<int>(grouped_node_items.size());
+        grouped_node_items.emplace_back(std::vector<NodeItem *>());
       }
     }
 
-    GELOGD("Adding node [%s] to group %d", node->NodeName().c_str(), group);
-    grouped_node_items_.back().emplace_back(node);
+    node->group = curr_group;
+    GELOGD("Adding node [%s] to group %d", node->NodeName().c_str(), node->group);
+    grouped_node_items.back().emplace_back(node);
   }
 
   return SUCCESS;
 }
+
+Status GraphItem::GroupNodes() {
+  GE_CHK_STATUS_RET_NOLOG(GroupNodes(node_items_, grouped_node_items_));
+  GE_CHK_STATUS_RET_NOLOG(GroupNodes(root_items_, grouped_root_items_));
+  return SUCCESS;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/graph_item.h b/ge/hybrid/model/graph_item.h
index f034f385..e387f3da 100644
--- a/ge/hybrid/model/graph_item.h
+++ b/ge/hybrid/model/graph_item.h
@@ -29,6 +29,7 @@ class GraphItem {
   Status GroupNodes();
   const vector<NodeItem *> &GetAllNodes() const;
   const vector<NodeItem *> &GetAllNodes(int group) const;
+  const vector<NodeItem *> &GetRootNodes(int group) const;
   const vector<const NodeItem *> &GetInputNodes() const;
   Status GetOutputDescList(std::vector<ConstGeTensorDescPtr> &output_desc_list) const;
   const vector<std::pair<const NodeItem *, int>> &GetOutputEdges() const;
@@ -40,6 +41,12 @@ class GraphItem {
     return total_outputs_;
   }
 
+  size_t GetNodeSize(int group) const;
+
+  bool HasCtrlFlowOp() const {
+    return has_ctrl_flow_op_;
+  }
+
   const std::string& GetName() const {
     return name_;
   }
@@ -60,9 +67,14 @@ class GraphItem {
 
  private:
   friend class HybridModelBuilder;
+  Status GroupNodes(const std::vector<NodeItem *> &node_items,
+                    std::vector<std::vector<NodeItem *>> &grouped_node_items) const;
+
   std::string name_;
   std::vector<NodeItem *> node_items_;
   std::vector<std::vector<NodeItem *>> grouped_node_items_;
+  std::vector<NodeItem *> root_items_;
+  std::vector<std::vector<NodeItem *>> grouped_root_items_;
   std::vector<const NodeItem *> input_nodes_;
   const NodeItem *output_node_ = nullptr;
   // <src_node, out_index>
@@ -71,6 +83,7 @@ class GraphItem {
   int total_outputs_ = 0;
 
   bool is_dynamic_ = true;
+  bool has_ctrl_flow_op_ = false;
   std::vector<int> input_index_mapping_;
   std::vector<int> output_index_mapping_;
 };
diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc
index a669c06f..5e496c3b 100644
--- a/ge/hybrid/model/hybrid_model.cc
+++ b/ge/hybrid/model/hybrid_model.cc
@@ -48,6 +48,7 @@ Status HybridModel::Init(bool is_single_op) {
   } else {
     GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "[Build][HybridModel] failed.");
   }
+  SaveSpecifyAttrValues();
   GELOGD("HybridModel initialized successfully.");
   return SUCCESS;
 }
@@ -120,6 +121,10 @@ const GraphItem *HybridModel::GetRootGraphItem() const {
   return root_graph_item_.get();
 }
 
+const ComputeGraphPtr &HybridModel::GetRootGraph() const {
+  return root_graph_;
+}
+
 const GraphItem *HybridModel::GetSubgraphItem(const std::string &graph_name) const {
   GELOGD("To find subgraph item by name = %s", graph_name.c_str());
   auto it = subgraph_items_.find(graph_name);
@@ -409,5 +414,45 @@ TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const {
   }
   return it->second.get();
 }
+
+// save specify attr values of op, such as ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES
+// it will save more attr values in the future
+void HybridModel::SaveSpecifyAttrValues() {
+  for (const auto &node : root_graph_->GetAllNodes()) {
+    if (node == nullptr) {
+      continue;
+    }
+    auto op_desc = node->GetOpDesc();
+    if (op_desc == nullptr) {
+      continue;
+    }
+    std::vector<std::string> value;
+    if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, value)) {
+      std::map<std::string, std::vector<std::string>> attr_name_to_value;
+      attr_name_to_value[ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES] = value;
+      op_name_to_attrs_[op_desc->GetName()] = attr_name_to_value;
+      GELOGD("Get op:%s attr:%s success.", op_desc->GetName().c_str(), ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES.c_str());
+    }
+  }
+  return;
+}
+Status HybridModel::GetOpAttr(const std::string &op_name, const std::string &attr_name,
+                              std::string &attr_value) const {
+  auto itr = op_name_to_attrs_.find(op_name);
+  if (itr == op_name_to_attrs_.end()) {
+    GELOGW("Did not save op:%s attr", op_name.c_str());
+    return SUCCESS;
+  }
+  auto attr_itr = itr->second.find(attr_name);
+  if (attr_itr == itr->second.end()) {
+    GELOGW("Did not save attr:%s of op:%s", attr_name.c_str(), op_name.c_str());
+    return SUCCESS;
+  }
+  for (const auto &name : attr_itr->second) {
+    attr_value += "[" + std::to_string(name.size()) + "]" + name;
+  }
+  GELOGD("Get attr:%s of op:%s success, attr value:%s", attr_name.c_str(), op_name.c_str(), attr_value.c_str());
+  return SUCCESS;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h
index 18daed4f..9821242a 100644
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@@ -101,6 +101,8 @@ class HybridModel {
 
   const GraphItem *GetRootGraphItem() const;
 
+  const ComputeGraphPtr &GetRootGraph() const;
+
   const GraphItem *GetSubgraphItem(const std::string &graph_name) const;
 
   const GraphItem *GetSubgraphItem(const ComputeGraphPtr &subgraph) const;
@@ -131,6 +133,9 @@ class HybridModel {
   void SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims,
                                       std::vector<std::pair<int64_t, int64_t>> &shape_ranges,
                                       InputOutputDescInfo &input);
+  void SaveSpecifyAttrValues();
+
+  Status GetOpAttr(const std::string &op_name, const std::string &attr_name, std::string &attr_value) const;
 
  private:
   friend class HybridModelBuilder;
@@ -165,6 +170,8 @@ class HybridModel {
   RuntimeParam root_runtime_param_;
   string om_name_;
   std::unique_ptr<TensorBuffer> global_step_;
+  // op name to attrs mapping
+  std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_;
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index a047a05b..86530fe0 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -17,6 +17,7 @@
 #include "hybrid/model/hybrid_model_builder.h"
 #include <algorithm>
 #include "common/math/math_util.h"
+#include "common/op/ge_op_utils.h"
 #include "graph/ge_context.h"
 #include "graph/build/memory/var_mem_assign_util.h"
 #include "graph/debug/ge_attr_define.h"
@@ -25,8 +26,7 @@
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/utils/graph_utils.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/node_executor/node_executor.h"
@@ -42,6 +42,11 @@ const uint64_t kProfilingFpStartLogid = 1U;
 const uint64_t kProfilingBpEndLogid = 2U;
 const uint64_t kProfilingIterEndLogid = 65535U;
 const int kBytes = 8;
+const int kDecimal = 10;
+const uint8_t kStreamActiveIdx = 0;
+const uint8_t kStreamActiveNum = 1;
+const uint8_t kStreamSwitchIdx = 1;
+const uint8_t kStreamSwitchNum = 2;
 const uint32_t kStringHeadElems = 2;
 const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown";
 const char *const kProfilingGraph = "ProfilingGraph";
@@ -213,6 +218,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite
                         "[Invoke][GetCanonicalInputIndex] failed, dst_node:[%s].", dst_node->GetName().c_str());
 
       node_item.outputs[i].emplace_back(canonical_index, dst_node_item);
+      node_item.SetDataSend(dst_node_item, dst_in_anchor->GetIdx());
     }
   }
 
@@ -253,6 +259,10 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
     return SUCCESS;
   }
 
+  if (node->GetType() == MEMCPYASYNC) { // Convert MemcpyAsync to Identity.
+    node->GetOpDesc()->SetType(IDENTITY);
+  }
+
   std::unique_ptr<NodeItem> new_node;
   GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "[Invoke][Create] failed, model_name_:[%s]", GetGraphName());
   GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor));
@@ -284,6 +294,46 @@ Status HybridModelBuilder::ParseForceInfershapeNodes(const NodePtr &node, NodeIt
   return SUCCESS;
 }
 
+Status HybridModelBuilder::ParseDependencies(NodeItem &node_item, const std::vector<string> &dependencies,
+                                             std::set<NodePtr> &dependent_for_shape_inference) {
+  for (const auto &input_name : dependencies) {
+    int input_index = node_item.op_desc->GetInputIndexByName(input_name);
+    if (input_index < 0) {
+      GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.",
+             node_item.NodeName().c_str(), input_name.c_str());
+      REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed, node:[%s] inputname: %s.",
+                        node_item.NodeName().c_str(), input_name.c_str());
+      return INTERNAL_ERROR;
+    }
+
+    const auto &in_anchor = node_item.node->GetInDataAnchor(input_index);
+    GE_CHECK_NOTNULL(in_anchor);
+    const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor();
+    GE_CHECK_NOTNULL(peer_out_anchor);
+    const auto &src_node = peer_out_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(src_node);
+    auto src_node_item = MutableNodeItem(src_node);
+    GE_CHECK_NOTNULL(src_node_item);
+    if (src_node_item->NodeType() == DATA) {
+      auto op_desc = src_node_item->GetOpDesc();
+      GE_CHECK_NOTNULL(op_desc);
+      auto tensor = op_desc->MutableInputDesc(0);
+      if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
+        GELOGD("Skip d2h memcpy, get hostmem from node %s.", src_node_item->NodeName().c_str());
+        continue;
+      }
+    }
+    src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());
+    dependent_for_shape_inference.emplace(src_node);
+    host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);
+    GELOGD("[%s] Dependent added from output of [%s:%d]",
+           node_item.NodeName().c_str(),
+           src_node_item->NodeName().c_str(),
+           peer_out_anchor->GetIdx());
+  }
+  return SUCCESS;
+}
+
 Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies) {
   std::set<NodePtr> dependent_for_shape_inference;
   std::set<NodePtr> dependent_for_execution;
@@ -300,8 +350,9 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
     }
     auto src_node = peer_anchor->GetOwnerNode();
     GE_CHECK_NOTNULL(src_node);
-    auto src_node_item = MutableNodeItem(src_node);
-    GE_CHECK_NOTNULL(src_node_item);
+    NodeItem *src_node_item = nullptr;
+    GE_CHK_STATUS_RET(GetOrCreateNodeItem(src_node, &src_node_item),
+                      "[%s] failed to get or create node item", src_node->GetName().c_str());
 
     if (src_node_item->shape_inference_type == DEPEND_COMPUTE || is_hccl_op || src_node_item->IsHcclOp()) {
       GELOGD("[%s](%s) Add input data dependent node [%s](%s), shape inference type = %d",
@@ -323,15 +374,17 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
     }
   }
 
-  for (const auto &src_node : ge_node->GetInControlNodes()) {
-    auto src_node_item = MutableNodeItem(src_node);
-    if ((src_node_item != nullptr) && (is_hccl_op || src_node_item->IsHcclOp())) {
-      GELOGD("[%s](%s) Add input control dependent node [%s](%s)",
-             ge_node->GetName().c_str(),
-             ge_node->GetType().c_str(),
-             src_node->GetName().c_str(),
-             src_node->GetType().c_str());
-      dependent_for_execution.emplace(src_node);
+  if (node_item.node_type == NETOUTPUT) {
+    for (const auto &src_node : ge_node->GetInControlNodes()) {
+      auto src_node_item = MutableNodeItem(src_node);
+      if ((src_node_item != nullptr) && src_node_item->IsHcclOp()) {
+        GELOGD("[%s](%s) Add input control dependent node [%s](%s)",
+               ge_node->GetName().c_str(),
+               ge_node->GetType().c_str(),
+               src_node->GetName().c_str(),
+               src_node->GetType().c_str());
+        dependent_for_execution.emplace(src_node);
+      }
     }
   }
 
@@ -347,31 +400,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
            src_node_item->NodeName().c_str());
   }
 
-  for (const auto &input_name : dependencies) {
-    int input_index = node_item.op_desc->GetInputIndexByName(input_name);
-    if (input_index < 0) {
-      GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.",
-             node_item.NodeName().c_str(), input_name.c_str());
-      REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed, node:[%s] inputname: %s.",
-                        node_item.NodeName().c_str(), input_name.c_str());
-      return INTERNAL_ERROR;
-    }
-
-    const auto &in_anchor = ge_node->GetInDataAnchor(input_index);
-    GE_CHECK_NOTNULL(in_anchor);
-    const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor();
-    GE_CHECK_NOTNULL(peer_out_anchor);
-    const auto &src_node = peer_out_anchor->GetOwnerNode();
-    GE_CHECK_NOTNULL(src_node);
-    auto src_node_item = MutableNodeItem(src_node);
-    src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());
-    dependent_for_shape_inference.emplace(src_node);
-    host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);
-    GELOGD("[%s] Dependent added from output of [%s:%d]",
-           node_item.NodeName().c_str(),
-           src_node_item->NodeName().c_str(),
-           peer_out_anchor->GetIdx());
-  }
+  GE_CHK_STATUS_RET(ParseDependencies(node_item, dependencies, dependent_for_shape_inference));
 
   GE_CHK_STATUS_RET(ParseDependentForFusedSubgraph(node_item, dependent_for_shape_inference));
   for (const auto &dep_node : dependent_for_shape_inference) {
@@ -794,6 +823,7 @@ Status HybridModelBuilder::LoadGraph() {
   }
 
   hybrid_model_.root_graph_ = root_graph;
+  GE_CHK_STATUS_RET(RelinkNextIteration(), "[%s] Relink NextIteration failed", GetGraphName());
   // Reset node id by topological order across all subgraphs
   int64_t index = 0;
   for (const auto &node : root_graph->GetAllNodes()) {
@@ -831,22 +861,24 @@ Status HybridModelBuilder::LoadGraph() {
                         "[Invoke][LoadDynamicSubgraph]Failed to load subgraph: [%s]",
                         sub_graph->GetName().c_str());
     } else {
-      GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item),
-                        "[Invoke][IdentifyVariableOutputs][%s] Failed to identify ref outputs.",
-                        parent_node_item->NodeName().c_str());
-      GE_CHK_STATUS_RET(IdentifySameInputs(*parent_node_item),
-                        "[Invoke][IdentifySameInputs][%s] Failed to identify same outputs.",
-                        parent_node_item->NodeName().c_str());
-
       // if parent is function control op. need add a virtual partitioned call
-      if (parent_node_item->IsControlOp()) {
+      if (parent_node_item->IsControlFlowV2Op()) {
         GE_CHK_STATUS_RET(LoadKnownShapedSubgraph(*sub_graph, parent_node_item),
                           "[Invoke][LoadKnownShapedSubgraph]Failed to load function control op subgraph [%s]",
                           sub_graph->GetName().c_str());
       }
     }
   }
-
+  for (auto &it : hybrid_model_.known_shape_sub_models_) {
+    auto node_item = MutableNodeItem(it.first);
+    AscendString graph_name;
+    GE_CHK_GRAPH_STATUS_RET(it.second->GetGraph().GetName(graph_name), "Failed to get subgraph name");
+    auto subgraph = hybrid_model_.GetRootGraph()->GetSubgraph(graph_name.GetString());
+    GE_CHECK_NOTNULL(subgraph);
+    GE_CHK_STATUS_RET(IdentifyVariableOutputs(*node_item, subgraph),
+                      "[Invoke][IdentifyVariableOutputs][%s] Failed to identify ref outputs.",
+                      node_item->NodeName().c_str());
+  }
   GE_CHK_STATUS_RET(ParseDependentByParallelGroup(),
                     "[Invoke][ParseDependentByParallelGroup]Failed to establish dependencies for hccl ops,"
                     "model_name_:%s.", GetGraphName());
@@ -1169,7 +1201,7 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr
   auto parent_node = sub_graph.GetParentNode();
   GE_CHECK_NOTNULL(parent_node);
   auto op_type = parent_node->GetType();
-  if (IsControlOp(op_type)) {
+  if (IsControlFlowV2Op(op_type)) {
     GELOGD("Set ge_model for control op subgraph: [%s], task_size = %d",
            sub_graph.GetName().c_str(),
            ge_model->GetModelTaskDefPtr()->task_size());
@@ -1325,6 +1357,10 @@ Status HybridModelBuilder::IndexSpecialNodes() {
       }
     } else if (op_type == CONSTANTOP) {
       constant_op_nodes_.emplace(node->GetName(), node);
+    } else if (op_type == STREAMMERGE) {
+      stream_merge_op_nodes_.emplace(node->GetName(), node);
+    } else if (op_type == NEXTITERATION || op_type == REFNEXTITERATION) {
+      next_iteration_op_nodes_.emplace(node->GetName(), node);
     } else if (op_type == DATA && node->GetOwnerComputeGraph() != root_graph) {
       NodePtr src_node;
       int peer_out_index = -1;
@@ -1478,50 +1514,8 @@ Status HybridModelBuilder::InitRuntimeParams() {
   return SUCCESS;
 }
 
-Status HybridModelBuilder::IdentifySameInputs(NodeItem &node_item) {
-  GELOGD("Start to parse same inputs on net output: %s", node_item.NodeName().c_str());
-  auto subgraph = NodeUtils::GetSubgraph(*node_item.node, kSubgraphIndex);
-  GE_CHECK_NOTNULL(subgraph);
-  auto net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT);
-  if (net_output_node == nullptr) {
-    GELOGD("Subgraph [%s] does not have net output", subgraph->GetName().c_str());
-    return SUCCESS;
-  }
-
-  auto net_output_desc = net_output_node->GetOpDesc();
-  GE_CHECK_NOTNULL(net_output_desc);
-
-  std::map<std::string, int> connected_inputs;
-  for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) {
-    auto out_data_anchor = in_data_anchor->GetPeerOutAnchor();
-    if (out_data_anchor == nullptr) {
-      continue;
-    }
-    auto src_node = out_data_anchor->GetOwnerNode();
-    GE_CHECK_NOTNULL(src_node);
-    auto op_desc = src_node->GetOpDesc();
-    GE_CHECK_NOTNULL(op_desc);
-
-    std::string input_key = std::to_string(op_desc->GetId()) + "_" + std::to_string(out_data_anchor->GetIdx());
-    auto it = connected_inputs.find(input_key);
-    if (it == connected_inputs.end()) {
-      connected_inputs.emplace(input_key, in_data_anchor->GetIdx());
-    } else {
-      GELOGD("[%s] output [%d] reuse output [%d] input node = %s, idx = %d.", node_item.NodeName().c_str(),
-             in_data_anchor->GetIdx(),
-             it->second,
-             src_node->GetName().c_str(),
-             out_data_anchor->GetIdx());
-      node_item.reuse_outputs.emplace(in_data_anchor->GetIdx(), it->second);
-    }
-  }
-  return SUCCESS;
-}
-
-Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
+Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item, const ComputeGraphPtr &subgraph) {
   GELOGD("Start to parse outputs of node: %s", node_item.NodeName().c_str());
-  auto subgraph = NodeUtils::GetSubgraph(*node_item.node, kSubgraphIndex);
-  GE_CHECK_NOTNULL(subgraph);
   auto net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT);
   if (net_output_node == nullptr) {
     GELOGD("[%s] Subgraph do not got net output", subgraph->GetName().c_str());
@@ -1530,36 +1524,13 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
   auto net_output_desc = net_output_node->GetOpDesc();
   GE_CHECK_NOTNULL(net_output_desc);
 
-  // constant/variable connected to net output
+  // constants connected to net output
   for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) {
     auto src_node = GetPeerNode(in_data_anchor);
     GE_CHECK_NOTNULL(src_node);
     auto src_op_type = src_node->GetType();
-    GELOGD("Node %s, output %d, src node = %s, src node type = %s",
-           node_item.NodeName().c_str(),
-           in_data_anchor->GetIdx(),
-           src_node->GetName().c_str(),
-           src_op_type.c_str());
-    uint32_t parent_index = 0;
-    if (GetParentNodeOutputIndex(*net_output_desc, in_data_anchor->GetIdx(), parent_index) != SUCCESS) {
-      continue;
-    }
-    GELOGD("Got parent output index = %u", parent_index);
-    if (src_op_type == DATA) {
-      int ref_i = 0;
-      (void)AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, ref_i);
-      node_item.reuse_inputs.emplace(static_cast<int>(parent_index), ref_i);
-      GELOGD("[%s] output[%u] resues input[%d]", node_item.NodeName().c_str(), parent_index, ref_i);
-    }
-
-    if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) {
-      continue;
-    }
-
-    GE_CHECK_LE(parent_index, INT32_MAX);
-    node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node);
     if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) {
-      known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node);
+      known_subgraph_constant_output_refs_[&node_item].emplace(in_data_anchor->GetIdx(), src_node);
     }
   }
 
@@ -1825,7 +1796,7 @@ Status HybridModelBuilder::GenerateEndProfilingTask(const OpDescPtr &op_desc, ve
   return SUCCESS;
 }
 
-Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node) {
+Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node, uint32_t &prev_num) {
   GE_CHECK_NOTNULL(node);
   const OpDescPtr &op_desc = node->GetOpDesc();
   GE_CHECK_NOTNULL(op_desc);
@@ -1871,7 +1842,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
   if (!node_task_map.empty()) {
     for (const auto &node_task : node_task_map) {
       NodePtr profiling_node = node_task.first;
-      vector<domi::TaskDef> task_def_lists = node_task.second;
+      const vector<domi::TaskDef> &task_def_lists = node_task.second;
       for (const auto &task_def : task_def_lists) {
         hybrid_model_.task_defs_[profiling_node].emplace_back(task_def);
       }
@@ -1886,6 +1857,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
       node_item->input_start = 0;
       node_item->output_start = 0;
       graph_item.node_items_.emplace_back(node_item);
+      ++prev_num;
     }
   } else {
     GELOGD("No need to create profiling node before.");
@@ -1894,7 +1866,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
   return SUCCESS;
 }
 
-Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node) {
+Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node, uint32_t &post_num) {
   GE_CHECK_NOTNULL(node);
   const OpDescPtr &op_desc = node->GetOpDesc();
   GE_CHECK_NOTNULL(op_desc);
@@ -1952,7 +1924,7 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const
   if (!node_task_map.empty()) {
     for (const auto &node_task : node_task_map) {
       NodePtr profiling_node = node_task.first;
-      vector<domi::TaskDef> task_def_lists = node_task.second;
+      const vector<domi::TaskDef> &task_def_lists = node_task.second;
       for (const auto &task_def : task_def_lists) {
         hybrid_model_.task_defs_[profiling_node].emplace_back(task_def);
       }
@@ -1967,6 +1939,7 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const
       node_item->input_start = 0;
       node_item->output_start = 0;
       graph_item.node_items_.emplace_back(node_item);
+      ++post_num;
     }
   } else {
     GELOGD("No need to create profiling node after.");
@@ -1986,20 +1959,23 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root
   int input_start = 0;
   int output_start = 0;
   std::vector<NodeItem *> data_nodes;
+  std::map<size_t, std::pair<uint32_t, uint32_t>> profiling_nodes;
   for (auto &node : graph.GetDirectNode()) {
     GE_CHECK_NOTNULL(node);
     GE_CHECK_NOTNULL(node->GetOpDesc());
     const auto &op_type = node->GetType();
-    if (op_type == NOOP) {
-      GELOGD("[%s] Skip NoOp", node->GetName().c_str());
-      continue;
-    }
 
     NodeItem *node_item = nullptr;
     GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node, &node_item));
     GE_CHK_STATUS_RET_NOLOG(BuildNodeItem(node, *node_item));
     GE_CHK_STATUS_RET_NOLOG(UpdateAnchorStatus(node)); // needed by FE generate task
 
+    GE_CHK_STATUS_RET_NOLOG(BuildControlFlowGroup(*graph_item, node, node_item));
+    if (node->GetInAllNodes().empty()) {
+      graph_item->root_items_.emplace_back(node_item);
+      GELOGD("[%s] add to root node list", node->GetName().c_str());
+    }
+
     node_item->input_start = input_start;
     node_item->output_start = output_start;
     input_start += node_item->num_inputs;
@@ -2011,9 +1987,16 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root
       graph_item->output_node_ = node_item;
       GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph));
     }
-    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node));
+
+    uint32_t prev_num = 0;
+    uint32_t post_num = 0;
+    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node, prev_num));
+    size_t node_index = graph_item->node_items_.size();
     graph_item->node_items_.emplace_back(node_item);
-    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node));
+    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node, post_num));
+    if (prev_num > 0 || post_num > 0) {
+      profiling_nodes[node_index] = { prev_num, post_num };
+    }
     // parse var outputs
     GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item));
     GELOGD("NodeItem created: %s", node_item->DebugString().c_str());
@@ -2022,6 +2005,7 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root
   graph_item->total_inputs_ = input_start;
   graph_item->total_outputs_ = output_start;
   GE_CHK_STATUS_RET_NOLOG(BuildInputMapping(*graph_item, data_nodes, is_root_graph));
+  GE_CHK_STATUS_RET_NOLOG(BuildProfilingControl(*graph_item, profiling_nodes));
   if (is_root_graph) {
     graph_item->SetName("Root-Graph");
     GELOGD("Done loading dynamic subgraph: [%s]", graph_item->GetName().c_str());
@@ -2271,5 +2255,299 @@ Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id,
   hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor));
   return SUCCESS;
 }
+
+Status HybridModelBuilder::RelinkNextIteration() {
+  for (const auto &item : stream_merge_op_nodes_) {
+    const auto &merge = item.second;
+    std::string node_name;
+    if (!AttrUtils::GetStr(merge->GetOpDesc(), ATTR_NAME_NEXT_ITERATION, node_name)) {
+      GELOGD("[%s] no attribute[%s], not in while loop", merge->GetName().c_str(), ATTR_NAME_NEXT_ITERATION.c_str());
+      continue;
+    }
+
+    const auto it = next_iteration_op_nodes_.find(node_name);
+    if (it == next_iteration_op_nodes_.end()) {
+      GELOGE(INTERNAL_ERROR, "[%s] expect NextIteration[%s] not found", merge->GetName().c_str(), node_name.c_str());
+      return INTERNAL_ERROR;
+    }
+
+    const auto &iteration = it->second;
+    if (GraphUtils::AddEdge(iteration->GetOutDataAnchor(0), merge->GetInDataAnchor(1)) != GRAPH_SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "[%s] -> [%s] Add edge failed", node_name.c_str(), merge->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+  }
+
+  stream_merge_op_nodes_.clear();
+  next_iteration_op_nodes_.clear();
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::BuildProfilingControl(GraphItem &graph_item,
+                                                 const std::map<size_t, std::pair<uint32_t, uint32_t>> &nodes) {
+  const auto node_size = graph_item.node_items_.size();
+  for (const auto &item : nodes) {
+    const auto node_index = item.first;
+    GE_CHK_BOOL_RET_STATUS(node_index < node_size, FAILED, "node index invalid");
+    const auto &node_item = graph_item.node_items_[node_index];
+    if (item.second.first > 0) {
+      const auto prev_num = item.second.first;
+      if (node_index == prev_num) {
+        // Profiling Before root node.
+        for (uint32_t i = 1; i <= prev_num; ++i) {
+          GE_CHK_BOOL_RET_STATUS(node_index - i < node_size, FAILED, "prev index invalid");
+          const auto &curr_item = graph_item.node_items_[node_index - i];
+          graph_item.root_items_.emplace(graph_item.root_items_.begin(), curr_item);
+        }
+      } else {
+        GE_CHK_BOOL_RET_STATUS((node_index - prev_num) - 1 < node_size, FAILED, "prev index invalid");
+        const auto &prev_item = graph_item.node_items_[(node_index - prev_num) - 1];
+        for (uint32_t i = 1; i <= prev_num; ++i) {
+          GE_CHK_BOOL_RET_STATUS(node_index - i < node_size, FAILED, "prev index invalid");
+          const auto &curr_item = graph_item.node_items_[node_index - i];
+          prev_item->SetCtrlSend(curr_item, UINT32_MAX);
+          curr_item->SetCtrlSend(node_item, UINT32_MAX);
+        }
+      }
+    }
+
+    if (item.second.second > 0) {
+      const auto post_num = item.second.second;
+      if (node_size == node_index + post_num + 1) {
+        // Profiling After last node.
+        for (uint32_t i = 1; i <= post_num; ++i) {
+          GE_CHK_BOOL_RET_STATUS(node_index + i < node_size, FAILED, "post index invalid");
+          const auto &curr_item = graph_item.node_items_[node_index + i];
+          node_item->SetCtrlSend(curr_item, UINT32_MAX);
+        }
+      } else {
+        GE_CHK_BOOL_RET_STATUS((node_index + post_num) + 1 < node_size, FAILED, "post index invalid");
+        const auto &post_item = graph_item.node_items_[(node_index + post_num) + 1];
+        for (uint32_t i = 1; i <= post_num; ++i) {
+          GE_CHK_BOOL_RET_STATUS(node_index + i < node_size, FAILED, "post index invalid");
+          const auto &curr_item = graph_item.node_items_[node_index + i];
+          node_item->SetCtrlSend(curr_item, UINT32_MAX);
+          curr_item->SetCtrlSend(post_item, UINT32_MAX);
+        }
+      }
+    }
+  }
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::BuildControlFlowGroup(GraphItem &graph_item, const NodePtr &node, NodeItem *node_item) {
+  GELOGD("Build control flow for node %s", node->GetName().c_str());
+  using GroupBuilder = std::function<Status(HybridModelBuilder *, const NodePtr &, NodeItem *)>;
+  static const std::map<std::string, GroupBuilder> control_flow{
+    { STREAMACTIVE, &HybridModelBuilder::CreateStreamActiveGroup },
+    { STREAMSWITCH, &HybridModelBuilder::CreateStreamSwitchGroup },
+    { STREAMSWITCHN, &HybridModelBuilder::CreateStreamSwitchNGroup },
+    { NEXTITERATION, &HybridModelBuilder::CreateNextIterationGroup },
+    { REFNEXTITERATION, &HybridModelBuilder::CreateNextIterationGroup },
+    { SWITCH, &HybridModelBuilder::CreateSwitchGroup },
+    { REFSWITCH, &HybridModelBuilder::CreateSwitchGroup },
+    { LABELSET, &HybridModelBuilder::CreateLabelSetGroup },
+    { LABELGOTO, &HybridModelBuilder::CreateLabelGotoGroup },
+    { LABELGOTOEX, &HybridModelBuilder::CreateLabelGotoGroup },
+    { LABELSWITCH, &HybridModelBuilder::CreateLabelSwitchGroup },
+    { LABELSWITCHBYINDEX, &HybridModelBuilder::CreateLabelSwitchGroup }
+  };
+
+  Status ret = SUCCESS;
+  auto it = control_flow.find(node_item->node_type);
+  if (it == control_flow.end()) {
+    ret = CreateNormalNodeGroup(node, node_item);
+  } else {
+    graph_item.has_ctrl_flow_op_ = true;
+    ret = it->second(this, node, node_item);
+  }
+  GELOGD("Node: %s, control by: %zu, control for: %zu, switch group: %zu", node->GetName().c_str(),
+         node_item->ctrl_recv_.size(), node_item->ctrl_send_.size(), node_item->switch_groups_.size());
+  return ret;
+}
+
+Status HybridModelBuilder::CreateNormalNodeGroup(const NodePtr &node, NodeItem *node_item) {
+  const auto out_ctrl_anchor = node->GetOutControlAnchor();
+  for (const auto &peer_in_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) {
+    const auto &dst_node = peer_in_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(dst_node);
+
+    NodeItem *dst_node_item = nullptr;
+    GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
+                      "[%s] failed to get or create node item", dst_node->GetName().c_str());
+    node_item->SetCtrlSend(dst_node_item, UINT32_MAX);
+  }
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateStreamActiveGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != STREAMACTIVE) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node_item->node_type.c_str());
+    return INTERNAL_ERROR;
+  }
+
+  node_item->switch_groups_.resize(kStreamActiveNum);
+  const auto &out_ctrl_anchor = node->GetOutControlAnchor();
+  for (const auto &peer_in_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) {
+    const auto &dst_node = peer_in_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(dst_node);
+    if (dst_node->GetType() == STREAMMERGE) {
+      GELOGI("[%s] skip control node: %s", node->GetName().c_str(), dst_node->GetName().c_str());
+      continue;
+    }
+
+    NodeItem *dst_node_item = nullptr;
+    GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
+                      "[%s] failed to get or create node item", dst_node->GetName().c_str());
+    node_item->SetCtrlSend(dst_node_item, kStreamActiveIdx);
+  }
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateStreamSwitchGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != STREAMSWITCH) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node_item->node_type.c_str());
+    return INTERNAL_ERROR;
+  }
+
+  // Consider as two groups, group[0] set empty for false, group[1] for true.
+  node_item->switch_groups_.resize(kStreamSwitchNum);
+  const auto &out_ctrl_anchor = node->GetOutControlAnchor();
+  for (const auto &peer_in_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) {
+    const auto &dst_node = peer_in_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(dst_node);
+
+    NodeItem *dst_node_item = nullptr;
+    GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
+                      "[%s] failed to get or create node item", dst_node->GetName().c_str());
+    node_item->SetCtrlSend(dst_node_item, kStreamSwitchIdx);
+  }
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateStreamSwitchNGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != STREAMSWITCHN) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  uint32_t batch_num = 0;
+  if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_BATCH_NUM, batch_num)) {
+    GELOGE(INTERNAL_ERROR, "[%s] Get ATTR_NAME_BATCH_NUM failed", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  if (batch_num == 0) {
+    GELOGW("[%s] Got empty branch for SwitchN, Please check.", node->GetName().c_str());
+    return SUCCESS;
+  }
+
+  node_item->switch_groups_.resize(batch_num);
+  const auto &out_ctrl_anchor = node->GetOutControlAnchor();
+  for (const auto &peer_in_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) {
+    const auto &dst_node = peer_in_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(dst_node);
+
+    std::string batch_label;
+    if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
+      GELOGE(INTERNAL_ERROR, "[%s] Get ATTR_NAME_BATCH_LABEL failed", node->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+
+    std::string::size_type pos = batch_label.rfind("_");
+    if (pos == std::string::npos) {
+      GELOGW("[%s] Separator not found in batch label: %s.", node->GetName().c_str(), batch_label.c_str());
+      continue;
+    }
+
+    ++pos; // Skip Separator
+    uint64_t batch_index = std::strtoul(batch_label.data() + pos, nullptr, kDecimal);
+    if (batch_index >= batch_num) {
+      GELOGW("batch label: %s, batch index: %lu great than batch num: %u", batch_label.c_str(), batch_index, batch_num);
+      continue;
+    }
+
+    NodeItem *dst_node_item = nullptr;
+    GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
+                      "[%s] failed to get or create node item", dst_node->GetName().c_str());
+    node_item->SetCtrlSend(dst_node_item, batch_index);
+  }
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateNextIterationGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != NEXTITERATION && node_item->node_type != REFNEXTITERATION) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateSwitchGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != SWITCH && node_item->node_type != REFSWITCH) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  const auto &out_ctrl_anchor = node->GetOutControlAnchor();
+  for (const auto &peer_in_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) {
+    const auto &dst_node = peer_in_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(dst_node);
+
+    NodeItem *dst_node_item = nullptr;
+    GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
+                      "[%s] failed to get or create node item", dst_node->GetName().c_str());
+    node_item->SetCtrlSend(dst_node_item, UINT32_MAX);
+  }
+
+  // Group switch flow by out put data.
+  node_item->switch_groups_.resize(SWITCH_OUTPUT_NUM);
+  for (uint32_t i = 0; i < SWITCH_OUTPUT_NUM; ++i) {
+    const auto &out_anchor = node->GetOutDataAnchor(i);
+    for (const auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
+      const auto &dst_node = peer_in_anchor->GetOwnerNode();
+      GE_CHECK_NOTNULL(dst_node);
+
+      NodeItem *dst_node_item = nullptr;
+      GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
+                        "[%s] failed to get or create node item", dst_node->GetName().c_str());
+      node_item->SetCtrlSend(dst_node_item, i); // take switch data as ctrl.
+    }
+  }
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateLabelSetGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != LABELSET) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  GELOGE(UNSUPPORTED, "[%s] Not implemented.", node->GetName().c_str());
+  return UNSUPPORTED;
+}
+
+Status HybridModelBuilder::CreateLabelGotoGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != LABELGOTO && node_item->node_type != LABELGOTOEX) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  GELOGE(UNSUPPORTED, "[%s] Not implemented.", node->GetName().c_str());
+  return UNSUPPORTED;
+}
+
+Status HybridModelBuilder::CreateLabelSwitchGroup(const NodePtr &node, NodeItem *node_item) {
+  if (node_item->node_type != LABELSWITCH && node_item->node_type != LABELSWITCHBYINDEX) {
+    GELOGE(INTERNAL_ERROR, "Called by %s is invalid", node->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  GELOGE(UNSUPPORTED, "[%s] Not implemented.", node->GetName().c_str());
+  return UNSUPPORTED;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h
index 3e467dc8..ad288317 100644
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -59,13 +59,14 @@ class HybridModelBuilder {
   Status LoadGeModel(ComputeGraph &graph, const GeModelPtr &ge_model);
   Status LoadTask(NodeItem &node_item);
   Status LoadTasks();
-  Status IdentifyVariableOutputs(NodeItem &node_item);
-  Status IdentifySameInputs(NodeItem &node_item);
+  Status IdentifyVariableOutputs(NodeItem &node_item, const ComputeGraphPtr &subgraph);
   Status BuildNodeItem(const NodePtr &node, NodeItem &node_item);
   Status GetOrCreateNodeItem(const NodePtr &node, NodeItem **node_item);
   Status ParseForceInfershapeNodes(const NodePtr &node, NodeItem &node_item);
   Status CollectParallelGroups(NodeItem *node_item);
   Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies);
+  Status ParseDependencies(NodeItem &node_item, const std::vector<string> &dependencies,
+                           std::set<NodePtr> &dependent_for_shape_inference);
   Status ParseDependentForFusedSubgraph(NodeItem &node_item, std::set<ge::NodePtr> &dependencies);
   Status ParseDependentByParallelGroup();
   Status IndexTaskDefs();
@@ -85,8 +86,8 @@ class HybridModelBuilder {
   Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
   Status RecoverGraphUnknownFlag();
   Status CheckAicpuOpList();
-  Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node);
-  Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node);
+  Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node, uint32_t &prev_num);
+  Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node, uint32_t &post_num);
   Status GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
   Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
   Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
@@ -94,6 +95,20 @@ class HybridModelBuilder {
   Status OptimizeDependenciesForConstantInputs();
   Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx);
 
+  Status RelinkNextIteration();
+  Status BuildProfilingControl(GraphItem &graph_item, const std::map<size_t, std::pair<uint32_t, uint32_t>> &nodes);
+  Status BuildControlFlowGroup(GraphItem &graph_item, const NodePtr &node, NodeItem *node_item);
+  Status CreateNormalNodeGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateStreamActiveGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateStreamSwitchGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateStreamSwitchNGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateNextIterationGroup(const NodePtr &node, NodeItem *node_item);
+
+  Status CreateSwitchGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateLabelSetGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateLabelGotoGroup(const NodePtr &node, NodeItem *node_item);
+  Status CreateLabelSwitchGroup(const NodePtr &node, NodeItem *node_item);
+
   const char* GetGraphName() const {
     return hybrid_model_.model_name_.c_str();
   }
@@ -104,6 +119,8 @@ class HybridModelBuilder {
   GeRootModelPtr ge_root_model_;
   std::map<std::string, GeModelPtr> subgraph_models_;
   std::map<std::string, NodePtr> constant_op_nodes_;
+  std::map<std::string, NodePtr> stream_merge_op_nodes_;
+  std::map<std::string, NodePtr> next_iteration_op_nodes_;
   std::map<std::string, std::set<NodeItem *>> parallel_group_to_nodes_;
   std::map<NodeItem *, std::set<std::string>> node_to_parallel_groups_;
 
diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc
index 8211dde3..c6adce00 100644
--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -29,10 +29,19 @@ namespace hybrid {
 namespace {
 const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph";
 const char *const kNodeTypeRetVal = "_RetVal";
-std::set<std::string> kControlOpTypes{
+const std::set<std::string> kControlOpTypes{
     IF, STATELESSIF, CASE, WHILE, STATELESSWHILE
 };
 
+const std::set<std::string> kControlFlowOpTypes{
+    STREAMACTIVE, STREAMSWITCH, STREAMSWITCHN, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX,
+    NEXTITERATION, REFNEXTITERATION
+};
+
+const std::set<std::string> kMergeOpTypes{
+    MERGE, REFMERGE, STREAMMERGE
+};
+
 Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) {
   uint32_t parent_index = 0;
   if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
@@ -107,7 +116,7 @@ Status ParseFusedSubgraph(NodeItem &node_item) {
 }
 }  // namespace
 
-bool IsControlOp(const std::string &op_type) {
+bool IsControlFlowV2Op(const std::string &op_type) {
   return kControlOpTypes.count(op_type) > 0;
 }
 
@@ -226,7 +235,7 @@ Status NodeItem::ResolveStaticInputsAndOutputs() {
 }
 
 void NodeItem::ResolveUnknownShapeType() {
-  if (IsControlOp() || node_type == PARTITIONEDCALL) {
+  if (IsControlFlowV2Op() || (is_dynamic && node_type == PARTITIONEDCALL)) {
     shape_inference_type = DEPEND_COMPUTE;
   } else {
     int32_t unknown_shape_type_val = 0;
@@ -236,6 +245,10 @@ void NodeItem::ResolveUnknownShapeType() {
 }
 
 Status NodeItem::Init() {
+  is_ctrl_flow_v2_op_ = ge::hybrid::IsControlFlowV2Op(node_type);
+  is_ctrl_flow_op_ = kControlFlowOpTypes.count(node_type) > 0;
+  is_merge_op_ = kMergeOpTypes.count(node_type) > 0;
+  is_root_node_ = node->GetInAllNodes().empty();
   GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
   GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
   ResolveUnknownShapeType();
@@ -244,14 +257,12 @@ Status NodeItem::Init() {
     GE_CHK_STATUS_RET(ParseFusedSubgraph(*this),
                       "[Invoke][ParseFusedSubgraph][%s] Failed to parse fused subgraph", node_name.c_str());
   }
+  copy_mu_ = MakeShared<std::mutex>();
+  GE_CHECK_NOTNULL(copy_mu_);
 
   return SUCCESS;
 }
 
-bool NodeItem::IsControlOp() const {
-  return ge::hybrid::IsControlOp(op_desc->GetType());
-}
-
 bool NodeItem::IsHcclOp() const {
   return NodeExecutorManager::GetInstance().ResolveExecutorType(*node) == NodeExecutorManager::ExecutorType::HCCL;
 }
@@ -383,5 +394,45 @@ bool NodeItem::IsInputShapeStatic(int index) const {
 
   return is_input_shape_static_[index];
 }
+
+void NodeItem::SetDataSend(NodeItem *node_item, int anchor_index) {
+  data_send_.emplace(node_item);
+  node_item->data_recv_[this] = anchor_index;
+  if (is_root_node_) {
+    node_item->root_data_.emplace(this);
+  }
+  GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str());
+}
+
+void NodeItem::SetCtrlSend(NodeItem *node_item, uint32_t switch_index) {
+  if (switch_index < switch_groups_.size()) {
+    std::vector<const NodeItem *> &switch_group = switch_groups_[switch_index];
+    switch_group.emplace_back(node_item);
+  } else {
+    ctrl_send_.insert(node_item);
+  }
+
+  node_item->ctrl_recv_.emplace(this);
+  if (is_root_node_) {
+    node_item->root_ctrl_.emplace(this);
+  }
+
+  GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str());
+}
+
+OptionalMutexGuard::OptionalMutexGuard(std::mutex *mutex, const string &name) : mu_(mutex), name_(name) {
+  if (mu_ != nullptr) {
+    GELOGD("lock for %s", name_.c_str());
+    mu_->lock();
+  }
+}
+
+OptionalMutexGuard::~OptionalMutexGuard() {
+  if (mu_ != nullptr) {
+    GELOGD("unlock for %s", name_.c_str());
+    mu_->unlock();
+    mu_ = nullptr;
+  }
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h
index 54c5e938..606e58fe 100644
--- a/ge/hybrid/model/node_item.h
+++ b/ge/hybrid/model/node_item.h
@@ -37,7 +37,17 @@ struct FusedSubgraph {
   ComputeGraphPtr graph;
 };
 
-bool IsControlOp(const std::string &op_type);
+bool IsControlFlowV2Op(const std::string &op_type);
+
+class OptionalMutexGuard {
+ public:
+  OptionalMutexGuard(std::mutex *mutex, const std::string &name);
+  ~OptionalMutexGuard();
+
+ private:
+  std::mutex *mu_{nullptr};
+  std::string name_;
+};
 
 // for caching static information across execution
 struct NodeItem {
@@ -70,12 +80,29 @@ struct NodeItem {
 
   Status GetCanonicalInputIndex(uint32_t index, int &canonical_index) const;
 
-  bool IsControlOp() const;
+  bool IsControlFlowV2Op() const {
+    return is_ctrl_flow_v2_op_;
+  }
+
+  bool IsControlFlowOp() const {
+    return is_ctrl_flow_op_;
+  }
+
+  bool IsMergeOp() const {
+    return is_merge_op_;
+  }
 
   bool IsHcclOp() const;
 
   void SetToDynamic();
 
+  void SetDataSend(NodeItem *node_item, int anchor_index);
+  void SetCtrlSend(NodeItem *node_item, uint32_t switch_index);
+
+  OptionalMutexGuard MutexGuard(const std::string &name) const {
+    return OptionalMutexGuard(copy_mu_.get(), name + "_" + node_name);
+  }
+
   std::string DebugString() const;
 
   NodePtr node;
@@ -99,7 +126,20 @@ struct NodeItem {
   std::set<int> to_const_output_id_list;
 
   // src_output_id, dst_anchor_id, dst_node
-  vector<vector<pair<int, NodeItem *>>> outputs;
+  std::vector<std::vector<std::pair<int, NodeItem *>>> outputs;
+
+  // for linked drive
+  bool is_root_node_ = false;
+  bool is_ctrl_flow_v2_op_ = false;
+  bool is_ctrl_flow_op_ = false;
+  bool is_merge_op_ = false;
+  std::set<const NodeItem *> root_ctrl_;  // Recv ctrl from root node
+  std::set<const NodeItem *> root_data_;  // Recv data from root node
+  std::set<const NodeItem *> data_send_;  // Send data notify to
+  std::map<const NodeItem *, int> data_recv_;  // Recv data notify from
+  std::set<const NodeItem *> ctrl_send_;  // Send ctrl notify to
+  std::set<const NodeItem *> ctrl_recv_;  // Recv ctrl notify from
+  std::vector<std::vector<const NodeItem *>> switch_groups_;  // Send ctrl notify to
 
   std::shared_ptr<NodeTask> kernel_task;
   std::unique_ptr<FusedSubgraph> fused_subgraph;
@@ -122,6 +162,7 @@ struct NodeItem {
 
   std::vector<bool> is_input_shape_static_;
   std::vector<uint32_t> input_desc_indices_;
+  std::shared_ptr<std::mutex> copy_mu_;
   mutable std::mutex mu_;
 };
 }  // namespace hybrid
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 8bb871fb..40118af3 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -23,6 +23,7 @@
 #include "graph/load/model_manager/tbe_handle_store.h"
 #include "graph/types.h"
 #include "single_op/task/build_task_utils.h"
+#include "single_op/task/tbe_task_builder.h"
 
 using optiling::OpRunInfo;
 
@@ -131,8 +132,8 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
     GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name),
                     GELOGI("Get original type of kernel_name"));
     GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str());
-    GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(),
-                                     stub_name_.c_str(), kernel_name.c_str(), 0));
+    auto stub_func = KernelBinRegistry::GetInstance().GetUnique(stub_name_);
+    GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_func, stub_name_.c_str(), kernel_name.c_str(), 0));
   }
   return SUCCESS;
 }
@@ -309,7 +310,7 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
   auto rt_ret = ValidateTaskDef(task_def);
   if (rt_ret != SUCCESS) {
     REPORT_CALL_ERROR("E19999", "op:%s(op_type:%s) failed to validate task def:%s",
-           op_desc.GetName().c_str(), op_desc.GetType().c_str(), task_def.DebugString().c_str());
+                      op_desc.GetName().c_str(), op_desc.GetType().c_str(), task_def.DebugString().c_str());
     GELOGE(rt_ret, "[Invoke][ValidateTaskDef]failed for op:%s(op_type:%s) to validate task def:%s",
            op_desc.GetName().c_str(), op_desc.GetType().c_str(), task_def.DebugString().c_str());
     return rt_ret;
@@ -401,9 +402,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
   }
 
   RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] Start");
-  GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(),
-                         tiling_data_.c_str(), tiling_data_.size(),
-                         RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), tiling_data_.c_str(),
+                              tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, context.GetStream()));
   RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] End");
 
   GELOGD("[%s] Done updating tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str());
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
index b6c48157..c607a43e 100644
--- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
@@ -24,6 +24,12 @@ namespace hybrid {
 namespace {
 // if dim count is not reach kMaxShapeDims(8), use INT64_MIN to mark dim end.
 constexpr int64_t kDimEndFlag = INT64_MIN;
+const std::map<int32_t, int32_t> kTopicTypeToRtsFlagMap {
+    {static_cast<int32_t>(aicpu::FWKAdapter::FWK_ADPT_TOPIC_DEVICE_ONLY), 0},
+    {static_cast<int32_t>(aicpu::FWKAdapter::FWK_ADPT_TOPIC_DEVICE_FIRST), RT_KERNEL_DEVICE_FIRST},
+    {static_cast<int32_t>(aicpu::FWKAdapter::FWK_ADPT_TOPIC_HOST_ONLY), RT_KERNEL_HOST_ONLY},
+    {static_cast<int32_t>(aicpu::FWKAdapter::FWK_ADPT_TOPIC_HOST_FIRST), RT_KERNEL_HOST_FIRST}
+};
 }
 
 Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
@@ -72,6 +78,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
       case aicpu::FWKAdapter::FWK_ADPT_EXT_UPDATE_ADDR:
         GE_CHK_STATUS_RET(ParseExtUpdateAddr(aicpu_ext_info), "[Parse][ExtUpdateAddr] failed.");
         break;
+      case aicpu::FWKAdapter::FWK_ADPT_EXT_TOPIC_TYPE:
+        GE_CHK_STATUS_RET(ParseExtTopicType(aicpu_ext_info), "[Parse][ExtTopicType] failed.");
+        break;
       default:
         GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.",
                node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen);
@@ -207,6 +216,45 @@ Status AicpuExtInfoHandler::ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info) {
   return SUCCESS;
 }
 
+Status AicpuExtInfoHandler::ParseExtTopicType(AicpuExtInfo *aicpu_ext_info) {
+  if (aicpu_ext_info->infoLen != sizeof(int32_t)) {
+    REPORT_INNER_ERROR("E19999",
+                       "Node[%s] parse topic_type info failed as infoLen must be %zu but %u.",
+                       node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen);
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID,
+           "[Check][DataLen]Node[%s] parse topic_type info failed as infoLen must be %zu but %u.",
+           node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen);
+    return ACL_ERROR_GE_PARAM_INVALID;
+  }
+  GE_CHECK_NOTNULL(aicpu_ext_info->infoMsg);
+  auto type_info = reinterpret_cast<int32_t *>(aicpu_ext_info->infoMsg);
+  int32_t type = *type_info;
+
+  topic_type_flag_ = TopicTypeToRtsFlag(type);
+  if (topic_type_flag_ == -1) {
+    REPORT_INNER_ERROR("E19999", "Node[%s] parse ext topic type failed as need %d %d %d %d but %d.",
+                       node_name_.c_str(),
+                       aicpu::FWKAdapter::FWK_ADPT_TOPIC_DEVICE_ONLY,
+                       aicpu::FWKAdapter::FWK_ADPT_TOPIC_DEVICE_FIRST,
+                       aicpu::FWKAdapter::FWK_ADPT_TOPIC_HOST_ONLY,
+                       aicpu::FWKAdapter::FWK_ADPT_TOPIC_HOST_FIRST,
+                       type);
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID,
+           "[Check][Type]Node[%s] parse ext shape type failed as need %d %d %d %d but %d.",
+           node_name_.c_str(),
+           aicpu::FWKAdapter::FWK_ADPT_TOPIC_DEVICE_ONLY,
+           aicpu::FWKAdapter::FWK_ADPT_TOPIC_DEVICE_FIRST,
+           aicpu::FWKAdapter::FWK_ADPT_TOPIC_HOST_ONLY,
+           aicpu::FWKAdapter::FWK_ADPT_TOPIC_HOST_FIRST,
+           type);
+    return ACL_ERROR_GE_PARAM_INVALID;
+  }
+
+  GELOGI("Node[%s] parse ext topic type info success infoLen=%u, topic_type=%d, rts_flag=%d.",
+         node_name_.c_str(), aicpu_ext_info->infoLen, type, topic_type_flag_);
+  return SUCCESS;
+}
+
 Status AicpuExtInfoHandler::UpdateExecuteMode(bool flag) {
   if (bit_map_ == nullptr) {
     GELOGD("There is no bit_map in ext_info, no need update.");
@@ -341,5 +389,14 @@ void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_typ
   data_type = static_cast<DataType>(shape_and_type->type);
   shape = GeShape(dims);
 }
+
+int32_t AicpuExtInfoHandler::TopicTypeToRtsFlag(int32_t topic_type) {
+  auto it = kTopicTypeToRtsFlagMap.find(topic_type);
+  if (it != kTopicTypeToRtsFlagMap.end()) {
+    return it->second;
+  }
+
+  return -1;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
index 01092204..46fb7c05 100644
--- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
+++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
@@ -62,6 +62,7 @@ class AicpuExtInfoHandler {
   Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type);
 
   bool IsNeedRefreshIOAddr();
+  int32_t GetTopicTypeFlag() const { return topic_type_flag_; }
 
  private:
 
@@ -71,6 +72,7 @@ class AicpuExtInfoHandler {
   Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info);
   Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info);
   Status ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info);
+  Status ParseExtTopicType(AicpuExtInfo *aicpu_ext_info);
 
   static Status UpdateShapeAndType(const GeShape &shape,
                                    DataType data_type,
@@ -81,6 +83,8 @@ class AicpuExtInfoHandler {
                               DataType &data_type);
 
  private:
+  int32_t TopicTypeToRtsFlag(int32_t topic_type);
+
   const std::string node_name_;
   const uint32_t input_num_;
   const uint32_t output_num_;
@@ -88,6 +92,7 @@ class AicpuExtInfoHandler {
   AicpuSessionInfo *session_info_ = nullptr;
   uint64_t *bit_map_ = nullptr;
   uint32_t *update_addr_ = nullptr;
+  int32_t topic_type_flag_ = -1;
 
   std::unique_ptr<uint8_t[]> ext_info_;
   size_t ext_info_len_ = 0;
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
index 48c3ab9e..c800e93d 100755
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -22,6 +22,7 @@
 #include "common/ge/ge_util.h"
 #include "graph/attr_value.h"
 #include "graph/debug/ge_attr_define.h"
+#include "graph/utils/graph_utils.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "hybrid/executor/hybrid_execution_context.h"
@@ -32,7 +33,7 @@ REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::COMPILED_SUBGR
 
 Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start");
-  GELOGD("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName());
+  GELOGD("[%s] KnownNodeTask::ExecuteAsync in, model id: %u.", context.GetNodeName(), davinci_model_->Id());
   if (davinci_model_->GetTaskList().empty()) {
     GELOGW("KnownNodeExecutor::ExecuteAsync davinci model has no taskinfo.");
 
@@ -62,7 +63,7 @@ Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> d
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] End");
 
   GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(done_callback));
-  GELOGD("[%s] KnownNodeTask::ExecuteAsync success.", context.GetNodeName());
+  GELOGD("[%s] KnownNodeTask::ExecuteAsync success, model id: %u.", context.GetNodeName(), davinci_model_->Id());
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] End");
   return SUCCESS;
 }
@@ -184,13 +185,15 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
   GELOGI("[%s] KnownNodeExecutor::LoadTask in.", node->GetName().c_str());
   GE_CHECK_NOTNULL(node);
 
-  const GeModelPtr ge_model = model.GetGeModel(node);
-  GE_CHECK_NOTNULL(ge_model);
-
-  AscendString graph_name;
-  GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name");
-  auto weight_buffer = model.GetModelWeight(graph_name.GetString());
-
+  GeModelPtr ge_model;
+  ComputeGraphPtr compute_graph;
+  GE_CHK_STATUS_RET(GetModelAndGraph(model, node, ge_model, compute_graph),
+                    "[%s] Failed to get model and graph",
+                    node->GetName().c_str());
+  auto node_item = const_cast<NodeItem *>(model.GetNodeItem(node));
+  GE_CHECK_NOTNULL(node_item);
+  GE_CHK_STATUS_RET_NOLOG(ParseAttrForAllocatingOutputs(*node_item, *compute_graph));
+  auto weight_buffer = model.GetModelWeight(compute_graph->GetName());
   std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
   GE_CHECK_NOTNULL(davinci_model);
 
@@ -223,5 +226,102 @@ Status KnownNodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context,
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] End");
   return SUCCESS;
 }
+
+Status KnownNodeExecutor::ParseAttrForAllocatingOutputs(NodeItem &node_item, ComputeGraph &graph) {
+  GELOGD("[%s] Start to parse attributes for outputs", node_item.NodeName().c_str());
+  auto net_output_node = graph.FindFirstNodeMatchType(NETOUTPUT);
+  if (net_output_node == nullptr) {
+    GELOGD("[%s] Subgraph do not got net output", graph.GetName().c_str());
+    return SUCCESS;
+  }
+
+  auto net_output_desc = net_output_node->GetOpDesc();
+  GE_CHECK_NOTNULL(net_output_desc);
+  std::map<std::string, int> connected_inputs;
+  std::map<NodePtr, int> data_indices;
+  GE_CHK_STATUS_RET(GetDataNodes(graph, data_indices),
+                    "[%s] Failed to get data node indices",
+                    node_item.NodeName().c_str());
+  for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) {
+    auto out_data_anchor = in_data_anchor->GetPeerOutAnchor();
+    if (out_data_anchor == nullptr) {
+      continue;
+    }
+    auto src_node = out_data_anchor->GetOwnerNode();
+    GE_CHECK_NOTNULL(src_node);
+    auto op_desc = src_node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    auto src_op_type = src_node->GetType();
+    auto output_index = in_data_anchor->GetIdx();
+    GELOGD("Node %s, output %d, src node = %s, src node type = %s",
+           node_item.NodeName().c_str(),
+           output_index,
+           src_node->GetName().c_str(),
+           src_op_type.c_str());
+    // parse reuse outputs
+    std::string input_key = std::to_string(op_desc->GetId()) + "_" + std::to_string(out_data_anchor->GetIdx());
+    auto it = connected_inputs.find(input_key);
+    if (it == connected_inputs.end()) {
+      connected_inputs.emplace(input_key, output_index);
+    } else {
+      GELOGD("[%s] output [%d] reuse output [%d] input node = %s, idx = %d.", node_item.NodeName().c_str(),
+             output_index,
+             it->second,
+             src_node->GetName().c_str(),
+             out_data_anchor->GetIdx());
+      node_item.reuse_outputs.emplace(output_index, it->second);
+    }
+
+    if (src_op_type == DATA) {
+      int data_index = data_indices[src_node];
+      node_item.reuse_inputs.emplace(output_index, data_index);
+      GELOGD("[%s] output[%u] reuses input[%d]", node_item.NodeName().c_str(), output_index, data_index);
+    } else if (src_op_type == CONSTANTOP || src_op_type == CONSTANT || src_op_type == VARIABLE) {
+      node_item.ref_outputs.emplace(output_index, src_node);
+      GELOGD("[%s] output[%d] ref to node [%s]",
+             node_item.NodeName().c_str(),
+             output_index,
+             src_node->GetName().c_str());
+    }
+  }
+
+  GELOGD("[%s] Done parsing attributes for outputs successfully", node_item.NodeName().c_str());
+  return SUCCESS;
+}
+
+Status KnownNodeExecutor::GetDataNodes(ComputeGraph &graph, std::map<NodePtr, int> &data_indices) {
+  std::map<int, NodePtr> ordered_data_nodes;
+  for (const auto &node : graph.GetDirectNode()) {
+    GE_CHECK_NOTNULL(node);
+    if (node->GetType() == DATA) {
+      int index = -1;
+      (void) AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_INDEX, index);
+      ordered_data_nodes.emplace(index, node);
+    }
+  }
+
+  // reindex
+  int data_index = 0;
+  for (const auto &it : ordered_data_nodes) {
+    data_indices.emplace(it.second, data_index++);
+  }
+
+  return SUCCESS;
+}
+
+Status KnownNodeExecutor::GetModelAndGraph(const HybridModel &model,
+                                           const NodePtr &node,
+                                           GeModelPtr &ge_model,
+                                           ComputeGraphPtr &graph) {
+  ge_model = model.GetGeModel(node);
+  GE_CHECK_NOTNULL(ge_model);
+  const auto &root_graph = model.GetRootGraph();
+  GE_CHECK_NOTNULL(root_graph);
+  AscendString graph_name;
+  GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get subgraph name");
+  graph = root_graph->GetSubgraph(graph_name.GetString());
+  GE_CHECK_NOTNULL(graph);
+  return SUCCESS;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
index 629cb543..11cda846 100644
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
@@ -51,6 +51,14 @@ class KnownNodeExecutor : public NodeExecutor {
   Status PrepareTask(NodeTask &task, TaskContext &context) const;
   Status ExecuteTask(NodeTask &task, TaskContext &context, const std::function<void()> &callback) const;
   ~KnownNodeExecutor() {}
+
+ private:
+  static Status ParseAttrForAllocatingOutputs(NodeItem &node_item, ComputeGraph &graph);
+  static Status GetDataNodes(ComputeGraph &graph, std::map<NodePtr, int> &data_indices);
+  static Status GetModelAndGraph(const HybridModel &model,
+                                 const NodePtr &node,
+                                 GeModelPtr &ge_model,
+                                 ComputeGraphPtr &graph);
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc
index df7da661..d55607ff 100644
--- a/ge/hybrid/node_executor/controlop/control_op_executor.cc
+++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc
@@ -22,18 +22,6 @@
 namespace ge {
 namespace hybrid {
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::CONTROL_OP, ControlOpNodeExecutor);
-namespace {
-template<typename T>
-Status CopyScalarValueToHost(const TensorValue &tensor, T &value) {
-  GE_CHECK_GE(tensor.GetSize(), sizeof(value));
-  GE_CHK_RT_RET(rtMemcpy(&value,
-                         sizeof(value),
-                         tensor.GetData(),
-                         sizeof(value),
-                         RT_MEMCPY_DEVICE_TO_HOST));
-  return SUCCESS;
-}
-}
 
 Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph,
                                           TaskContext &task_context,
@@ -60,12 +48,12 @@ Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph,
 
 Status ControlOpNodeTask::ToBool(const TensorValue &tensor, DataType data_type, bool &value) {
   switch (data_type) {
-#define CASE(DT, T)                                         \
-  case (DT): {                                              \
-    T val{};                                                \
-    GE_CHK_STATUS_RET(CopyScalarValueToHost(tensor, val));  \
-    value = val != 0;                                       \
-    break;                                                  \
+#define CASE(DT, T)                                       \
+  case (DT): {                                            \
+    T val{};                                              \
+    GE_CHK_STATUS_RET(tensor.CopyScalarValueToHost(val)); \
+    value = val != 0;                                     \
+    break;                                                \
   }
     // DT_STRING was handled in CondPass
     CASE(DT_FLOAT, float)
@@ -77,7 +65,7 @@ Status ControlOpNodeTask::ToBool(const TensorValue &tensor, DataType data_type,
     CASE(DT_INT64, int64_t)
 #undef CASE
     case DT_BOOL:
-      GE_CHK_STATUS_RET(CopyScalarValueToHost(tensor, value));
+      GE_CHK_STATUS_RET(tensor.CopyScalarValueToHost(value));
       break;
     default:
       GELOGE(UNSUPPORTED, "Data type %s is not support by cond.", TypeUtils::DataTypeToSerialString(data_type).c_str());
@@ -182,7 +170,7 @@ Status CaseOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::func
   auto branch_tensor = task_context.GetInput(kCaseBranchIndex);
   GE_CHECK_NOTNULL(branch_tensor);
   int32_t branch_index = 0;
-  GE_CHK_STATUS_RET(CopyScalarValueToHost(*branch_tensor, branch_index));
+  GE_CHK_STATUS_RET(branch_tensor->CopyScalarValueToHost(branch_index));
   const GraphItem *subgraph = SelectBranch(branch_index);
   GELOGI("[%s] Taking subgraph [%s] by branch = [%d]",
          task_context.GetNodeName(),
diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
index 6e8a1eb9..d35989a1 100755
--- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
+++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
@@ -18,8 +18,7 @@
 #include "hybrid/node_executor/host_cpu/kernel_factory.h"
 #include "graph/passes/folding_pass.h"
 #include "hybrid/model/hybrid_model.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "ge_local_engine/engine/host_cpu_engine.h"
 
 namespace ge {
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
index 370bb286..9d196a45 100644
--- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
+++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
@@ -35,7 +35,7 @@ Status AssignKernel::Compute(TaskContext& context) {
   GE_CHECK_NOTNULL(value_tensor);
   if (value_tensor->GetSize() > ref_tensor->GetSize()) {
     REPORT_INNER_ERROR("E19999", "[%s] value_input_size=%zu bigger than ref_input_size=%zu. check invalid",
-           node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize());
+                       node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize());
     GELOGE(INTERNAL_ERROR, "[Check][Size][%s] value_input_size=%zu, but ref_input_size=%zu.",
            node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize());
     return INTERNAL_ERROR;
diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc
index d5d868ab..5f3d6e45 100755
--- a/ge/hybrid/node_executor/node_executor.cc
+++ b/ge/hybrid/node_executor/node_executor.cc
@@ -97,7 +97,7 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node
     return ExecutorType::GE_LOCAL;
   }
 
-  if (IsControlOp(op_type)) {
+  if (IsControlFlowV2Op(op_type)) {
     return ExecutorType::CONTROL_OP;
   }
 
@@ -119,8 +119,7 @@ Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executo
   auto executor_type = ResolveExecutorType(node);
   const auto it = executors_.find(executor_type);
   if (it == executors_.end()) {
-    REPORT_INNER_ERROR("E19999", "Failed to get executor by type: %d.",
-           static_cast<int>(executor_type));
+    REPORT_INNER_ERROR("E19999", "Failed to get executor by type: %d.", static_cast<int>(executor_type));
     GELOGE(INTERNAL_ERROR, "[Check][ExecutorType]Failed to get executor by type: %d.",
            static_cast<int>(executor_type));
     return INTERNAL_ERROR;
diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h
index 17ccc012..fffd4e7d 100644
--- a/ge/hybrid/node_executor/node_executor.h
+++ b/ge/hybrid/node_executor/node_executor.h
@@ -27,6 +27,8 @@ const uint32_t MEMORY_ALIGN_RATIO = 2;
 const uint32_t MEMORY_ALIGN_SIZE = 32;
 namespace hybrid {
 class HybridModel;
+using NodeTaskPtr = std::shared_ptr<NodeTask>;
+
 // Base class of Node Task
 class NodeTask {
  public:
diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.cc b/ge/hybrid/node_executor/rts/rts_node_executor.cc
index aa833de0..3ad791b6 100644
--- a/ge/hybrid/node_executor/rts/rts_node_executor.cc
+++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc
@@ -14,10 +14,13 @@
  * limitations under the License.
  */
 
-#include "rts_node_executor.h"
+#include "hybrid/node_executor/rts/rts_node_executor.h"
+#include "hybrid/node_executor/rts/rts_task_factory.h"
+
 #include "common/debug/log.h"
 #include "common/ge/ge_util.h"
 #include "common/types.h"
+#include "graph/common/omg_util.h"
 #include "graph/utils/tensor_utils.h"
 #include "hybrid/model/hybrid_model.h"
 #include "runtime/rt.h"
@@ -26,6 +29,11 @@ namespace ge {
 namespace hybrid {
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::RTS, RtsNodeExecutor);
 
+REGISTER_RTS_TASK_CREATOR(IDENTITY, IdentityNodeTask);
+REGISTER_RTS_TASK_CREATOR(IDENTITYN, IdentityNNodeTask);
+REGISTER_RTS_TASK_CREATOR(READVARIABLEOP, ReadVariableOpNodeTask);
+REGISTER_RTS_TASK_CREATOR(PROFILINGTRAININGTRACE, ProfilingTraceNodeTask);
+
 Status IdentityNodeTask::DoCopyTensor(TaskContext &context, int index) {
   auto input_desc = context.MutableInputDesc(index);
   GE_CHECK_NOTNULL(input_desc);
@@ -77,10 +85,6 @@ Status IdentityNodeTask::ExecuteAsync(TaskContext &context, std::function<void()
   return SUCCESS;
 }
 
-Status IdentityNodeTask::UpdateArgs(TaskContext &context) {
-  return SUCCESS;
-}
-
 Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
   GELOGD("[%s] Start to execute.", context.GetNodeName());
   for (int i = 0; i < context.NumInputs(); ++i) {
@@ -95,7 +99,15 @@ Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function<void(
   return SUCCESS;
 }
 
-Status ProfilingTraceNodeTask::UpdateArgs(TaskContext &context) {
+Status ProfilingTraceNodeTask::Init(const HybridModel &model, const NodePtr &node) {
+  auto *task_defs = model.GetTaskDefs(node);
+  if (task_defs == nullptr || task_defs->empty()) {
+    GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute.");
+    return INTERNAL_ERROR;
+  }
+
+  task_defs_ = *task_defs;
+  GELOGD("[%s] Done initialization successfully.", node->GetName().c_str());
   return SUCCESS;
 }
 
@@ -116,32 +128,21 @@ Status ProfilingTraceNodeTask::ExecuteAsync(TaskContext &context, std::function<
   }
 
   return SUCCESS;
-};
+}
 
 Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
   GE_CHECK_NOTNULL(node);
-
-  auto op_type = node->GetType();
-  if (op_type == IDENTITY) {
-    task = MakeShared<IdentityNodeTask>();
-  } else if (op_type == IDENTITYN) {
-    task = MakeShared<IdentityNNodeTask>();
-  } else if (op_type == READVARIABLEOP) {
-    task = MakeShared<ReadVariableOpNodeTask>();
-  } else if (op_type == PROFILINGTRAININGTRACE) {
-    auto *task_defs = model.GetTaskDefs(node);
-    if (task_defs == nullptr || task_defs->empty()) {
-      GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute.");
-      return INTERNAL_ERROR;
-    }
-    task = MakeShared<ProfilingTraceNodeTask>(*task_defs);
-  } else {
-    GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str());
-    return INTERNAL_ERROR;
+  GELOGD("[%s] Load for local task.", node->GetName().c_str());
+  std::string node_type;
+  GE_CHK_STATUS_RET(GetOriginalType(node, node_type), "Get original type failed.");
+  RtsNodeTaskPtr rts_task = RtsTaskFactory::GetInstance().Create(node_type);
+  if (rts_task == nullptr) {
+    GELOGE(UNSUPPORTED, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), node_type.c_str());
+    return UNSUPPORTED;
   }
 
-  GE_CHECK_NOTNULL(task);
-  return SUCCESS;
+  task = rts_task;
+  return rts_task->Init(model, node);
 }
 }  // namespace hybrid
 }  // namespace ge
\ No newline at end of file
diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.h b/ge/hybrid/node_executor/rts/rts_node_executor.h
index aecf138b..94c2c06a 100644
--- a/ge/hybrid/node_executor/rts/rts_node_executor.h
+++ b/ge/hybrid/node_executor/rts/rts_node_executor.h
@@ -18,13 +18,12 @@
 #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_
 
 #include "hybrid/node_executor/node_executor.h"
-#include "proto/task.pb.h"
+#include "hybrid/node_executor/rts/rts_node_task.h"
 
 namespace ge {
 namespace hybrid {
-class IdentityNodeTask : public NodeTask {
+class IdentityNodeTask : public RtsNodeTask {
  public:
-  Status UpdateArgs(TaskContext &context) override;
   Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
 
  protected:
@@ -41,12 +40,10 @@ class ReadVariableOpNodeTask : public IdentityNodeTask {
   Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
 };
 
-class ProfilingTraceNodeTask :  public NodeTask {
+class ProfilingTraceNodeTask :  public RtsNodeTask {
  public:
-  explicit ProfilingTraceNodeTask(const std::vector<domi::TaskDef> &task_defs) : task_defs_(task_defs) {}
-  ~ProfilingTraceNodeTask() override = default;
+  Status Init(const HybridModel &model, const NodePtr &node) override;
 
-  Status UpdateArgs(TaskContext &context) override;
   Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
 
  private:
diff --git a/ge/hybrid/node_executor/rts/rts_node_task.cc b/ge/hybrid/node_executor/rts/rts_node_task.cc
new file mode 100644
index 00000000..f6d6ddb6
--- /dev/null
+++ b/ge/hybrid/node_executor/rts/rts_node_task.cc
@@ -0,0 +1,245 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hybrid/node_executor/rts/rts_node_task.h"
+#include "hybrid/node_executor/rts/rts_task_factory.h"
+
+#include "graph/debug/ge_attr_define.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/type_utils.h"
+#include "common/ge/ge_util.h"
+#include "common/op/ge_op_utils.h"
+
+namespace {
+constexpr uint8_t kSwitchPredIndex = 0;
+constexpr uint8_t kSwitchCompIndex = 1;
+
+const static std::map<rtCondition_t, std::function<bool(int64_t, int64_t)>> kCompHandle = {
+  {RT_EQUAL, [](int64_t pred_value, int64_t comp_value) { return pred_value == comp_value; }},
+  {RT_NOT_EQUAL, [](int64_t pred_value, int64_t comp_value) { return pred_value != comp_value; }},
+  {RT_GREATER, [](int64_t pred_value, int64_t comp_value) { return pred_value > comp_value; }},
+  {RT_GREATER_OR_EQUAL, [](int64_t pred_value, int64_t comp_value) { return pred_value >= comp_value; }},
+  {RT_LESS, [](int64_t pred_value, int64_t comp_value) { return pred_value < comp_value; }},
+  {RT_LESS_OR_EQUAL, [](int64_t pred_value, int64_t comp_value) { return pred_value <= comp_value; }},
+};
+}
+
+namespace ge {
+namespace hybrid {
+REGISTER_RTS_TASK_CREATOR(STREAMACTIVE, StreamActiveNodeTask);
+REGISTER_RTS_TASK_CREATOR(STREAMSWITCH, StreamSwitchNodeTask);
+REGISTER_RTS_TASK_CREATOR(STREAMMERGE, StreamMergeNodeTask);
+REGISTER_RTS_TASK_CREATOR(MEMCPYASYNC, MemcpyAsyncNodeTask);
+
+REGISTER_RTS_TASK_CREATOR(ENTER, PassThroughNodeTask);
+REGISTER_RTS_TASK_CREATOR(REFENTER, PassThroughNodeTask);
+REGISTER_RTS_TASK_CREATOR(LOOPCOND, PassThroughNodeTask);
+REGISTER_RTS_TASK_CREATOR(NEXTITERATION, PassThroughNodeTask);
+REGISTER_RTS_TASK_CREATOR(REFNEXTITERATION, PassThroughNodeTask);
+REGISTER_RTS_TASK_CREATOR(EXIT, PassThroughNodeTask);
+REGISTER_RTS_TASK_CREATOR(REFEXIT, PassThroughNodeTask);
+
+REGISTER_RTS_TASK_CREATOR(LABELSET, LabelSetNodeTask);
+REGISTER_RTS_TASK_CREATOR(LABELGOTO, LabelGotoNodeTask);
+REGISTER_RTS_TASK_CREATOR(LABELGOTOEX, LabelGotoNodeTask);
+REGISTER_RTS_TASK_CREATOR(LABELSWITCH, LabelSwitchNodeTask);
+REGISTER_RTS_TASK_CREATOR(LABELSWITCHBYINDEX, LabelSwitchNodeTask);
+
+Status RtsNodeTask::GetScalarIndexValue(TaskContext &task_context, uint32_t index, int64_t &value) {
+  auto tensor_value = task_context.GetInput(index);
+  GE_CHECK_NOTNULL(tensor_value);
+  auto tensor_desc = task_context.MutableInputDesc(index);
+  GE_CHECK_NOTNULL(tensor_desc);
+
+  auto data_type = tensor_desc->GetDataType();
+  switch (data_type) {
+#define CASE_TYPE(DT, VT)                                             \
+  case (DT): {                                                        \
+    VT data_val{};                                                    \
+    GE_CHK_STATUS_RET(tensor_value->CopyScalarValueToHost(data_val)); \
+    value = static_cast<int64_t>(data_val);                           \
+    break;                                                            \
+  }
+    // Just accept index data type.
+    CASE_TYPE(DT_INT32, int32_t)
+    CASE_TYPE(DT_INT64, int64_t)
+#undef CASE_TYPE
+    default: {
+      GELOGE(UNSUPPORTED, "Data type %s not index type.", TypeUtils::DataTypeToSerialString(data_type).c_str());
+      return UNSUPPORTED;
+    }
+  }
+
+  return SUCCESS;
+}
+
+Status StreamActiveNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+  const auto &node_state = task_context.GetNodeState();
+  node_state->SetSwitchIndex(0);
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGI("[%s] Done executing successfully.", task_context.GetNodeName());
+  return SUCCESS;
+}
+
+Status StreamSwitchNodeTask::Init(const HybridModel &model, const NodePtr &node) {
+  uint32_t value = 0;
+  if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, value)) {
+    GELOGE(INTERNAL_ERROR, "[%s] Get %s failed.", node->GetName().c_str(), ATTR_NAME_STREAM_SWITCH_COND.c_str());
+    return INTERNAL_ERROR;
+  }
+  rtCondition_t cond = static_cast<rtCondition_t>(value);
+  const auto it = kCompHandle.find(cond);
+  if (it == kCompHandle.end()) {
+    GELOGE(INTERNAL_ERROR, "[%s] Get Condition: %u handle failed.", node->GetName().c_str(), value);
+    return INTERNAL_ERROR;
+  }
+
+  comp_func_ = it->second;
+  GELOGD("[%s] Done initialization successfully, condition is %u.", node->GetName().c_str(), value);
+  return SUCCESS;
+}
+
+Status StreamSwitchNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+  GE_CHECK_NOTNULL(comp_func_);
+
+  int64_t pred_value = 0;
+  GE_CHK_STATUS_RET(GetScalarIndexValue(task_context, kSwitchPredIndex, pred_value));
+  int64_t comp_value = 0;
+  GE_CHK_STATUS_RET(GetScalarIndexValue(task_context, kSwitchCompIndex, comp_value));
+
+  bool switch_idx = comp_func_(pred_value, comp_value);
+  auto node_state = task_context.GetNodeState();
+  node_state->SetSwitchIndex(static_cast<int>(switch_idx));
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGI("[%s] Done executing successfully, pred value: %ld, comp value: %ld, switch index: %d.",
+         task_context.GetNodeName(), pred_value, comp_value, static_cast<int>(switch_idx));
+  return SUCCESS;
+}
+
+Status StreamMergeNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  int index = task_context.GetNodeState()->GetMergeIndex();
+  GELOGD("[%s] Start to execute, merge index: %d.", task_context.GetNodeName(), index);
+  if (index < 0 || index >= task_context.NumInputs()) {
+    GELOGE(INTERNAL_ERROR, "[%s] Invalid merge param, inputs num: %d, merge index: %d.",
+           task_context.GetNodeName(), task_context.NumInputs(), index);
+    return INTERNAL_ERROR;
+  }
+
+  const auto in_x = task_context.MutableInput(index); // x
+  GE_CHECK_NOTNULL(in_x);
+  GE_CHK_STATUS_RET_NOLOG(task_context.SetOutput(MERGE_DATA_OUTPUT, *in_x)); // y
+
+  const auto out_y = task_context.MutableOutput(MERGE_INDEX_OUTPUT);  // value_index
+  GE_CHECK_NOTNULL(out_y);
+  if (out_y->GetSize() > 0) {
+    GE_CHK_RT_RET(rtMemcpyAsync(out_y->MutableData(), out_y->GetSize(), &index, sizeof(index),
+                                RT_MEMCPY_HOST_TO_DEVICE_EX, task_context.GetStream()));
+  }
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  task_context.GetNodeState()->SetMergeIndex(-1); // Invalidate for loop.
+  GELOGD("[%s] Done executing successfully.", task_context.GetNodeName());
+  return SUCCESS;
+}
+
+Status MemcpyAsyncNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+  auto input_desc = task_context.MutableInputDesc(0);
+  GE_CHECK_NOTNULL(input_desc);
+  int64_t copy_size = 0;
+  GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*input_desc, copy_size));
+  // copy_size would not be negative since GetTensorSizeInBytes returned successfully.
+  if (copy_size > 0) {
+    const auto in_v = task_context.MutableInput(0);
+    const auto out_v = task_context.MutableOutput(0);
+    GE_CHECK_NOTNULL(in_v);
+    GE_CHECK_NOTNULL(out_v);
+    GELOGD("[%s] input size: %zu, output size: %zu, copy size: %ld", task_context.GetNodeName(),
+           in_v->GetSize(), out_v->GetSize(), copy_size);
+    GE_CHK_RT_RET(rtMemcpyAsync(out_v->MutableData(), out_v->GetSize(), in_v->GetData(), copy_size,
+                                RT_MEMCPY_DEVICE_TO_DEVICE, task_context.GetStream()));
+  } else {
+    GELOGW("[%s] invalid copy size: %ld", task_context.GetNodeName(), copy_size);
+  }
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGD("[%s] Done executing successfully.", task_context.GetNodeName());
+  return SUCCESS;
+}
+
+Status PassThroughNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+  const auto in_x = task_context.GetInput(0); // x
+  GE_CHECK_NOTNULL(in_x);
+  GE_CHK_STATUS_RET_NOLOG(task_context.SetOutput(0, *in_x)); // y
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGD("[%s] Done executing successfully.", task_context.GetNodeName());
+  return SUCCESS;
+}
+
+Status LabelSetNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGD("[%s] Done executing successfully.", task_context.GetNodeName());
+  return UNSUPPORTED;
+}
+
+Status LabelGotoNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGD("[%s] Done executing successfully.", task_context.GetNodeName());
+  return UNSUPPORTED;
+}
+
+Status LabelSwitchNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) {
+  GELOGD("[%s] Start to execute.", task_context.GetNodeName());
+
+  if (done_callback) {
+    GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback));
+  }
+
+  GELOGD("[%s] Done executing successfully.", task_context.GetNodeName());
+  return UNSUPPORTED;
+}
+}  // namespace hybrid
+}  // namespace ge
\ No newline at end of file
diff --git a/ge/hybrid/node_executor/rts/rts_node_task.h b/ge/hybrid/node_executor/rts/rts_node_task.h
new file mode 100644
index 00000000..d7d63eb5
--- /dev/null
+++ b/ge/hybrid/node_executor/rts/rts_node_task.h
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_TASK_H_
+#define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_TASK_H_
+
+#include "hybrid/node_executor/node_executor.h"
+#include "proto/task.pb.h"
+
+namespace ge {
+namespace hybrid {
+class RtsNodeTask : public NodeTask {
+ public:
+  Status Init(TaskContext &task_context) override {
+    return SUCCESS;
+  }
+
+  virtual Status Init(const HybridModel &model, const NodePtr &node) {
+    GELOGD("[%s] Done initialization successfully.", node->GetName().c_str());
+    return SUCCESS;
+  }
+
+  Status UpdateArgs(TaskContext &task_context) override {
+    GELOGD("[%s] Done update args successfully.", task_context.GetNodeName());
+    return SUCCESS;
+  }
+
+  static Status GetScalarIndexValue(TaskContext &task_context, uint32_t index, int64_t &value);
+};
+
+class StreamActiveNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+
+class StreamSwitchNodeTask : public RtsNodeTask {
+ public:
+  Status Init(const HybridModel &model, const NodePtr &node) override;
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+
+ private:
+  std::function<bool(int64_t, int64_t)> comp_func_{nullptr};
+};
+
+class StreamMergeNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+
+class MemcpyAsyncNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+
+class PassThroughNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+
+class LabelSetNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+
+class LabelGotoNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+
+class LabelSwitchNodeTask : public RtsNodeTask {
+ public:
+  Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override;
+};
+}  // namespace hybrid
+}  // namespace ge
+#endif  // GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_TASK_H_
diff --git a/ge/hybrid/node_executor/rts/rts_task_factory.cc b/ge/hybrid/node_executor/rts/rts_task_factory.cc
new file mode 100644
index 00000000..0072fdf6
--- /dev/null
+++ b/ge/hybrid/node_executor/rts/rts_task_factory.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hybrid/node_executor/rts/rts_task_factory.h"
+
+namespace ge {
+namespace hybrid {
+RtsNodeTaskPtr RtsTaskFactory::Create(const std::string &task_type) const {
+  auto it = creators_.find(task_type);
+  if (it == creators_.end()) {
+    GELOGW("Cannot find task type %s in inner map.", task_type.c_str());
+    return nullptr;
+  }
+
+  return it->second();
+}
+
+void RtsTaskFactory::RegisterCreator(const std::string &task_type, const RtsTaskCreatorFun &creator) {
+  if (creator == nullptr) {
+    GELOGW("Register %s creator is null", task_type.c_str());
+    return;
+  }
+
+  auto it = creators_.find(task_type);
+  if (it != creators_.end()) {
+    GELOGW("Task %s creator already exist", task_type.c_str());
+    return;
+  }
+
+  creators_[task_type] = creator;
+}
+}  // namespace hybrid
+}  // namespace ge
diff --git a/ge/hybrid/node_executor/rts/rts_task_factory.h b/ge/hybrid/node_executor/rts/rts_task_factory.h
new file mode 100644
index 00000000..a2d2bf56
--- /dev/null
+++ b/ge/hybrid/node_executor/rts/rts_task_factory.h
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_HYBRID_NODE_EXECUTOR_RTS_TASK_FACTORY_H_
+#define GE_HYBRID_NODE_EXECUTOR_RTS_TASK_FACTORY_H_
+
+#include "hybrid/node_executor/rts/rts_node_task.h"
+
+namespace ge {
+namespace hybrid {
+using RtsNodeTaskPtr = std::shared_ptr<RtsNodeTask>;
+using RtsTaskCreatorFun = std::function<RtsNodeTaskPtr()>;
+
+class RtsTaskFactory {
+ public:
+  static RtsTaskFactory &GetInstance() {
+    static RtsTaskFactory instance;
+    return instance;
+  }
+
+  RtsNodeTaskPtr Create(const std::string &task_type) const;
+
+  class RtsTaskRegistrar {
+   public:
+    RtsTaskRegistrar(const std::string &task_type, const RtsTaskCreatorFun &creator) {
+      RtsTaskFactory::GetInstance().RegisterCreator(task_type, creator);
+    }
+    ~RtsTaskRegistrar() = default;
+  };
+
+ private:
+  RtsTaskFactory() = default;
+  ~RtsTaskFactory() = default;
+
+  /**
+   * Register build of executor
+   * @param executor_type   type of executor
+   * @param builder         build function
+   */
+  void RegisterCreator(const std::string &task_type, const RtsTaskCreatorFun &creator);
+
+  std::map<std::string, RtsTaskCreatorFun> creators_;
+};
+}  // namespace hybrid
+}  // namespace ge
+
+#define REGISTER_RTS_TASK_CREATOR(task_type, task_clazz) \
+    REGISTER_RTS_TASK_CREATOR_UNIQ_HELPER(__COUNTER__, task_type, task_clazz)
+
+#define REGISTER_RTS_TASK_CREATOR_UNIQ_HELPER(ctr, type, clazz) \
+  RtsTaskFactory::RtsTaskRegistrar g_##type##_Creator##ctr(type, []()-> RtsNodeTaskPtr { return MakeShared<clazz>(); })
+
+#endif // GE_HYBRID_NODE_EXECUTOR_RTS_TASK_FACTORY_H_
diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index db8fe9fe..59250d8c 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -418,13 +418,14 @@ Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr
     return MEMALLOC_FAILED;
   }
 
-  GELOGD("Allocating workspace of size = %zu successfully", size);
+  GELOGD("[%s] Allocating workspace of size = %zu successfully", node_item_->NodeName().c_str(), size);
   workspaces_.emplace_back(*buffer);
   return SUCCESS;
 }
 
 Status TaskContext::PropagateOutputs() {
   // propagate outputs
+  const auto &guard = node_item_->MutexGuard("PropagateOutputs");
   for (int i = 0; i < NumOutputs(); ++i) {
     auto tensor = MutableOutput(i);
     GE_CHECK_NOTNULL(tensor);
@@ -461,7 +462,7 @@ Status TaskContext::PropagateOutputs() {
       }
     }
   }
-
+  (void)guard;
   return SUCCESS;
 }
 
@@ -561,8 +562,8 @@ const DumpProperties &TaskContext::GetDumpProperties() const {
 }
 
 bool TaskContext::NeedCallback() {
-  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0 ||
-         !execution_context_->model->IsSingleOp();
+  return node_item_->has_observer || IsDumpEnabled() || GraphExecutionContext::profiling_level > 0 ||
+         !execution_context_->model->IsSingleOp() || ProfilingManager::Instance().ProfilingModelLoadOn();
 }
 
 Status TaskContext::Synchronize() {
@@ -571,7 +572,7 @@ Status TaskContext::Synchronize() {
 
 Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream_id,
                                               const std::string &task_type, uint32_t block_dim) {
-  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
+  if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
     const NodeItem &node_item = GetNodeItem();
     auto op_desc = node_item.GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc
index 39a18fd1..2374e75f 100644
--- a/ge/init/gelib.cc
+++ b/ge/init/gelib.cc
@@ -39,7 +39,7 @@
 #include "graph/ge_context.h"
 #include "graph/ge_global_options.h"
 #include "graph/load/model_manager/model_manager.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "runtime/kernel.h"
@@ -60,8 +60,6 @@ static std::shared_ptr<GELib> instancePtr_ = nullptr;
 
 // Initial each module of GE, if one failed, release all
 Status GELib::Initialize(const map<string, string> &options) {
-
-
   GELOGI("initial start");
   GEEVENT("[GEPERFTRACE] GE Init Start");
   // Multiple initializations are not allowed
diff --git a/ge/ir_build/option_utils.cc b/ge/ir_build/option_utils.cc
index 1be996b2..c23da519 100755
--- a/ge/ir_build/option_utils.cc
+++ b/ge/ir_build/option_utils.cc
@@ -239,7 +239,8 @@ bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map
     bool is_char_valid = isdigit(c) || (c == ',') || (c == ' ') || (c == ';');
     if (!is_char_valid) {
       ErrorManager::GetInstance().ATCReportErrMessage(
-              "E10033", {"value", "reason"}, {dynamic_image_size, kDynamicImageSizeError});
+              "E10001", {"parameter", "value", "reason"}, 
+              {"dynamic_image_size", dynamic_image_size.c_str(), kDynamicImageSizeError});
       GELOGE(ge::PARAM_INVALID, "[Check][DynamicImageSizeInputShape] --dynamic_image_size:%s is invalid. reason: %s",
              dynamic_image_size.c_str(), kDynamicImageSizeError);
       return false;
@@ -846,18 +847,23 @@ Status UpdateDataOpShapeRange(const OpDescPtr &op,
       GELOGE(PARAM_INVALID, "[Check][OpDescPtr] Check shape by shape range failed for op:%s.", data_op_name.c_str());
       return PARAM_INVALID;
     }
-    for (size_t idx = 0; idx < cur_shape_range.size(); idx++) {
+    std::vector<int64_t> dims;
+    for (size_t idx = 0; idx < cur_shape_range.size(); ++idx) {
       auto left_range = cur_shape_range[idx].first;
       auto right_range = cur_shape_range[idx].second;
       if (left_range != right_range) {
-        origin_shape.SetDim(idx, UNKNOWN_DIM);
+        dims.push_back(UNKNOWN_DIM);
+      } else {
+        dims.push_back(left_range);
       }
     }
+    origin_shape = GeShape(dims);
     tensor_input->SetShape(origin_shape);
     tensor_input->SetShapeRange(cur_shape_range);
     tensor_output->SetShape(origin_shape);
     tensor_output->SetShapeRange(cur_shape_range);
-    GELOGI("Update input [%s] shape range info", data_op_name.c_str());
+    GELOGI("Update input [%s] shape range and shape [%s] info success.",
+           data_op_name.c_str(), origin_shape.ToString().c_str());
   } else {
     GELOGI("No need to update input [%s] attr because not found from input_shape_range.", data_op_name.c_str());
   }
@@ -899,18 +905,23 @@ Status UpdateDataOpShapeRange(const OpDescPtr &op,
     GELOGE(PARAM_INVALID, "[Check][OpDescPtr] Check shape by shape range failed for op:%s.", data_op_name.c_str());
     return PARAM_INVALID;
   }
+  std::vector<int64_t> dims;
   for (size_t idx = 0; idx < cur_shape_range.size(); ++idx) {
     auto left_range = cur_shape_range[idx].first;
     auto right_range = cur_shape_range[idx].second;
     if (left_range != right_range) {
-      origin_shape.SetDim(idx, UNKNOWN_DIM);
+      dims.push_back(UNKNOWN_DIM);
+    } else {
+      dims.push_back(left_range);
     }
   }
+  origin_shape = GeShape(dims);
   tensor_input->SetShape(origin_shape);
   tensor_input->SetShapeRange(cur_shape_range);
   tensor_output->SetShape(origin_shape);
   tensor_output->SetShapeRange(cur_shape_range);
-  GELOGI("Update input [%s] shape range info success.", data_op_name.c_str());
+  GELOGI("Update input [%s] shape range and shape [%s] info success.",
+         data_op_name.c_str(), origin_shape.ToString().c_str());
 
   return SUCCESS;
 }
diff --git a/ge/model/ge_root_model.h b/ge/model/ge_root_model.h
index b8ff7b7a..9e8e116e 100755
--- a/ge/model/ge_root_model.h
+++ b/ge/model/ge_root_model.h
@@ -40,6 +40,10 @@ class GeRootModel {
   }
   uint32_t GetModelId() const { return model_id_; }
 
+  void SetIsSpecificStream(bool is_specific_stream) { is_specific_stream_ = is_specific_stream; }
+
+  bool IsSpecificStream() const { return is_specific_stream_; }
+
   void SetModelName(const std::string &model_name) { model_name_ = model_name; }
 
   const std::string &GetModelName() const { return model_name_; }
@@ -64,6 +68,7 @@ class GeRootModel {
   std::vector<uint32_t> model_ids_;
   bool train_flag_ = false;
   std::string model_name_;
+  bool is_specific_stream_ = false;
 };
 }  // namespace ge
 using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;
diff --git a/ge/offline/main.cc b/ge/offline/main.cc
index 6603a3f5..8eb83010 100755
--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -216,10 +216,6 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path");
 
 DEFINE_string(display_model_info, "0", "Optional; display model info");
 
-DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance."
-                                     "normal: no need to compile, used saved .o files directly;"
-                                     "high: need to recompile, high execute performance mode.");
-
 DEFINE_string(device_id, "0", "Optional; user device id");
 
 class GFlagUtils {
@@ -336,8 +332,7 @@ class GFlagUtils {
         "Default value: $HOME/atc_data\n"
         "  --op_compiler_cache_mode   Set the operator compilation cache mode."
         "Options are disable(default), enable and force(force to refresh the cache)\n"
-        "  --display_model_info     enable for display model info; 0(default): close display, 1: open display.\n"
-        "  --performance_mode       Set high performance mode of compile or execute.");
+        "  --display_model_info     enable for display model info; 0(default): close display, 1: open display.");
 
     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
     // Using gflags to analyze input parameters
@@ -1085,7 +1080,6 @@ static void SetEnvForSingleOp(std::map<string, string> &options) {
   options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode);
   options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path);
   options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path);
-  options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode);
   options.emplace(ge::TUNE_DEVICE_IDS, FLAGS_device_id);
 }
 
@@ -1240,7 +1234,6 @@ domi::Status GenerateOmModel() {
 
   options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info));
 
-  options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode));
   // set enable scope fusion passes
   SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes);
   // print atc option map
@@ -1343,6 +1336,9 @@ domi::Status ConvertPbtxtToJson() {
 
 int init(int argc, char* argv[]) {
   GFlagUtils::InitGFlag(argc, argv);
+  const char *gflag_argv = gflags::GetArgv();
+  string cmdline = gflag_argv == nullptr ? "" : gflag_argv;
+  domi::GetContext().atc_cmdline = cmdline;
   // set log level
   int ret = -1;
   const std::set<string> log_level = {"null", "debug", "info", "warning", "error"};
diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc
index e8b3ae0e..39c87107 100755
--- a/ge/session/inner_session.cc
+++ b/ge/session/inner_session.cc
@@ -32,6 +32,7 @@
 #include "graph/common/local_context.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/utils/tensor_adapter.h"
 #include "runtime/mem.h"
 
@@ -155,6 +156,11 @@ Status InnerSession::Finalize() {
   // release var memory
   GELOGI("VarManager free var memory.");
   (void)VarManager::Instance(session_id_)->FreeVarMemory();
+
+  for (auto memory_type : MemManager::Instance().GetAllMemoryType()) {
+    (void)MemManager::Instance().SessionScopeMemInstance(memory_type).Free(session_id_);
+  }
+
   // release analyzer saved info(Session Level)
   Analyzer::GetInstance()->DestroySessionJsonObject(session_id_);
 
@@ -262,6 +268,51 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
   }
 }
 
+Status InnerSession::RunGraphWithStreamAsync(uint32_t graph_id, rtStream_t stream,
+                                             const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
+  GELOGI("Run graph with stream, session id = %lu, graph id = %u, stream = %p.",
+         session_id_, graph_id, stream);
+  if (mutex_.try_lock()) {
+    std::lock_guard<std::mutex> lock(mutex_, std::adopt_lock);
+    if (!init_flag_) {
+      GELOGE(GE_SESS_INIT_FAILED, "[Run][GraphWithStream]failed because GraphManager not Init,"
+             "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
+      REPORT_INNER_ERROR("E19999", "RunGraphWithStreamAsync failed because GraphManager not Init,"
+                         "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
+      return GE_SESS_INIT_FAILED;
+    }
+    UpdateThreadContext(graph_id);
+    vector<GeTensor> ge_inputs;
+    for (auto &item : inputs) {
+      ge_inputs.emplace_back(TensorAdapter::AsGeTensor(item));
+    }
+    vector<GeTensor> ge_outputs;
+    for (auto &item : outputs) {
+      ge_outputs.emplace_back(TensorAdapter::AsGeTensor(item));
+    }
+    Status ret = graph_manager_.RunGraphWithStreamAsync(graph_id, stream, session_id_, ge_inputs, ge_outputs);
+    domi::GetContext().out_nodes_map.clear();
+    domi::GetContext().user_out_nodes.clear();
+    if (ret != SUCCESS) {
+      GELOGE(ret, "[Run][GraphWithStreamAsync]failed,"
+             "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
+      REPORT_CALL_ERROR("E19999", "GraphManager RunGrapWithStreamhAsync failed,"
+                        "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
+      return ret;
+    }
+
+    GELOGI("Run graph with stream success, session id = %lu, graph id = %u, stream = %p.",
+           session_id_, graph_id, stream);
+    return SUCCESS;
+  } else {
+    GELOGE(GE_SESS_ALREADY_RUNNING, "[Run][GraphWithStreamAsync]failed because mutex try_lock false,"
+           "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
+    REPORT_INNER_ERROR("E19999", "[Run][GraphWithStreamAsync]failed failed because mutex try_lock false,"
+                       "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
+    return GE_SESS_ALREADY_RUNNING;
+  }
+}
+
 Status InnerSession::RemoveGraph(uint32_t graph_id) {
   std::lock_guard<std::mutex> lock(resource_mutex_);
   if (!init_flag_) {
@@ -363,7 +414,26 @@ Status InnerSession::BuildGraph(uint32_t graph_id, const std::vector<InputTensor
   return ret;
 }
 
-Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
+Status InnerSession::BuildGraph(uint32_t graph_id, const std::vector<ge::Tensor> &inputs) {
+  UpdateThreadContext(graph_id);
+  GELOGI("[InnerSession:%lu] build graph on session, graph_id=%u.", session_id_, graph_id);
+  std::vector<ge::GeTensor> ge_inputs;
+  for (const auto &input : inputs) {
+    ge_inputs.emplace_back(TensorAdapter::AsGeTensor(input));
+  }
+  GeRootModelPtr ge_root_model = nullptr;
+  Status ret = graph_manager_.BuildGraph(graph_id, ge_inputs, ge_root_model, session_id_, true);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[Build][Graph] failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
+    REPORT_CALL_ERROR("E19999",
+                      "GraphManager BuildGraph failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
+    return ret;
+  }
+  GELOGI("[InnerSession:%lu] build graph success, graph_id=%u.", session_id_, graph_id);
+  return ret;
+}
+
+Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector<ge::Tensor> &inputs,
                                    RunAsyncCallback callback) {
   UpdateThreadContext(graph_id);
   GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id);
@@ -377,7 +447,6 @@ Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector<InputTen
   GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id);
   return ret;
 }
-
 const GraphManager &InnerSession::getGraphManagerObj() const { return graph_manager_; }
 
 void InnerSession::UpdateThreadContext(const std::map<std::string, std::string> &options) {
diff --git a/ge/session/inner_session.h b/ge/session/inner_session.h
index 5cab43d8..a2ec35df 100644
--- a/ge/session/inner_session.h
+++ b/ge/session/inner_session.h
@@ -41,11 +41,16 @@ class InnerSession {
 
   Status RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs);
 
+  Status RunGraphWithStreamAsync(uint32_t graph_id, rtStream_t stream, const std::vector<Tensor> &inputs,
+                                 std::vector<Tensor> &outputs);
+
   Status RemoveGraph(uint32_t graph_id);
 
   Status BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs);
 
-  Status RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback);
+  Status BuildGraph(uint32_t graph_id, const std::vector<ge::Tensor> &inputs);
+
+  Status RunGraphAsync(uint32_t graph_id, const std::vector<ge::Tensor> &inputs, RunAsyncCallback callback);
 
   Status Finalize();
 
diff --git a/ge/session/omg.cc b/ge/session/omg.cc
index ca5043b1..46c39b2a 100755
--- a/ge/session/omg.cc
+++ b/ge/session/omg.cc
@@ -867,7 +867,12 @@ FMK_FUNC_HOST_VISIBILITY void PrintModelInfo(ge::proto::ModelDef *model_def, uin
   auto soc_version = (iter != model_attr_map->end()) ? iter->second.s() : "";
   iter = model_attr_map->find("framework_type");
   auto framework_type = (iter != model_attr_map->end()) ? iter->second.s() : "";
-  std::cout << "system   info: "
+  // original atc cmdline
+  iter = model_attr_map->find(ATTR_MODEL_ATC_CMDLINE);
+  auto cmdline = (iter != model_attr_map->end()) ? iter->second.s() : "";
+  std::cout << "Original Atc command line: "
+            << cmdline << std::endl
+            << "system   info: "
             <<  ATTR_MODEL_ATC_VERSION
             << "[" << atc_version << "], "
             << "soc_version"
diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc
index 1e4efa6b..fdf37d06 100755
--- a/ge/session/session_manager.cc
+++ b/ge/session/session_manager.cc
@@ -242,6 +242,33 @@ Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const s
   return innerSession->RunGraph(graph_id, inputs, outputs);
 }
 
+Status SessionManager::RunGraphWithStreamAsync(SessionId session_id,
+                                               uint32_t graph_id,
+                                               rtStream_t stream,
+                                               const std::vector<Tensor> &inputs,
+                                               std::vector<Tensor> &outputs) {
+  if (!init_flag_) {
+    GELOGE(GE_SESSION_MANAGER_NOT_INIT,
+           "[RunWithStream][Graph]Session manager is not initialized,"
+           "session id = %lu, graph id = %u, stream = %p.", session_id, graph_id, stream);
+    REPORT_INNER_ERROR("E19999",
+        "RunGraphWithStreamAsync fail for Session manager is not initialized,"
+        "session id = %lu, graph id = %u, stream = %p.", session_id, graph_id, stream);
+    return GE_SESSION_MANAGER_NOT_INIT;
+  }
+  SessionPtr innerSession = nullptr;
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    std::map<SessionId, SessionPtr>::iterator it = session_manager_map_.find(session_id);
+    if (it == session_manager_map_.end()) {
+      return GE_SESSION_NOT_EXIST;
+    } else {
+      innerSession = it->second;
+    }
+  }
+  return innerSession->RunGraphWithStreamAsync(graph_id, stream, inputs, outputs);
+}
+
 Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {
   if (!init_flag_) {
     GELOGE(GE_SESSION_MANAGER_NOT_INIT,
@@ -357,8 +384,29 @@ Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const
   return innerSession->BuildGraph(graph_id, inputs);
 }
 
+Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<ge::Tensor> &inputs) {
+  if (!init_flag_) {
+    GELOGE(GE_SESSION_MANAGER_NOT_INIT, "[Build][Graph]fail for Session manager is not initialized,"
+           "session_id:%lu, graph_id:%u.", session_id, graph_id);
+    REPORT_INNER_ERROR("E19999", "BuildGraph fail for Session manager is not initialized,"
+                       "session_id:%lu, graph_id:%u.", session_id, graph_id);
+    return GE_SESSION_MANAGER_NOT_INIT;
+  }
+  SessionPtr innerSession = nullptr;
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    std::map<SessionId, SessionPtr>::iterator it = session_manager_map_.find(session_id);
+    if (it == session_manager_map_.end()) {
+      return GE_SESSION_NOT_EXIST;
+    } else {
+      innerSession = it->second;
+    }
+  }
+  return innerSession->BuildGraph(graph_id, inputs);
+}
+
 Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id,
-                                     const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) {
+                                     const std::vector<ge::Tensor> &inputs, RunAsyncCallback callback) {
   if (!init_flag_) {
     GELOGE(GE_SESSION_MANAGER_NOT_INIT,
            "[AsyncRun][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
diff --git a/ge/session/session_manager.h b/ge/session/session_manager.h
index da23219c..17152b0a 100644
--- a/ge/session/session_manager.h
+++ b/ge/session/session_manager.h
@@ -25,6 +25,7 @@
 #include "common/ge_inner_error_codes.h"
 #include "ge/ge_api_types.h"
 #include "session/inner_session.h"
+#include "runtime/base.h"
 
 namespace ge {
 using SessionPtr = std::shared_ptr<InnerSession>;
@@ -98,6 +99,19 @@ class SessionManager {
 
   ///
   /// @ingroup ge_session
+  /// @brief run a graph of the session with specific stream asynchronously
+  /// @param [in] session_id session id
+  /// @param [in] graph_id graph id
+  /// @param [in] stream specific stream
+  /// @param [in] inputs input data
+  /// @param [out] outputs output data
+  /// @return Status result of function
+  ///
+  Status RunGraphWithStreamAsync(SessionId session_id, uint32_t graph_id, rtStream_t stream,
+                                 const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs);
+
+  ///
+  /// @ingroup ge_session
   /// @brief remove a graph from the session with specific session id
   /// @param [in] session_id session id
   /// @param [in] graph_id graph id
@@ -125,6 +139,8 @@ class SessionManager {
   ///
   Status BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs);
 
+  Status BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<ge::Tensor> &inputs);
+
   ///
   /// @ingroup ge_session
   /// @brief run a graph of the session with specific session id for train asynchronously
@@ -133,7 +149,7 @@ class SessionManager {
   /// @param [in] inputs input data
   /// @return Status result of function
   ///
-  Status RunGraphAsync(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
+  Status RunGraphAsync(SessionId session_id, uint32_t graph_id, const std::vector<ge::Tensor> &inputs,
                        RunAsyncCallback callback);
 
   ///
diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index 4b3f17cf..36ca1850 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -361,6 +361,37 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, u
   return SUCCESS;
 }
 
+Status DynamicSingleOp::SetHostTensorValue(const vector<GeTensorDesc> &input_desc,
+                                           const vector<DataBuffer> &input_buffers) {
+  for (auto &tensor_map : tensor_with_hostmem_) {
+    auto index = static_cast<size_t>(tensor_map.first);
+    if (index >= input_desc.size() || index >= input_buffers.size()) {
+      GELOGE(INTERNAL_ERROR, "[Check][Size]Index %zu should smaller then input desc size %zu "
+             "and input buffers size %zu.", index, input_desc.size(), input_buffers.size());
+      return INTERNAL_ERROR;
+    }
+    auto ge_tensor_desc = input_desc[index];
+    // reconstruct GeTensor by DataBuffer
+    GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc);
+    GE_CHECK_NOTNULL(ge_tensor);
+    GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
+           index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
+    if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
+                           static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
+      return INTERNAL_ERROR;
+    }
+    for (auto &tensor_desc : tensor_map.second) {
+      GE_CHECK_NOTNULL(tensor_desc);
+      if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) {
+        GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE.");
+        return FAILED;
+      }
+    }
+  }
+  return SUCCESS;
+}
+
 Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
                                      const vector<DataBuffer> &input_buffers,
                                      vector<GeTensorDesc> &output_desc,
@@ -374,6 +405,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
   if (!inputs_size.empty()) {
     StreamResource *stream_resource  = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
     GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers));
+    GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers));
   }
 
   if (hybrid_model_executor_ != nullptr) {
diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h
index 01d6dfc0..deb4532e 100755
--- a/ge/single_op/single_op.h
+++ b/ge/single_op/single_op.h
@@ -81,9 +81,12 @@ class DynamicSingleOp {
                         std::vector<DataBuffer> &outputs) const;
   Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                             const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
+  Status SetHostTensorValue(const vector<GeTensorDesc> &input_desc, const vector<DataBuffer> &input_buffers);
   std::unique_ptr<OpTask> op_task_;
   std::unique_ptr<hybrid::HybridModel> hybrid_model_;
   std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
+  std::map<int32_t, std::vector<GeTensorDescPtr>> tensor_with_hostmem_;
+
   uintptr_t resource_id_ = 0;
   std::mutex *stream_mutex_;
   rtStream_t stream_ = nullptr;
diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc
index 180b50c1..d09dd802 100644
--- a/ge/single_op/single_op_manager.cc
+++ b/ge/single_op/single_op_manager.cc
@@ -19,8 +19,7 @@
 #include <mutex>
 #include <string>
 
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/graph_caching_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() {
diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc
index a4135999..6959c6b3 100755
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -235,6 +235,13 @@ Status SingleOpModel::LoadAllNodes() {
 
     if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) {
       data_ops_.emplace_back(op_desc);
+      auto tensor = op_desc->MutableInputDesc(0);
+      if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
+        int32_t index = 0;
+        (void) AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index);
+        GELOGD("Node %s, index %d, has host mem.", node->GetName().c_str(), index);
+        op_with_hostmem_[index] = node;
+      }
       continue;
     }
 
@@ -616,6 +623,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   if (need_hybrid_model) {
     GELOGD("Build single op HybridModel.");
     GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
+    GE_CHK_STATUS(SetHostMemTensor(single_op), "[Init][HostMem]Failed.");
     auto root_model = model_helper_.GetGeRootModel();
     GE_CHECK_NOTNULL(root_model);
     root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
@@ -634,4 +642,28 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   }
   return BuildTaskListForDynamicOp(&resource, single_op);
 }
+
+Status SingleOpModel::SetHostMemTensor(DynamicSingleOp &single_op) {
+  for (auto &node_map : op_with_hostmem_) {
+    auto node = node_map.second;
+    auto out_anchor = node->GetOutDataAnchor(0);
+    GE_CHECK_NOTNULL(out_anchor);
+    auto in_anchors = out_anchor->GetPeerInDataAnchors();
+    vector<GeTensorDescPtr> tensor_descs;
+    auto idx = node_map.first;
+    for (auto anchor : in_anchors) {
+      GE_CHECK_NOTNULL(anchor);
+      auto output_node = anchor->GetOwnerNode();
+      GE_CHECK_NOTNULL(output_node);
+      auto op_desc = output_node->GetOpDesc();
+      GE_CHECK_NOTNULL(op_desc);
+      auto tensor_desc = op_desc->MutableInputDesc(anchor->GetIdx());
+      tensor_descs.emplace_back(tensor_desc);
+      GELOGD("Get %d th input tensor desc of %s by %d data node: %s.", anchor->GetIdx(),
+             output_node->GetName().c_str(), idx, node->GetName().c_str());
+    }
+    single_op.tensor_with_hostmem_[idx] = tensor_descs;
+  }
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h
index d900f09f..e7d07ee0 100755
--- a/ge/single_op/single_op_model.h
+++ b/ge/single_op/single_op_model.h
@@ -77,6 +77,7 @@ class SingleOpModel {
   static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
   void ParseArgTable(OpTask *task, SingleOp &op);
   Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op);
+  Status SetHostMemTensor(DynamicSingleOp &single_op);
 
   std::string model_name_;
   uint32_t model_id_ = 0;
@@ -86,6 +87,7 @@ class SingleOpModel {
   ModelHelper model_helper_;
 
   map<uint32_t, NodePtr> op_list_;
+  map<int32_t, NodePtr> op_with_hostmem_;
   SingleOpModelParam model_params_;
 
   std::vector<ptrdiff_t> input_offset_list_;
diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h
index c8b5a8ec..c2cbe794 100644
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -123,6 +123,18 @@ class GE_FUNC_VISIBILITY Session {
 
   ///
   /// @ingroup ge_graph
+  /// @brief run a graph of the session with specific session id and specific stream asynchronously
+  /// @param [in] graph_id graph id
+  /// @param [in] stream specific stream
+  /// @param [in] inputs input data
+  /// @param [out] outputs output data
+  /// @return Status result of function
+  ///
+  Status RunGraphWithStreamAsync(uint32_t graph_id, void *stream, const std::vector<Tensor> &inputs,
+                                 std::vector<Tensor> &outputs);
+
+  ///
+  /// @ingroup ge_graph
   /// @brief build graph in the session with specific session id
   /// @param [in] graphId: graph id
   /// @param [in] inputs: input data
@@ -130,6 +142,8 @@ class GE_FUNC_VISIBILITY Session {
   ///
   Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs);
 
+  Status BuildGraph(uint32_t graphId, const std::vector<ge::Tensor> &inputs);  /*lint !e148*/
+
   ///
   /// @ingroup ge_graph
   /// @brief run graph in the session with specific session id asynchronously
@@ -140,7 +154,7 @@ class GE_FUNC_VISIBILITY Session {
   ///                        Please ensure that the implementation of the function is trusted.
   /// @return Status result of function
   ///
-  Status RunGraphAsync(uint32_t graphId, const std::vector<ge::InputTensorInfo> &inputs, RunAsyncCallback callback);
+  Status RunGraphAsync(uint32_t graphId, const std::vector<ge::Tensor> &inputs, RunAsyncCallback callback);
 
   ///
   /// @ingroup ge_graph
diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index cef3fc42..388f0fe0 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -23,6 +23,7 @@
 #include <set>
 #include <functional>
 #include <memory>
+#include "graph/tensor.h"
 
 namespace ge {
 // Option key: graph run mode
@@ -356,7 +357,8 @@ struct OutputTensorInfo {
 };
 
 using Status = uint32_t;
-using RunAsyncCallback = std::function<void(Status, std::vector<ge::OutputTensorInfo> &)>;
+using RunAsyncCallback = std::function<void(Status, std::vector<ge::Tensor> &)>;
+
 // for ir build
 namespace ir_option {
 static const char *const INPUT_FORMAT = "input_format";
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index b37574f7..64231b8c 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -61,6 +61,9 @@ const std::string kTaskTypeAicore = "AI_CORE";
 const std::string kTaskTypeAicpu = "AI_CPU";
 const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";
 
+// dynamic execute mode
+const char *const kLazyRecompile = "lazy_recompile";
+
 // Data cache, including data address and length
 struct DataBuffer {
  public:
@@ -226,7 +229,7 @@ class GE_FUNC_VISIBILITY ModelListener {
   /// @param [in] resultCode Execution results
   ///
   virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code,
-                               std::vector<ge::OutputTensorInfo> &outputs) = 0;
+                               std::vector<ge::Tensor> &outputs) = 0;
 };
 
 // OMM configuration item
@@ -293,6 +296,7 @@ struct DumpConfig {
   std::string dump_mode;
   std::string dump_status;
   std::string dump_op_switch;
+  std::string dump_debug;
   std::vector<ModelDumpConfig> dump_list;
 };
 }  // namespace ge
diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h
index 89529520..bc965d13 100644
--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -54,6 +54,10 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_TRUE_OUTPUT;
 GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT;
 
+// Merge
+GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT;
+GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT;
+
 // FunctionOp
 GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t FOR_START_INPUT;
@@ -129,7 +133,7 @@ class GE_FUNC_VISIBILITY OpUtils {
   /// @param [out] output Data pointer after conversion. The format is HWCK
   ///
   static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output);
-  
+
   static vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
   static vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
   static vector<GeTensorPtr> MutableWeights(const ge::Node &node);
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index 9da630c9..fcca561c 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -153,6 +153,10 @@ class GE_FUNC_VISIBILITY GeExecutor {
                                 const kAippDynamicPara &aippParms);
 
   ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
+
+  ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
+                       std::string &attr_value);
+
   ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
 
   ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h
index f5837b3a..173cc64e 100644
--- a/inc/framework/memory/memory_assigner.h
+++ b/inc/framework/memory/memory_assigner.h
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner {
 
   MemoryAssigner &operator=(const MemoryAssigner &) = delete;
 
-  Status AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
+  Status AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
 
  private:
   ge::ComputeGraphPtr compute_graph_;
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index 84f6ef46..1024f7e6 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -124,6 +124,7 @@ struct OmgContext {
   std::vector<NodePtr> data_nodes;
   std::vector<NodePtr> getnext_nosink_nodes;
   bool fuzz_compile_flag = false;
+  std::string atc_cmdline;
 };
 }  // namespace ge
 
diff --git a/metadef b/metadef
index 22ab76ec..7cb171b9 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 22ab76ecd8461f679606374be4b3b6b6f7cad321
+Subproject commit 7cb171b9c511fec57ccc0ad746ef2126267fe18b
diff --git a/parser b/parser
index f12a4159..8d44bebf 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit f12a4159641de89e38460205be89da8ef74eba8a
+Subproject commit 8d44bebfeeb71b793bc7325acc95345090789e19
diff --git a/tests/depends/error_manager/src/error_manager_stub.cc b/tests/depends/error_manager/src/error_manager_stub.cc
index 7ed8dbcb..1b4c140d 100644
--- a/tests/depends/error_manager/src/error_manager_stub.cc
+++ b/tests/depends/error_manager/src/error_manager_stub.cc
@@ -48,6 +48,14 @@ int FormatErrorMessage(char *str_dst, size_t dst_max, const char *format, ...) {
     return 0;
   }
 
+  std::string ErrorManager::GetErrorMessage() {
+    return std::string();
+  }
+
+  std::string ErrorManager::GetWarningMessage() {
+    return std::string();
+  }
+
   int ErrorManager::ReportInterErrMessage(std::string error_code, const std::string &error_msg) {
     return 0;
   }
@@ -99,7 +107,7 @@ int FormatErrorMessage(char *str_dst, size_t dst_max, const char *format, ...) {
   const std::string &ErrorManager::GetLogHeader() { return error_context_.log_header; }
 
   struct error_message::Context &ErrorManager::GetErrorManagerContext() {
-    struct error_message::Context error_context;
+    static struct error_message::Context error_context;
     return error_context;
   }
 
diff --git a/tests/depends/runtime/CMakeLists.txt b/tests/depends/runtime/CMakeLists.txt
index 349af39a..544a2f55 100644
--- a/tests/depends/runtime/CMakeLists.txt
+++ b/tests/depends/runtime/CMakeLists.txt
@@ -15,7 +15,7 @@
 
 #cmake_minimum_required(VERSION 2.8)
 
-project(STUB_MMPA)
+project(runtime_stub)
 
 file(GLOB_RECURSE SRCS RELATIVE ${CMAKE_CURRENT_LIST_DIR}
         "src/runtime_stub.cc"
@@ -26,7 +26,13 @@ include_directories(${GE_CODE_DIR}/inc/framework)
 
 add_library(runtime_stub SHARED ${SRCS})
 
+target_compile_options(runtime_stub PRIVATE
+    -g
+)
+
 target_link_libraries(runtime_stub PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     c_sec
 )
+
+target_include_directories(runtime_stub INTERFACE ${CMAKE_CURRENT_LIST_DIR}/src)
diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc
index 00873b8f..59a98978 100644
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -17,6 +17,9 @@
 #include <cce/dnn.h>
 #include <securec.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 #define EVENT_LENTH 10
 
 rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; }
@@ -96,15 +99,16 @@ rtError_t rtSetDevice(int32_t device) { return RT_ERROR_NONE; }
 rtError_t rtStreamSynchronize(rtStream_t stream) { return RT_ERROR_NONE; }
 
 rtError_t rtMemcpy(void *dst, uint64_t dest_max, const void *src, uint64_t count, rtMemcpyKind_t kind) {
-#ifdef OTQT_UT
-  if (dest_max == 12 && count == 12) {  // UTEST_kernelinfo_manager.all_success special treatment
+  if (dst != nullptr && src != nullptr) {
     memcpy_s(dst, dest_max, src, count);
   }
-#endif
   return RT_ERROR_NONE;
 }
 rtError_t rtMemcpyAsync(void *dst, uint64_t dest_max, const void *src, uint64_t count, rtMemcpyKind_t kind,
                         rtStream_t stream) {
+  if (dst != nullptr && src != nullptr) {
+    memcpy_s(dst, dest_max, src, count);
+  }
   return RT_ERROR_NONE;
 }
 
@@ -125,9 +129,6 @@ rtError_t rtEventElapsedTime(float *time, rtEvent_t start, rtEvent_t end) {
   *time = 10.0f;
   return RT_ERROR_NONE;
 }
-rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char *stub_name, const void *dev_func) {
-  return RT_ERROR_NONE;
-}
 
 rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char *stub_name, const void *dev_func,
                              uint32_t func_mode) {
@@ -156,7 +157,7 @@ rtError_t rtConfigureCall(uint32_t num_blocks, rtSmDesc_t *sm_desc, rtStream_t s
 
 rtError_t rtSetProfDir(char *prof_dir) { return RT_ERROR_NONE; }
 
-rtError_t rtSetProfDirEx(char *prof_dir, char *address, char *job_ctx) { return RT_ERROR_NONE; }
+rtError_t rtSetProfDirEx(const char *profDir, const char *address, const char *jobCtx) { return RT_ERROR_NONE; }
 
 rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aicore_memory_size) { return RT_ERROR_NONE; }
 
@@ -218,9 +219,8 @@ rtError_t rtGetFunctionByName(const char *stub_name, void **stub_func) {
   *(char **)stub_func = "func";
   return RT_ERROR_NONE;
 }
-rtError_t rtGetAddrByFun(const void *stubFunc, void **addr)
-{
-  *(char**)addr =  "dev_func";
+rtError_t rtGetAddrByFun(const void *stubFunc, void **addr) {
+  *(char **)addr = "dev_func";
   return RT_ERROR_NONE;
 }
 rtError_t rtQueryFunctionRegistered(const char *stub_name) { return RT_ERROR_NONE; }
@@ -244,7 +244,9 @@ rtError_t rtEndGraphEx(rtModel_t model, rtStream_t stream, uint32_t flags)
 {
   return RT_ERROR_NONE;
 }
-rtError_t rtProfilerStop(void) { return RT_ERROR_NONE; }
+rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList) {
+  return RT_ERROR_NONE;
+}
 
 rtError_t rtSetDvfsProfile(DvfsProfileMode mode) { return RT_ERROR_NONE; }
 
@@ -256,7 +258,9 @@ rtError_t rtCtxDestroy(rtContext_t ctx) { return RT_ERROR_NONE; }
 
 rtError_t rtProfilerInit(const char *prof_dir, const char *address, const char *job_ctx) { return RT_ERROR_NONE; }
 
-rtError_t rtProfilerStart(void) { return RT_ERROR_NONE; }
+rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList) {
+  return RT_ERROR_NONE;
+}
 
 rtError_t rtLabelCreate(rtLabel_t *label) {
   *label = new uint64_t;
@@ -305,7 +309,9 @@ rtError_t rtLabelGotoEx(rtLabel_t label, rtStream_t stream) {
 }
 
 
-rtError_t rtInvalidCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; }
+rtError_t rtInvalidCache(void *base, size_t len) {
+  return RT_ERROR_NONE;
+}
 
 rtError_t rtModelLoadComplete(rtModel_t model) { return RT_ERROR_NONE; }
 
@@ -314,7 +320,9 @@ rtError_t rtStreamCreateWithFlags(rtStream_t *stream, int32_t priority, uint32_t
   return RT_ERROR_NONE;
 }
 
-rtError_t rtFlushCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; }
+rtError_t rtFlushCache(void *base, size_t len) {
+  return RT_ERROR_NONE;
+}
 
 rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; }
 
@@ -445,4 +453,7 @@ rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void
 
 rtError_t rtDebugUnRegisterForStream(rtStream_t stream) {
   return RT_ERROR_NONE;
-}
\ No newline at end of file
+}
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/tests/depends/slog/src/slog_stub.cc b/tests/depends/slog/src/slog_stub.cc
index edc245b4..d0eb49c5 100644
--- a/tests/depends/slog/src/slog_stub.cc
+++ b/tests/depends/slog/src/slog_stub.cc
@@ -15,6 +15,7 @@
  */
 
 #include "toolchain/slog.h"
+#include "toolchain/plog.h"
 
 #include <stdarg.h>
 #include <stdio.h>
@@ -46,3 +47,22 @@ int CheckLogLevel(int moduleId, int logLevel)
 {
   return 1;
 }
+
+/**
+ * @ingroup plog
+ * @brief DlogReportInitialize: init log in service process before all device setting.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+int DlogReportInitialize() {
+  return 0;
+}
+
+/**
+ * @ingroup plog
+ * @brief DlogReportFinalize: release log resource in service process after all device reset.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+int DlogReportFinalize() {
+  return 0;
+}
+
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 12b329d7..2b68d8fe 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -97,6 +97,7 @@ set(GRAPH_SRC_FILES
     "${GE_CODE_DIR}/metadef/graph/ge_tensor.cc"
     "${GE_CODE_DIR}/metadef/graph/ref_relation.cc"
     "${GE_CODE_DIR}/metadef/graph/tensor.cc"
+    "${GE_CODE_DIR}/metadef/graph/types.cc"
     "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc"
     "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc"
     "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc"
@@ -166,7 +167,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc"
     "${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
-	"${GE_CODE_DIR}/ge/common/dump/exception_dumper.cc"
+    "${GE_CODE_DIR}/ge/common/dump/exception_dumper.cc"
     "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
     "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
@@ -215,12 +216,10 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/get_original_format_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/shape_operate_op_remove_pass.cc"
-    "${GE_CODE_DIR}/ge/graph/passes/unused_op_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/assert_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/dropout_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/infershape_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/unused_const_pass.cc"
-    "${GE_CODE_DIR}/ge/graph/passes/isolated_op_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/permute_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/ctrl_edge_transfer_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/end_of_sequence_add_control_pass.cc"
@@ -239,6 +238,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/merge_to_stream_merge_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/merge_input_memcpy_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/switch_to_stream_switch_pass.cc"
+    "${GE_CODE_DIR}/ge/graph/passes/mark_force_unknown_for_cond_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/attach_stream_label_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/multi_batch_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/multi_batch_clone_pass.cc"
@@ -261,7 +261,6 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/switch_logic_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/switch_data_edges_bypass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/merge_pass.cc"
-    "${GE_CODE_DIR}/ge/graph/passes/variable_format_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/variable_op_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/cast_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/transpose_transdata_pass.cc"
@@ -335,8 +334,10 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
     "${GE_CODE_DIR}/ge/graph/common/local_context.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
     "${GE_CODE_DIR}/ge/common/model_saver.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
@@ -394,8 +395,10 @@ set(GRAPH_LOAD_COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc"
     "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc"
     "${GE_CODE_DIR}/ge/common/thread_pool.cc"
 )
 
@@ -493,8 +496,6 @@ set(GRAPH_PASS_COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/placeholder_with_default_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/snapshot_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/shape_operate_op_remove_pass.cc"
-    "${GE_CODE_DIR}/ge/graph/passes/unused_op_remove_pass.cc"
-    "${GE_CODE_DIR}/ge/graph/passes/isolated_op_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/permute_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/var_is_initialized_op_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/cast_translate_pass.cc"
@@ -512,8 +513,8 @@ set(GRAPH_PASS_COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/reshape_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/resource_pair_add_control_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/resource_pair_remove_control_pass.cc"
-	"${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
-	"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
+    "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
+    "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/transop_breadth_fusion_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/transop_without_reshape_fusion_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/transop_depth_fusion_pass.cc"
@@ -621,6 +622,8 @@ set(SINGLE_OP_SRC_FILES
     "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/rts/rts_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/rts/rts_node_task.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/rts/rts_task_factory.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/node_executor.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/task_context.cc"
     "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model.cc"
@@ -666,8 +669,7 @@ set(PASS_TEST_FILES
     "graph/passes/permute_pass_unittest.cc"
     "graph/passes/print_op_pass_unittest.cc"
     "graph/passes/shape_operate_op_remove_pass_unittest.cc"
-    "graph/passes/unused_and_isolated_op_remove_pass_unittest.cc"
-    "graph/passes/variable_op_pass_unittest.cc"
+        "graph/passes/variable_op_pass_unittest.cc"
     "graph/passes/base_pass_unittest.cc"
     "graph/passes/addn_pass_unittest.cc"
     "graph/passes/save_pass_unittest.cc"
@@ -689,6 +691,7 @@ set(PASS_TEST_FILES
     "graph/passes/stop_gradient_pass_unittest.cc"
     "graph/passes/prevent_gradient_pass_unittest.cc"
     "graph/passes/identity_pass_unittest.cc"
+    "graph/passes/global_step_insert_pass_unittest.cc"
     "graph/passes/placeholder_with_default_pass_unittest.cc"
     "graph/passes/snapshot_pass_unittest.cc"
     "graph/passes/guarantee_const_pass_unittest.cc"
@@ -701,14 +704,15 @@ set(PASS_TEST_FILES
     "graph/passes/net_output_pass_unittest.cc"
     "graph/passes/no_use_reshape_remove_pass_unittest.cc"
     "graph/passes/infershape_pass_unittest.cc"
+    "graph/passes/mark_force_unknown_for_cond_pass_unittest.cc"
     "graph/passes/multi_batch_clone_pass_unittest.cc"
     "graph/passes/replace_with_empty_const_pass_unittest.cc"
     "graph/passes/link_gen_mask_nodes_pass_unittest.cc"
     "graph/passes/transpose_transdata_pass_unittest.cc"
     "graph/passes/parallel_group_pass_unittest.cc"
     "graph/passes/buffer_pool_memory_pass_unittest.cc"
-	"graph/passes/mark_node_unknown_shape_pass_unittest.cc"
-	"graph/passes/reshape_recovery_pass_unittest.cc"
+    "graph/passes/mark_node_unknown_shape_pass_unittest.cc"
+    "graph/passes/reshape_recovery_pass_unittest.cc"
     "graph/passes/cast_remove_pass_unittest.cc"
 )
 
@@ -751,12 +755,12 @@ set(KERNEL_TEST_FILES
 
 set(MULTI_PARTS_TEST_FILES
     "graph_ir/ge_operator_factory_unittest.cc"
-	"graph_ir/ge_ir_build_unittest.cc"
+    "graph_ir/ge_ir_build_unittest.cc"
     "graph/transop_util_unittest.cc"
     "common/datatype_transfer_unittest.cc"
     "common/dump_manager_unittest.cc"
     "common/dump_op_unittest.cc"
-	"common/dump_exception_unittest.cc"
+    "common/dump_exception_unittest.cc"
     "common/opdebug_register_unittest.cc"
     "common/format_transfer_unittest.cc"
     "common/format_transfer_transpose_unittest.cc"
@@ -775,7 +779,7 @@ set(MULTI_PARTS_TEST_FILES
     "common/format_transfer_fracz_nhwc_unittest.cc"
     "common/format_transfer_fracz_hwcn_unittest.cc"
     "common/ge_format_util_unittest.cc"
-	"common/ge_auth_file_saver_unittest.cc"
+    "common/ge_auth_file_saver_unittest.cc"
     "graph/variable_accelerate_ctrl_unittest.cc"
     "graph/build/logical_stream_allocator_unittest.cc"
     "graph/build/model_builder_unittest.cc"
@@ -786,9 +790,14 @@ set(MULTI_PARTS_TEST_FILES
     "graph/preprocess/graph_preprocess_unittest.cc"
     "graph/manager/hcom_util_unittest.cc"
     "graph/manager/graph_caching_allocator_unittest.cc"
+    "graph/manager/session_scope_mem_allocator_unittest.cc"
+    "graph/manager/run_graph_unittest.cc"
     "graph/partition/dynamic_shape_partition_unittest.cc"
     "graph/manager/graph_manager_unittest.cc"
     "session/omg_omg_unittest.cc"
+    "session/ge_api_unittest.cc"
+    "session/inner_session_unittest.cc"
+    "session/session_manager_unittest.cc"
 )
 
 set(GENERATOR_TEST_FILES
@@ -804,7 +813,7 @@ set(SINGLE_OP_TEST_FILES
     "single_op/single_op_manager_unittest.cc"
     "single_op/stream_resource_unittest.cc"
     "single_op/single_op_task_unittest.cc"
-	"single_op/single_op_unittest.cc"
+    "single_op/single_op_unittest.cc"
 )
 
 set(PROFILING_MNG_TEST_FILES
@@ -814,7 +823,12 @@ set(PROFILING_MNG_TEST_FILES
 set(HYBRID_TEST_FILES
     "hybrid/ge_hybrid_unittest.cc"
     "hybrid/known_node_executor_unittest.cc"
-	"hybrid/executor/worker/execution_engine_unittest.cc"
+    "hybrid/executor/worker/execution_engine_unittest.cc"
+    "hybrid/executor/subgraph_executor_unittest.cc"
+    "hybrid/executor/worker/execution_engine_unittest.cc"
+    "hybrid/model/hybrid_model_builder_unittest.cc"
+    "hybrid/node_executor/rts/rts_node_task_unittest.cc"
+    "hybrid/executor/hybrid_model_async_executor_unittest.cc"
 )
 
 set(OTHERS_TEST_FILES
@@ -830,6 +844,8 @@ list(APPEND COMMON_SHARED_LIBRARIES
     mmpa_stub
     hccl_stub
     error_manager_stub
+    ascend_protobuf
+    json
 )
 
 # build graph
@@ -875,7 +891,7 @@ target_link_libraries(ge_ut_common PRIVATE
 )
 
 # build common format
-add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_FILES} ${PROTO_HDRS})
+add_library(ge_ut_common_format STATIC ${COMMON_FORMAT_SRC_FILES} ${PROTO_HDRS})
 
 target_compile_definitions(ge_ut_common_format PRIVATE
     google=ascend_private
@@ -1052,7 +1068,6 @@ target_link_libraries(ge_single_op PRIVATE
 # libge_mutiparts_utest
 add_executable(ut_libge_multiparts_utest
     ${COMMON_TEST_FILES}
-    ${COMMON_FORMAT_SRC_FILES}
     ${MULTI_PARTS_TEST_FILES}
 )
 
@@ -1067,14 +1082,14 @@ target_compile_definitions(ut_libge_multiparts_utest PRIVATE
 
 target_link_libraries(ut_libge_multiparts_utest
     $<BUILD_INTERFACE:intf_pub>
-    ge_build_common ge_load_common ge_execute_common ge_optimize_common ge_partition_common ge_prepare_common ge_single_op ge_ut_common
-    gtest gtest_main gmock gmock_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
+    ge_build_common ge_load_common ge_execute_common ge_optimize_common ge_partition_common ge_prepare_common
+    ge_single_op ge_ut_common_format ge_ut_common
+    gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lgcov
 )
 
 # libge_others_utest
 add_executable(ut_libge_others_utest
     ${COMMON_TEST_FILES}
-    ${COMMON_FORMAT_SRC_FILES}
     ${PASS_TEST_FILES}
     ${EXECUTE_TEST_FILES}
     ${OTHERS_TEST_FILES}
@@ -1087,16 +1102,15 @@ target_compile_options(ut_libge_others_utest PRIVATE
 
 target_link_libraries(ut_libge_others_utest
     $<BUILD_INTERFACE:intf_pub>
-    ge_load_common ge_execute_common ge_ut_common
-    gtest gtest_main gmock gmock_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
+    ge_load_common ge_execute_common ge_ut_common ge_ut_common_format
+    gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lgcov
 )
 
 # libge_kernel_utest
 add_executable(ut_libge_kernel_utest
-        ${COMMON_TEST_FILES}
-        ${COMMON_FORMAT_SRC_FILES}
-        ${KERNEL_TEST_FILES}
-        ${KERNEL_SRC_FILES}
+    ${COMMON_TEST_FILES}
+    ${KERNEL_TEST_FILES}
+    ${KERNEL_SRC_FILES}
 )
 
 target_compile_options(ut_libge_kernel_utest PRIVATE
@@ -1106,8 +1120,8 @@ target_compile_options(ut_libge_kernel_utest PRIVATE
 
 target_link_libraries(ut_libge_kernel_utest
     $<BUILD_INTERFACE:intf_pub>
-    ge_load_common ge_ut_common
-    gtest gtest_main gmock gmock_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
+    ge_load_common ge_ut_common ge_ut_common_format
+    gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lgcov
 )
 
 # libge_distinct_load_utest
@@ -1133,10 +1147,11 @@ target_compile_definitions(ut_libge_distinct_load_utest PRIVATE
 )
 
 target_link_libraries(ut_libge_distinct_load_utest
-    ${COMMON_SHARED_LIBRARIES}
     $<BUILD_INTERFACE:intf_pub>
-    ge_execute_common ge_ut_common_format ge_load_common
-    ge_single_op   ge_prepare_common
-    ge_optimize_common  ge_build_common ge_partition_common ge_ut_common
-    gtest gtest_main gmock gmock_main ascend_protobuf json c_sec -lrt -ldl -lpthread -lgcov
+    -Wl,--whole-archive
+    ge_single_op
+    -Wl,--no-whole-archive
+    ge_execute_common ge_load_common
+    ge_prepare_common ge_optimize_common ge_build_common ge_partition_common ge_ut_common ge_ut_common_format
+    gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lpthread -lgcov
 )
diff --git a/tests/ut/ge/common/dump_manager_unittest.cc b/tests/ut/ge/common/dump_manager_unittest.cc
index 7f3880f2..50eabc4a 100644
--- a/tests/ut/ge/common/dump_manager_unittest.cc
+++ b/tests/ut/ge/common/dump_manager_unittest.cc
@@ -67,6 +67,35 @@ TEST_F(UTEST_dump_manager, is_dump_single_op_close_success) {
    EXPECT_EQ(ret, ge::SUCCESS);
  }
 
+ // dump_debug and debug_status are on
+  TEST_F(UTEST_dump_manager, dump_op_debug_on) {
+    DumpConfig dump_config;
+    dump_config.dump_debug = "on";
+    dump_config.dump_status = "on";
+    auto ret = DumpManager::GetInstance().SetDumpConf(dump_config);
+    EXPECT_EQ(ret, ge::SUCCESS);
+  }
+
+  // just dump_status is on
+  TEST_F(UTEST_dump_manager, dump_status_without_dump_list) {
+    DumpConfig dump_config;
+    dump_config.dump_status = "on";
+    auto ret = DumpManager::GetInstance().SetDumpConf(dump_config);
+    EXPECT_EQ(ret, ge::PARAM_INVALID);
+  }
+
+  // dump_status is on with dump_list
+  TEST_F(UTEST_dump_manager, dump_status_with_dump_list) {
+    DumpConfig dump_config;
+    dump_config.dump_status = "on";
+    ModelDumpConfig dump_list;
+    dump_list.model_name = "test";
+    dump_list.layers.push_back("first");
+    dump_config.dump_list.push_back(dump_list);
+    auto ret = DumpManager::GetInstance().SetDumpConf(dump_config);
+    EXPECT_EQ(ret, ge::PARAM_INVALID);
+  }
+
  TEST_F(UTEST_dump_manager, add_dump_properties_success) {
    DumpProperties dump_properties;
    DumpManager::GetInstance().AddDumpProperties(0, dump_properties);
diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
index c883e87f..c9b0b579 100644
--- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc
+++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
@@ -44,12 +44,13 @@ using domi::GetContext;
 
 class UtestMemoryAssignerTest : public testing::Test {
  public:
-  ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
+  ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some",
+                                   int64_t size = 1024) {
     ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
     auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
     auto desc_temp = *desc_temp_ptr;
 
-    TensorUtils::SetSize(desc_temp, 1024);
+    TensorUtils::SetSize(desc_temp, size);
     op_def->AddInputDesc(desc_temp);
     op_def->AddOutputDesc(desc_temp);
 
@@ -215,28 +216,158 @@ class UtestMemoryAssignerTest : public testing::Test {
     return builder.GetGraph();
   }
 
+  void MakeFftsReuseGraph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId,
+                            int32_t thread_scope_id_2 = kInvalidThreadScopeId) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 0);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    if (thread_scope_id_1 != kInvalidThreadScopeId) {
+      (void)ge::AttrUtils::SetInt(op_def_a, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_1);
+      (void)ge::AttrUtils::SetInt(op_def_b, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_1);
+      (void)ge::AttrUtils::SetInt(op_def_c, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_1);
+    }
+
+    if (thread_scope_id_2 != kInvalidThreadScopeId) {
+      (void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_2);
+      (void)ge::AttrUtils::SetInt(op_def_e, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_2);
+      (void)ge::AttrUtils::SetInt(op_def_f, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_2);
+    }
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
+  void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    std::vector<int64_t> workspace_bytes;
+    workspace_bytes.push_back(1024);
+    workspace_bytes.push_back(512);
+    op_def_c->SetWorkspaceBytes(workspace_bytes);
+    vector<int32_t> workspace_no_reuse_scope = { 0 , 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
+    vector<int32_t> workspace_no_reuse_scope_e = { 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e);
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
+ void MakeContinuousReuseGraph(ge::ComputeGraphPtr graph, bool nopading = false) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    if (nopading) {
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, true);
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_OUTPUT_REUSE_INPUT, true);
+      (void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, 0);
+    } else {
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_INPUT, true);
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_OUTPUT, true);
+    }
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
+  void MakeMultiBatchReuseGraph(ge::ComputeGraphPtr graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+   (void)ge::AttrUtils::SetStr(op_def_b, ATTR_NAME_BATCH_LABEL, "Batch_0");
+   (void)ge::AttrUtils::SetStr(op_def_c, ATTR_NAME_BATCH_LABEL, "Batch_0");
+   (void)ge::AttrUtils::SetStr(op_def_e, ATTR_NAME_BATCH_LABEL, "Batch_1");
+   (void)ge::AttrUtils::SetStr(op_def_f, ATTR_NAME_BATCH_LABEL, "Batch_1");
+   vector<int32_t> workspace_no_reuse_scope = { 1 };
+   (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+   (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
  protected:
   void SetUp() {}
 
   void TearDown() { GetContext().out_nodes_map.clear(); }
 };
 
-/*
-TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) {
-  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
-  ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
-  ge::NodePtr node_a = graph->AddNode(op_def_a);
-  MemoryBlock* memory_block = new MemoryBlock(0);
-  memory_block->Init(1, kOutput, node_a, 0, 1);
-  memory_block->real_size_list_.clear();
-  memory_block->Resize();
-
-  EXPECT_EQ(memory_block->Size(), 0);
-
-  delete memory_block;
-}
-*/
-
 namespace ge {
 
 class MockBlockMemAssigner : public BlockMemAssigner {
@@ -275,18 +406,50 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
   EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
 }
 
+TEST_F(UtestMemoryAssignerTest, block_memory_assign_nopading_continuous_memory) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeContinuousReuseGraph(graph, true);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+
+  EXPECT_EQ(offset, 8192);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, block_memory_assign_continuous_memory) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeContinuousReuseGraph(graph);
+  map<uint64_t, size_t> mem_offset;
+  size_t zero_copy_mem_size = 0;
+  MemoryAssigner memoryAssigner(graph);
+  ge::Status ret = memoryAssigner.AssignMemory(false, mem_offset, zero_copy_mem_size);
+  size_t offset = 0;
+  auto it = mem_offset.find(RT_MEMORY_HBM);
+  if (it != mem_offset.end()) {
+    offset = it->second;
+  }
+
+  EXPECT_EQ(offset, 11264);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
 TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) {
   ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
   MakeGraph(graph);
   auto node_f = graph->FindNode("F");
   MemoryAssigner memory_assigner(graph);
-  map<int64_t, size_t> mem_offset;
+  map<uint64_t, size_t> mem_offset;
   size_t zero_memory_size = 0;
   EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
 
-  int32_t flag = 0;
-  (void) ge::AttrUtils::GetInt(node_f->GetOpDesc()->GetInputDesc(0), ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, flag);
-  EXPECT_EQ(flag, 1);
+  bool flag = 0;
+  (void) ge::AttrUtils::GetBool(node_f->GetOpDesc()->GetInputDesc(0), ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, flag);
+  EXPECT_EQ(flag, true);
 }
 
 TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) {
@@ -297,7 +460,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) {
   std::string value = "A";
   (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value);
   MemoryAssigner memory_assigner(graph);
-  map<int64_t, size_t> mem_offset;
+  map<uint64_t, size_t> mem_offset;
   size_t zero_memory_size = 0;
   VarManager::Instance(0)->Init(0, 0, 0, 0);
   EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
@@ -318,7 +481,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) {
   std::string value = "M";
   (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value);
   MemoryAssigner memory_assigner(graph);
-  map<int64_t, size_t> mem_offset;
+  map<uint64_t, size_t> mem_offset;
   size_t zero_memory_size = 0;
   VarManager::Instance(0)->Init(0, 0, 0, 0);
   EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
@@ -361,3 +524,147 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_update_ref_op_offset_reverse
     GraphMemoryAssigner memoryAssigner(graph);
     EXPECT_EQ(memoryAssigner.UpdateRefOpOffsetReverse(add), SUCCESS);
 }
+
+TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace) {
+  ge::ut::GraphBuilder builder("graph");
+  auto data_input = builder.AddNode("data", "Data", 1, 1);
+  auto const_input = builder.AddNode("const", "Const", 1, 1);
+  auto add = builder.AddNode("add", "Add", 2, 1);
+  // add link
+  builder.AddDataEdge(data_input, 0, add, 0);
+  builder.AddDataEdge(const_input, 0, add, 1);
+  ge::ComputeGraphPtr graph = builder.GetGraph();
+
+  auto node = graph->FindNode("add");
+  EXPECT_NE(node, nullptr);
+  auto output_tensor_desc = node->GetOpDesc()->MutableOutputDesc(0);
+  ge::TensorUtils::SetSize(*output_tensor_desc, 100);
+  vector<int64_t> output_list = {0};
+  node->GetOpDesc()->SetOutputOffset(output_list);
+  vector<int64_t> workspace_list = {0};
+  node->GetOpDesc()->SetWorkspace(workspace_list);
+  vector<int64_t> atomic_output_index = {0};
+  bool set_attr = ge::AttrUtils::SetListInt(node->GetOpDesc(), ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
+  EXPECT_EQ(set_attr, true);
+
+  map<string, map<int64_t, int64_t>> workspace_info;
+  workspace_info["add"][0] = 100;
+  set_attr = node->GetOpDesc()->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, workspace_info);
+  EXPECT_EQ(set_attr, true);
+
+  {
+    bool is_fusion_node = false;
+    set_attr = ge::AttrUtils::SetBool(node->GetOpDesc(), ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
+    EXPECT_EQ(set_attr, true);
+
+    GraphMemoryAssigner graph_memory_assigner(graph);
+    graph_memory_assigner.memory_offset_.insert({RT_MEMORY_HBM, MemoryOffset(RT_MEMORY_HBM, 0)});
+    vector<int64_t> mem_offset_end;
+    Status ret = graph_memory_assigner.AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end);
+    EXPECT_EQ(ret, SUCCESS);
+    EXPECT_EQ(mem_offset_end.size(), 2);
+    MemoryOffset mem_offset = graph_memory_assigner.memory_offset_.at(RT_MEMORY_HBM);
+    EXPECT_EQ(mem_offset.mem_offset_, 1024);
+  }
+
+  {
+    bool is_fusion_node = true;
+    set_attr = ge::AttrUtils::SetBool(node->GetOpDesc(), ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
+    EXPECT_EQ(set_attr, true);
+
+    GraphMemoryAssigner graph_memory_assigner(graph);
+    graph_memory_assigner.memory_offset_.insert({RT_MEMORY_HBM, MemoryOffset(RT_MEMORY_HBM, 0)});
+    vector<int64_t> mem_offset_end;
+    Status ret = graph_memory_assigner.AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end);
+    EXPECT_EQ(ret, SUCCESS);
+    EXPECT_EQ(mem_offset_end.size(), 2);
+    MemoryOffset mem_offset = graph_memory_assigner.memory_offset_.at(RT_MEMORY_HBM);
+    EXPECT_EQ(mem_offset.mem_offset_, 1024);
+  }
+}
+
+TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeFftsReuseGraph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+  EXPECT_EQ(offset, 5120);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeFftsReuseGraph(graph, 0, 1);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+  EXPECT_EQ(offset, 6656);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeFftsReuseGraph(graph, 0, kInvalidThreadScopeId);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+  EXPECT_EQ(offset, 5632);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, one_session_scope_op) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeSessionScopeReuseGraph(graph);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+
+  auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
+  size_t session_scope_offset = 0;
+  it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    session_scope_offset = it->second;
+  }
+  EXPECT_EQ(offset, 5120);
+  EXPECT_EQ(session_scope_offset, 1536);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, multi_batch_reuse) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeMultiBatchReuseGraph(graph);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+
+  auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
+  size_t session_scope_offset = 0;
+  it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    session_scope_offset = it->second;
+  }
+  EXPECT_EQ(offset, 6656);
+  EXPECT_EQ(session_scope_offset, 1536);
+  EXPECT_EQ(ret, SUCCESS);
+}
\ No newline at end of file
diff --git a/tests/ut/ge/graph/build/model_builder_unittest.cc b/tests/ut/ge/graph/build/model_builder_unittest.cc
index b9204dbc..628d0fda 100644
--- a/tests/ut/ge/graph/build/model_builder_unittest.cc
+++ b/tests/ut/ge/graph/build/model_builder_unittest.cc
@@ -30,6 +30,7 @@
 #define protected public
 #define private public
 #include "graph/build/model_builder.h"
+#include "memory/memory_assigner.h"
 #undef protected
 #undef private
 
@@ -127,6 +128,41 @@ class UtestModelBuilderTest : public testing::Test {
     graph->TopologicalSorting();
   }
 
+void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    std::vector<int64_t> workspace_bytes;
+    workspace_bytes.push_back(1024);
+    workspace_bytes.push_back(512);
+    op_def_c->SetWorkspaceBytes(workspace_bytes);
+    vector<int32_t> workspace_no_reuse_scope = { 0 , 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
+    vector<int32_t> workspace_no_reuse_scope_e = { 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e);
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
 
  protected:
   void SetUp() {}
@@ -161,3 +197,40 @@ TEST_F(UtestModelBuilderTest, test_save_atomic_bin) {
   op_desc->SetExtAttr("atomic_clean_node_ptr", atomic_node);
   EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS);
 }
+
+TEST_F(UtestModelBuilderTest, build_model_for_get_task) {
+  Graph2SubGraphInfoList subgraphs;
+  std::map<std::string, int> stream_max_parallel_num;
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeSessionScopeReuseGraph(graph);
+  std::map<std::string, std::string> option;
+  ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false);
+
+  MemoryAssigner mem_assigner(graph);
+  EXPECT_EQ(mem_assigner.AssignMemory(false, builder.mem_type_to_mem_offset_, builder.zero_copy_mem_size_), SUCCESS);
+
+  ge::Model model;
+  EXPECT_EQ(builder.BuildModelDef(model), SUCCESS);
+  int64_t session_scope_mem_offset = 0;
+  ge::AttrUtils::GetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset);
+  EXPECT_EQ(session_scope_mem_offset, 1536);
+}
+
+TEST_F(UtestModelBuilderTest, test_model_save) {
+  Graph2SubGraphInfoList subgraphs;
+  std::map<std::string, int> stream_max_parallel_num;
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false);
+
+  auto op_desc = make_shared<OpDesc>("Conv2d", "Conv2d");
+  auto kernel_buffer = static_cast<GeAttrValue::BYTES>(Buffer(10));
+  AttrUtils::SetStr(op_desc, ATTR_NAME_TBE_KERNEL_NAME, "Conv2d");
+  AttrUtils::SetBytes(op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer);
+
+  ge::NodePtr node = graph->AddNode(op_desc);
+  ge::Model ge_model;
+  ge::GeModel ge_gemodel;
+  builder.SaveDataToModel(ge_model, ge_gemodel);
+  auto tbe_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
+  EXPECT_NE(tbe_kernel, nullptr);
+}
diff --git a/tests/ut/ge/graph/execute/graph_execute_unittest.cc b/tests/ut/ge/graph/execute/graph_execute_unittest.cc
index e340df2f..6d982454 100644
--- a/tests/ut/ge/graph/execute/graph_execute_unittest.cc
+++ b/tests/ut/ge/graph/execute/graph_execute_unittest.cc
@@ -115,7 +115,7 @@ TEST_F(UtestGraphExecuteTest, test_set_callback) {
   ComputeGraphPtr graph = MakeShared<ComputeGraph>("test");
   // is_unknown_shape_graph_ = false
   GeRootModelPtr ge_root_model = MakeShared<GeRootModel>(graph);
-  RunAsyncCallback callback = [](Status, std::vector<ge::OutputTensorInfo> &) {};
+  RunAsyncCallback callback = [](Status, std::vector<ge::Tensor> &) {};
 
   auto model_manager = ModelManager::GetInstance();
   auto listener = MakeShared<RunAsyncListener>();
diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc
index e26aa86e..19b8aeab 100644
--- a/tests/ut/ge/graph/ge_executor_unittest.cc
+++ b/tests/ut/ge/graph/ge_executor_unittest.cc
@@ -34,13 +34,16 @@
 #include "common/types.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/davinci_model.h"
+#include "hybrid/hybrid_davinci_model.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/task_info/kernel_task_info.h"
 #include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
+#include "graph/execute/graph_execute.h"
 #include "ge/common/dump/dump_properties.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/utils/graph_utils.h"
 #include "proto/ge_ir.pb.h"
+#include "graph/manager/graph_var_manager.h"
 #undef private
 #undef protected
 
@@ -73,7 +76,7 @@ class DModelListener : public ge::ModelListener {
   DModelListener() {
   };
   Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t resultCode,
-                       std::vector<ge::OutputTensorInfo> &outputs) {
+                       std::vector<ge::Tensor> &outputs) {
     GELOGI("In Call back. OnComputeDone");
     return SUCCESS;
   }
@@ -115,7 +118,7 @@ TEST_F(UtestGeExecutor, load_data_from_file) {
 
   string test_smap = "/tmp/" + std::to_string(getpid()) + "_maps";
   string self_smap = "/proc/" + std::to_string(getpid()) + "/maps";
-  string copy_smap = "cp " + self_smap + " " + test_smap;
+  string copy_smap = "cp -f " + self_smap + " " + test_smap;
   EXPECT_EQ(system(copy_smap.c_str()), 0);
 
   ModelData model_data;
@@ -190,4 +193,139 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) {
   kernel_ex_task_info.davinci_model_ = &model;
   kernel_ex_task_info.InitDumpTask(nullptr, op_desc);
 }
+
+TEST_F(UtestGeExecutor, execute_graph_with_stream) {
+  VarManager::Instance(0)->Init(0, 0, 0, 0);
+  map<string, string> options;
+  options[GRAPH_MEMORY_MAX_SIZE] = "1048576";
+  VarManager::Instance(0)->SetMemoryMallocSize(options);
+
+  DavinciModel model(0, nullptr);
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeModelPtr ge_model = make_shared<GeModel>();
+  ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
+  AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 10240);
+  AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);
+
+  shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
+  ge_model->SetModelTaskDef(model_task_def);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+  {
+    OpDescPtr op_desc = CreateOpDesc("data", DATA);
+    op_desc->AddInputDesc(tensor);
+    op_desc->AddOutputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    op_desc->SetOutputOffset({1024});
+    NodePtr node = graph->AddNode(op_desc);    // op_index = 0
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("square", "Square");
+    op_desc->AddInputDesc(tensor);
+    op_desc->AddOutputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    op_desc->SetOutputOffset({1024});
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 1
+
+    domi::TaskDef *task_def = model_task_def->add_task();
+    task_def->set_stream_id(0);
+    task_def->set_type(RT_MODEL_TASK_KERNEL);
+    domi::KernelDef *kernel_def = task_def->mutable_kernel();
+    kernel_def->set_stub_func("stub_func");
+    kernel_def->set_args_size(64);
+    string args(64, '1');
+    kernel_def->set_args(args.data(), 64);
+    domi::KernelContext *context = kernel_def->mutable_context();
+    context->set_op_index(op_desc->GetId());
+    context->set_kernel_type(2);    // ccKernelType::TE
+    uint16_t args_offset[9] = {0};
+    context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("memcpy", MEMCPYASYNC);
+    op_desc->AddInputDesc(tensor);
+    op_desc->AddOutputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    op_desc->SetOutputOffset({5120});
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 2
+
+    domi::TaskDef *task_def = model_task_def->add_task();
+    task_def->set_stream_id(0);
+    task_def->set_type(RT_MODEL_TASK_MEMCPY_ASYNC);
+    domi::MemcpyAsyncDef *memcpy_async = task_def->mutable_memcpy_async();
+    memcpy_async->set_src(1024);
+    memcpy_async->set_dst(5120);
+    memcpy_async->set_dst_max(512);
+    memcpy_async->set_count(1);
+    memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE);
+    memcpy_async->set_op_index(op_desc->GetId());
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("output", NETOUTPUT);
+    op_desc->AddInputDesc(tensor);
+    op_desc->SetInputOffset({5120});
+    op_desc->SetSrcName( { "memcpy" } );
+    op_desc->SetSrcIndex( { 0 } );
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 3
+  }
+
+  EXPECT_EQ(model.Assign(ge_model), SUCCESS);
+  EXPECT_EQ(model.Init(), SUCCESS);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 1);
+  EXPECT_EQ(model.task_list_.size(), 2);
+
+  OutputData output_data;
+  vector<Tensor> outputs;
+  EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
+
+  GraphExecutor graph_executer;
+  graph_executer.init_flag_ = true;
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  std::vector<GeTensor> input_tensor;
+  std::vector<GeTensor> output_tensor;
+  std::vector<InputOutputDescInfo> output_desc;
+  InputOutputDescInfo desc0;
+  output_desc.push_back(desc0);
+  graph_executer.ExecuteGraphWithStream(0, nullptr, ge_root_model, input_tensor, output_tensor);
+}
+
+TEST_F(UtestGeExecutor, get_op_attr) {
+  shared_ptr<DavinciModel> model = MakeShared<DavinciModel>(1, g_label_call_back);
+  model->SetId(1);
+  model->om_name_ = "testom";
+  model->name_ = "test";
+
+  shared_ptr<hybrid::HybridDavinciModel> hybrid_model = MakeShared<hybrid::HybridDavinciModel>();
+  model->SetId(2);
+  model->om_name_ = "testom_hybrid";
+  model->name_ = "test_hybrid";
+
+  std::shared_ptr<ModelManager> model_manager = ModelManager::GetInstance();
+  model_manager->InsertModel(1, model);
+  model_manager->InsertModel(2, hybrid_model);
+
+  OpDescPtr op_desc = CreateOpDesc("test", "test");
+  std::vector<std::string> value{"test"};
+  ge::AttrUtils::SetListStr(op_desc, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, value);
+
+  model->SaveSpecifyAttrValues(op_desc);
+
+  GeExecutor ge_executor;
+  GeExecutor::isInit_ = true;
+  std::string attr_value;
+  auto ret = ge_executor.GetOpAttr(1, "test", ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, attr_value);
+  EXPECT_EQ(ret, SUCCESS);
+  EXPECT_EQ(attr_value, "[4]test");
+  ret = ge_executor.GetOpAttr(2, "test", ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, attr_value);
+  EXPECT_EQ(ret, PARAM_INVALID);
+  ret = ge_executor.GetOpAttr(3, "test", ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, attr_value);
+  EXPECT_EQ(ret, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID);
+}
 }
\ No newline at end of file
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 56a91ef8..4771ca8d 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -32,7 +32,7 @@ extern OpDescPtr CreateOpDesc(string name, string type);
 class DModelListener : public ModelListener {
  public:
   DModelListener(){};
-  uint32_t OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result, vector<OutputTensorInfo> &outputs) {
+  uint32_t OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result, vector<ge::Tensor> &outputs) {
     return 0;
   }
 };
@@ -138,7 +138,7 @@ TEST_F(UtestDavinciModel, init_success) {
   EXPECT_EQ(model.task_list_.size(), 2);
 
   OutputData output_data;
-  vector<OutputTensorInfo> outputs;
+  vector<ge::Tensor> outputs;
   EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
   EXPECT_EQ(output_data.blobs.size(), 1);
   EXPECT_EQ(outputs.size(), 1);
@@ -333,8 +333,8 @@ TEST_F(UtestDavinciModel, init_unknown) {
 TEST_F(UtestDavinciModel, Init_variable_op) {
   DavinciModel model(0, g_local_call_back);
   model.ge_model_ = make_shared<GeModel>();
-  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
-  model.runtime_param_.mem_size = 5120000;
+  model.runtime_param_.mem_size = 51200;
+  model.runtime_param_.mem_base = (uint8_t *)malloc(model.runtime_param_.mem_size);
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
 
   GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
@@ -365,6 +365,8 @@ TEST_F(UtestDavinciModel, Init_variable_op) {
   EXPECT_EQ(model.CopyOutputData(1, output_data, RT_MEMCPY_DEVICE_TO_HOST), SUCCESS);
 
   EXPECT_EQ(model.ReturnResult(1, false, true, &output_data), INTERNAL_ERROR);
+  free(model.runtime_param_.mem_base);
+  model.runtime_param_.mem_base = nullptr;
 }
 
 TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) {
@@ -1022,7 +1024,7 @@ TEST_F(UtestDavinciModel, NnExecute) {
   rtStream_t stream = nullptr;
   InputData input_data;
   OutputData output_data;
-  vector<OutputTensorInfo> outputs;
+  vector<ge::Tensor> outputs;
   EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
   EXPECT_EQ(output_data.blobs.size(), 1);
   EXPECT_EQ(outputs.size(), 1);
diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
index 44d4d042..327dd248 100644
--- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
@@ -91,8 +91,8 @@ TEST_F(UtestKernelExTaskInfo, success_kernel_ex_task_release) {
 // test kernel_ex_task_Release
 TEST_F(UtestKernelExTaskInfo, success_kernel_ex_task_info_copy) {
   DavinciModel model(0, nullptr);
-  model.runtime_param_.mem_base = (uint8_t *)0x12345;
-  model.runtime_param_.mem_size = 100332000;
+  model.runtime_param_.mem_size = 10240;
+  model.runtime_param_.mem_base = new uint8_t[model.runtime_param_.mem_size];
 
   rtStream_t stream = nullptr;
   rtStreamCreate(&stream, 0);
@@ -108,19 +108,20 @@ TEST_F(UtestKernelExTaskInfo, success_kernel_ex_task_info_copy) {
 
   EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED);  // workspace empty.
 
-  model.op_list_[0]->SetWorkspace({100331008});   // offset
+  model.op_list_[0]->SetWorkspace({1008});   // offset
   model.op_list_[0]->SetWorkspaceBytes({0});      // length
   EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED);  // workspace addr is null.
 
-  model.op_list_[0]->SetWorkspace({100331008});   // offset
+  model.op_list_[0]->SetWorkspace({1208});   // offset
   model.op_list_[0]->SetWorkspaceBytes({10});     // length
   EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED);  // workspace addr is small.
 
-  model.op_list_[0]->SetWorkspace({100331008});   // offset
+  model.op_list_[0]->SetWorkspace({1308});   // offset
   model.op_list_[0]->SetWorkspaceBytes({150});    // length
   EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), SUCCESS);
 
   task_def.clear_kernel_ex();
+  delete [] model.runtime_param_.mem_base;
   model.runtime_param_.mem_base = nullptr;
 }
 
@@ -154,4 +155,58 @@ TEST_F(UtestKernelExTaskInfo, parse_update_addr) {
   KernelExTaskInfo kernel_ex_task_info;
   EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
 }
+
+TEST_F(UtestKernelExTaskInfo, parse_topic_type_success_1) {
+  const string ext_info = {7,0,0,0,4,0,0,0,0,0,0,0};
+  const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp");
+  AttrUtils::SetBool(op_desc, "_AllShape", true);
+
+  KernelExTaskInfo kernel_ex_task_info;
+  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
+}
+
+TEST_F(UtestKernelExTaskInfo, parse_topic_type_success_2) {
+  const string ext_info = {7,0,0,0,4,0,0,0,1,0,0,0};
+  const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp");
+  AttrUtils::SetBool(op_desc, "_AllShape", true);
+
+  KernelExTaskInfo kernel_ex_task_info;
+  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
+}
+
+TEST_F(UtestKernelExTaskInfo, parse_topic_type_success_3) {
+  const string ext_info = {7,0,0,0,4,0,0,0,2,0,0,0};
+  const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp");
+  AttrUtils::SetBool(op_desc, "_AllShape", true);
+
+  KernelExTaskInfo kernel_ex_task_info;
+  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
+}
+
+TEST_F(UtestKernelExTaskInfo, parse_topic_type_success_4) {
+  const string ext_info = {7,0,0,0,4,0,0,0,3,0,0,0};
+  const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp");
+  AttrUtils::SetBool(op_desc, "_AllShape", true);
+
+  KernelExTaskInfo kernel_ex_task_info;
+  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
+}
+
+TEST_F(UtestKernelExTaskInfo, parse_topic_type_failed_1) {
+  const string ext_info = {7,0,0,0,4,0,0,0,4,0,0,0};
+  const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp");
+  AttrUtils::SetBool(op_desc, "_AllShape", true);
+
+  KernelExTaskInfo kernel_ex_task_info;
+  EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
+}
+
+TEST_F(UtestKernelExTaskInfo, parse_topic_type_failed_2) {
+  const string ext_info = {7,0,0,0,2,0,0,0,2,0,0,0};
+  const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp");
+  AttrUtils::SetBool(op_desc, "_AllShape", true);
+
+  KernelExTaskInfo kernel_ex_task_info;
+  EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
+}
 }  // namespace ge
diff --git a/tests/ut/ge/graph/load/model_manager_unittest.cc b/tests/ut/ge/graph/load/model_manager_unittest.cc
index 342f6362..de891072 100644
--- a/tests/ut/ge/graph/load/model_manager_unittest.cc
+++ b/tests/ut/ge/graph/load/model_manager_unittest.cc
@@ -25,6 +25,7 @@
 #include "common/op/ge_op_utils.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/davinci_model.h"
+#include "graph/ops_stub.h"
 
 using namespace std;
 using namespace testing;
@@ -56,6 +57,23 @@ class UtestModelManagerModelManager : public testing::Test {
 
   void TearDown() {}
 
+  void CreateGraph(Graph &graph) {
+    TensorDesc desc(ge::Shape({1, 3, 224, 224}));
+    uint32_t size = desc.GetShape().GetShapeSize();
+    desc.SetSize(size);
+    auto data = op::Data("Data").set_attr_index(0);
+    data.update_input_desc_data(desc);
+    data.update_output_desc_out(desc);
+
+    auto flatten = op::Flatten("Flatten").set_input_x(data, data.name_out_out());
+
+    std::vector<Operator> inputs{data};
+    std::vector<Operator> outputs{flatten};
+    std::vector<Operator> targets{flatten};
+    // Graph graph("test_graph");
+    graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets);
+  }
+
   void GenUnencryptModelData(ModelData &data) {
     const int model_len = 10;
     data.model_len = sizeof(ModelFileHeader) + model_len;
@@ -414,10 +432,34 @@ TEST_F(UtestModelManagerModelManager, test_data_input_tensor) {
   mm.model_map_[1] = model;
   mm.hybrid_model_map_[1] = std::make_shared<hybrid::HybridDavinciModel>();
 
-  auto input_tensor = InputTensorInfo();
-  vector<InputTensorInfo> inputs;
+  ge::Tensor input_tensor;
+  vector<ge::Tensor> inputs;
   inputs.emplace_back(input_tensor);
   auto ret = mm.DataInputTensor(model_id,inputs);
-  EXPECT_EQ(UNSUPPORTED, ret);
+  EXPECT_EQ(PARAM_INVALID, ret);    // HybridDavinciModel::impl_ is null.
+}
+
+TEST_F(UtestModelManagerModelManager, test_init_dump_properties_with_new_session_id) {
+  ModelManager model_manager;
+  uint64_t session_id = 1;
+  model_manager.InitDumPropertiesWithNewSessionId(session_id);
+}
+
+TEST_F(UtestModelManagerModelManager, test_update_session_id) {
+  ModelManager model_manager;
+  uint32_t model_id = 0;
+  uint64_t session_id = 0;
+  GeModelPtr ge_model = MakeShared<GeModel>();
+  std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
+  model_manager.UpdateSessionId(model_id, ge_model, davinci_model, session_id);
+}
+
+TEST_F(UtestModelManagerModelManager, test_has_var_node) {
+  ModelManager model_manager;
+  uint64_t session_id = 1;
+  Graph graph("test");
+  CreateGraph(graph);
+  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
+  model_manager.HasVarNode(compute_graph);
 }
 }  // namespace ge
diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
index 7863a70f..5833a13a 100644
--- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
+++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
@@ -28,8 +28,7 @@
 
 #define protected public
 #define private public
-#include "graph/manager/graph_caching_allocator.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #undef protected
 #undef private
 
diff --git a/tests/ut/ge/graph/manager/graph_manager_unittest.cc b/tests/ut/ge/graph/manager/graph_manager_unittest.cc
index 79beb02d..96a0fa64 100644
--- a/tests/ut/ge/graph/manager/graph_manager_unittest.cc
+++ b/tests/ut/ge/graph/manager/graph_manager_unittest.cc
@@ -206,6 +206,52 @@ TEST_F(UtestGraphManagerTest, test_add_graph_3) {
   EXPECT_EQ(status2, ge::SUCCESS);
 }
 
+TEST_F(UtestGraphManagerTest, test_add_graph_4) {
+  GraphId graph_id = 1;
+  GraphManager graph_manager;
+  // create graph
+  Graph graph("test_graph");
+  CreateGraph(graph);
+  auto compute_graph = GraphUtils::GetComputeGraph(graph);
+  (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true);
+
+  std::map<std::string, std::string> options;
+  OmgContext context;
+  Status status = graph_manager.AddGraph(graph_id, graph, options, context);
+  EXPECT_NE(status, ge::SUCCESS);
+}
+
+TEST_F(UtestGraphManagerTest, test_add_graph_5) {
+  Graph graph("test_graph");
+  auto data = op::Data("Data").set_attr_index(1);
+  auto flatten = op::Flatten("Flatten").set_input_x(data, data.name_out_out());
+  std::vector<Operator> inputs{data};
+  std::vector<Operator> outputs{flatten};
+  graph.SetInputs(inputs).SetOutputs(outputs);
+
+  std::map<std::string, std::string> options = {{"ge.exec.dataInputsShapeRange", "0:[-1]"}};
+  OmgContext context;
+  GraphId graph_id = 1;
+  GraphManager graph_manager;
+  EXPECT_EQ(graph_manager.AddGraph(graph_id, graph, options, context), GRAPH_PARAM_INVALID);
+}
+
+TEST_F(UtestGraphManagerTest, test_add_graph_with_copy_1) {
+  GraphId graph_id = 1;
+  GraphManager graph_manager;
+  
+  // create graph
+  Graph graph("test_graph");
+  CreateGraph(graph);
+  GraphNodePtr graph_node = MakeShared<GraphNode>(graph_id);
+  graph_manager.graph_map_.insert({1, graph_node});
+
+  std::map<std::string, std::string> options;
+  OmgContext context;
+  Status status = graph_manager.AddGraphWithCopy(graph_id, graph, options, context);
+  EXPECT_NE(status, ge::SUCCESS);
+}
+
 TEST_F(UtestGraphManagerTest, test_remove_graph_1) {
   GraphId graph_id = 1;
   GraphManager graph_manager;
@@ -249,7 +295,7 @@ TEST_F(UtestGraphManagerTest, test_pre_run_thread) {
   graph_manager.thread_run_flag_ = true;
 
   GraphId graph_id = 1;
-  std::vector<ge::InputTensorInfo> input_tensor;
+  std::vector<ge::Tensor> input_tensor;
   uint64_t session_id = 0;
   error_message::Context error_context;
   GEThreadLocalContext context;
@@ -275,7 +321,7 @@ TEST_F(UtestGraphManagerTest, test_pre_run_thread_2) {
   graph_manager.IncreaseGraphCount(graph_id);
   graph_manager.IncreaseGraphCount(graph_id);
   graph_node_1->SetBuildFlag(true);
-  std::vector<ge::InputTensorInfo> input_tensor;
+  std::vector<ge::Tensor> input_tensor;
   uint64_t session_id = 0;
   error_message::Context error_context;
   GEThreadLocalContext context;
@@ -350,7 +396,7 @@ TEST_F(UtestGraphManagerTest, test_check_incre_build_and_pre_run_2) {
   ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph");
   GeRootModelPtr ge_root_model = MakeShared<GeRootModel>(compute_graph);
   GraphManager::PreRunArgs arg;
-  arg.callback = [](Status, std::vector<ge::OutputTensorInfo> &) {};
+  arg.callback = [](Status, std::vector<ge::Tensor> &) {};
   GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
   graph_node->SetBuildFlag(true);
   graph_node->Lock();
@@ -366,7 +412,7 @@ TEST_F(UtestGraphManagerTest, test_check_incre_build_and_pre_run_3) {
   ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph");
   GeRootModelPtr ge_root_model = MakeShared<GeRootModel>(compute_graph);
   GraphManager::PreRunArgs arg;
-  arg.callback = [](Status, std::vector<ge::OutputTensorInfo> &) {};
+  arg.callback = [](Status, std::vector<ge::Tensor> &) {};
   GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
   graph_node->SetBuildFlag(false);
   graph_node->Lock();
@@ -403,3 +449,34 @@ TEST_F(UtestGraphManagerTest, test_add_graph_with_copy_fail) {
   status = graph_manager.AddGraphWithCopy(graph_id, graph, options, context);
   EXPECT_NE(status, ge::SUCCESS);
 }
+
+TEST_F(UtestGraphManagerTest, ParseInputsDimsForData_success) {
+  GraphManager graph_manager;
+  std::vector<ge::Tensor> input_tensors;
+  ge::Tensor tensor;
+  input_tensors.emplace_back(tensor);
+  graph_manager.ParseInputsDimsForData(input_tensors);
+}
+
+// TEST_F(UtestGraphManagerTest, ParseInputsDimsForGetNexNosinkAndData_success) {
+//   GraphManager graph_manager;
+
+//   ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
+
+//   // save1
+//   ge::OpDescPtr save_op = std::make_shared<ge::OpDesc>();
+//   save_op->SetType("Save");
+//   save_op->SetName("Save1");
+//   save_op->AddInputDesc(ge::GeTensorDesc());
+//   save_op->AddOutputDesc(ge::GeTensorDesc());
+//   AttrUtils::SetInt(save_op, ATTR_NAME_INDEX, 1);
+//   ge::NodePtr save_node = graph->AddNode(save_op);
+ 
+//   std::vector<NodePtr> nodes;
+//   nodes.emplace_back(save_node);
+//   ge::Tensor tensor;
+//   std::vector<Tensor> input_tensors;
+//   input_tensors.emplace_back(tensor);
+//   auto ret = graph_manager.ParseInputsDimsForGetNexNosinkAndData(nodes, input_tensors);
+//   EXPECT_EQ(ret, ge::SUCCESS);
+// }
diff --git a/tests/ut/ge/graph/manager/run_graph_unittest.cc b/tests/ut/ge/graph/manager/run_graph_unittest.cc
new file mode 100644
index 00000000..445a5864
--- /dev/null
+++ b/tests/ut/ge/graph/manager/run_graph_unittest.cc
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "graph/anchor.h"
+#include "graph/attr_value.h"
+#include "graph/debug/ge_attr_define.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/utils/node_utils.h"
+#include "graph/utils/op_desc_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "omg/omg_inner_types.h"
+
+#define protected public
+#define private public
+#include"graph/manager/graph_manager_utils.h"
+#include "graph/manager/graph_manager.h"
+#undef protected
+#undef private
+
+using namespace std;
+using namespace testing;
+using namespace ge;
+using domi::GetContext;
+
+class UtestGraphRunTest : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() { GetContext().out_nodes_map.clear(); }
+};
+
+TEST_F(UtestGraphRunTest, RunGraphWithStreamAsync) {
+  GraphManager graph_manager;
+  GeTensor input0, input1;
+  std::vector<GeTensor> inputs{input0, input1};
+  std::vector<GeTensor> outputs;
+  GraphNodePtr graph_node = std::make_shared<GraphNode>(1);
+  graph_manager.AddGraphNode(1, graph_node);
+  GraphPtr graph = std::make_shared<Graph>("test");
+  graph_node->SetGraph(graph);
+  graph_node->SetRunFlag(false);
+  graph_node->SetBuildFlag(true);
+  auto ret = graph_manager.RunGraphWithStreamAsync(1, nullptr, 0, inputs, outputs);
+}
diff --git a/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc
new file mode 100644
index 00000000..87af585a
--- /dev/null
+++ b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "graph/anchor.h"
+#include "graph/attr_value.h"
+#include "graph/debug/ge_attr_define.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/utils/node_utils.h"
+#include "graph/utils/op_desc_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "omg/omg_inner_types.h"
+
+#define protected public
+#define private public
+#include "graph/manager/graph_mem_manager.h"
+#undef protected
+#undef private
+
+using namespace std;
+using namespace testing;
+using namespace ge;
+using domi::GetContext;
+
+class UtestSessionScopeMemAllocator : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() { GetContext().out_nodes_map.clear(); }
+};
+
+TEST_F(UtestSessionScopeMemAllocator, initialize_success) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  MemManager::Instance().Finalize();
+}
+
+TEST_F(UtestSessionScopeMemAllocator, malloc_success) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(1000, 0);
+  EXPECT_NE(nullptr, ptr);
+  MemManager::Instance().Finalize();
+}
+
+TEST_F(UtestSessionScopeMemAllocator, free_success) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
+  EXPECT_NE(nullptr, ptr);
+  ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
+  EXPECT_NE(nullptr, ptr);
+
+  EXPECT_EQ(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0));
+  EXPECT_NE(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0));
+  MemManager::Instance().Finalize();
+}
+
+TEST_F(UtestSessionScopeMemAllocator, free_success_session) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  mem_type.push_back(RT_MEMORY_P2P_DDR);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
+  EXPECT_NE(nullptr, ptr);
+  ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
+  EXPECT_NE(nullptr, ptr);
+  for (auto memory_type : MemManager::Instance().GetAllMemoryType()) {
+    if (RT_MEMORY_P2P_DDR == memory_type) {
+      EXPECT_NE(MemManager::Instance().SessionScopeMemInstance(memory_type).Free(0), SUCCESS);
+    } else {
+      EXPECT_EQ(MemManager::Instance().SessionScopeMemInstance(memory_type).Free(0), SUCCESS);
+    }
+  }
+  MemManager::Instance().Finalize();
+}
diff --git a/tests/ut/ge/graph/passes/global_step_insert_pass_unittest.cc b/tests/ut/ge/graph/passes/global_step_insert_pass_unittest.cc
new file mode 100644
index 00000000..9da2565d
--- /dev/null
+++ b/tests/ut/ge/graph/passes/global_step_insert_pass_unittest.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define protected public
+#define private public
+#include "graph/passes/global_step_insert_pass.h"
+
+#include "common/op/ge_op_utils.h"
+#include "common/types.h"
+#include "graph/anchor.h"
+#include "graph/attr_value.h"
+#include "graph/compute_graph.h"
+#include "graph/op_desc.h"
+#include "graph/passes/base_pass.h"
+#include "graph/utils/attr_utils.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/utils/op_desc_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/tuning_utils.h"
+#include "graph_builder_utils.h"
+#include "graph/ge_context.h"
+#include "graph/ge_local_context.h"
+#include "inc/pass_manager.h"
+#undef protected
+#undef private
+
+using namespace std;
+using namespace testing;
+using namespace ge;
+
+class UtestGlobalStepInsertPass : public Test {
+ protected:
+};
+
+static ComputeGraphPtr BuildGraph1() {
+  ge::ut::GraphBuilder builder("g1");
+  auto var1 = builder.AddNode("var1", "Variable", 0, 1);
+  auto var2 = builder.AddNode("var2", "Variable", 0, 1);
+  auto identity1 = builder.AddNode("identity1", "Identity", 1, 1);
+  auto out = builder.AddNode("out", "NetOutput", 1, 1);
+
+  builder.AddDataEdge(var1, 0, identity1, 0);
+  builder.AddControlEdge(var2, identity1);
+  builder.AddDataEdge(identity1, 0, out, 0);
+  return builder.GetGraph();
+}
+
+TEST_F(UtestGlobalStepInsertPass, skip_insert) {
+  auto graph = BuildGraph1();
+  std::string build_mode;
+  std::map<string, string> options_map;
+  options_map.insert({ge::RUN_FLAG, "0"});
+  ge::GetThreadLocalContext().SetGraphOption(options_map);
+  GlobalStepInsertPass pass;
+  Status status = pass.Run(graph);
+  EXPECT_EQ(status, SUCCESS);
+  NodePtr found_node = graph->FindNode(NODE_NAME_GLOBAL_STEP);
+  EXPECT_EQ(found_node, nullptr);
+}
diff --git a/tests/ut/ge/graph/passes/infershape_pass_unittest.cc b/tests/ut/ge/graph/passes/infershape_pass_unittest.cc
index a7628b2e..13e66c50 100644
--- a/tests/ut/ge/graph/passes/infershape_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/infershape_pass_unittest.cc
@@ -20,9 +20,8 @@
 #define private public
 #include "graph/passes/infershape_pass.h"
 
-#include "graph/compute_graph.h"
-#include "graph/node.h"
-#include "graph/operator.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/graph_utils.h"
 #include "graph/operator_factory.h"
 #include "graph/operator_reg.h"
 #include "graph_builder_utils.h"
@@ -36,6 +35,40 @@ class UtestGraphInfershapePass : public testing::Test {
   void TearDown() {}
 };
 
+static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
+  op_desc->SetStreamId(0);
+  static int32_t index = 0;
+  op_desc->SetId(index++);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+  vector<int64_t> input_offset;
+  for (int i = 0; i < in_num; i++) {
+    op_desc->AddInputDesc(tensor);
+    input_offset.emplace_back(1024);
+  }
+  op_desc->SetInputOffset(input_offset);
+
+  vector<int64_t> output_offset;
+  for (int i = 0; i < out_num; i++) {
+    op_desc->AddOutputDesc(tensor);
+    output_offset.emplace_back(1024);
+  }
+  op_desc->SetOutputOffset(output_offset);
+
+  op_desc->SetWorkspace({});
+  op_desc->SetWorkspaceBytes({});
+  op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE");
+
+  const auto stub_func = [](Operator &op) { return GRAPH_SUCCESS; };
+  op_desc->AddInferFunc(stub_func);
+  op_desc->AddInferFormatFunc(stub_func);
+  op_desc->AddVerifierFunc(stub_func);
+
+  return graph.AddNode(op_desc);
+}
+
 TEST_F(UtestGraphInfershapePass, infershape_pass_failed) {
   GeTensorDesc ge_tensor_desc(GeShape({-2, 2, 3, 4}), ge::FORMAT_NCHW, DT_FLOAT16);
   string type = "AddN";
@@ -62,4 +95,67 @@ TEST_F(UtestGraphInfershapePass, delete_need_infer_again) {
   EXPECT_EQ(infershape_pass.Run(no_op_node), SUCCESS);
 }
 
+TEST_F(UtestGraphInfershapePass, stop_node_for_while_loop) {
+/*******************************************************************************
+ *      Exit         Identify
+ *        \         /       \.
+ *         \       /         \.
+ *          Switch           Add
+ *         /     |            |
+ *        /      |            |
+ *       /       |            |
+ *  LoopCond     |            |
+ *      \        |            |
+ *       \       |            |
+ *        \      |            |
+ *       Less    |            |
+ *          \    |       NextIteration
+ *           \   |            |
+ *            \  |            |
+ *            Merge <---------|
+ *              |
+ *              |
+ *            Enter
+ ******************************************************************************/
+  auto graph = std::make_shared<ComputeGraph>("test_infer_shape");
+  auto data1 = CreateNode(*graph, "data", DATA, 1, 1);
+  auto enter1 = CreateNode(*graph, "enter", ENTER, 1, 1);
+  auto merge1 = CreateNode(*graph, "merge", MERGE, 2, 2);
+  auto less1 = CreateNode(*graph, "less", LESS, 2, 1);
+  auto loop1 = CreateNode(*graph, "loopcond", LOOPCOND, 1, 1);
+  auto switch1 = CreateNode(*graph, "switch", SWITCH, 2, 2);
+  auto ident1 = CreateNode(*graph, "identity", IDENTITY, 1, 1);
+  auto add1 = CreateNode(*graph, "add", ADD, 2, 1);
+  auto next1 = CreateNode(*graph, "next", NEXTITERATION, 1, 1);
+  auto exit1 = CreateNode(*graph, "exit", EXIT, 1, 1);
+  auto value0 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+  auto value1 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+  auto output1 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1);
+
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), enter1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(enter1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), less1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), less1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), loop1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(loop1->GetOutDataAnchor(0), switch1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), switch1->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(switch1->GetOutDataAnchor(0), exit1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(switch1->GetOutDataAnchor(1), ident1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(ident1->GetOutDataAnchor(0), add1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), add1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(add1->GetOutDataAnchor(0), next1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(next1->GetOutDataAnchor(0), merge1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(exit1->GetOutDataAnchor(0), output1->GetInDataAnchor(0));
+
+  GEPass ge_passes(graph);
+  NamesToPass names_to_passes;
+  InferShapePass infer_shape_pass;
+  names_to_passes.emplace_back("InferShapePass", &infer_shape_pass);
+
+  EXPECT_EQ(ge_passes.Run(names_to_passes), SUCCESS);
+}
 }  // namespace ge
diff --git a/tests/ut/ge/graph/passes/mark_force_unknown_for_cond_pass_unittest.cc b/tests/ut/ge/graph/passes/mark_force_unknown_for_cond_pass_unittest.cc
new file mode 100644
index 00000000..b416d958
--- /dev/null
+++ b/tests/ut/ge/graph/passes/mark_force_unknown_for_cond_pass_unittest.cc
@@ -0,0 +1,230 @@
+/**
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define protected public
+#define private public
+#include "graph/passes/mark_force_unknown_for_cond_pass.h"
+
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/operator_factory.h"
+#include "graph/operator_reg.h"
+#include "graph_builder_utils.h"
+
+using namespace std;
+using namespace testing;
+namespace ge {
+class UtestMarkForceUnknownForCondPass : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() {}
+};
+
+static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
+  op_desc->SetStreamId(0);
+  static int32_t index = 0;
+  op_desc->SetId(index++);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+  vector<int64_t> input_offset;
+  for (int i = 0; i < in_num; i++) {
+    op_desc->AddInputDesc(tensor);
+    input_offset.emplace_back(1024);
+  }
+  op_desc->SetInputOffset(input_offset);
+
+  vector<int64_t> output_offset;
+  for (int i = 0; i < out_num; i++) {
+    op_desc->AddOutputDesc(tensor);
+    output_offset.emplace_back(1024);
+  }
+  op_desc->SetOutputOffset(output_offset);
+
+  op_desc->SetWorkspace({});
+  op_desc->SetWorkspaceBytes({});
+  op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE");
+
+  const auto stub_func = [](Operator &op) { return GRAPH_SUCCESS; };
+  op_desc->AddInferFunc(stub_func);
+  op_desc->AddInferFormatFunc(stub_func);
+  op_desc->AddVerifierFunc(stub_func);
+
+  return graph.AddNode(op_desc);
+}
+
+static void CreateLoopGraph(ComputeGraphPtr &graph, NodePtr &merge) {
+/*******************************************************************************
+ *      Exit         Identify
+ *        \         /       \.
+ *         \       /         \.
+ *          Switch           Add
+ *         /     |            |
+ *        /      |            |
+ *       /       |            |
+ *  LoopCond     |            |
+ *      \        |            |
+ *       \       |            |
+ *        \      |            |
+ *       Less    |            |
+ *          \    |       NextIteration
+ *           \   |            |
+ *            \  |            |
+ *            Merge <---------|
+ *              |
+ *              |
+ *            Enter
+ ******************************************************************************/
+  auto data1 = CreateNode(*graph, "data", DATA, 1, 1);
+  auto enter1 = CreateNode(*graph, "enter", ENTER, 1, 1);
+  auto merge1 = CreateNode(*graph, "merge", MERGE, 2, 2);
+  auto less1 = CreateNode(*graph, "less", LESS, 2, 1);
+  auto loop1 = CreateNode(*graph, "loopcond", LOOPCOND, 1, 1);
+  auto switch1 = CreateNode(*graph, "switch", SWITCH, 2, 2);
+  auto ident1 = CreateNode(*graph, "identity", IDENTITY, 1, 1);
+  auto add1 = CreateNode(*graph, "add", ADD, 2, 1);
+  auto next1 = CreateNode(*graph, "next", NEXTITERATION, 1, 1);
+  auto exit1 = CreateNode(*graph, "exit", EXIT, 1, 1);
+  auto value0 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+  auto value1 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+  auto output1 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1);
+
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), enter1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(enter1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), less1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), less1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), loop1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(loop1->GetOutDataAnchor(0), switch1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), switch1->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(switch1->GetOutDataAnchor(0), exit1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(switch1->GetOutDataAnchor(1), ident1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(ident1->GetOutDataAnchor(0), add1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), add1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(add1->GetOutDataAnchor(0), next1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(next1->GetOutDataAnchor(0), merge1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(exit1->GetOutDataAnchor(0), output1->GetInDataAnchor(0));
+
+  merge = merge1;
+}
+
+static void CreateCondGraph(ComputeGraphPtr &graph, NodePtr &merge) {
+/*******************************************************************************
+ *        NetOutput
+ *            |
+ *            |
+ *          Merge
+ *         /     \.
+ *        /       \.
+ *       /         \.
+ *     Add          Sub
+ *     |  \         |  \.
+ *     |   \        |   \.
+ *     |    \       |    Const
+ *     |     \      |      \.
+ *     |      \     |      Identify
+ *     |       \    |        |
+ *  Switch  Switch Switch  Switch
+ *     |     |     |   |    |
+ *     |     |     |   |    |
+ *     x     y   Cond  z
+ ******************************************************************************/
+  auto data1 = CreateNode(*graph, "data_x", DATA, 1, 1);
+  auto data2 = CreateNode(*graph, "data_y", DATA, 1, 1);
+  auto data3 = CreateNode(*graph, "data_z", DATA, 1, 1);
+
+  auto less1 = CreateNode(*graph, "less", LESS, 2, 1);
+
+  auto switch1 = CreateNode(*graph, "switch_x", SWITCH, 2, 2);
+  auto switch2 = CreateNode(*graph, "switch_y", SWITCH, 2, 2);
+  auto switch3 = CreateNode(*graph, "switch_z", SWITCH, 2, 2);
+  auto switch4 = CreateNode(*graph, "switch_i", SWITCH, 2, 2);
+
+  auto add1 = CreateNode(*graph, "add", ADD, 2, 1);
+  auto sub1 = CreateNode(*graph, "add", SUB, 2, 1);
+  auto ident1 = CreateNode(*graph, "identity", IDENTITY, 1, 1);
+  auto const1 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+
+  auto merge1 = CreateNode(*graph, "merge", MERGE, 2, 2);
+  auto output1 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1);
+
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), less1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(data2->GetOutDataAnchor(0), less1->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), switch1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(data2->GetOutDataAnchor(0), switch2->GetInDataAnchor(0));
+  GraphUtils::AddEdge(data3->GetOutDataAnchor(0), switch3->GetInDataAnchor(0));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch4->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch2->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch3->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch4->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(switch1->GetOutDataAnchor(0), add1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(switch2->GetOutDataAnchor(0), add1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(switch3->GetOutDataAnchor(0), sub1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(switch4->GetOutDataAnchor(0), ident1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(ident1->GetOutControlAnchor(), const1->GetInControlAnchor());
+  GraphUtils::AddEdge(const1->GetOutDataAnchor(0), sub1->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(add1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(sub1->GetOutDataAnchor(0), merge1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), output1->GetInDataAnchor(0));
+
+  merge = merge1;
+}
+
+TEST_F(UtestMarkForceUnknownForCondPass, skip_while_loop_merge) {
+  auto graph = std::make_shared<ComputeGraph>("test_graph");
+  NodePtr merge;
+  CreateLoopGraph(graph, merge);
+
+  AttrUtils::SetBool(merge->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true);
+
+  MarkForceUnknownForCondPass mark_force_unknown_pass;
+  EXPECT_EQ(mark_force_unknown_pass.Run(graph), SUCCESS);   // skip LoopCond
+}
+
+TEST_F(UtestMarkForceUnknownForCondPass, skip_known_shape_merge) {
+  auto graph = std::make_shared<ComputeGraph>("test_graph");
+  NodePtr merge;
+  CreateCondGraph(graph, merge);
+
+  MarkForceUnknownForCondPass mark_force_unknown_pass;
+  EXPECT_EQ(mark_force_unknown_pass.Run(graph), SUCCESS);   // skip known shape merge
+}
+
+
+TEST_F(UtestMarkForceUnknownForCondPass, mark_unknown_shape_merge) {
+  auto graph = std::make_shared<ComputeGraph>("test_graph");
+  NodePtr merge;
+  CreateCondGraph(graph, merge);
+
+  auto tensor_desc = merge->GetOpDesc()->GetOutputDesc(0);
+  tensor_desc.SetShape(GeShape({-1}));  // Set for unknown.
+  merge->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
+
+  MarkForceUnknownForCondPass mark_force_unknown_pass;
+  EXPECT_EQ(mark_force_unknown_pass.Run(graph), SUCCESS);
+}
+}  // namespace ge
diff --git a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
index af60021c..3be11452 100644
--- a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
@@ -42,8 +42,8 @@ ut::GraphBuilder Graph1Builder() {
   auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1});
   auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224});
   auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224});
-  auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224});
-  auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0);
+  auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {-1, 224});
+  auto netoutput1 = builder.AddNode("netoutput1", "NetOutput", 2, 0);
 
   builder.AddDataEdge(var1, 0, transdata1, 0);
   builder.AddDataEdge(const1, 0, transdata2, 0);
@@ -58,10 +58,10 @@ TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) {
   auto builder = Graph1Builder();
   auto graph = builder.GetGraph();
   ReshapeRecoveryPass reshape_recovery_pass;
-  EXPECT_EQ(graph->GetDirectNodesSize(),5);
+  EXPECT_EQ(graph->GetDirectNodesSize(), 5);
   Status ret = reshape_recovery_pass.Run(graph);
   EXPECT_EQ(ret, SUCCESS);
-  EXPECT_EQ(graph->GetDirectNodesSize(),8);
+  EXPECT_EQ(graph->GetDirectNodesSize(), 7);
 
   auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0");
   EXPECT_NE(reshape1, nullptr);
diff --git a/tests/ut/ge/graph/passes/unused_and_isolated_op_remove_pass_unittest.cc b/tests/ut/ge/graph/passes/unused_and_isolated_op_remove_pass_unittest.cc
deleted file mode 100644
index 21b5d7e3..00000000
--- a/tests/ut/ge/graph/passes/unused_and_isolated_op_remove_pass_unittest.cc
+++ /dev/null
@@ -1,191 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "graph/passes/unused_op_remove_pass.h"
-
-#include <gtest/gtest.h>
-#include "graph/passes/isolated_op_remove_pass.h"
-#include "pass_manager.h"
-
-using namespace ge;
-
-class UtestGraphPassesUnusedAndIsolatedOpRemovePass : public testing::Test {
- protected:
-  void SetUp() {}
-
-  void TearDown() {}
-
-  NodePtr AddNode(ComputeGraphPtr graph, const string &name, const string &type, int32_t in_anchors_num = 1,
-                  int32_t out_anchors_num = 1) {
-    GeTensorDesc tensor_desc;
-    OpDescPtr op_desc = make_shared<OpDesc>(name, type);
-    for (int32_t i = 0; i < in_anchors_num; i++) {
-      op_desc->AddInputDesc(tensor_desc);
-    }
-    for (int32_t i = 0; i < out_anchors_num; i++) {
-      op_desc->AddOutputDesc(tensor_desc);
-    }
-
-    NodePtr node = graph->AddNode(op_desc);
-    return node;
-  }
-};
-
-TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_reshape) {
-  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
-
-  NodePtr data_node = AddNode(graph, "DATA", DATA);
-  NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE);
-  NodePtr reshape_node = AddNode(graph, "reshape1", RESHAPE);
-
-  GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), reshape_node->GetInDataAnchor(0));
-
-  ge::UnusedOpRemovePass unused_pass(TENSORFLOW);
-  ge::IsolatedOpRemovePass isolate_pass;
-  std::vector<std::pair<string, GraphPass*>> passes;
-  passes.emplace_back("", &isolate_pass);
-  passes.emplace_back("", &unused_pass);
-  Status status = PassManager::Run(graph, passes);
-  EXPECT_EQ(SUCCESS, status);
-  NodePtr found_node = graph->FindNode("transpose1");
-  EXPECT_EQ(transpose_node, found_node);
-}
-
-TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_squeeze) {
-  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
-
-  NodePtr data_node = AddNode(graph, "DATA", DATA);
-  NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE);
-  NodePtr squeeze_node = AddNode(graph, "squeeze1", SQUEEZE);
-
-  GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), squeeze_node->GetInDataAnchor(0));
-
-  ge::UnusedOpRemovePass unused_pass(TENSORFLOW);
-  ge::IsolatedOpRemovePass isolate_pass;
-  std::vector<std::pair<string, GraphPass*>> passes;
-  passes.emplace_back("", &isolate_pass);
-  passes.emplace_back("", &unused_pass);
-  Status status = PassManager::Run(graph, passes);
-  EXPECT_EQ(SUCCESS, status);
-  NodePtr found_node = graph->FindNode("transpose1");
-  EXPECT_EQ(transpose_node, found_node);
-}
-
-TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_conv) {
-  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
-
-  NodePtr data_node = AddNode(graph, "DATA", DATA);
-
-  NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE);
-  vector<int64_t> order_list = {0, 2, 3, 1};
-  AttrUtils::SetListInt(transpose_node->GetOpDesc(), PERMUTE_ATTR_ORDER, order_list);
-  AttrUtils::SetInt(transpose_node->GetOpDesc(), ATTR_NAME_FORMAT, (int64_t)DT_FLOAT);
-
-  NodePtr conv_node = AddNode(graph, "conv1", CONVOLUTION);
-
-  GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), conv_node->GetInDataAnchor(0));
-
-  NodePtr conv2_node = AddNode(graph, "conv2", CONVOLUTION);
-  GraphUtils::AddEdge(conv_node->GetOutDataAnchor(0), conv2_node->GetInDataAnchor(0));
-
-  ge::UnusedOpRemovePass unused_pass(TENSORFLOW);
-  ge::IsolatedOpRemovePass isolate_pass;
-  std::vector<std::pair<string, GraphPass*>> passes;
-  passes.emplace_back("", &isolate_pass);
-  passes.emplace_back("", &unused_pass);
-  Status status = PassManager::Run(graph, passes);
-  EXPECT_EQ(SUCCESS, status);
-  NodePtr found_node0 = graph->FindNode("transpose1");
-  NodePtr found_node = graph->FindNode("conv1");
-  EXPECT_EQ(conv_node, found_node);
-}
-
-TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_conv3) {
-  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
-
-  NodePtr data_node = AddNode(graph, "DATA", DATA);
-
-  NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE);
-  vector<int64_t> order_list = {0, 1, 3, 2};
-  AttrUtils::SetListInt(transpose_node->GetOpDesc(), PERMUTE_ATTR_ORDER, order_list);
-  AttrUtils::SetInt(transpose_node->GetOpDesc(), ATTR_NAME_FORMAT, (int64_t)DT_FLOAT);
-
-  NodePtr conv_node = AddNode(graph, "conv1", CONVOLUTION);
-
-  GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), conv_node->GetInDataAnchor(0));
-
-  NodePtr conv2_node = AddNode(graph, "conv2", CONVOLUTION);
-  GraphUtils::AddEdge(conv_node->GetOutDataAnchor(0), conv2_node->GetInDataAnchor(0));
-
-  ge::UnusedOpRemovePass unused_pass(TENSORFLOW);
-  ge::IsolatedOpRemovePass isolate_pass;
-  std::vector<std::pair<string, GraphPass*>> passes;
-  passes.emplace_back("", &isolate_pass);
-  passes.emplace_back("", &unused_pass);
-  Status status = PassManager::Run(graph, passes);
-  EXPECT_EQ(SUCCESS, status);
-  NodePtr found_node0 = graph->FindNode("transpose1");
-  EXPECT_EQ(transpose_node, found_node0);
-  NodePtr found_node = graph->FindNode("conv1");
-  EXPECT_EQ(conv_node, found_node);
-}
-
-TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, cast_and_cast) {
-  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
-
-  NodePtr data_node = AddNode(graph, "DATA", DATA);
-  NodePtr conv3_node = AddNode(graph, "cast3", CAST);
-  NodePtr transpose_node = AddNode(graph, "cast1", CAST);
-  NodePtr transpose_node_1 = AddNode(graph, "cast2", CAST);
-
-  GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), conv3_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(conv3_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), transpose_node_1->GetInDataAnchor(0));
-
-  ge::UnusedOpRemovePass unused_pass(TENSORFLOW);
-  ge::IsolatedOpRemovePass isolate_pass;
-  std::vector<std::pair<string, GraphPass*>> passes;
-  passes.emplace_back("", &isolate_pass);
-  passes.emplace_back("", &unused_pass);
-  Status status = PassManager::Run(graph, passes);
-  EXPECT_EQ(SUCCESS, status);
-}
-
-TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, remove_parent_node) {
-  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
-  vector<NodePtr> node_vec;
-
-  NodePtr data_node = AddNode(graph, "DATA", DATA);
-  NodePtr conv3_node = AddNode(graph, "cast3", CAST);
-  NodePtr transpose_node = AddNode(graph, "cast1", CAST);
-  NodePtr transpose_node_1 = AddNode(graph, "cast2", CAST);
-
-  GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), conv3_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(conv3_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0));
-  GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), transpose_node_1->GetInDataAnchor(0));
-
-  ge::UnusedOpRemovePass unused_pass(TENSORFLOW);
-  ge::IsolatedOpRemovePass isolate_pass;
-  std::vector<std::pair<string, GraphPass*>> passes;
-  passes.emplace_back("", &isolate_pass);
-  passes.emplace_back("", &unused_pass);
-  Status status = PassManager::Run(graph, passes);
-  EXPECT_EQ(SUCCESS, status);
-}
diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
index d6af6de9..f1ea7a27 100644
--- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
@@ -35,7 +35,7 @@
 #include "graph/manager/graph_context.h"
 #include "graph/optimize/graph_optimize.h"
 #include "graph/manager/util/variable_accelerate_ctrl.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph_builder_utils.h"
 #include "cce/dnn.h"
diff --git a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc
index ff49f34c..6c5babfc 100644
--- a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc
+++ b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc
@@ -72,6 +72,54 @@ ComputeGraphPtr BuildGraph3() {
   return builder.GetGraph();
 }
 
+ComputeGraphPtr BuildGraph5() {
+  auto builder = ut::GraphBuilder("g5");
+  auto data1 = builder.AddNode("input1", DATA, 1, 1, FORMAT_NCHW, DT_FLOAT, {1, 2, 3});
+  auto data2 = builder.AddNode("input2", DATA, 1, 1, FORMAT_NCHW, DT_FLOAT, {4, 10});
+  auto add = builder.AddNode("add", ADD, 2, 1);
+  auto netoutput = builder.AddNode("netoutput", NETOUTPUT, 1, 0);
+
+  builder.AddDataEdge(data1, 0, add, 0);
+  builder.AddDataEdge(data2, 0, add, 1);
+  builder.AddDataEdge(add, 0,netoutput, 0);
+  return builder.GetGraph();
+}
+
+/*
+ *   MapIndex   Data1          subgraph1        subgraph2
+ *         \    /
+ *          Case      ===>       Data2            Data3
+ *           |
+ *       Netoutput
+ */
+ComputeGraphPtr BuildGraph4() {
+  auto builder = ut::GraphBuilder("mbatch_Case");
+
+  auto data1 = builder.AddNode("data1", DATA, 1, 1);
+  auto data_desc = data1->GetOpDesc();
+  AttrUtils::SetStr(data_desc, ATTR_ATC_USER_DEFINE_DATATYPE, "DT_FLOAT16");
+  AttrUtils::SetStr(data_desc, "mbatch-switch-name", "case1");
+  AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, 0);
+
+  auto mapindex1 = builder.AddNode("mapindex1", "MapIndex", 0, 1);
+  auto case1 = builder.AddNode("case1", CASE, 2, 1);
+  auto netoutput1 = builder.AddNode("netoutput1", NETOUTPUT, 1, 0);
+
+  builder.AddDataEdge(mapindex1, 0, case1, 0);
+  builder.AddDataEdge(data1, 0, case1, 1);
+  builder.AddDataEdge(case1, 0, netoutput1, 0);
+
+  return builder.GetGraph();
+}
+
+ComputeGraphPtr BuildGraph4_Subgraph(string graph_name) {
+  auto builder = ut::GraphBuilder(graph_name);
+  auto data1 = builder.AddNode(graph_name + "_data1", DATA, 1, 1);
+  auto data_desc = data1->GetOpDesc();
+  AttrUtils::SetInt(data_desc, ATTR_NAME_PARENT_NODE_INDEX, 1);
+  return builder.GetGraph();
+}
+
 TEST_F(UtestGraphPreproces, test_dynamic_input_shape_parse) {
   ge::GraphPrepare graph_prepare;
   graph_prepare.compute_graph_ = BuildGraph1();
@@ -118,4 +166,61 @@ TEST_F(UtestGraphPreproces, test_update_input_output1) {
   Status ret = graph_prepare.UpdateInputOutputByOptions();
   EXPECT_EQ(ret, SUCCESS);
 }
+
+
+TEST_F(UtestGraphPreproces, check_ref_op_data_succ) {
+  GraphPrepare graph_preparer;
+  ComputeGraphPtr graph_test = BuildGraph5();
+  NodePtr add_node = nullptr;
+  for (auto &node : graph_test->GetAllNodes()) {
+    if (node->GetName() == "add") {
+      add_node = node;
+    }
+  }
+  EXPECT_NE(add_node, nullptr);
+  string input_name = "__input0";
+  std::set<NodePtr> ref_nodes;
+  auto ret = graph_preparer.CheckRefInputNode(add_node, input_name, ref_nodes);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestGraphPreproces, test_update_dtype_mbatch_case) {
+  ge::GraphPrepare graph_prepare;
+  graph_prepare.compute_graph_ = BuildGraph4();
+  auto parent_graph = graph_prepare.compute_graph_;
+  auto subgraph1 = BuildGraph4_Subgraph("subgraph1");
+  auto subgraph2 = BuildGraph4_Subgraph("subgraph2");
+
+  auto data1 = parent_graph->FindNode("data1");
+  auto data_desc = data1->GetOpDesc();
+
+  auto case_node = parent_graph->FindNode("case1");
+  EXPECT_NE(case_node, nullptr);
+  case_node->GetOpDesc()->AddSubgraphName("subgraph1");
+  case_node->GetOpDesc()->SetSubgraphInstanceName(0, "subgraph1");
+  subgraph1->SetParentNode(case_node);
+  subgraph1->SetParentGraph(parent_graph);
+  EXPECT_EQ(parent_graph->AddSubgraph("subgraph1", subgraph1), GRAPH_SUCCESS);
+
+  case_node->GetOpDesc()->AddSubgraphName("subgraph2");
+  case_node->GetOpDesc()->SetSubgraphInstanceName(1, "subgraph2");
+  subgraph2->SetParentNode(case_node);
+  subgraph2->SetParentGraph(parent_graph);
+  EXPECT_EQ(parent_graph->AddSubgraph("subgraph2", subgraph2), GRAPH_SUCCESS);
+
+  Status ret = graph_prepare.UpdateInputOutputByOptions();
+  EXPECT_EQ(ret, SUCCESS);
+
+  auto case_desc = case_node->GetOpDesc();
+  auto case_input = case_desc->MutableInputDesc(1);
+  EXPECT_EQ(case_input->GetDataType(), 1);
+
+  auto sub1_data1 = subgraph1->FindNode("subgraph1_data1");
+  EXPECT_NE(sub1_data1, nullptr);
+  auto data1_desc = sub1_data1->GetOpDesc();
+  auto data1_input = data1_desc->MutableInputDesc(0);
+  EXPECT_EQ(data1_input->GetDataType(), 1);
+  auto data1_output = data1_desc->MutableOutputDesc(0);
+  EXPECT_EQ(data1_output->GetDataType(), 1);
+}
 }
\ No newline at end of file
diff --git a/tests/ut/ge/graph/utils/buffer_pool_graph_builder.cc b/tests/ut/ge/graph/utils/buffer_pool_graph_builder.cc
index dd52f287..7866def1 100644
--- a/tests/ut/ge/graph/utils/buffer_pool_graph_builder.cc
+++ b/tests/ut/ge/graph/utils/buffer_pool_graph_builder.cc
@@ -114,9 +114,9 @@ void BufferPoolGraphBuilder::SetPrefetchNodeInfo(NodePtr &node, int64_t pool_id,
 /// Normal graph
 ///
 ///             w1         w2         w3         w4         w5
-///              \          \          \         \          \
+///              \          \          \         \          \.
 ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
-///               \          \          \         \          \
+///               \          \          \         \          \.
 /// const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ----- net_output
 ///
 ///
@@ -188,10 +188,10 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildNormalGraph() {
 /// Normal graph with multi buffer pool
 ///
 ///             w1         w2         w3         w4         w5
-///              \          \          \         \          \
+///              \          \          \         \          \.
   ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
 ///            (pool0)    (pool1)    (pool0)   (pool0)    (pool1)
-///               \          \          \         \          \
+///               \          \          \         \          \.
   /// const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ----- net_output
 ///
 ///
@@ -265,9 +265,9 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildNormalGraphWithMultiBufferPool() {
 /// SerialGraph: Buffer pool size only can contain one prefetch node
 ///
 ///             w1         w2         w3         w4         w5
-///              \          \          \         \          \
+///              \          \          \         \          \.
 ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
-///               \          \          \         \          \
+///               \          \          \         \          \.
 /// const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ----- net_output
 ///
 ///
@@ -345,7 +345,7 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildSerialGraph() {
 /// GraphWithMultiPrefetch: Calc node with more prefetch node
 ///
 ///            w1          w2         w3         w4       w5
-///             \           \          \          \        \
+///             \           \          \          \        \.
 ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5  const1
 ///               \         /           \         /          \       /
 ///                \       /             \       /            \     /
@@ -426,9 +426,9 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildGraphWithMultiPrefetch() {
 ///   Subgraph1:                                                    Subgraph2:
 ///
 ///             w1         w2         w3                                      w4         w5
-///              \          \          \                                      \          \
+///              \          \          \                                      \          \.
 ///          prefetch1  prefetch2  prefetch3                               prefetch4  prefetch5
-///               \          \          \                                      \          \
+///               \          \          \                                      \          \.
 /// const1 ----- add1 ----- add2 ----- add3 ---- subgraph1_out      data1 ---- add4 ----- add5 ---- subgraph2_out
 ///
 ///
@@ -540,9 +540,9 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildGraphWithSubgraph() {
 ///   Subgraph1:                                       Subgraph2:
 ///
 ///             w1         w2                                      w3         w4         w5
-///              \          \                                       \         \          \
+///              \          \                                       \         \          \.
 ///          prefetch1  prefetch2                               prefetch3  prefetch4  prefetch5
-///               \          \                                       \         \          \
+///               \          \                                       \         \          \.
 /// const1 ----- add1 ----- add2 ----- subgraph1_out     data1 ---- add3 ---- add4 ----- add5 ---- subgraph2_out
 ///
 ///
@@ -651,10 +651,10 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildSubgraphWithInnerDependency() {
 ///                                                        batch_label_128
 ///
 ///                              const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ---
-///                             /              /          /          /         /          /       \
-///  						               /c        prefetch1  prefetch2  prefetch3  prefetch4  prefetch5     \
-///     const1        switch_false           /          /          /         /          /           \
-///         \         /                     /          /          /         /          /             \
+///                             /              /          /          /         /          /       \.
+///  						               /c        prefetch1  prefetch2  prefetch3  prefetch4  prefetch5     \.
+///     const1        switch_false           /          /          /         /          /           \.
+///         \         /                     /          /          /         /          /             \.
 ///           switch1                      w1         w2         w3        w4         w5           merge1 -- net_output
 ///  	     /          \                     \          \          \         \          \             /
 ///     const2        switch_true            \          \          \         \          \           /
@@ -809,7 +809,7 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildGraphWithMultiBatch() {
 /// GraphWithMultiOutputPrefetch: Prefetch has more than one output
 ///
 ///                      w1         w2         w3         w4         w5
-///                       \          \          \         \          \
+///                       \          \          \         \          \.
 ///                   prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
 ///                     /   \      /   \      /   \      /   \      /
 ///                    /     \    /     \    /     \    /     \    /
@@ -892,7 +892,7 @@ ComputeGraphPtr BufferPoolGraphBuilder::BuildGraphWithMultiOutputPrefetch() {
 /// GraphWithMultiOutputPrefetch: Prefetch has more than one output
 ///
 ///                     w1      w2        w3         w4         w5
-///                      \    /   \      /  \      /   \      /    \
+///                      \    /   \      /  \      /   \      /    \.
   ///                   prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
 ///                     /   \      /   \      /   \      /   \      /
 ///                    /     \    /     \    /     \    /     \    /
diff --git a/tests/ut/ge/graph/utils/buffer_pool_graph_builder.h b/tests/ut/ge/graph/utils/buffer_pool_graph_builder.h
index 24382dd2..29ed6efd 100644
--- a/tests/ut/ge/graph/utils/buffer_pool_graph_builder.h
+++ b/tests/ut/ge/graph/utils/buffer_pool_graph_builder.h
@@ -54,9 +54,9 @@ class BufferPoolGraphBuilder {
   /// Normal graph
   ///
   ///             w1         w2         w3         w4         w5
-  ///              \          \          \         \          \
+  ///              \          \          \         \          \.
   ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
-  ///               \          \          \         \          \
+  ///               \          \          \         \          \.
   /// const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ----- net_output
   ///
   ///
@@ -72,10 +72,10 @@ class BufferPoolGraphBuilder {
   /// Normal graph with multi buffer pool
   ///
   ///             w1         w2         w3         w4         w5
-  ///              \          \          \         \          \
+  ///              \          \          \         \          \.
   ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
   ///            (pool0)    (pool1)    (pool0)   (pool0)    (pool1)
-  ///               \          \          \         \          \
+  ///               \          \          \         \          \.
   /// const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ----- net_output
   ///
   ///
@@ -92,9 +92,9 @@ class BufferPoolGraphBuilder {
   /// SerialGraph: Buffer pool size only can contain one prefetch node
   ///
   ///             w1         w2         w3         w4         w5
-  ///              \          \          \         \          \
+  ///              \          \          \         \          \.
   ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
-  ///               \          \          \         \          \
+  ///               \          \          \         \          \.
   /// const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ----- net_output
   ///
   ///
@@ -116,7 +116,7 @@ class BufferPoolGraphBuilder {
   /// GraphWithMultiPrefetch: Calc node with more prefetch node
   ///
   ///            w1          w2         w3         w4       w5
-  ///             \           \          \          \        \
+  ///             \           \          \          \        \.
   ///          prefetch1  prefetch2  prefetch3  prefetch4  prefetch5  const1
   ///               \         /           \         /          \       /
   ///                \       /             \       /            \     /
@@ -144,9 +144,9 @@ class BufferPoolGraphBuilder {
   ///   Subgraph1:                                                    Subgraph2:
   ///
   ///             w1         w2         w3                                      w4         w5
-  ///              \          \          \                                      \          \
+  ///              \          \          \                                      \          \.
   ///          prefetch1  prefetch2  prefetch3                               prefetch4  prefetch5
-  ///               \          \          \                                      \          \
+  ///               \          \          \                                      \          \.
   /// const1 ----- add1 ----- add2 ----- add3 ---- subgraph1_out      data1 ---- add4 ----- add5 ---- subgraph2_out
   ///
   ///
@@ -168,9 +168,9 @@ class BufferPoolGraphBuilder {
   ///   Subgraph1:                                       Subgraph2:
   ///
   ///             w1         w2                                      w3         w4         w5
-  ///              \          \                                       \         \          \
+  ///              \          \                                       \         \          \.
   ///          prefetch1  prefetch2                               prefetch3  prefetch4  prefetch5
-  ///               \          \                                       \         \          \
+  ///               \          \                                       \         \          \.
   /// const1 ----- add1 ----- add2 ----- subgraph1_out     data1 ---- add3 ---- add4 ----- add5 ---- subgraph2_out
   ///
   ///
@@ -189,10 +189,10 @@ class BufferPoolGraphBuilder {
   ///                                                      batch_label_128
   ///
   ///                            const1 ----- add1 ----- add2 ----- add3 ----- add4 ----- add5 ---
-  ///                           /              /          /          /         /          /       \
-  ///  				                 /c        prefetch1  prefetch2  prefetch3  prefetch4  prefetch5     \
-  ///   const1        switch_false           /          /          /         /          /           \
-  ///       \         /                     /          /          /         /          /             \
+  ///                           /              /          /          /         /          /       \.
+  ///  				                 /c        prefetch1  prefetch2  prefetch3  prefetch4  prefetch5     \.
+  ///   const1        switch_false           /          /          /         /          /           \.
+  ///       \         /                     /          /          /         /          /             \.
   ///         switch1                      w1         w2         w3        w4         w5           merge1 -- net_output
   ///  	   /          \                     \          \          \         \          \             /
   ///   const2        switch_true            \          \          \         \          \           /
@@ -215,7 +215,7 @@ class BufferPoolGraphBuilder {
   /// GraphWithMultiOutputPrefetch: Prefetch has more than one output
   ///
   ///                      w1         w2         w3         w4         w5
-  ///                       \          \          \         \          \
+  ///                       \          \          \         \          \.
   ///                   prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
   ///                     /   \      /   \      /   \      /   \      /
   ///                    /     \    /     \    /     \    /     \    /
@@ -238,7 +238,7 @@ class BufferPoolGraphBuilder {
   /// GraphWithMultiOutputPrefetch: Prefetch has more than one output
   ///
   ///                     w1      w2        w3         w4         w5
-  ///                      \    /   \      /  \      /   \      /    \
+  ///                      \    /   \      /  \      /   \      /    \.
   ///                   prefetch1  prefetch2  prefetch3  prefetch4  prefetch5
   ///                     /   \      /   \      /   \      /   \      /
   ///                    /     \    /     \    /     \    /     \    /
diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc
new file mode 100644
index 00000000..b4091a50
--- /dev/null
+++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <vector>
+
+#define private public
+#define protected public
+#include "hybrid/executor/hybrid_model_async_executor.h"
+#include "hybrid/executor/hybrid_model_executor.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/debug/ge_attr_define.h"
+
+
+using namespace std;
+using namespace testing;
+
+
+namespace ge {
+using namespace hybrid;
+
+class UtestHybridModelAsyncExecutor : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() { }
+};
+
+TEST_F(UtestHybridModelAsyncExecutor, CopyOutputs_success) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  GeModelPtr ge_sub_model = make_shared<GeModel>();
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelAsyncExecutor executor(&hybrid_model);
+  
+  TensorValue input_tensor;
+  HybridModelExecutor::ExecuteArgs args;
+  args.inputs.emplace_back(input_tensor);
+  auto desc = MakeShared<GeTensorDesc>();
+  GeShape geshape({2,2,2,2});
+  desc->SetShape(geshape);
+
+  auto allocator = NpuMemoryAllocator::GetAllocator();
+  auto tensor_buffer = TensorBuffer::Create(allocator, 100);
+  auto output_tensor = TensorValue(shared_ptr<TensorBuffer>(tensor_buffer.release()));
+  args.outputs.emplace_back(output_tensor);
+  args.output_desc.emplace_back(desc);
+  
+  OutputData output_data;
+  std::vector<ge::Tensor> outputs;
+  auto ret = executor.CopyOutputs(args, &output_data, outputs);
+  ASSERT_EQ(ret,SUCCESS);
+}
+
+TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  GeModelPtr ge_sub_model = make_shared<GeModel>();
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelAsyncExecutor executor(&hybrid_model);
+  
+  auto allocator = NpuMemoryAllocator::GetAllocator();
+  auto tensor_buffer = TensorBuffer::Create(allocator, 100);
+  auto tensor = TensorValue(shared_ptr<TensorBuffer>(tensor_buffer.release()));
+  GeTensorDesc ge_tensor_desc;
+  int64_t output_size = 100;
+  std::vector<ge::Tensor> outputs;
+  executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs);
+  auto size = tensor.GetSize();
+  ASSERT_EQ(size, 100);
+}
+} // namespace ge
\ No newline at end of file
diff --git a/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc b/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc
new file mode 100644
index 00000000..fbda3776
--- /dev/null
+++ b/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc
@@ -0,0 +1,267 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <vector>
+
+#define private public
+#define protected public
+#include "hybrid/executor/subgraph_executor.h"
+#include "hybrid/node_executor/node_executor.h"
+#include "hybrid/node_executor/rts/rts_node_executor.h"
+#include "hybrid/node_executor/ge_local/ge_local_node_executor.h"
+#include "hybrid/model/hybrid_model_builder.h"
+#include "graph/utils/graph_utils.h"
+
+using namespace std;
+using namespace testing;
+
+namespace ge {
+using namespace hybrid;
+
+class UtestSubgraphExecutor : public testing::Test {
+ protected:
+  void SetUp() {
+    NodeExecutorManager::GetInstance().engine_mapping_.clear();
+    auto &engine_mapping = NodeExecutorManager::GetInstance().engine_mapping_;
+    engine_mapping.emplace("DNN_VM_RTS_OP_STORE", NodeExecutorManager::ExecutorType::RTS);
+    engine_mapping.emplace("DNN_VM_GE_LOCAL_OP_STORE", NodeExecutorManager::ExecutorType::GE_LOCAL);
+
+    NodeExecutorManager::GetInstance().executors_.clear();
+    auto &task_executor = NodeExecutorManager::GetInstance().executors_;
+    task_executor.emplace(NodeExecutorManager::ExecutorType::RTS, std::unique_ptr<NodeExecutor>(new RtsNodeExecutor()));
+    task_executor.emplace(NodeExecutorManager::ExecutorType::GE_LOCAL, std::unique_ptr<NodeExecutor>(new GeLocalNodeExecutor()));
+  }
+  void TearDown() {
+    NodeExecutorManager::GetInstance().engine_mapping_.clear();
+    NodeExecutorManager::GetInstance().executors_.clear();
+  }
+};
+
+static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
+  op_desc->SetStreamId(0);
+  static int32_t index = 0;
+  op_desc->SetId(index++);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64);
+  TensorUtils::SetSize(tensor, 64);
+  vector<int64_t> input_offset;
+  for (int i = 0; i < in_num; i++) {
+    op_desc->AddInputDesc(tensor);
+    input_offset.emplace_back(index * 64 + i * 64);
+  }
+  op_desc->SetInputOffset(input_offset);
+
+  vector<int64_t> output_offset;
+  for (int i = 0; i < out_num; i++) {
+    op_desc->AddOutputDesc(tensor);
+    output_offset.emplace_back(index * 64 + in_num * 64 + i * 64);
+  }
+  op_desc->SetOutputOffset(output_offset);
+
+  op_desc->SetWorkspace({});
+  op_desc->SetWorkspaceBytes({});
+  op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE");
+
+  return graph.AddNode(op_desc);
+}
+
+static void CreateSimpleCondGraph(ComputeGraph &graph, NodePtr &switch_t, NodePtr &switch_f) {
+/*******************************************************************************
+ *             |
+ *           Merge
+ *          /     \.
+ *         /       \.
+ *        /         \.
+ *       Add       Sub
+ *      |   \     /   |
+ *      |    \ _ /    |
+ *      |    /   \    |
+ *      |   /     \   |
+ *    Switch       Switch
+ *     |   \       /   |
+ *     |    \     /    |
+ *     |     \   /     |
+ *     |      \ /      |
+ *     |     Less      |
+ *     |     /   \     |
+ *     |    /     \    |
+ *      Data       Data
+ ******************************************************************************/
+  const auto data0 = CreateNode(graph, "data", DATA, 1, 1);
+  const auto data1 = CreateNode(graph, "data1", DATA, 1, 1);
+  data0->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+  data1->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+  AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_INDEX, 0);
+  AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1);
+
+  const auto const0 = CreateNode(graph, "const", CONSTANT, 0, 1);
+  const auto const1 = CreateNode(graph, "const1", CONSTANT, 0, 1);
+  const0->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+  const1->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+  {
+    uint64_t const_value = 101;
+    const auto op_desc = const0->GetOpDesc();
+    auto weight = make_shared<GeTensor>(op_desc->GetOutputDesc(0), (uint8_t *)&const_value, sizeof(uint64_t));
+    AttrUtils::SetTensor(op_desc, ATTR_NAME_WEIGHTS, weight);
+  }
+  {
+    uint64_t const_value = 101;
+    const auto op_desc = const1->GetOpDesc();
+    auto weight = make_shared<GeTensor>(op_desc->GetOutputDesc(0), (uint8_t *)&const_value, sizeof(uint64_t));
+    AttrUtils::SetTensor(op_desc, ATTR_NAME_WEIGHTS, weight);
+  }
+
+  const auto less1 = CreateNode(graph, "less", ENTER, 2, 1);
+
+  const auto active1 = CreateNode(graph, "active1", STREAMACTIVE, 0, 0);
+  switch_t = CreateNode(graph, "switch_t", STREAMSWITCH, 2, 0);
+  switch_f = CreateNode(graph, "switch_f", STREAMSWITCH, 2, 0);
+  AttrUtils::SetInt(switch_t->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, RT_EQUAL); // 101 for true.
+  AttrUtils::SetInt(switch_f->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, RT_NOT_EQUAL);
+
+  const auto add1 = CreateNode(graph, "add", ENTER, 2, 1);
+  const auto sub1 = CreateNode(graph, "sub", ENTER, 2, 1);
+
+  const auto merge1 = CreateNode(graph, "merge", STREAMMERGE, 2, 2);
+  const auto active2 = CreateNode(graph, "active2", STREAMACTIVE, 0, 0);
+  const auto active3 = CreateNode(graph, "active3", STREAMACTIVE, 0, 0);
+
+  const auto output1 = CreateNode(graph, "net_output", NETOUTPUT, 1, 1);
+  output1->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+
+  GraphUtils::AddEdge(data0->GetOutDataAnchor(0), less1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), less1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch_t->GetInDataAnchor(0));
+  GraphUtils::AddEdge(const0->GetOutDataAnchor(0), switch_t->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch_f->GetInDataAnchor(0));
+  GraphUtils::AddEdge(const1->GetOutDataAnchor(0), switch_f->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(less1->GetOutControlAnchor(), active1->GetInControlAnchor());
+  GraphUtils::AddEdge(active1->GetOutControlAnchor(), switch_t->GetInControlAnchor());
+  GraphUtils::AddEdge(active1->GetOutControlAnchor(), switch_f->GetInControlAnchor());
+
+  GraphUtils::AddEdge(data0->GetOutDataAnchor(0), add1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), add1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(add1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(switch_t->GetOutControlAnchor(), add1->GetInControlAnchor());
+  GraphUtils::AddEdge(add1->GetOutControlAnchor(), active2->GetInControlAnchor());
+  GraphUtils::AddEdge(active2->GetOutControlAnchor(), merge1->GetInControlAnchor());
+
+  GraphUtils::AddEdge(data0->GetOutDataAnchor(0), sub1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(data1->GetOutDataAnchor(0), sub1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(sub1->GetOutDataAnchor(0), merge1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(switch_f->GetOutControlAnchor(), sub1->GetInControlAnchor());
+  GraphUtils::AddEdge(sub1->GetOutControlAnchor(), active3->GetInControlAnchor());
+  GraphUtils::AddEdge(active3->GetOutControlAnchor(), merge1->GetInControlAnchor());
+
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), output1->GetInDataAnchor(0));
+}
+
+TEST_F(UtestSubgraphExecutor, simple_schedule_tasks) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  const auto data0 = CreateNode(*graph, "data", DATA, 1, 1);
+  const auto output0 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1);
+  GraphUtils::AddEdge(data0->GetOutDataAnchor(0), output0->GetInDataAnchor(0));
+  data0->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+  output0->GetOpDesc()->SetOpKernelLibName("DNN_VM_GE_LOCAL_OP_STORE");
+
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  GeModelPtr ge_sub_model = make_shared<GeModel>();
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelBuilder hybrid_model_builder(hybrid_model);
+  ASSERT_EQ(hybrid_model_builder.Build(), SUCCESS);
+
+  uint64_t value_0 = 110;
+  TensorValue in_tensor0(&value_0, sizeof(value_0));
+  const std::vector<TensorValue> inputs{ in_tensor0 };
+
+  uint64_t value_1 = 123;
+  TensorValue out_tensor0(&value_1, sizeof(value_1));
+  const std::vector<TensorValue> outputs{ out_tensor0 };
+
+  auto input_desc = output0->GetOpDesc()->GetInputDescPtr(0);
+  const std::vector<ConstGeTensorDescPtr> input_descs{ input_desc };
+
+  GraphExecutionContext graph_context;
+  graph_context.model = &hybrid_model;
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &graph_context);
+  ASSERT_EQ(executor.ExecuteAsync(inputs, input_descs, outputs), SUCCESS);
+  ASSERT_EQ(executor.Synchronize(), SUCCESS);
+}
+
+TEST_F(UtestSubgraphExecutor, cond_graph_schedule_tasks) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  NodePtr switch_t = nullptr;
+  NodePtr switch_f = nullptr;
+  CreateSimpleCondGraph(*graph, switch_t, switch_f);
+
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  GeModelPtr ge_sub_model = make_shared<GeModel>();
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  std::vector<uint64_t> weights_value{101, 102};
+  ge_sub_model->SetWeight(Buffer::CopyFrom((uint8_t *)weights_value.data(), weights_value.size() * sizeof(uint64_t)));
+  ge_sub_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
+
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelBuilder hybrid_model_builder(hybrid_model);
+  ASSERT_EQ(hybrid_model_builder.Build(), SUCCESS);
+
+  uint64_t value_0 = 101; // Enter used for Less, will pass this value to switch.
+  TensorValue in_tensor0(&value_0, sizeof(value_0));
+  uint64_t value_1 = 110;
+  TensorValue in_tensor1(&value_1, sizeof(value_1));
+  const std::vector<TensorValue> inputs{ in_tensor0, in_tensor1 };
+  uint64_t value_2 = 123;
+  TensorValue out_tensor0(&value_2, sizeof(value_2));
+  const std::vector<TensorValue> outputs{ out_tensor0 };
+
+  GeTensorDescPtr tensor_desc = make_shared<GeTensorDesc>(GeShape(), FORMAT_ND, DT_INT64);
+  TensorUtils::SetSize(*tensor_desc, 64);
+  const std::vector<ConstGeTensorDescPtr> input_desc{ tensor_desc, tensor_desc };
+
+  GraphExecutionContext graph_context;
+  graph_context.model = &hybrid_model;
+  graph_context.allocator = NpuMemoryAllocator::GetAllocator(0);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+  ASSERT_EQ(graph_context.callback_manager->Init(), SUCCESS);
+
+  const auto node_it_t = hybrid_model.node_items_.find(switch_t);
+  const auto node_it_f = hybrid_model.node_items_.find(switch_f);
+  ASSERT_NE(hybrid_model.node_items_.end(), node_it_t);
+  ASSERT_NE(hybrid_model.node_items_.end(), node_it_f);
+
+  SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &graph_context);
+  ASSERT_EQ(executor.ExecuteAsync(inputs, input_desc, outputs), SUCCESS);
+  ASSERT_EQ(executor.Synchronize(), SUCCESS);
+
+  const auto state_it_t = executor.subgraph_context_->node_states_.find(node_it_t->second.get());
+  const auto state_it_f = executor.subgraph_context_->node_states_.find(node_it_f->second.get());
+  ASSERT_NE(executor.subgraph_context_->node_states_.end(), state_it_t);
+  ASSERT_NE(executor.subgraph_context_->node_states_.end(), state_it_f);
+  ASSERT_EQ(state_it_t->second->GetSwitchIndex(), 1);
+  ASSERT_EQ(state_it_f->second->GetSwitchIndex(), 0);
+  ASSERT_EQ(graph_context.callback_manager->Destroy(), SUCCESS);
+}
+} // namespace ge
\ No newline at end of file
diff --git a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc
index 5fa0d22c..07022230 100644
--- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc
+++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc
@@ -26,6 +26,7 @@
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/executor/hybrid_model_executor.h"
 #include "hybrid/executor/worker/execution_engine.h"
+#include "hybrid/executor/subgraph_executor.h"
 #undef private
 #undef protected
 
@@ -75,6 +76,10 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_kernel_task) {
   node_item->output_start = 0;
 
   GraphExecutionContext execution_context;
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  HybridModel hybrid_model(ge_root_model);
+  hybrid_model.root_graph_item_ = std::unique_ptr<GraphItem>(new(std::nothrow)GraphItem());
+  execution_context.model = &hybrid_model;
   execution_context.profiling_level = 1;
   SubgraphContext subgraph_context(nullptr, &execution_context);
 
@@ -85,7 +90,11 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_kernel_task) {
 
   ExecutionEngine execution_engine;
   ASSERT_TRUE(node_state.GetTaskContext() != nullptr);
-  EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context), INTERNAL_ERROR);
+
+  std::function<void()> callback;
+  SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &execution_context);
+  executor.InitCallback(&node_state, callback);
+  EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context, callback), INTERNAL_ERROR);
 }
 
 TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) {
@@ -105,15 +114,25 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) {
   GraphExecutionContext execution_context;
   GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
   HybridModel hybrid_model(ge_root_model);
+  hybrid_model.root_graph_item_ = std::unique_ptr<GraphItem>(new(std::nothrow)GraphItem());
   execution_context.model = &hybrid_model;
   SubgraphContext subgraph_context(nullptr, &execution_context);
 
   NodeState node_state(*node_item, &subgraph_context);
   auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
+  uint32_t task_id = 0;
+  uint32_t stream_id = 1;
+  std::string task_type = "rts";
+  uint32_t block_dim = 0;
+  task_context->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim);
   auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release());
   node_state.SetTaskContext(shared_task_context);
 
   ExecutionEngine execution_engine;
   ASSERT_TRUE(node_state.GetTaskContext() != nullptr);
-  EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context), INTERNAL_ERROR);
+
+  std::function<void()> callback;
+  SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &execution_context);
+  executor.InitCallback(&node_state, callback);
+  EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context, callback), INTERNAL_ERROR);
 }
diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
index b5aac527..7a2a5dfe 100644
--- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
+++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
@@ -19,9 +19,9 @@
 #include <vector>
 #include "runtime/rt.h"
 
-#include "graph/utils/node_utils.h"
 #define protected public
 #define private public
+#include "graph/utils/node_utils.h"
 #include "hybrid/model/hybrid_model_builder.h"
 #include "hybrid/model/hybrid_model.h"
 #include "hybrid/node_executor/node_executor.h"
@@ -109,16 +109,61 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
   ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
 }
 
-TEST_F(UtestGeHybrid, task_update_tiling_info) {
+TEST_F(UtestGeHybrid, aicore_op_task_init_success2) {
+  // build aicore task
   auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
   aicore_task->is_single_op_ = true;
+  domi::TaskDef task_def;
+  task_def.set_type(RT_MODEL_TASK_KERNEL);
+  domi::KernelDef *kernel = task_def.mutable_kernel();
+  kernel->set_block_dim(32);
+  kernel->set_args_size(64);
+  string args(64, '1');
+  kernel->set_args(args.data(), 64);
+  domi::KernelContext *context = kernel->mutable_context();
+  context->set_op_index(1);
+  context->set_kernel_type(2);    // ccKernelType::TE
+  uint16_t args_offset[9] = {0};
+  context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
+
+  OpDescPtr op_desc = CreateOpDesc("Add", "Add");
+  std::vector<char> kernelBin;
+  TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
+  op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
+  std::string kernel_name("kernel/Add");
+  AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
+  ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
+  rtStream_t stream = nullptr;
+  rtStreamCreate(&stream, 0);
+  ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
+  char *handle = "";
+  aicore_task->handle_ = handle;
+  aicore_task->tiling_key_ = 1;
+  ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
+}
+
+TEST_F(UtestGeHybrid, task_update_tiling_info) {
+  auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
   auto graph = make_shared<ComputeGraph>("graph");
   OpDescPtr op_desc = CreateOpDesc("Add", "Add");
   ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key");
   ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json");
+  ge::AttrUtils::SetBool(op_desc, "support_dynamicshape", true);
+  ge::AttrUtils::SetInt(op_desc, "op_para_size", 1);
   auto node = graph->AddNode(op_desc);
-  optiling::OpRunInfo tiling_info;
-  ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS);
+
+  std::unique_ptr<NodeItem> node_item;
+  NodeItem::Create(node, node_item);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphExecutionContext execution_context;
+  SubgraphContext subgraph_context(nullptr, &execution_context);
+  NodeState node_state(*node_item, &subgraph_context);
+  auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
+  ASSERT_TRUE(task_context != nullptr);
+  ASSERT_EQ(aicore_task->InitTilingInfo(*op_desc), SUCCESS);
+  ASSERT_EQ(aicore_task->UpdateTilingInfo(*task_context), SUCCESS);
 }
 
 TEST_F(UtestGeHybrid, index_taskdefs_failed) {
@@ -202,7 +247,7 @@ TEST_F(UtestGeHybrid, data_direct_connect) {
   GeRootModelPtr ge_root_model = make_shared<GeRootModel>(root_graph);
   HybridModel hybrid_model(ge_root_model);
   HybridModelBuilder hybrid_model_builder(hybrid_model);
-  auto ret = hybrid_model_builder.IdentifyVariableOutputs(*new_node.get());
+  auto ret = hybrid_model_builder.IdentifyVariableOutputs(*new_node.get(), sub_graph);
   ASSERT_EQ(ret, SUCCESS);
 }
 
@@ -288,7 +333,7 @@ TEST_F(UtestGeHybrid, hybrid_model_executor) {
   HybridModel *model_ptr = &model;
 
   uint32_t device_id = 0;
-  rtStream_t stream;
+  rtStream_t stream = nullptr;
   HybridModelExecutor executor(model_ptr, device_id, stream);
   executor.Init();
 }
@@ -644,17 +689,58 @@ TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) {
   std::unique_ptr<NodeItem> node_item_1;
   NodeItem::Create(node_1, node_item_1);
   node_item_1->node_id = 1;
-
   node->GetOutControlAnchor()->LinkTo(node_1->GetInControlAnchor());
 
+  OpDescPtr op_desc_2 = CreateOpDesc("net_output", NETOUTPUT);
+  auto node_2 = compute_graph->AddNode(op_desc_2);
+  std::unique_ptr<NodeItem> node_item_2;
+  NodeItem::Create(node_2, node_item_2);
+  node_item_2->node_id = 2;
+  node_1->GetOutControlAnchor()->LinkTo(node_2->GetInControlAnchor());
+
   GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph);
   HybridModel model(root_model);
   model.root_graph_ = compute_graph;
   model.node_items_.emplace(node, std::move(node_item));
+  model.node_items_.emplace(node_1, std::move(node_item_1));
+  model.node_items_.emplace(node_2, std::move(node_item_2));
 
   HybridModelBuilder builder(model);
   std::vector<std::string> deps;
-  ASSERT_EQ(builder.ParseDependentInputNodes(*node_item_1, deps), SUCCESS);
-  ASSERT_TRUE(model.GetNodeItem(node)->has_observer);
-  ASSERT_EQ(node_item_1->dependents_for_execution.size(), 1);
+  ASSERT_EQ(builder.ParseDependentInputNodes(*model.node_items_[node_1], deps), SUCCESS);
+  ASSERT_EQ(builder.ParseDependentInputNodes(*model.node_items_[node_2], deps), SUCCESS);
+  ASSERT_FALSE(model.GetNodeItem(node)->has_observer);
+  ASSERT_TRUE(model.GetNodeItem(node_1)->has_observer);
+  ASSERT_EQ(model.node_items_[node_1]->dependents_for_execution.size(), 0);
+  ASSERT_EQ(model.node_items_[node_2]->dependents_for_execution.size(), 1);
 }
+
+TEST_F(UtestGeHybrid, TestParseDependencies) {
+  // make graph
+  ut::GraphBuilder graph_builder = ut::GraphBuilder("graph");
+  auto data = graph_builder.AddNode("Data", "Data", 0, 1);
+  auto netoutput = graph_builder.AddNode("Netoutput", "NetOutput", 1, 0);
+  graph_builder.AddDataEdge(data, 0, netoutput, 0);
+  auto graph = graph_builder.GetGraph();
+
+  GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(graph);
+  HybridModel model(root_model);
+  HybridModelBuilder builder(model);
+
+  std::unique_ptr<NodeItem> node_item;
+  NodeItem::Create(netoutput, node_item);
+  std::unique_ptr<NodeItem> node_item2;
+  NodeItem::Create(data, node_item2);
+  model.node_items_.emplace(data, std::move(node_item2));
+
+  std::vector<std::string> deps;
+  deps.push_back("Data");
+  auto op_desc = netoutput->GetOpDesc();
+  op_desc->input_name_idx_["Data"] = 0;
+  auto data_desc = data->GetOpDesc();
+  auto tensor = std::make_shared<GeTensor>();
+  auto tensor_desc = data_desc->MutableInputDesc(0);
+  AttrUtils::SetTensor(tensor_desc, "_value", tensor);
+  std::set<NodePtr> dependent_for_shape_inference;
+  ASSERT_EQ(builder.ParseDependencies(*node_item, deps, dependent_for_shape_inference), SUCCESS);
+}
\ No newline at end of file
diff --git a/tests/ut/ge/hybrid/known_node_executor_unittest.cc b/tests/ut/ge/hybrid/known_node_executor_unittest.cc
index 16bbe3a0..98e985f7 100644
--- a/tests/ut/ge/hybrid/known_node_executor_unittest.cc
+++ b/tests/ut/ge/hybrid/known_node_executor_unittest.cc
@@ -26,6 +26,7 @@
 #undef private
 #undef protected
 #include "graph/manager/graph_mem_allocator.h"
+#include "../graph/passes/graph_builder_utils.h"
 
 using namespace std;
 using namespace testing;
@@ -69,3 +70,22 @@ TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) {
   model.weight_buffer_map_.emplace("subgraph", TensorBuffer::Create(buffer, sizeof(buffer)));
   ASSERT_EQ(mock.InitDavinciModel(model, model.GetModelWeight("subgraph")), SUCCESS);
 }
+
+TEST_F(UnknownNodeExecutorTest, TestParseAttrForAllocatingOutputs) {
+  ut::GraphBuilder builder("test-graph");
+  auto data_node = builder.AddNode("Data0", DATA, 1, 1);
+  auto netoutput_node = builder.AddNode("NodeOutput", NETOUTPUT, 2, 2);
+  builder.AddDataEdge(data_node, 0, netoutput_node, 0);
+  auto const_node = builder.AddNode("Const0", CONSTANT, 0, 1);
+  builder.AddDataEdge(const_node, 0, netoutput_node, 1);
+  auto graph = builder.GetGraph();
+
+  ut::GraphBuilder builder2("root-graph");
+  auto partitioned_call = builder2.AddNode("Node0", PARTITIONEDCALL, 1, 2);
+  NodeItem node_item(partitioned_call);
+  ASSERT_EQ(KnownNodeExecutor::ParseAttrForAllocatingOutputs(node_item, *graph), SUCCESS);
+  ASSERT_EQ(node_item.ref_outputs.size(), 1);
+  ASSERT_EQ(node_item.ref_outputs[1], const_node);
+  ASSERT_EQ(node_item.reuse_inputs.size(), 1);
+  ASSERT_EQ(node_item.reuse_inputs[0], 0);
+}
\ No newline at end of file
diff --git a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc
new file mode 100644
index 00000000..9630b193
--- /dev/null
+++ b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc
@@ -0,0 +1,233 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+#include <vector>
+
+#define private public
+#define protected public
+#include "hybrid/model/hybrid_model_builder.h"
+#include "hybrid/node_executor/node_executor.h"
+
+#include "graph/utils/tensor_utils.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/debug/ge_attr_define.h"
+
+using namespace std;
+using namespace testing;
+
+namespace ge {
+using namespace hybrid;
+
+class UtestHybridModelBuilder : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() { }
+};
+
+static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
+  op_desc->SetStreamId(0);
+  static int32_t index = 0;
+  op_desc->SetId(index++);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+  vector<int64_t> input_offset;
+  for (int i = 0; i < in_num; i++) {
+    op_desc->AddInputDesc(tensor);
+    input_offset.emplace_back(1024);
+  }
+  op_desc->SetInputOffset(input_offset);
+
+  vector<int64_t> output_offset;
+  for (int i = 0; i < out_num; i++) {
+    op_desc->AddOutputDesc(tensor);
+    output_offset.emplace_back(1024);
+  }
+  op_desc->SetOutputOffset(output_offset);
+
+  op_desc->SetWorkspace({});
+  op_desc->SetWorkspaceBytes({});
+  op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE");
+
+  return graph.AddNode(op_desc);
+}
+
+TEST_F(UtestHybridModelBuilder, normal_hybrid_model_build) {
+/*******************************************************************************
+ *      Exit         Identify
+ *        \         /       \.
+ *         \       /         \.
+ *          Switch           Add
+ *         /     |            |
+ *        /      |            |
+ *       /       |            |
+ *  LoopCond     |            |
+ *      \        |            |
+ *       \       |            |
+ *        \      |            |
+ *       Less    |            |
+ *          \    |       NextIteration
+ *           \   |            |
+ *            \  |            |
+ *            Merge <---------|
+ *              |
+ *              |
+ *            Enter
+ ******************************************************************************/
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  GeModelPtr ge_sub_model = make_shared<GeModel>();
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+
+  auto enter1 = CreateNode(*graph, "enter", ENTER, 1, 1);
+  auto merge1 = CreateNode(*graph, "merge", STREAMMERGE, 2, 2);
+  auto less1 = CreateNode(*graph, "less", LESS, 2, 1);
+  less1->GetOpDesc()->SetOpKernelLibName("AIcoreEngine");
+  auto loop1 = CreateNode(*graph, "loopcond", LOOPCOND, 1, 1);
+  auto switch_t = CreateNode(*graph, "switch_t", STREAMSWITCH, 2, 0);
+  auto switch_f = CreateNode(*graph, "switch_f", STREAMSWITCH, 2, 0);
+  auto ident1 = CreateNode(*graph, "identity", IDENTITY, 2, 1);
+  auto add1 = CreateNode(*graph, "add", ADD, 2, 1);
+  add1->GetOpDesc()->SetOpKernelLibName("AIcoreEngine");
+  auto next1 = CreateNode(*graph, "next", NEXTITERATION, 1, 1);
+  auto exit1 = CreateNode(*graph, "exit", EXIT, 1, 1);
+  auto value0 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+  auto value1 = CreateNode(*graph, "const", CONSTANT, 0, 1);
+  auto active1 = CreateNode(*graph, "active1", STREAMACTIVE, 0, 0);
+  auto active2 = CreateNode(*graph, "active2", STREAMACTIVE, 0, 0);
+  auto active3 = CreateNode(*graph, "active3", STREAMACTIVE, 0, 0);
+  auto output1 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1);
+
+  GraphUtils::AddEdge(enter1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), less1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), less1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(less1->GetOutDataAnchor(0), loop1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(loop1->GetOutDataAnchor(0), switch_t->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), switch_t->GetInDataAnchor(1));
+  GraphUtils::AddEdge(loop1->GetOutDataAnchor(0), switch_f->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value0->GetOutDataAnchor(0), switch_f->GetInDataAnchor(1));
+
+  GraphUtils::AddEdge(switch_f->GetOutControlAnchor(), exit1->GetInControlAnchor());
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), exit1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(switch_t->GetOutControlAnchor(), ident1->GetInControlAnchor());
+  GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), ident1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(ident1->GetOutDataAnchor(0), add1->GetInDataAnchor(0));
+  GraphUtils::AddEdge(value1->GetOutDataAnchor(0), add1->GetInDataAnchor(1));
+  GraphUtils::AddEdge(add1->GetOutDataAnchor(0), next1->GetInDataAnchor(0));
+
+  GraphUtils::AddEdge(enter1->GetOutControlAnchor(), active1->GetInControlAnchor());
+  GraphUtils::AddEdge(active1->GetOutControlAnchor(), merge1->GetInControlAnchor());
+
+  GraphUtils::AddEdge(loop1->GetOutControlAnchor(), active2->GetInControlAnchor());
+  GraphUtils::AddEdge(active2->GetOutControlAnchor(), switch_f->GetInControlAnchor());
+  GraphUtils::AddEdge(active2->GetOutControlAnchor(), switch_t->GetInControlAnchor());
+
+  GraphUtils::AddEdge(next1->GetOutControlAnchor(), active3->GetInControlAnchor());
+
+  GraphUtils::AddEdge(exit1->GetOutDataAnchor(0), output1->GetInDataAnchor(0));
+  AttrUtils::SetStr(merge1->GetOpDesc(), ATTR_NAME_NEXT_ITERATION, next1->GetName());
+
+  AttrUtils::SetBool(enter1->GetOpDesc(), ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true);
+  AttrUtils::SetBool(output1->GetOpDesc(), ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
+  AttrUtils::SetBool(add1->GetOpDesc(), ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true);
+  AttrUtils::SetBool(add1->GetOpDesc(), ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
+
+  // Build -> IndexSpecialNodes --> stream_merge_op_nodes_
+  // Build -> LoadGraph -> RelinkNextIteration
+  // Build -> LoadGraph -> LoadDynamicSubgraph --> BuildNodeItem --> NodeItem::SetDataSend
+  // Build -> LoadGraph -> LoadDynamicSubgraph --> BuildControlFlowGroup --> NodeItem::SetCtrlSend
+  auto &engine_mapping = NodeExecutorManager::GetInstance().engine_mapping_;
+  engine_mapping.emplace("AIcoreEngine", NodeExecutorManager::ExecutorType::AICORE);
+  engine_mapping.emplace("DNN_VM_GE_LOCAL_OP_STORE", NodeExecutorManager::ExecutorType::GE_LOCAL);
+  engine_mapping.emplace("aicpu_tf_kernel", NodeExecutorManager::ExecutorType::AICPU_TF);
+  engine_mapping.emplace("aicpu_ascend_kernel", NodeExecutorManager::ExecutorType::AICPU_TF);
+  engine_mapping.emplace("ops_kernel_info_hccl", NodeExecutorManager::ExecutorType::HCCL);
+  engine_mapping.emplace("DNN_VM_RTS_OP_STORE", NodeExecutorManager::ExecutorType::RTS);
+  engine_mapping.emplace("DNN_VM_HOST_CPU_OP_STORE", NodeExecutorManager::ExecutorType::HOST_CPU);
+
+  auto &task_executor = NodeExecutorManager::GetInstance().executors_;
+  task_executor.emplace(NodeExecutorManager::ExecutorType::AICORE, std::unique_ptr<NodeExecutor>(new NodeExecutor()));
+  task_executor.emplace(NodeExecutorManager::ExecutorType::GE_LOCAL, std::unique_ptr<NodeExecutor>(new NodeExecutor()));
+  task_executor.emplace(NodeExecutorManager::ExecutorType::AICPU_TF, std::unique_ptr<NodeExecutor>(new NodeExecutor()));
+  task_executor.emplace(NodeExecutorManager::ExecutorType::HCCL, std::unique_ptr<NodeExecutor>(new NodeExecutor()));
+  task_executor.emplace(NodeExecutorManager::ExecutorType::RTS, std::unique_ptr<NodeExecutor>(new NodeExecutor()));
+  task_executor.emplace(NodeExecutorManager::ExecutorType::HOST_CPU, std::unique_ptr<NodeExecutor>(new NodeExecutor()));
+
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelBuilder hybrid_model_builder(hybrid_model);
+  ASSERT_EQ(hybrid_model_builder.Build(), SUCCESS);
+  engine_mapping.clear();
+  task_executor.clear();
+}
+
+TEST_F(UtestHybridModelBuilder, create_called_invalid) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelBuilder hybrid_model_builder(hybrid_model);
+
+  auto node = CreateNode(*graph, "node", PARTITIONEDCALL, 1, 1);
+  NodeItem node_item(node);
+
+  ASSERT_EQ(hybrid_model_builder.CreateStreamActiveGroup(node, &node_item), INTERNAL_ERROR);
+  ASSERT_EQ(hybrid_model_builder.CreateStreamSwitchGroup(node, &node_item), INTERNAL_ERROR);
+  ASSERT_EQ(hybrid_model_builder.CreateNextIterationGroup(node, &node_item), INTERNAL_ERROR);
+  ASSERT_EQ(hybrid_model_builder.CreateStreamSwitchNGroup(node, &node_item), INTERNAL_ERROR);
+  ASSERT_EQ(hybrid_model_builder.CreateSwitchGroup(node, &node_item), INTERNAL_ERROR);
+
+  ASSERT_EQ(hybrid_model_builder.CreateLabelSetGroup(node, &node_item), INTERNAL_ERROR);
+  node_item.node_type = LABELSET;
+  ASSERT_EQ(hybrid_model_builder.CreateLabelSetGroup(node, &node_item), UNSUPPORTED);
+
+  ASSERT_EQ(hybrid_model_builder.CreateLabelGotoGroup(node, &node_item), INTERNAL_ERROR);
+  node_item.node_type = LABELGOTO;
+  ASSERT_EQ(hybrid_model_builder.CreateLabelGotoGroup(node, &node_item), UNSUPPORTED);
+
+  ASSERT_EQ(hybrid_model_builder.CreateLabelSwitchGroup(node, &node_item), INTERNAL_ERROR);
+  node_item.node_type = LABELSWITCH;
+  ASSERT_EQ(hybrid_model_builder.CreateLabelSwitchGroup(node, &node_item), UNSUPPORTED);
+}
+
+TEST_F(UtestHybridModelBuilder, stream_switch_n_group) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
+  HybridModel hybrid_model(ge_root_model);
+  HybridModelBuilder hybrid_model_builder(hybrid_model);
+
+  auto switch_n = CreateNode(*graph, "switch_n", STREAMSWITCHN, 1, 0);
+  NodeItem node_item(switch_n);
+
+  // no batch_num
+  ASSERT_EQ(hybrid_model_builder.CreateStreamSwitchNGroup(switch_n, &node_item), INTERNAL_ERROR);
+
+  uint32_t batch_num = 0;
+  AttrUtils::SetInt(switch_n->GetOpDesc(), ATTR_NAME_BATCH_NUM, batch_num);
+  ASSERT_EQ(hybrid_model_builder.CreateStreamSwitchNGroup(switch_n, &node_item), SUCCESS);
+
+  batch_num = 3;
+  AttrUtils::SetInt(switch_n->GetOpDesc(), ATTR_NAME_BATCH_NUM, batch_num);
+  ASSERT_EQ(hybrid_model_builder.CreateStreamSwitchNGroup(switch_n, &node_item), SUCCESS);
+}
+} // namespace ge
\ No newline at end of file
diff --git a/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc b/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc
new file mode 100644
index 00000000..c4c2c65b
--- /dev/null
+++ b/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc
@@ -0,0 +1,484 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <vector>
+
+#define private public
+#define protected public
+#include "hybrid/executor/subgraph_context.h"
+#include "hybrid/node_executor/rts/rts_node_executor.h"
+#include "model/ge_root_model.h"
+
+using namespace std;
+using namespace testing;
+
+namespace ge {
+using namespace hybrid;
+
+class UtestRtsNodeTask : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() { }
+};
+
+static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
+  op_desc->SetStreamId(0);
+  static int32_t index = 0;
+  op_desc->SetId(index++);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64);
+  TensorUtils::SetSize(tensor, 64);
+  vector<int64_t> input_offset;
+  for (int i = 0; i < in_num; i++) {
+    op_desc->AddInputDesc(tensor);
+    input_offset.emplace_back(i * 64);
+  }
+  op_desc->SetInputOffset(input_offset);
+
+  vector<int64_t> output_offset;
+  for (int i = 0; i < out_num; i++) {
+    op_desc->AddOutputDesc(tensor);
+    output_offset.emplace_back(in_num * 64 + i * 64);
+  }
+  op_desc->SetOutputOffset(output_offset);
+
+  op_desc->SetWorkspace({});
+  op_desc->SetWorkspaceBytes({});
+  op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE");
+
+  return graph.AddNode(op_desc);
+}
+
+TEST_F(UtestRtsNodeTask, test_stream_switch_task) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "switch", STREAMSWITCH, 2, 0);
+  ASSERT_TRUE(AttrUtils::SetInt(node->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, 0));
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 2;
+  graph_item.total_outputs_ = 2;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  uint64_t value_0 = 110;
+  uint64_t value_1 = 120;
+  TensorValue in_tensor0(&value_0, sizeof(value_0));
+  TensorValue in_tensor1(&value_1, sizeof(value_1));
+  subgraph_context.SetInput(*node_item, 0, in_tensor0);
+  subgraph_context.SetInput(*node_item, 1, in_tensor1);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(node_state->GetSwitchIndex(), -1);
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), SUCCESS);
+  ASSERT_EQ(node_state->GetSwitchIndex(), 0); // not equal, active 0
+
+  uint64_t value_2 = 110;
+  TensorValue in_tensor2(&value_2, sizeof(value_2));
+  subgraph_context.SetInput(*node_item, 1, in_tensor2);
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), SUCCESS);
+  ASSERT_EQ(node_state->GetSwitchIndex(), 1); // equal, active 1
+}
+
+TEST_F(UtestRtsNodeTask, test_stream_active_task) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "active", STREAMACTIVE, 0, 0);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(node_state->GetSwitchIndex(), -1);
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), SUCCESS);
+  ASSERT_EQ(node_state->GetSwitchIndex(), 0);
+}
+
+TEST_F(UtestRtsNodeTask, test_stream_merge_task) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "merge", STREAMMERGE, 2, 2);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 2;
+  graph_item.total_outputs_ = 2;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  uint64_t value_0 = 110;
+  TensorValue in_tensor0(&value_0, sizeof(value_0));
+  subgraph_context.SetInput(*node_item, 0, in_tensor0);
+  uint64_t value_1 = 220;
+  TensorValue in_tensor1(&value_1, sizeof(value_1));
+  subgraph_context.SetInput(*node_item, 1, in_tensor1);
+
+  uint64_t value_2 = 123;
+  TensorValue out_tensor0(&value_2, sizeof(value_2));
+  subgraph_context.SetOutput(*node_item, 0, out_tensor0);
+  uint64_t value_3 = 223;
+  TensorValue out_tensor1(&value_3, sizeof(value_3));
+  subgraph_context.SetOutput(*node_item, 1, out_tensor1);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  node_state->SetMergeIndex(1);
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), SUCCESS);
+  ASSERT_EQ(node_state->GetSwitchIndex(), -1);
+
+  uint64_t value_4 = 323;
+  ASSERT_EQ(node_state->GetTaskContext()->GetOutput(0)->CopyScalarValueToHost(value_4), SUCCESS);
+  ASSERT_EQ(value_4, value_1);
+
+  uint64_t value_5 = 423;
+  ASSERT_EQ(node_state->GetTaskContext()->GetOutput(1)->CopyScalarValueToHost(value_5), SUCCESS);
+  ASSERT_EQ(value_5, 1);
+}
+
+TEST_F(UtestRtsNodeTask, test_memcpy_async_task) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "memcpy", MEMCPYASYNC, 1, 1);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 1;
+  graph_item.total_outputs_ = 1;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  uint64_t value_0 = 110;
+  TensorValue in_tensor0(&value_0, sizeof(value_0));
+  subgraph_context.SetInput(*node_item, 0, in_tensor0);
+
+  uint64_t value_1 = 123;
+  TensorValue out_tensor0(&value_1, sizeof(value_1));
+  subgraph_context.SetOutput(*node_item, 0, out_tensor0);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), SUCCESS);
+
+  uint64_t value_4 = 323;
+  ASSERT_EQ(node_state->GetTaskContext()->GetOutput(0)->CopyScalarValueToHost(value_4), SUCCESS);
+  ASSERT_EQ(value_4, value_0);
+  ASSERT_EQ(value_1, value_0);
+}
+
+TEST_F(UtestRtsNodeTask, test_pass_through_task) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "enter", ENTER, 1, 1);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 1;
+  graph_item.total_outputs_ = 1;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  uint64_t value_0 = 110;
+  TensorValue in_tensor0(&value_0, sizeof(value_0));
+  subgraph_context.SetInput(*node_item, 0, in_tensor0);
+
+  uint64_t value_1 = 123;
+  TensorValue out_tensor0(&value_1, sizeof(value_1));
+  subgraph_context.SetOutput(*node_item, 0, out_tensor0);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), SUCCESS);
+
+  uint64_t value_4 = 323;
+  ASSERT_EQ(node_state->GetTaskContext()->GetOutput(0)->CopyScalarValueToHost(value_4), SUCCESS);
+  ASSERT_EQ(value_4, value_0);
+}
+
+TEST_F(UtestRtsNodeTask, test_unsupport_label_set) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "labelset", LABELSET, 0, 0);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 2;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 2;
+  graph_item.total_outputs_ = 2;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), UNSUPPORTED);
+}
+
+TEST_F(UtestRtsNodeTask, test_unsupport_label_goto) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "labelgoto", LABELGOTO, 0, 0);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 2;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 2;
+  graph_item.total_outputs_ = 2;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), UNSUPPORTED);
+}
+
+TEST_F(UtestRtsNodeTask, test_unsupport_label_switch) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(*graph, "labelswitch", LABELSWITCH, 0, 0);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 2;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 2;
+  graph_item.total_outputs_ = 2;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context);
+  ASSERT_NE(unique_task_context, nullptr);
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state->SetTaskContext(shared_task_context);
+
+  NodeTaskPtr task = nullptr;
+  RtsNodeExecutor node_executor;
+  ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS);
+  ASSERT_NE(task, nullptr);
+
+  std::function<void()> done = []() {};
+  ASSERT_EQ(task->ExecuteAsync(*node_state->GetTaskContext(), done), UNSUPPORTED);
+}
+} // namespace ge
\ No newline at end of file
diff --git a/tests/ut/ge/session/ge_api_unittest.cc b/tests/ut/ge/session/ge_api_unittest.cc
new file mode 100644
index 00000000..371efdfa
--- /dev/null
+++ b/tests/ut/ge/session/ge_api_unittest.cc
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <vector>
+#include <string>
+#include <map>
+
+#define protected public
+#define private public
+#include "common/ge/ge_util.h"
+#include "proto/ge_ir.pb.h"
+#include "inc/external/ge/ge_api.h"
+#include "session/session_manager.h"
+#undef protected
+#undef private
+
+using namespace std;
+
+namespace ge {
+class UtestGeApi : public testing::Test {
+ protected:
+  void SetUp() override {}
+
+  void TearDown() override {}
+};
+
+TEST_F(UtestGeApi, run_graph_with_stream) {
+  vector<Tensor> inputs;
+  vector<Tensor> outputs;
+  std::map<std::string, std::string> options;
+  Session session(options);
+  auto ret = session.RunGraphWithStreamAsync(10, nullptr, inputs, outputs);
+  ASSERT_NE(ret, SUCCESS);
+  SessionManager session_manager;
+  session_manager.init_flag_ = true;
+  ret = session_manager.RunGraphWithStreamAsync(10, 10, nullptr, inputs, outputs);
+  ASSERT_NE(ret, SUCCESS);
+  InnerSession inner_session(1, options);
+  inner_session.init_flag_ = true;
+  ret = inner_session.RunGraphWithStreamAsync(10, nullptr, inputs, outputs);
+  ASSERT_NE(ret, SUCCESS);
+}
+
+TEST_F(UtestGeApi, build_graph_success) {
+  vector<Tensor> inputs;
+  std::map<std::string, std::string> options;
+  Session session(options);
+  auto ret = session.BuildGraph(1, inputs);
+  ASSERT_NE(ret, SUCCESS);
+}
+}  // namespace ge
diff --git a/tests/ut/ge/session/inner_session_unittest.cc b/tests/ut/ge/session/inner_session_unittest.cc
new file mode 100644
index 00000000..19f75d9f
--- /dev/null
+++ b/tests/ut/ge/session/inner_session_unittest.cc
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define private public
+#define protected public
+#include "session/inner_session.h"
+#undef private
+#undef protected
+
+
+using namespace std;
+
+namespace ge {
+class Utest_Inner_session : public testing::Test {
+ protected:
+  void SetUp() override {}
+
+  void TearDown() override {}
+};
+
+TEST_F(Utest_Inner_session, build_graph_success) {
+  std::map <string, string> options;
+  uint64_t session_id = 1;
+  InnerSession inner_seesion(session_id, options);
+  std::vector<ge::Tensor> inputs;
+  ge::Tensor tensor;
+  inputs.emplace_back(tensor);
+  Status ret = inner_seesion.BuildGraph(1, inputs);
+  EXPECT_NE(ret, ge::SUCCESS);
+}
+
+}  // namespace ge
diff --git a/tests/ut/ge/session/session_manager_unittest.cc b/tests/ut/ge/session/session_manager_unittest.cc
new file mode 100644
index 00000000..3ba5def1
--- /dev/null
+++ b/tests/ut/ge/session/session_manager_unittest.cc
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define private public
+#define protected public
+#include "session/session_manager.h"
+#undef private
+#undef protected
+
+
+using namespace std;
+
+namespace ge {
+class Utest_SessionManager : public testing::Test {
+ protected:
+  void SetUp() override {}
+
+  void TearDown() override {}
+};
+
+TEST_F(Utest_SessionManager, build_graph_failed) {
+  map<string, string> session_manager_option;
+  map<string, string> session_option;
+  SessionManager *session_manager = new SessionManager();
+  uint64_t session_id = 0;
+  uint32_t graph_id = 0;
+  std::vector<ge::Tensor> inputs;
+
+  Status ret = session_manager->BuildGraph(session_id, graph_id, inputs);
+  EXPECT_EQ(ret, ge::GE_SESSION_MANAGER_NOT_INIT);
+
+  session_manager->Initialize(session_manager_option);
+  ret = session_manager->BuildGraph(session_id, graph_id, inputs);
+  EXPECT_NE(ret, ge::SUCCESS);
+  delete session_manager;
+}
+
+TEST_F(Utest_SessionManager, RungraphAsync_before_init) {
+  SessionManager *session_manager = new SessionManager();
+  SessionId session_id;
+  uint32_t graph_id = 0;
+  std::vector<ge::Tensor> inputs;
+  RunAsyncCallback callback;
+  Status ret = session_manager->RunGraphAsync(session_id, graph_id, inputs, callback);
+  EXPECT_EQ(ret, ge::GE_SESSION_MANAGER_NOT_INIT);
+  delete session_manager;
+}
+
+TEST_F(Utest_SessionManager, RungraphAsync_failed) {
+  map<string, string> session_manager_option;
+  SessionManager *session_manager = new SessionManager();
+  session_manager->Initialize(session_manager_option);
+
+  SessionId session_id;
+  uint32_t graph_id = 0;
+  std::vector<ge::Tensor> inputs;
+  RunAsyncCallback callback;
+  Status ret = session_manager->RunGraphAsync(session_id, graph_id, inputs, callback);
+  EXPECT_EQ(ret, ge::GE_SESSION_NOT_EXIST);
+  delete session_manager;
+}
+
+}  // namespace ge
diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc
index dadabaf6..f5d1a83c 100644
--- a/tests/ut/ge/single_op/single_op_model_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_model_unittest.cc
@@ -27,6 +27,7 @@
 #include "single_op/task/tbe_task_builder.h"
 #undef private
 #undef protected
+#include "graph/passes/graph_builder_utils.h"
 
 using namespace std;
 using namespace testing;
@@ -223,3 +224,19 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) {
   model.BuildDynamicOp(res, dynamic_single_op);
 }
 
+TEST_F(UtestSingleOpModel, test_host_mem) {
+  string model_data_str = "123456789";
+  SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
+
+  // make graph
+  ut::GraphBuilder builder = ut::GraphBuilder("graph");
+  auto data = builder.AddNode("Data", "Data", 0, 1);
+  auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0);
+  builder.AddDataEdge(data, 0, netoutput, 0);
+  auto graph = builder.GetGraph();
+  model.op_with_hostmem_[0] = data;
+
+  std::mutex stream_mu_;
+  DynamicSingleOp single_op(0, &stream_mu_, nullptr);
+  ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS);
+}
diff --git a/tests/ut/ge/single_op/single_op_unittest.cc b/tests/ut/ge/single_op/single_op_unittest.cc
index 8c2f6e51..3519811b 100644
--- a/tests/ut/ge/single_op/single_op_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_unittest.cc
@@ -160,4 +160,23 @@ TEST_F(UtestSingleOp, test_singleop_execute_async2) {
   EXPECT_EQ(single_op.running_param_->mem_base, nullptr);
   EXPECT_EQ(single_op.tasks_.size(), 0);
   EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID);
-}
\ No newline at end of file
+}
+
+TEST_F(UtestSingleOp, test_set_host_mem) {
+  std::mutex stream_mu_;
+  DynamicSingleOp single_op(0, &stream_mu_, nullptr);
+  
+  vector<DataBuffer> input_buffers;
+  DataBuffer data_buffer;
+  input_buffers.emplace_back(data_buffer);
+
+  vector<GeTensorDesc> input_descs;
+  GeTensorDesc tensor_desc1;
+  input_descs.emplace_back(tensor_desc1);
+
+  vector<GeTensorDescPtr> op_input_descs;
+  auto tensor_desc2 = std::make_shared<GeTensorDesc>();
+  op_input_descs.emplace_back(tensor_desc2);
+  single_op.tensor_with_hostmem_[0] = op_input_descs;
+  EXPECT_EQ(single_op.SetHostTensorValue(input_descs, input_buffers), SUCCESS);
+}
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
index 7a2cbc50..df57c82e 100644
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -61,9 +61,18 @@ enum FWKTaskExtInfoType {
   FWK_ADPT_EXT_OP_NAME,
   FWK_ADPT_EXT_SESSION_INFO,
   FWK_ADPT_EXT_BITMAP,
+  FWK_ADPT_EXT_TOPIC_TYPE,
   FWK_ADPT_EXT_INVALID
 };
 
+enum FWKExtTopicType {
+  FWK_ADPT_TOPIC_DEVICE_ONLY = 0,
+  FWK_ADPT_TOPIC_DEVICE_FIRST,
+  FWK_ADPT_TOPIC_HOST_ONLY,
+  FWK_ADPT_TOPIC_HOST_FIRST,
+  FWK_ADPT_TOPIC_INVALID
+};
+
 enum FWKExtUpdateAddrType {
   FWK_ADPT_UPDATE_NULL = 0,
   FWK_ADPT_UPDATE_INPUT,
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index cf5347cb..338e8854 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -185,6 +185,11 @@ typedef void (*rtCallback_t)(void *fnData);
 #define RT_FUSION_KERNEL_DUMPFLAG (0x04)
 #define RT_KERNEL_CUSTOM_AICPU (0x08)
 
+// STARS topic scheduler sqe : topic_type
+#define RT_KERNEL_DEVICE_FIRST (0X10)
+#define RT_KERNEL_HOST_ONLY (0X20)
+#define RT_KERNEL_HOST_FIRST (0X30)
+
 /**
  * @ingroup rt_kernel
  * @brief kernel mode