Synchronize latest Ascend software suite 24 Dec 2020

Merry xmas by the way
4 years ago · 274dbb5dc9
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -607,7 +607,7 @@ set(INFER_SRC_LIST

 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})

 target_compile_definitions(ge_runner PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -648,11 +648,14 @@ target_include_directories(ge_runner PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_libraries(ge_runner
 target_link_libraries(ge_runner PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    adump_server
    static_mmpa
    -Wl,--whole-archive
    msprofiler_fwk
    -Wl,--no-whole-archive
    -Wl,--no-as-needed
    graph
    ge_common
@@ -712,7 +715,7 @@ target_include_directories(ge_compiler PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_libraries(ge_compiler
 target_link_libraries(ge_compiler PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    static_mmpa
@@ -766,7 +769,14 @@ target_link_options(opensrc_ascendcl PRIVATE
    -Wl,--allow-multiple-definition
    -Wl,-z,muldefs
    -Wl,-Bsymbolic
    -Wl,--exclude-libs,ALL
    -Wl,--exclude-libs,libascend_protobuf.a
    -Wl,--exclude-libs,libge_executor.a
    -Wl,--exclude-libs,libge_common.a
    -Wl,--exclude-libs,libgraph.a
    -Wl,--exclude-libs,libmmpa.a
    -Wl,--exclude-libs,libregister.a
    -Wl,--exclude-libs,liberror_manager.a
    -Wl,--exclude-libs,libadump_server.a
 )
 target_link_libraries(opensrc_ascendcl PRIVATE
                     -Wl,--whole-archive
--- a/ge/common/dump/dump_op.cc
+++ b/ge/common/dump/dump_op.cc
@@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
    for (auto dim : output_descs.at(i).GetShape().GetDims()) {
      output.mutable_shape()->add_dim(dim);
    }
    for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
      output.mutable_origin_shape()->add_dim(dim);
    }
    int64_t output_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
@@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
    for (auto dim : input_descs.at(i).GetShape().GetDims()) {
      input.mutable_shape()->add_dim(dim);
    }
    for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
      input.mutable_origin_shape()->add_dim(dim);
    }
    int64_t input_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
@@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() {
  SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
         dump_path.c_str());

  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
  }
  aicpu::dump::Task task;
  task.set_task_id(task_id);
  task.set_stream_id(stream_id);
  task.mutable_op()->set_op_name(op_desc_->GetName());
  task.mutable_op()->set_op_type(op_desc_->GetType());
  if (dump_properties_.GetDumpMode() == kDumpOutput) {
--- a/ge/common/ge/tbe_plugin_manager.cc
+++ b/ge/common/ge/tbe_plugin_manager.cc
@@ -181,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
 void TBEPluginManager::LoadCustomOpLib() {
  LoadPluginSo(options_);

  std::string fmk_type = std::to_string(domi::TENSORFLOW);
  auto it = options_.find(ge::FRAMEWORK_TYPE);
  if (it != options_.end()) {
   fmk_type = it->second;
  }
  std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
  GELOGI("The size of registration_datas is: %zu", registration_datas.size());
  for (OpRegistrationData reg_data : registration_datas) {
    GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
           TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
    domi::OpRegistry::Instance()->Register(reg_data);
    if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
      GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
             TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
      (void)domi::OpRegistry::Instance()->Register(reg_data);
    }
  }
 }

--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -112,7 +112,6 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
  if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
    GELOGW("Msprof ctrl callback is exist, just ignore it.");
  } else {
    GELOGI("GE register Msprof ctrl callback.");
    ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func);
  }
  return ge::SUCCESS;
@@ -124,7 +123,6 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
    return ge::PARAM_INVALID;
  }
  // Pass MsprofSetDeviceCallback to runtime
  GELOGI("GE pass setdevice callback to runtime.");
  ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
  if (rt_ret != ge::SUCCESS) {
    GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!");
@@ -158,7 +156,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
  if (type != kProfCommandhandleFinalize) {
    GE_CHECK_NOTNULL(data);
  }
  ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data;
  ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data);
  auto iter = kProfCommandTypeMap.find(type);
  if (iter == kProfCommandTypeMap.end()) {
    GELOGW("The prof comand type is invalid.");
@@ -183,7 +181,8 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
  if (type != kProfCommandhandleFinalize) {
    command.module_index = prof_config_param->profSwitch;
  }
  GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index);
  GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
         command.module_index);
  if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
    GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
  }
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 }  // namespace

 namespace ge {
 ProfilingManager::ProfilingManager() : is_load_profiling_(false),
                                       is_execute_profiling_(false),
                                       is_training_trace_(false),
                                       subscribe_count_(0) {
 ProfilingManager::ProfilingManager()
    : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) {
  prof_cb_.msprofCtrlCallback = nullptr;
  prof_cb_.msprofReporterCallback = nullptr;
 }
@@ -102,8 +100,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
      return INTERNAL_ERROR;
    }
    is_execute_profiling_ = true;
    GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(),
          prof_conf.options, options.profiling_options.c_str());
    GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
           options.profiling_options.c_str());
  } else {
    (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
    (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
@@ -143,6 +141,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
  }
  try {
    Json prof_options = Json::parse(options);
    if (options.find(kTrainingTrace) == std::string::npos) {
      return ge::SUCCESS;
    }
    const std::string training_trace = prof_options[kTrainingTrace];
    if (training_trace.empty()) {
      GELOGI("Training trace will not take effect.");
@@ -802,32 +803,46 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
  if (!fp_point_.empty() && !bp_point_.empty()) {
    fp_point = fp_point_;
    bp_point = bp_point_;
    GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str());
    GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(),
           fp_point.c_str());
    return;
  }
  // ProfApi mode and training trace is set
  try {
    char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
  // Parse options first
  char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
  bool is_profiling_valid = false;
  std::string profiling_options;
  if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS &&
      !profiling_options.empty()) {
    is_profiling_valid = true;
  } else {
    INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX);
    if (ret != EN_OK) {
      GELOGI("PROFILING_OPTIONS env is not exist.");
      return;
    }
    GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options);
    Json prof_options = Json::parse(env_profiling_options);
    profiling_options = env_profiling_options;
    is_profiling_valid = true;
  }
  if (is_profiling_valid) {
    try {
      Json prof_options = Json::parse(profiling_options);

    fp_point_ = prof_options[kFpPoint];
    bp_point_ = prof_options[kBpPoint];
      fp_point_ = prof_options[kFpPoint];
      bp_point_ = prof_options[kBpPoint];

    fp_point = fp_point_;
    bp_point = bp_point_;
    if (!fp_point_.empty() && !bp_point_.empty()) {
      GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
      fp_point = fp_point_;
      bp_point = bp_point_;
      if (!fp_point_.empty() && !bp_point_.empty()) {
        GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
      }
    } catch (...) {
      GELOGW("Json prof options is invalid.");
      return;
    }
  } catch (...) {
    GELOGE(FAILED, "Json prof options is invalid.");
    return;
  }
  
  return;
 }

--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -36,21 +36,21 @@ using Json = nlohmann::json;
 namespace {
  const std::string GE_PROFILING_MODULE = "Framework";
  // DataTypeConfig MASK
  #define PROF_ACL_API_MASK                0x0001
  #define PROF_TASK_TIME_MASK              0x0002
  #define PROF_AICORE_METRICS_MASK         0x0004
  #define PROF_AICPU_TRACE_MASK            0x0008
  #define PROF_MODEL_EXECUTE_MASK          0x0010
  #define PROF_RUNTIME_API_MASK            0x0020
  #define PROF_RUNTIME_TRACE_MASK          0x0040
  #define PROF_SCHEDULE_TIMELINE_MASK      0x0080
  #define PROF_SCHEDULE_TRACE_MASK         0x0100
  #define PROF_AIVECTORCORE_METRICS_MASK   0x0200
  #define PROF_SUBTASK_TIME_MASK           0x0400
  #define PROF_TRAINING_TRACE_MASK         0x0800
  #define PROF_HCCL_TRACE_MASK             0x1000
  #define PROF_DATA_PROCESS_MASK           0x2000
  #define PROF_MODEL_LOAD_MASK             0x8000000000000000
  const uint64_t PROF_ACL_API_MASK = 0x0001;
  const uint64_t PROF_TASK_TIME_MASK = 0x0002;
  const uint64_t PROF_AICORE_METRICS_MASK = 0x0004;
  const uint64_t PROF_AICPU_TRACE_MASK = 0x0008;
  const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010;
  const uint64_t PROF_RUNTIME_API_MASK = 0x0020;
  const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040;
  const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080;
  const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100;
  const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200;
  const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400;
  const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800;
  const uint64_t PROF_HCCL_TRACE_MASK = 0x1000;
  const uint64_t PROF_DATA_PROCESS_MASK = 0x2000;
  const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000;

 }  // namespace
 namespace ge {
@@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
  bool ProfilingModelLoadOn() const { return is_load_profiling_; }
  bool ProfilingModelExecuteOn() const;
  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env
  // is_execute_profiling_ only used by ge option and env
  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
  void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
--- a/ge/common/proto/op_mapping_info.proto
+++ b/ge/common/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -209,19 +209,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,

 namespace ge {
 bool GeExecutor::isInit_ = false;
 class ModelListenerAdapter : public ModelListener {
 public:
  domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
                             std::vector<ge::OutputTensorInfo> &outputs) {
    if (listener == nullptr) {
      GELOGE(ge::FAILED, "listener is null.");
      return FAILED;
    }
    return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
  }

  std::shared_ptr<ge::ModelListener> listener;
 };

 static void InitOpsProtoManger() {
  string opsproto_path;
@@ -573,60 +560,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
  return SUCCESS;
 }

 // Load model
 Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
                                    int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("load model offline begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  string filePath = RealPath(path.c_str());
  if (filePath.empty()) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
           "File path is invalid. please check your text file '%s'.", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
  }

  std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
  if (listener_adapter == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  listener_adapter->listener = listener;

  Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  return SUCCESS;
 }

 Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
                             std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("Load model begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
  if (listener_adapter == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  listener_adapter->listener = listener;

  Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GeExecutor] LoadModel failed.");
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  return ret;
 }

 Status GeExecutor::UnloadModel(uint32_t model_id) {
  GELOGD("unload model %u begin.", model_id);
  if (!isInit_) {
@@ -659,21 +592,6 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
  return SUCCESS;
 }

 Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
  GELOGI("run model begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  InputData inputs;
  GetDomiInputData(input_data, inputs);
  OutputData outputs;
  GetDomiOutputData(output_data, outputs);

  return GraphExecutor::DataInput(inputs, outputs);
 }

 // Get input and output descriptor
 Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                    std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {
--- a/ge/executor/proto/op_mapping_info.proto
+++ b/ge/executor/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -39,7 +39,7 @@ namespace {
      }                                                                                                                \
      ge_tensor = MakeShared<GeTensor>(out_desc);                                                                      \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
      GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
      if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) {      \
        GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str());          \
        return MEMALLOC_FAILED;                                                                                        \
@@ -50,8 +50,7 @@ namespace {
    } else {                                                                                                           \
      ge_tensor = outputs[i];                                                                                          \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i,                          \
             reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size());             \
      GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i);                                             \
    }                                                                                                                  \
    auto tensor = TensorAdapter::AsTensor(*ge_tensor);                                                                 \
    auto tensor_name = op_desc->GetOutputNameByIndex(i);                                                               \
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -563,6 +563,19 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr

  GE_CHECK_NOTNULL(ge_root_model);
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  ModelHelper model_helper;
  string model_name = "";
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
                                                                 model_name);
  if (name_ret != SUCCESS) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
    return PARAM_INVALID;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
  ge_model->SetName(model_name);
  ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
  if (ret != SUCCESS) {
    GELOGE(ret, "Save model failed");
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() {
  MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);

  if (mem_assigner->GetP2PMemOffset() > 0) {
  if (mem_assigner->GetP2PMemOffset() >= 0) {
    MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
    memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  }
--- a/ge/graph/build/stream_graph_optimizer.cc
+++ b/ge/graph/build/stream_graph_optimizer.cc
@@ -48,26 +48,41 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap
  }
 }

 bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) {
 bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) {
  if (comp_graph == nullptr) {
    return false;
  }
  std::set<int64_t> stream_set;
  std::set<std::string> label_set;
  for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) {
    GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue);
    int64_t stream_id = cur_node->GetOpDesc()->GetStreamId();
    if (stream_id == kInvalidStream) {
      continue;
    }
    GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(),
           comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
    stream_set.insert(stream_id);

    std::string batch_label;
    if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
      label_set.insert(batch_label);
    } else {
      GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
             cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
      continue;
    }

    GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
           comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
  }
  if (stream_set.size() > 1) {
    GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.",
  if (stream_set.size() > 1 || label_set.size() > 1) {
    GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
           comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size());
    return false;
  }

  if (!label_set.empty()) {
    (void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin());
  }
  return true;
 }

@@ -99,8 +114,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
          continue;
        }

        if (!IsSameStreamId(subgraph)) {
          GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str());
        if (!IsSameStreamIdOrBatchLabel(subgraph)) {
          GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str());
          continue;
        }
        OpDescPtr op_desc = nodes.at(0)->GetOpDesc();
@@ -112,9 +127,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
          return FAILED;
        }
        run_context.stream = run_context.graphStreamList[stream_id];
        GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.",
               subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
               static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)));
        std::string batch_label;
        (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
        GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
          "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
               static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());
        for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
          GE_CHECK_NOTNULL(*iter);
          Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);
--- a/ge/graph/build/stream_graph_optimizer.h
+++ b/ge/graph/build/stream_graph_optimizer.h
@@ -41,7 +41,7 @@ class StreamGraphOptimizer {
 private:
  void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map);

  bool IsSameStreamId(const ComputeGraphPtr &comp_graph);
  bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
      continue;
    }
    string op_type = op_desc->GetType();
    if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) {
    if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) {
      continuous_op_lists.emplace_back(vector<OpDescPtr>());
    } else {
      continuous_op_lists.back().emplace_back(op_desc);
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
                                     ModelData &model_data) {
  Status ret;
  if (!CheckInputPathValid(path)) {
    GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return GE_EXEC_MODEL_PATH_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
  }

  GELOGI("Load model begin, model path is: %s", path.c_str());
  if (!key_path.empty() && !CheckInputPathValid(key_path)) {
    GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
    return GE_EXEC_MODEL_KEY_PATH_INVALID;
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
    return SUCCESS;
 }

 Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                      const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
  Status ret;
  ModelData model_data;
  ret = LoadDataFromFile(path, key_path, priority, model_data);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
    return ret;
  }

  ret = LoadModel(model_data, listener, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
  }

  if (model_data.model_data != nullptr) {
    delete[] static_cast<char *>(model_data.model_data);
    model_data.model_data = nullptr;
  }

  return ret;
 }

 Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
                              uint32_t &model_id) {
  GELOGI("Load model begin, model_id:%u.", model_id);

  // For GeOp, Open Device 0 here.
  GE_CHK_RT_RET(rtSetDevice(0));
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
  if (ret != SUCCESS) {
    GE_CHK_RT(rtDeviceReset(0));
    GELOGE(ret, "LoadModel: Load failed.");
    return ret;
  }
  ret = model_manager->Start(model_id);
  if (ret != SUCCESS) {
    if (model_manager->Unload(model_id) != SUCCESS) {
      GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
    }
    GELOGE(ret, "LoadModel: Start failed.");
    return ret;
  }
  GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
  return SUCCESS;
 }

 Status GraphLoader::CommandHandle(const Command &command) {
  try {
    auto model_manager = ModelManager::GetInstance();
@@ -225,16 +168,16 @@ Status GraphLoader::CommandHandle(const Command &command) {
 }

 Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
                                      size_t memsize, void *weight_ptr, size_t weightsize) {
                                      size_t mem_size, void *weight_ptr, size_t weight_size) {
  GELOGI("Load model begin, model_id:%u.", model_id);
  // For ACL, Open Device from App.
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelOffline(
      model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
      model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
  if (ret != SUCCESS) {
    GELOGE(ret, "Load model failed, model_id:%u.", model_id);
    return ret;
    GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id);
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  GELOGI("Load model success, model_id:%u.", model_id);
  return SUCCESS;
@@ -259,8 +202,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
  if (ret != SUCCESS) {
    GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id);
    return ret;
    GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id);
    return ACL_ERROR_GE_LOAD_MODEL;
  }

  GELOGI("Load model with queue success, model_id:%u.", model_id);
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@@ -44,12 +44,6 @@ class GraphLoader {

  static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);

  static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
                          uint32_t &model_id);

  static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                  const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);

  static Status CommandHandle(const Command &command);

  static Status GetMemoryInfo(int64_t &free);
--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@@ -319,6 +319,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
  for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
    output.mutable_shape()->add_dim(dim);
  }
  for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
    output.mutable_origin_shape()->add_dim(dim);
  }
  int64_t output_size = 0;
  if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
    GELOGE(PARAM_INVALID, "Get output size filed");
@@ -476,6 +479,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
  for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
    input.mutable_shape()->add_dim(dim);
  }
  for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
    input.mutable_origin_shape()->add_dim(dim);
  }
  int64_t input_size = 0;
  if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
    GELOGI("Get aipp input size according to attr is %ld", input_size);
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
    if (weight_ptr == nullptr) {
      weights_mem_base_ = MallocWeightsMem(weights_size);
      if (weights_mem_base_ == nullptr) {
        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size);
        return ACL_ERROR_GE_MEMORY_ALLOCATION;
      }
      is_inner_weight_base_ = true;
    }
@@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh

 Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  if (is_feature_map_mem_has_inited_) {
    GELOGE(FAILED, "call InitFeatureMapMem more than once .");
    return FAILED;
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once .");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  is_feature_map_mem_has_inited_ = true;

@@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

  if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
    GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
    return FAILED;
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }

  mem_base_ = static_cast<uint8_t *>(dev_ptr);
@@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  if (TotalMemSize() && mem_base_ == nullptr) {
    mem_base_ = MallocFeatureMapMem(data_size);
    if (mem_base_ == nullptr) {
      GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
      return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    }
    GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
            runtime_param_.graph_id, mem_base_, data_size);
@@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  if (p2p_data_size != 0) {
    p2p_mem_base_ = MallocP2PMem(p2p_data_size);
    if (p2p_mem_base_ == nullptr) {
      GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
      return GE_EXEC_ALLOC_P2P_MEM_FAILED;
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    }
    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
           p2p_mem_base_, p2p_data_size);
@@ -710,6 +710,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  }

  // collect profiling for ge
  GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed");
  auto &profiling_manager = ProfilingManager::Instance();
  if (profiling_manager.ProfilingModelLoadOn()) {
    Status p_ret = ReportProfilingData();
@@ -970,7 +971,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
  uint32_t parent_index = 0;  // Ignore subgraph Data Node.
  if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
    GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str());
    return InitInputBatchLabel(node);
    return SUCCESS;
  }

  data_op_list_.push_back(op_desc);
@@ -1011,10 +1012,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
  }

  data_op_index++;
  if (InitInputZeroCopyNodes(node) != SUCCESS) {
    GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!");
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

@@ -1036,39 +1033,6 @@ void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_inde
  }
 }

 ///
 /// @ingroup ge
 /// @brief input zero copy node Initialize.
 /// @param [in] NodePtr: Data Op.
 /// @return Status
 ///
 Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) {
  auto out_data_anchor = node->GetOutDataAnchor(kDataIndex);
  if (out_data_anchor == nullptr) {
    GELOGE(FAILED, "Out data anchor is nullptr");
    return FAILED;
  }
  for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    auto node = peer_in_data_anchor->GetOwnerNode();
    auto op_desc = node->GetOpDesc();
    if (op_desc == nullptr) {
      GELOGE(FAILED, "Op desc is nullptr");
      return FAILED;
    }
    string batch_label;
    (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
    if (batch_label.empty()) {
      batch_label = kDefaultBatchLable;
    }
    if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
      zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label));
      GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(),
             op_desc->GetId(), batch_label.c_str());
    }
  }
  return SUCCESS;
 }

 bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
  bool getnext_sink_dynamic = false;
  if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) {
@@ -1094,7 +1058,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
  if (owner_graph->GetParentGraph() != nullptr) {
    GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str());
    op_list_.erase(op_desc->GetId());
    return InitOutputBatchLabel(node);
    return SUCCESS;
  }

  output_op_list_.push_back(op_desc);
@@ -1146,8 +1110,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
    }
  }

  GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS,
                  GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;);
  GetAllGearsInfo(node);
  if (is_getnext_sink_dynamic_) {
    GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS,
@@ -1343,121 +1305,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info
  }
 }

 ///
 /// @ingroup ge
 /// @brief output zero copy node Initialize.
 /// @param [in] NodePtr: netoutput Op.
 /// @return Status
 ///
 Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) {
  set<NodePtr> nodes_need_record;
  for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
    auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
    if (peer_out_data_anchor == nullptr) {
      continue;
    }
    auto peer_node = peer_out_data_anchor->GetOwnerNode();
    nodes_need_record.emplace(peer_node);

    // Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios
    if (peer_node->GetType() == MERGE) {
      for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) {
        auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor();
        if (merge_peer_out_data_anchor == nullptr) {
          continue;
        }
        auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode();
        nodes_need_record.emplace(merge_peer_node);
      }
    } else {
      for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) {
        auto other_in_node = other_in_data_anchor->GetOwnerNode();
        if (other_in_node->GetType() != NETOUTPUT) {
          nodes_need_record.emplace(other_in_node);
        }
      }
    }
  }

  for (const auto &node_need_record : nodes_need_record) {
    auto op_desc = node_need_record->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    string batch_label;
    (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
    if (batch_label.empty()) {
      batch_label = kDefaultBatchLable;
    }
    if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
      zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label));
      GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.",
             op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str());
    }
  }
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief input zero copy node Initialize.
 /// @param [in] NodePtr: Data Op.
 /// @return Status
 ///
 Status DavinciModel::InitInputBatchLabel(const NodePtr &node) {
  string batch_label;
  if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
    return SUCCESS;  // Not Multi-batch.
  }

  const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex);
  GE_CHECK_NOTNULL(out_data_anchor);

  for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    const auto &node = peer_in_data_anchor->GetOwnerNode();
    const auto &op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);

    if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
      zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label;
      GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(),
             op_desc->GetId(), batch_label.c_str());
    }
  }

  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief output zero copy node Initialize for Case.
 /// @param [in] NodePtr: netoutput Op.
 /// @return Status
 ///
 Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) {
  string batch_label;
  if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
    return SUCCESS;  // Not Multi-batch.
  }

  for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
    const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
    if (peer_out_data_anchor == nullptr) {
      continue;
    }

    const auto &peer_node = peer_out_data_anchor->GetOwnerNode();
    const auto &op_desc = peer_node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);

    if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) {
      zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label;
      GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s",
             op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str());
    }
  }

  return SUCCESS;
 }

 /// @ingroup ge
 /// @brief LabelSet Op Initialize.
 /// @param [in] op_desc: LabelSet Op descriptor.
@@ -2240,12 +2087,61 @@ Status DavinciModel::SyncVarData() {
  return ret;
 }

 inline int64_t SumSize(const vector<int64_t> &size_list) {
  int64_t sum_size = 0;
  for (const int64_t &size : size_list) {
    sum_size += size;
 Status DavinciModel::InitModelProfile() {
  for (const auto &task : task_list_) {
    GE_CHECK_NOTNULL(task);
    const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
    // when type is RT_MODEL_TASK_KERNEL, ctx is not null
    if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
      continue;
    }

    GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size());
    op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID()));
  }

  std::set<uint32_t> task_id_set;
  using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
  using Range = std::pair<CIT, CIT>;
  for (const auto &task : task_list_) {
    GE_CHECK_NOTNULL(task);
    const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
    if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
      continue;
    }

    if (task_id_set.count(task->GetTaskID()) > 0) {
      continue;
    }

    const auto &op_desc = GetOpByIndex(fusion_op_info->op_index);
    GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index);

    ProfileInfo profile;
    profile.fusion_info = *fusion_op_info;
    Range range = op_id_map_.equal_range(fusion_op_info->op_index);
    for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
      profile.task_count++;
      task_id_set.insert(range_idx->second);
    }

    // memory info
    TaskMemInfo &mem_info = profile.memory_info;
    const auto input_size = ModelUtils::GetInputSize(op_desc);
    const auto output_size = ModelUtils::GetOutputSize(op_desc);
    const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc);
    const auto weight_size = ModelUtils::GetWeightSize(op_desc);
    mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0);
    mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0);
    mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0);
    mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0);
    mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size;

    profile_list_.emplace_back(profile);
  }
  return sum_size;

  GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size());
  return SUCCESS;
 }

 Status DavinciModel::SinkModelProfile() {
@@ -2253,18 +2149,12 @@ Status DavinciModel::SinkModelProfile() {
  auto &prof_mgr = ProfilingManager::Instance();
  ReporterData reporter_data{};
  // report model data tag name
  std::string tag_name;
  tag_name.append("model_load_info_").append(std::to_string(this->Id()));
  std::string tag_name("model_load_info_" + std::to_string(this->Id()));
  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");

  // Model Header
  string name;
  if (!om_name_.empty()) {
    name = om_name_;
  } else {
    name = name_;
  }
  std::string name = om_name_.empty() ? name_ : om_name_;
  size_t name_len = name.size();
  reporter_data.deviceId = device_id_;
  reporter_data.data = (unsigned char *)&name_len;
@@ -2296,128 +2186,71 @@ Status DavinciModel::SinkModelProfile() {
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  int32_t task_num = task_list_.size();
  std::multimap<uint32_t, uint32_t> op_id_map;
  std::set<uint32_t> task_id_set;
  for (int32_t i = 0; i < task_num; i++) {
    auto task = task_list_[i];
    GE_CHECK_NOTNULL(task);
    auto fusion_op_info = task->GetFusionOpInfo();
    // when type is RT_MODEL_TASK_KERNEL, ctx is not null
    if (fusion_op_info != nullptr) {
      uint32_t op_num = fusion_op_info->original_op_names.size();
      uint32_t task_id = task->GetTaskID();
      if (op_num > 0) {
        GELOGI("task.id = %u, opNum = %u", task_id, op_num);
        op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id));
      }
    }
  }

  struct memoryInfo {
    int64_t input_size;
    int64_t output_size;
    int64_t weight_size;
    int64_t workspace_size;
    int64_t total_size;

    memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {}
  };

  using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
  using Range = std::pair<CIT, CIT>;
  for (int32_t i = 0; i < task_num; i++) {
    auto task = task_list_[i];
    GE_CHECK_NOTNULL(task);
    auto fusion_op_info = task->GetFusionOpInfo();
    if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) {
      uint32_t task_id = task->GetTaskID();
      uint32_t op_num = fusion_op_info->original_op_names.size();
      uint32_t task_count = 0;
      if (task_id_set.count(task_id) != 0) {
        continue;
      }

      uint32_t op_id = fusion_op_info->op_index;
      Range range = op_id_map.equal_range(op_id);
      for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
        task_count++;
        uint32_t task_id = range_idx->second;
        task_id_set.insert(task_id);
      }

      // op name after fusion
      string fusion_op_name = fusion_op_info->op_name;
      int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
      reporter_data.data = (unsigned char *)&fusion_op_name_len;
  for (const ProfileInfo &profile : profile_list_) {
    // op name after fusion
    string fusion_op_name = profile.fusion_info.op_name;
    int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
    reporter_data.data = (unsigned char *)&fusion_op_name_len;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    reporter_data.data = (unsigned char *)fusion_op_name.c_str();
    reporter_data.dataLen = fusion_op_name_len;
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // original op name before fusion
    uint32_t op_num = profile.fusion_info.original_op_names.size();
    reporter_data.data = (unsigned char *)&op_num;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    for (uint32_t k = 0; k < op_num; k++) {
      std::string op_name = profile.fusion_info.original_op_names[k];
      int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
      reporter_data.data = (unsigned char *)&op_name_len;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      reporter_data.data = (unsigned char *)fusion_op_name.c_str();
      reporter_data.dataLen = fusion_op_name_len;
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      // original op name before fusion
      reporter_data.data = (unsigned char *)&op_num;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      for (uint32_t k = 0; k < op_num; k++) {
        std::string op_name = fusion_op_info->original_op_names[k];
        int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
        reporter_data.data = (unsigned char *)&op_name_len;
        reporter_data.dataLen = sizeof(int32_t);
        GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                         "Reporter data fail, model id:%u.", this->Id());
        reporter_data.data = (unsigned char *)op_name.c_str();
        reporter_data.dataLen = op_name_len;
        GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                         "Reporter data fail, model id:%u.", this->Id());
      }

      // stream id info
      uint32_t streamId = task->GetStreamId();
      reporter_data.data = (unsigned char *)&streamId;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      // memory info
      struct memoryInfo memory_info;
      uint32_t op_index = fusion_op_info->op_index;
      auto iter = op_list_.find(op_index);
      GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index);
      auto op_desc = iter->second;
      memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc));
      memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc));
      memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc));
      memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc));
      memory_info.total_size =
          memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size;
      reporter_data.data = (unsigned char *)&memory_info;
      reporter_data.dataLen = sizeof(struct memoryInfo);
      reporter_data.data = (unsigned char *)op_name.c_str();
      reporter_data.dataLen = op_name_len;
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
    }

      // task info
      reporter_data.data = (unsigned char *)&task_count;
    // stream id info
    uint32_t streamId = profile.fusion_info.stream_id;
    reporter_data.data = (unsigned char *)&streamId;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // memory info
    reporter_data.data = (unsigned char *)&profile.memory_info;
    reporter_data.dataLen = sizeof(profile.memory_info);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // task info
    reporter_data.data = (unsigned char *)&profile.task_count;
    reporter_data.dataLen = sizeof(uint32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
    for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
      uint32_t task_id = idx->second;
      reporter_data.data = (unsigned char *)&task_id;
      reporter_data.dataLen = sizeof(uint32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      Range task_range = op_id_map.equal_range(op_id);
      for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
        uint32_t task_id = idx->second;
        reporter_data.data = (unsigned char *)&task_id;
        reporter_data.dataLen = sizeof(uint32_t);
        GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                         "Reporter data fail, model id:%u.", this->Id());
      }
    }
  }

  return SUCCESS;
 }

@@ -2991,19 +2824,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
  return SUCCESS;
 }

 Status DavinciModel::UpdateKnownZeroCopyAddr() {
  for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
    auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
 Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
  for (size_t i = 0; i < total_io_addrs.size(); ++i) {
    auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
    if (it_in != knonw_input_data_info_.end()) {
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
             knonw_input_data_info_.at(total_io_addrs_[i]));
      total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
             knonw_input_data_info_.at(total_io_addrs[i]));
      total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
    }
    auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
    auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
    if (it_out != knonw_output_data_info_.end()) {
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
             knonw_output_data_info_.at(total_io_addrs_[i]));
      total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
             knonw_output_data_info_.at(total_io_addrs[i]));
      total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
    }
  }
  GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -3032,7 +2865,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
  } else {
    total_io_addrs_ = orig_total_io_addrs_;
  }
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

  if (total_args_size_ == 0) {
    GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -3099,7 +2932,14 @@ Status DavinciModel::MallocKnownArgs() {
    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  // malloc dynamic and static hybrid memory
  if (total_hybrid_args_size_ != 0) {
    rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }
  // malloc fixed addr memory, eg: rts op
  if (total_fixed_addr_size_ != 0) {
    GELOGI("Begin to allocate fixed addr.");
@@ -3257,27 +3097,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v

    for (auto &input_outside_addrs : new_input_outside_addrs_) {
      ZeroCopyOffset &input_outside = input_outside_addrs.second;
      bool ret = input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
      if (ret) {
        void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen;
        SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val));
      }
      input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
    }

    for (auto &output_outside_addrs : new_output_outside_addrs_) {
      ZeroCopyOffset &output_outside = output_outside_addrs.second;
      bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
      if (ret) {
        void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen;
        SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val));
      }
      output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
    }
  }
  auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId());
  if (it == zero_copy_op_id_batch_label_.end()) {

  string batch_label;
  if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) {
    zero_copy_task.SetBatchLabel(kDefaultBatchLable);
  } else {
    zero_copy_task.SetBatchLabel(it->second);
    zero_copy_task.SetBatchLabel(batch_label);
  }

  std::lock_guard<std::mutex> lock(outside_addrs_mutex_);
@@ -3287,27 +3120,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v
  }
 }

 void DavinciModel::SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr) {
  // Establish a mapping between batch label and zero copy address for multi-batch scenes
  auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId());
  if (it == zero_copy_op_id_batch_label_.end()) {
    return;
  }

  const string &batch_label = it->second;
  auto iter = zero_copy_batch_label_addrs_.find(batch_label);
  if (iter != zero_copy_batch_label_addrs_.end()) {
    iter->second.insert(addr);
    GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(),
           op_desc->GetName().c_str());
  } else {
    set<uintptr_t> addrs = {addr};
    zero_copy_batch_label_addrs_.emplace(pair<string, set<uintptr_t>>(batch_label, addrs));
    GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.",
           batch_label.c_str(), op_desc->GetName().c_str());
  }
 }

 ///
 /// @ingroup ge
 /// @brief Copy Check input size and model op size.
@@ -3441,15 +3253,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &
      void *addr = data.second.GetDataInfo().at(count).second;
      void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) +
                                                   data.second.GetRelativeOffset().at(count));
      GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(),
             data.first, addr, size, buffer_addr);
      GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s",
             input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str());
      // For input data, just copy for rts task.
      for (ZeroCopyTask &task : zero_copy_tasks_) {
        if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) {
          continue;
        }
        uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr);
        if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) {
        if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) {
          return FAILED;
        }
      }
@@ -3811,9 +3623,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
  GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_);
  GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");
  is_dynamic_ = input_data.is_dynamic_batch;
  if (!is_dynamic_) {
    zero_copy_batch_label_addrs_.clear();
  }

  GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START));
  Status ret = CopyModelData(input_data, output_data, is_dynamic_);
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -76,6 +76,20 @@ struct timeInfo {
  int64_t dumpEndTime;
 };

 struct TaskMemInfo {
  int64_t input_size{0};
  int64_t output_size{0};
  int64_t weight_size{0};
  int64_t workspace_size{0};
  int64_t total_size{0};
 };

 struct ProfileInfo {
  FusionOpInfo fusion_info;
  TaskMemInfo memory_info;
  uint32_t task_count{0};
 };

 enum ExecuteMode {
  INITIALIZATION,
  SYNCHRONIZATION,
@@ -226,8 +240,6 @@ class DavinciModel {
  const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

  // get Op
  const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }

  OpDescPtr GetOpByIndex(uint32_t index) const {
    if (op_list_.find(index) == op_list_.end()) {
      return nullptr;
@@ -436,10 +448,6 @@ class DavinciModel {

  int64_t GetLoadEndTime() { return load_end_time_; }

  Status SinkModelProfile();

  Status SinkTimeProfile(const InputData &current_data);

  Status ReportProfilingData();

  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@@ -476,6 +484,14 @@ class DavinciModel {
  void SetTotalIOAddrs(vector<void *> &io_addrs) {
    total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
  }
  void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
  uint32_t GetHybridArgsSize() {
    return total_hybrid_args_size_;
  }
  void *GetCurrentHybridArgsAddr(uint32_t offset) {
    void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
    return cur_args;
  }
  void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
  int64_t GetFixedAddrsSize(string tensor_name);
  void *GetCurrentFixedAddr(int64_t offset) const {
@@ -494,7 +510,7 @@ class DavinciModel {
  Status MallocKnownArgs();
  Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  Status UpdateKnownZeroCopyAddr();
  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
  void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -531,15 +547,6 @@ class DavinciModel {

  ///
  /// @ingroup ge
  /// @brief Save Batch label Info.
  /// @param [in] const OpDescPtr &op_desc
  /// @param [in] uintptr_t addr: address value in args block.
  /// @return None.
  ///
  void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr);

  ///
  /// @ingroup ge
  /// @brief Copy Check input size and model op size.
  /// @param [in] const int64_t &input_size: input size.
  /// @param [in] const int64_t &op_size: model op size.
@@ -651,14 +658,6 @@ class DavinciModel {

  ///
  /// @ingroup ge
  /// @brief input zero copy node Initialize.
  /// @param [in] NodePtr: Data Op.
  /// @return Status
  ///
  Status InitInputZeroCopyNodes(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief NetOutput Op Initialize.
  /// @param [in] NodePtr: NetOutput Op.
  /// @return Status
@@ -667,30 +666,6 @@ class DavinciModel {

  ///
  /// @ingroup ge
  /// @brief output zero copy node Initialize.
  /// @param [in] NodePtr: Data Op.
  /// @return Status
  ///
  Status InitOutputZeroCopyNodes(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief input zero copy node Initialize for Case.
  /// @param [in] NodePtr: Data Op.
  /// @return Status
  ///
  Status InitInputBatchLabel(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief output zero copy node Initialize for Case.
  /// @param [in] NodePtr: netoutput Op.
  /// @return Status
  ///
  Status InitOutputBatchLabel(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief Constant Op Init.
  /// @return Status
  ///
@@ -837,6 +812,11 @@ class DavinciModel {

  void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);

  Status InitModelProfile();
  Status SinkModelProfile();

  Status SinkTimeProfile(const InputData &current_data);

  Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
                             std::vector<ge::OutputTensorInfo> &outputs);

@@ -914,11 +894,6 @@ class DavinciModel {
  std::vector<ZeroCopyTask> zero_copy_tasks_;  // Task used Data or NetOutput addr.
  std::set<const void *> copy_only_addrs_;     // Address need copy to original place.

  // {op_id, batch_label}
  std::map<int64_t, std::string> zero_copy_op_id_batch_label_;
  // {batch_label, addrs}
  std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_;

  std::vector<TaskInfoPtr> task_list_;
  // rt_moodel_handle
  rtModel_t rt_model_handle_;
@@ -977,6 +952,8 @@ class DavinciModel {
  void *args_ = nullptr;
  void *args_host_ = nullptr;
  void *fixed_addrs_ = nullptr;
  void *hybrid_addrs_ = nullptr;
  uint32_t total_hybrid_args_size_ = 0;
  int64_t total_fixed_addr_size_ = 0;
  std::map<const void *, void *> knonw_input_data_info_;
  std::map<const void *, void *> knonw_output_data_info_;
@@ -1016,6 +993,9 @@ class DavinciModel {
  // key: input_index: input is merge node; value: each gear info and each output shape
  std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  std::vector<std::vector<int64_t>> all_gears_info_;

  std::multimap<uint32_t, uint32_t> op_id_map_;
  std::vector<ProfileInfo> profile_list_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -89,6 +89,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
  if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
    std::vector<uint64_t> v_aicpu_kernel;
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
    std::lock_guard<std::recursive_mutex> lock(map_mutex_);
    auto iter = model_aicpu_kernel_.find(model_key);
    if (iter != model_aicpu_kernel_.end()) {
      GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
@@ -176,7 +177,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
 }

 void ModelManager::DestroyAicpuSession(uint64_t session_id) {
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  auto it = sess_ids_.find(session_id);
  if (it == sess_ids_.end()) {
    GELOGI("The session: %lu not created.", session_id);
@@ -205,7 +206,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
 }

 ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
  if (hybrid_davinci_model != hybrid_model_map_.end()) {
    uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@@ -215,8 +216,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

  auto it = model_map_.find(model_id);
  if (it == model_map_.end()) {
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
    return GE_EXEC_MODEL_ID_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
  }
  uint64_t session_id = it->second->GetSessionId();
  DestroyAicpuSession(session_id);
@@ -225,7 +226,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
  GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
@@ -238,7 +239,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
 }

 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::vector<uint64_t> v_aicpu_kernel;
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
@@ -250,7 +251,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
 }

 ModelManager::~ModelManager() {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  model_map_.clear();
  model_aicpu_kernel_.clear();
  cust_aicpu_so_.clear();
@@ -358,18 +359,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

 void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
  GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  model_map_[id] = davinci_model;
 }

 void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
  GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  hybrid_model_map_[id] = hybrid_model;
 }

 Status ModelManager::DeleteModel(uint32_t id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  auto it = model_map_.find(id);
  auto hybrid_model_it = hybrid_model_map_.find(id);
@@ -384,22 +385,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
  } else if (hybrid_model_it != hybrid_model_map_.end()) {
    (void)hybrid_model_map_.erase(hybrid_model_it);
  } else {
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
    return GE_EXEC_MODEL_ID_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
  }

  return SUCCESS;
 }

 std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  auto it = model_map_.find(id);
  return (it == model_map_.end()) ? nullptr : it->second;
 }

 std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  auto it = hybrid_model_map_.find(id);
  return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@@ -902,7 +903,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);

  davinci_model->SetModelDescVersion(new_model_desc);
@@ -970,8 +971,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
 }

 Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  auto davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetCurShape Failed, Invalid Model ID %u!", model_id);
  davinci_model->GetCurShape(batch_info, dynamic_type);
  return SUCCESS;
 }
@@ -984,7 +986,8 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetModelAttr Failed, Invalid Model ID %u!", model_id);
  davinci_model->GetModelAttr(dynamic_output_shape_info);
  return SUCCESS;
 }
@@ -994,9 +997,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
                                                       std::vector<uint32_t> &inputFormats,
                                                       std::vector<uint32_t> &outputFormats) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
                         model_id);

  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
      "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
  return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
 }

@@ -1011,18 +1013,14 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
 Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
  return davinci_model->GetAIPPInfo(index, aipp_info);
 }

 Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
  return davinci_model->GetAippType(index, type, aipp_index);
 }

@@ -1055,7 +1053,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
  mmTimespec timespec = mmGetTickCount();

  ModelHelper model_helper;
  Status ret = model_helper.LoadModel(model);
  Status ret = model_helper.LoadRootModel(model);
  if (model_helper.GetModelType()) {
    bool is_shape_unknown = false;
    GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
                      "CheckIsUnknownShape failed, model id:%u", model_id);
    if (is_shape_unknown || GetContext().GetHostExecFlag()) {
      return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
    }
  }
  if (ret != SUCCESS) {
    GELOGE(ret, "load model failed.");
    return ret;
@@ -1069,8 +1075,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    } catch (...) {
      GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
      return INTERNAL_ERROR;
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    }
    ret = davinci_model->Assign(ge_model);
    if (ret != SUCCESS) {
@@ -1082,7 +1088,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
    int32_t device_id = 0;
    rtError_t rt_ret = rtGetDevice(&device_id);
    if (rt_ret != RT_ERROR_NONE || device_id < 0) {
      GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    davinci_model->SetDeviceId(device_id);
@@ -1214,7 +1220,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "Invalid model id %u, check weather model has been loaded or not.", model_id);
                         "Invalid model id %u, check whether model has been loaded or not.", model_id);

  if (davinci_model->NeedDestroyAicpuKernel()) {
    GELOGI("Start to destroy specified aicpu kernel.");
@@ -1237,7 +1243,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
 }

 Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  auto it = sess_ids_.find(session_id);
  // never been created by any model
  if (it == sess_ids_.end()) {
@@ -1456,8 +1462,7 @@ void ModelManager::GenModelId(uint32_t *id) {
  if (id == nullptr) {
    return;
  }

  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  *id = ++max_model_id_;
 }

--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -353,8 +353,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
  std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
  uint32_t max_model_id_;
  std::mutex map_mutex_;
  std::mutex sess_ids_mutex_;
  std::recursive_mutex map_mutex_;
  std::mutex session_id_create_mutex_;
  static::std::mutex exeception_infos_mutex_;
  uint64_t session_id_bias_;
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -90,20 +90,18 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
                  fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names;
                  fusion_op_info_.op_name = op_desc_->GetName());

  string session_graph_model_id;
  davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
  // get bin_file_key
  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
  // new aicpu kernel(rtCpuKernelLaunch) no need to check function
  if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
    rtError_t rt_ret;
    rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
    rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
                                                    kernel_def.stub_func().c_str());
                    return RT_ERROR_TO_GE_STATUS(rt_ret););
  } else if (kernel_type_ == ccKernelType::TE) {
    rtError_t rt_ret;
    rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
    // get bin_file_key
    string session_graph_model_id;
    davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
    const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
    rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                    GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
                    return RT_ERROR_TO_GE_STATUS(rt_ret););
@@ -372,7 +370,11 @@ Status KernelTaskInfo::SuperKernelDistribute() {
 Status KernelTaskInfo::Distribute() {
  GELOGD("KernelTaskInfo Distribute Start.");
  if (davinci_model_->IsKnownNode()) {
    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    if (kernel_type_ == ccKernelType::TE) {
      args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
      args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
    }
    GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
  }
  rtError_t rt_ret = RT_ERROR_NONE;
@@ -428,36 +430,31 @@ Status KernelTaskInfo::UpdateArgs() {
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
  vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
  vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

  vector<void *> io_addrs;
  if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
    io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
    io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
  io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
  io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
  if (kernel_type_ == ccKernelType::TE) {
    vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
    io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
  } else {
    string peer_input_name;
    if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
      uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
      if (output_index > output_data_addrs.size()) {
        GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
               output_data_addrs.size(), output_index);
        return FAILED;
      }
      io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
      for (size_t i = 0; i < output_data_addrs.size(); ++i) {
        if (i == output_index) {
          void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
          io_addrs.emplace_back(fixed_addr);
          continue;
        }
        io_addrs.emplace_back(output_data_addrs[i]);
      }
      io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
    davinci_model_->SetTotalIOAddrs(io_addrs);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
    uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
    auto addrs_size = sizeof(uint64_t) * io_addrs.size();
    errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
    if (sec_ret != EOK) {
      GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
      return FAILED;
    }
    // copy args to device
    rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }

  davinci_model_->SetTotalIOAddrs(io_addrs);
  GELOGI("KernelTaskInfo::UpdateArgs success.");
  return SUCCESS;
 }
@@ -533,33 +530,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
 }

 Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  domi::KernelDef kernel_def = task_def.kernel();
  uint32_t args_size = kernel_def.args_size();
  args_offset_ = davinci_model->GetTotalArgsSize();
  davinci_model->SetTotalArgsSize(args_size);
  GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

  // get opcontext stored in model
  const domi::KernelDef &kernel_def = task_def.kernel();
  const domi::KernelContext &context = kernel_def.context();
  // get opdesc
  op_desc_ = davinci_model->GetOpByIndex(context.op_index());
  GE_CHECK_NOTNULL(op_desc_);
  // alloc fixed addr
  string peer_input_name;
  if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
    uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
    if (output_index > op_desc_->GetOutputsSize()) {
      GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
             output_index);
      return FAILED;
    }
    fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
    auto tensor_desc = op_desc_->GetOutputDesc(output_index);
    int64_t tensor_size = 0;
    GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
    davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
    GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
           fixed_addr_offset_);
  kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type_ == ccKernelType::TE) {
    uint32_t args_size = kernel_def.args_size();
    args_offset_ = davinci_model->GetTotalArgsSize();
    davinci_model->SetTotalArgsSize(args_size);
    GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
    davinci_model->SetHybridArgsSize(kernel_def.args_size());
    GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
  }
  return SUCCESS;
 }
@@ -888,7 +870,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  }

  // copy args to new host memory
  std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
  args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
  GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
  errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
  if (sec_ret != EOK) {
@@ -896,8 +878,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    return FAILED;
  }

  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

  if (davinci_model_->IsKnownNode()) {
    return SUCCESS;
  }
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
  vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
  vector<void *> io_addrs;
@@ -914,19 +911,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    }
  }

  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

  // malloc device memory for args
  rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo {
  OpDescPtr op_desc_;
  DavinciModel *davinci_model_;
  uint32_t args_offset_ = 0;
  uint32_t hybrid_args_offset_ = 0;
  int64_t fixed_addr_offset_ = 0;
  std::unique_ptr<uint8_t[]> args_addr = nullptr;
  bool call_save_dump_ = false;

  // aicpu ext_info device mem
--- a/ge/graph/load/new_model_manager/zero_copy_offset.cc
+++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc
@@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
  addr_count_ = out_count;
 }

 bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
 void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
  const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
  bool set_batch_label_flag = false;
  for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
    auto &addrs_mapping_list = GetOutsideAddrs();
    auto args_addrs = addrs_mapping_list[out_count].find(outside_addr);
    if (args_addrs != addrs_mapping_list[out_count].end()) {
    auto args_addrs = outside_addrs_[out_count].find(outside_addr);
    if (args_addrs != outside_addrs_[out_count].end()) {
      GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid.");
      void *args_val = static_cast<uint8_t *>(args) + offset;
      args_addrs->second.push_back(args_val);
      GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
             args, offset);
      set_batch_label_flag = true;
    }
  }
  return set_batch_label_flag;
 }

 }  // namespace ge
--- a/ge/graph/load/new_model_manager/zero_copy_offset.h
+++ b/ge/graph/load/new_model_manager/zero_copy_offset.h
@@ -51,7 +51,7 @@ class ZeroCopyOffset {
                            const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag);
  void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
                             std::vector<void *> &tensor_addrs);
  bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);
  void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);

  // basic_addr of l2-fusion
  void *GetBasicAddr() const { return basic_addr_; }
--- a/ge/graph/load/new_model_manager/zero_copy_task.cc
+++ b/ge/graph/load/new_model_manager/zero_copy_task.cc
@@ -22,8 +22,6 @@
 #include "common/ge_compiler_options.h"

 namespace ge {
 const char *const kDefaultBatchLable = "Batch_default";

 ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size)
    : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {}

@@ -66,68 +64,23 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) {
  const uint8_t *data = static_cast<const uint8_t *>(info);
  args_info_.assign(data, data + size);

  GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
  GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
         args_addr_, args_size_, size);
 }

 /**
 * @ingroup ge
 * @brief Check is dynamic batch node.
 * @param [in] addr: virtual address value from Op.
 * @param [in] data: data buffer from user.
 * @param [in] batch_addrs: dynamic batch addr info.
 * @param [in] batch_label: batch label.
 * @return: true / false
 */
 bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label,
                                     uintptr_t addr) {
  // Used for dynamic batch / resolution scene
  set<uintptr_t> dynamic_input_addrs;
  auto dynamic_input_iter = batch_addrs.find(batch_label);
  if (dynamic_input_iter != batch_addrs.end()) {
    dynamic_input_addrs = dynamic_input_iter->second;
  }

  set<uintptr_t> fix_input_addrs;
  auto fix_input_iter = batch_addrs.find(kDefaultBatchLable);
  if (fix_input_iter != batch_addrs.end()) {
    fix_input_addrs = fix_input_iter->second;
  }

  if (fix_input_addrs.empty()) {
    if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) {
      return false;
    }
  } else {
    if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() &&
        fix_input_addrs.find(addr) == fix_input_addrs.end()) {
      return false;
    }
  }

  return true;
 }

 /**
 * @ingroup ge
 * @brief Set user data addr to Task param.
 * @param [in] addr: virtual address value from Op.
 * @param [in] buffer_addr: real_data_buffer_addr from user.
 * @param [in] batch_addrs: dynamic batch addr info.
 * @param [in] batch_label: batch label.
 * @return: void
 */
 Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs,
                                     const string &batch_label) {
 Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) {
  auto iter = task_addr_offset_.find(addr);
  if (iter != task_addr_offset_.end()) {
    auto &cur_pair = *iter;
    uint8_t *args_info = args_info_.data();
    for (auto offset : cur_pair.second) {
      if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast<uintptr_t>(args_addr_ + offset))) {
        continue;
      }

      auto dst_addr = static_cast<uint8_t *>(buffer_addr);
      GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p",
             name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr);
--- a/ge/graph/load/new_model_manager/zero_copy_task.h
+++ b/ge/graph/load/new_model_manager/zero_copy_task.h
@@ -67,12 +67,9 @@ class ZeroCopyTask {
   * @brief Set user data addr to Task param.
   * @param [in] addr: virtual address value from Op.
   * @param [in] buffer_addr: data buffer_addr from user.
   * @param [in] batch_addrs: dynamic batch addr info.
   * @param [in] batch_label: batch label.
   * @return: 0 SUCCESS / others FAILED
   */
  ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs,
                             const string &batch_label);
  ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr);

  /**
   * @ingroup ge
@@ -91,9 +88,6 @@ class ZeroCopyTask {
    return batch_label_;
  }

 protected:
  bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr);

 private:
  const string name_;

--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -23,25 +23,15 @@
 #include <sstream>
 #include <string>
 #include <thread>
 #include <utility>

 #include "common/ge/ge_util.h"
 #include "common/math/math_util.h"
 #include "common/thread_pool.h"
 #include "common/util.h"
 #include "external/graph/types.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/ge_types.h"
 #include "analyzer/analyzer.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/common/local_context.h"
 #include "graph/common/transop_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/ge_global_options.h"
 #include "graph/ge_local_context.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/util/rt_context_util.h"
 #include "graph/partition/dynamic_shape_partition.h"
 #include "graph/passes/enter_pass.h"
@@ -61,8 +51,6 @@
 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/dimension_compute_pass.h"
 #include "graph/passes/flow_ctrl_pass.h"
 #include "graph/passes/hccl_group_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/input_output_connection_identify_pass.h"
 #include "graph/passes/iterator_op_pass.h"
@@ -77,7 +65,6 @@
 #include "graph/passes/permute_pass.h"
 #include "graph/passes/prune_pass.h"
 #include "graph/passes/ref_identity_delete_op_pass.h"
 #include "graph/passes/replace_with_empty_const_pass.h"
 #include "graph/passes/reshape_recovery_pass.h"
 #include "graph/passes/reshape_remove_pass.h"
 #include "graph/passes/same_transdata_breadth_fusion_pass.h"
@@ -87,13 +74,11 @@
 #include "graph/passes/switch_logic_remove_pass.h"
 #include "graph/passes/switch_to_stream_switch_pass.h"
 #include "graph/passes/transop_breadth_fusion_pass.h"
 #include "graph/passes/transop_depth_fusion_pass.h"
 #include "graph/passes/transop_nearby_allreduce_fusion_pass.h"
 #include "graph/passes/transop_symmetry_elimination_pass.h"
 #include "graph/passes/transop_without_reshape_fusion_pass.h"
 #include "graph/passes/transpose_transdata_pass.h"
 #include "graph/passes/variable_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/passes/variable_ref_delete_op_pass.h"
 #include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
 #include "graph/passes/end_of_sequence_add_control_pass.h"
@@ -104,9 +89,6 @@
 #include "graph/passes/memcpy_addr_async_pass.h"
 #include "graph/build/label_allocator.h"
 #include "graph/utils/tensor_adapter.h"
 #include "graph/utils/type_utils.h"
 #include "graph/graph_util.h"
 #include "graph/types.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"
 #include "ir_build/atc_ir_common.h"
@@ -550,7 +532,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
      (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
    }
    std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
                                            compute_graph->GetGraphID(), subgraph, compute_graph, session_id,
                                            compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id,
                                            GetThreadLocalContext());
    if (!f.valid()) {
      GELOGE(FAILED, "Future is invalid");
@@ -565,7 +547,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
        (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
      }
      std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
                                              compute_graph->GetGraphID(), subgraph, compute_graph, session_id,
                                              compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id,
                                              GetThreadLocalContext());
      if (!f.valid()) {
        GELOGE(FAILED, "Future is invalid");
@@ -2471,7 +2453,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra

 Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id,
                                                     const SubGraphInfoPtr &sub_graph_info_ptr,
                                                     const ComputeGraphPtr &compute_graph, uint64_t session_id,
                                                     const std::string &root_graph_name,
                                                     uint64_t session_id,
                                                     const GEThreadLocalContext &ge_context) {
  if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) {
    GetContext().SetSessionId(session_id);
@@ -2488,9 +2471,13 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
      GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id);
      return FAILED;
    }
    if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) {
      GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \
             root_graph_name: %s.", root_graph_name.c_str());
      return FAILED;
    }
    compute_graph_tmp->SetSessionID(session_id);
    Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp,
                                                                                            compute_graph,
                                                                                            engine_name);
    if (ret != SUCCESS) {
      GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str());
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -219,7 +219,8 @@ class GraphManager {

  static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id,
                                                const SubGraphInfoPtr &sub_graph_info_ptr,
                                                const ComputeGraphPtr &compute_graph, uint64_t session_id,
                                                const std::string &root_graph_name,
                                                uint64_t session_id,
                                                const GEThreadLocalContext &ge_context);
  Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor);
  void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);
--- a/ge/graph/manager/graph_mem_allocator.cc
+++ b/ge/graph/manager/graph_mem_allocator.cc
@@ -16,10 +16,7 @@

 #include "graph/manager/graph_mem_allocator.h"

 #include <set>
 #include <string>

 #include "framework/common/debug/ge_log.h"
 #include "graph/manager/graph_caching_allocator.h"
 #include "graph/manager/rdma_pool_allocator.h"

--- a/ge/graph/manager/memory_api.cc
+++ b/ge/graph/manager/memory_api.cc
@@ -63,7 +63,7 @@ Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t
  });

  auto hcom_remote_mem_register =
      (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register");
      (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "HcomRegRemoteAccessMem");
  if (hcom_remote_mem_register == nullptr) {
    GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function.");
    return FAILED;
--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -76,7 +76,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) {
  }
 }

 Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph,
 Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph,
                                       const std::string &engine_name) {
  if (compute_graph == nullptr) {
    GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr.");
@@ -106,10 +106,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const Com
      for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) {
        Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph));
        if (ret != SUCCESS) {
          auto root_graph = ge::GraphUtils::FindRootGraph(parent_graph);
          if (root_graph != nullptr) {
            ErrorManager::GetInstance().SaveMstuneCompileFailedMsg(root_graph->GetName());
          }
          GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret);
          return ret;
        }
--- a/ge/graph/optimize/graph_optimize.h
+++ b/ge/graph/optimize/graph_optimize.h
@@ -42,8 +42,7 @@ class GraphOptimize {
  ~GraphOptimize() = default;

  // subgraph optimize
  Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph,
                          const std::string &engine_name);
  Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name);

  // original graph optimize
  Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph);
--- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
+++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
@@ -113,6 +113,17 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) {
  GE_CHECK_NOTNULL(op_desc);
  std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim};
  GeShape dynamic_shape(dynamic_shape_dims);
  bool reset_shape_flag = false;
  if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) {
    (void)ResetOutputTensorShape(op_desc, dynamic_shape);
  }
  return SUCCESS;
 }

 Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape,
                                                            bool &reset_shape_flag) {
  reset_shape_flag = false;
  GE_CHECK_NOTNULL(op_desc);
  for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
    auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
    GE_CHECK_NOTNULL(input_desc);
@@ -125,8 +136,14 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) {
    if (CheckIfConstInput(input_desc)) {
      continue;
    }
    reset_shape_flag = true;
    input_desc->SetShape(dynamic_shape);
  }
  return SUCCESS;
 }

 Status DynamicSingleOpResetShapePass::ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape) {
  GE_CHECK_NOTNULL(op_desc);
  for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) {
    auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
    GE_CHECK_NOTNULL(output_desc);
--- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.h
+++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.h
@@ -27,6 +27,8 @@ class DynamicSingleOpResetShapePass : public GraphPass {

 private:
  Status ResetOpShape(OpDescPtr &op_desc);
  Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag);
  Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
  Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu);
  bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc);
 };
--- a/ge/graph/passes/switch_to_stream_switch_pass.cc
+++ b/ge/graph/passes/switch_to_stream_switch_pass.cc
@@ -17,13 +17,8 @@
 #include "graph/passes/switch_to_stream_switch_pass.h"
 #include <stack>
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/types.h"
 #include "ge/ge_api_types.h"
 #include "graph/common/omg_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/utils/type_utils.h"

@@ -125,12 +120,13 @@ void SwitchToStreamSwitchPass::MarkCycleDependence(
      if (visited.count(tmp_node) > 0) {
        continue;
      }
      GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str());
      for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) {
        if (switch_nodes.find(out_node) == switch_nodes.end()) {
          out_nodes.push(out_node);
          continue;
        }
        GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.",
               tmp_node->GetName().c_str(), out_node->GetName().c_str());
        GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS,
                        GELOGW("set cyclic dependence attr failed."); return );
        auto map_iter = switch_cyclic_map_.find(out_node);
@@ -602,7 +598,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons
 ///
 Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node,
                                                        const std::set<NodePtr> &same_cond_switch) {
  GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
  GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
         cast_node->GetName().c_str());
  std::string orig_switch_name = switch_node->GetName();
  OpDescPtr switch_desc = switch_node->GetOpDesc();
@@ -653,7 +649,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no
 ///
 Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch,
                                                         const NodePtr &active_node) {
  GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
  GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
         stream_switch->GetName().c_str(), active_node->GetName().c_str());
  auto find_res = switch_node_map_.find(switch_node);
  GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), {
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -18,7 +18,6 @@
 #include <map>
 #include <set>
 #include <string>
 #include <utility>
 #include "common/formats/format_transfers/format_transfer_fractal_nz.h"
 #include "common/formats/format_transfers/format_transfer_fractal_z.h"
 #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
@@ -28,13 +27,9 @@
 #include "common/helper/model_helper.h"
 #include "common/math/math_util.h"
 #include "common/op/ge_op_utils.h"
 #include "common/util/error_manager/error_manager.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/common/local_context.h"
 #include "graph/common/transop_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/shape_refiner.h"
 #include "graph/manager/graph_var_manager.h"
@@ -44,29 +39,21 @@
 #include "graph/passes/aicpu_constant_folding_pass.h"
 #include "graph/passes/assert_pass.h"
 #include "graph/passes/assign_pass.h"
 #include "graph/passes/base_pass.h"
 #include "graph/passes/common_subexpression_elimination_pass.h"
 #include "graph/passes/cond_pass.h"
 #include "graph/passes/cond_remove_pass.h"
 #include "graph/passes/constant_folding_pass.h"
 #include "graph/passes/constant_fuse_same_pass.h"
 #include "graph/passes/control_trigger_pass.h"
 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/dimension_compute_pass.h"
 #include "graph/passes/dropout_pass.h"
 #include "graph/passes/enter_pass.h"
 #include "graph/passes/flow_ctrl_pass.h"
 #include "graph/passes/for_pass.h"
 #include "graph/passes/get_original_format_pass.h"
 #include "graph/passes/guarantee_const_pass.h"
 #include "graph/passes/hccl_group_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/infershape_pass.h"
 #include "graph/passes/iterator_op_pass.h"
 #include "graph/passes/merge_pass.h"
 #include "graph/passes/net_output_pass.h"
 #include "graph/passes/next_iteration_pass.h"
 #include "graph/passes/no_use_reshape_remove_pass.h"
 #include "graph/passes/parallel_concat_start_op_pass.h"
 #include "graph/passes/placeholder_with_default_pass.h"
@@ -81,45 +68,18 @@
 #include "graph/passes/shape_operate_op_remove_pass.h"
 #include "graph/passes/snapshot_pass.h"
 #include "graph/passes/stop_gradient_pass.h"
 #include "graph/passes/subgraph_pass.h"
 #include "graph/passes/switch_data_edges_bypass.h"
 #include "graph/passes/switch_dead_branch_elimination.h"
 #include "graph/passes/switch_logic_remove_pass.h"
 #include "graph/passes/merge_to_stream_merge_pass.h"
 #include "graph/passes/switch_to_stream_switch_pass.h"
 #include "graph/passes/attach_stream_label_pass.h"
 #include "graph/passes/unused_const_pass.h"
 #include "graph/passes/unused_op_remove_pass.h"
 #include "graph/passes/var_is_initialized_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/preprocess/insert_op/util_insert_aipp_op.h"
 #include "graph/types.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"
 #include "multi_batch_copy_graph.h"
 #include "runtime/dev.h"

 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/link_gen_mask_nodes_pass.h"
 #include "graph/passes/permute_pass.h"
 #include "graph/passes/reshape_remove_pass.h"
 #include "graph/passes/same_transdata_breadth_fusion_pass.h"
 #include "graph/passes/transop_breadth_fusion_pass.h"
 #include "graph/passes/transop_depth_fusion_pass.h"
 #include "graph/passes/transop_nearby_allreduce_fusion_pass.h"

 #include "graph/passes/cast_remove_pass.h"
 #include "graph/passes/data_pass.h"
 #include "graph/passes/transop_without_reshape_fusion_pass.h"
 #include "graph/passes/transpose_transdata_pass.h"
 #include "graph/passes/variable_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/passes/variable_ref_delete_op_pass.h"
 #include "graph/passes/mark_agnostic_pass.h"


 namespace ge {
 namespace {
 static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -1407,11 +1407,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() {
 }

 Status ProcessMultiBatch(ComputeGraphPtr &graph) {
  const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE");
  if (multi_batch_with_case != nullptr) {
    PassManager pass_manager;
    GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
    return pass_manager.Run(graph);
  if (GetLocalOmgContext().dynamic_node_type.empty()) {
    const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
    if (multi_batch_with_switchn == nullptr) {
      PassManager pass_manager;
      GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
      return pass_manager.Run(graph);
    }
  }
  if (!GetLocalOmgContext().need_multi_batch) {
    GELOGI("No need to process_multi for no_train graph.");
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -18,6 +18,7 @@
 #include <chrono>
 #include "framework/common/debug/log.h"
 #include "graph/compute_graph.h"
 #include "graph/utils/tensor_utils.h"
 #include "hybrid_execution_context.h"
 #include "subgraph_context.h"

@@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
         this->num_pending_shapes_);
 }

 Status ShapeInferenceState::UpdateInputShape(int idx,
                                             const GeShape &ori_shape,
                                             const GeShape &shape) {
 Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
  if (node_item.IsInputShapeStatic(idx)) {
    GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
           node_item.NodeName().c_str(),
           idx,
           node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
           shape.ToString().c_str());
           target.GetShape().ToString().c_str());
    return SUCCESS;
  }

  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
  int64_t tensor_size = -1;
  (void) TensorUtils::GetSize(target, tensor_size);
  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
         node_item.NodeName().c_str(),
         idx,
         shape.ToString().c_str(),
         ori_shape.ToString().c_str());
         target.GetShape().ToString().c_str(),
         target.GetOriginShape().ToString().c_str(),
         tensor_size);

  std::lock_guard<std::mutex> lk(mu_);
  auto tensor_desc = node_item.MutableInputDesc(idx);
  GE_CHECK_NOTNULL(tensor_desc);
  tensor_desc->SetShape(shape);
  tensor_desc->SetOriginShape(ori_shape);
  tensor_desc->SetShape(target.GetShape());
  tensor_desc->SetOriginShape(target.GetOriginShape());
  (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  if (--num_pending_shapes_ == 0) {
    ready_cv_.notify_all();
  }
@@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
  for (auto &p : shape_futures) {
    auto idx = p.first;
    auto &future = p.second;
    GeShape shape;
    GeShape ori_shape;
    RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
    GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
                      "[%s] Get shape failed. index = %u",
                      node_item.NodeName().c_str(),
                      idx);
    auto src_tensor_desc = future.GetTensorDesc();
    GE_CHECK_NOTNULL(src_tensor_desc);
    RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);

    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
           node_item.NodeName().c_str(),
           idx,
           shape.ToString().c_str(),
           ori_shape.ToString().c_str());
    auto input_desc = node_item.MutableInputDesc(idx);
    GE_CHECK_NOTNULL(input_desc);
    input_desc->SetShape(std::move(shape));
    input_desc->SetOriginShape(ori_shape);
    int64_t tensor_size = -1;
    (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
           node_item.NodeName().c_str(),
           idx,
           src_tensor_desc->GetShape().ToString().c_str(),
           src_tensor_desc->GetOriginShape().ToString().c_str(),
           tensor_size);
    input_desc->SetShape(src_tensor_desc->GetShape());
    input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
    (void) TensorUtils::SetSize(*input_desc, tensor_size);
  }

  return SUCCESS;
@@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
  GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
  return SUCCESS;
 }

 GeTensorDescPtr ShapeFuture::GetTensorDesc() {
  GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
  if (!subgraph_context_->Await(src_node_)) {
    GELOGE(INTERNAL_ERROR, "cancelled");
    return nullptr;
  }
  return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -35,6 +35,7 @@ class ShapeFuture {
  ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
  ~ShapeFuture() = default;
  Status Get(GeShape &ori_shape, GeShape &shape);
  GeTensorDescPtr GetTensorDesc();

 private:
  NodePtr src_node_;
@@ -45,7 +46,7 @@ class ShapeFuture {
 struct ShapeInferenceState {
  explicit ShapeInferenceState(const NodeItem &node_item);

  Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);
  Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);

  void UpdateInputShapeFuture(int idx, ShapeFuture &&future);

--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
      GE_CHECK_NOTNULL(tensor_desc);
      auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
      GE_CHECK_NOTNULL(node_state);
      node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
      node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
    }
  }

@@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  } else {
    node_state.SetKernelTask(node_item.kernel_task);
  }

  GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
                    "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
  GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
  return SUCCESS;
 }

--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -20,12 +20,9 @@
 #include "graph/utils/tensor_adapter.h"
 #include "graph/debug/ge_attr_define.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "common/dump/dump_manager.h"
 #include "hybrid/executor//worker//shape_inference_engine.h"
 #include "common/dump/dump_op.h"
 #include "common/types.h"
 #include "common/ge_types.h"
 #include "common/profiling/profiling_manager.h"
 #include "runtime/base.h"

 namespace ge {
 namespace hybrid {
@@ -348,6 +345,10 @@ Status NodeDoneCallback::OnNodeDone() {
  }

  GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
  if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
    // update output tensor sizes
    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
  }
  // PropagateOutputs for type == DEPEND_COMPUTE
  if (node_item.shape_inference_type == DEPEND_COMPUTE) {
    if (graph_context_->trace_enabled) {
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -17,9 +17,15 @@
 #include "hybrid/executor/worker/shape_inference_engine.h"
 #include "graph/shape_refiner.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "common/math/math_util.h"
 #include "hybrid/node_executor/node_executor.h"

 namespace ge {
 namespace {
 const int kAlignment = 32;
 }
 namespace hybrid {
 ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
    : execution_context_(execution_context),
@@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
  }

  if (node_item.fused_subgraph != nullptr) {
    return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
    GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
    GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
    return SUCCESS;
  }

  // Skip shape inference for node of type DEPEND_COMPUTE
@@ -63,21 +71,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
    std::lock_guard<std::mutex> lk(mu_);
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
    GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
        "Invoke InferShapeAndType failed.");
                      "Invoke InferShapeAndType failed.");
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
  }
  // Check again to make sure shape is valid after shape inference
  if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
                      "Failed to get shape status. node = %s",
                      node_item.NodeName().c_str());

    GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
                           INTERNAL_ERROR,
                           "[%s] Shape is still unknown after shape inference.",
                           node_item.NodeName().c_str());
  }
  // update output tensor sizes after shape inference
  // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");

  GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
         node_item.NodeName().c_str(),
@@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
  // propagate each output
  for (int i = 0; i < node_item.num_outputs; ++i) {
    auto output_desc = node_item.op_desc->MutableOutputDesc(i);
    const auto &shape = output_desc->MutableShape();
    const auto &ori_shape = output_desc->GetOriginShape();
    auto &output_nodes = node_item.outputs[i];

    // propagate output to all sub-inputs
@@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
        infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
                                           std::move(future));
      } else {
        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
                                                             ori_shape,
                                                             shape));
        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
      }
    }
  }
@@ -230,5 +228,92 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
  }
  return SUCCESS;
 }

 Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
                                               std::vector<int64_t> &shape,
                                               bool fallback_with_range) {
  const auto &tensor_shape = tensor_desc.MutableShape();
  if (tensor_shape.IsUnknownShape()) {
    if (!fallback_with_range) {
      GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
             tensor_shape.ToString().c_str());
      return INTERNAL_ERROR;
    }

    GELOGD("Calc output size by range");
    std::vector<std::pair<int64_t, int64_t>> shape_range;
    GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
    if (shape_range.size() != shape.size()) {
      GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
             shape_range.size(),
             shape.size());
      return INTERNAL_ERROR;
    }

    for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
      if (shape[dim_index] == ge::UNKNOWN_DIM) {
        shape[dim_index] = shape_range[dim_index].second;
      }
    }

    GELOGD("After canonicalization, shape = [%s], before = [%s]",
           GeShape(shape).ToString().c_str(),
           tensor_shape.ToString().c_str());
  }

  return SUCCESS;
 }

 Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
                                            const std::vector<int64_t> &shape,
                                            int64_t &tensor_size) {
  GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
  uint32_t type_size;
  if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
    GELOGE(INTERNAL_ERROR, "Failed to get data type size");
    return INTERNAL_ERROR;
  }

  tensor_size = type_size;
  for (const auto &dim : shape) {
    GE_CHECK_GE(dim, 0);
    GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
                      "Shape size overflow, shape = [%s]",
                      GeShape(shape).ToString().c_str());
    tensor_size *= dim;
  }

  GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
                    "Tensor size is too large: %ld, shape = [%s]",
                    tensor_size,
                    GeShape(shape).ToString().c_str());
  tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
  return SUCCESS;
 }

 Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
  auto op_desc = node_item.GetOpDesc();
  for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
    auto tensor_desc = op_desc->MutableOutputDesc(output_index);
    GE_CHECK_NOTNULL(tensor_desc);
    const auto &shape = tensor_desc->MutableShape();
    // modify on copy
    auto dims = shape.GetDims();
    GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
                      "[%s] Failed to canonicalize shape for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);

    int64_t tensor_size;
    GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
                      "[%s] Failed to calc tensor size for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);
    GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
    (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  }

  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/worker/shape_inference_engine.h
+++ b/ge/hybrid/executor/worker/shape_inference_engine.h
@@ -34,7 +34,11 @@ class ShapeInferenceEngine {

  Status PropagateOutputShapes(const NodeItem &node_item);

  static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);

 private:
  static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
  static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
  static Status UpdatePeerNodeShape(const Node &node);
  Status AwaitDependentNodes(NodeState &node_state);

--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -22,6 +22,7 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/node_utils.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor/worker/shape_inference_engine.h"

 namespace ge {
 namespace hybrid {
@@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
    GE_CHECK_NOTNULL(dst_op_desc);
    auto in_idx = node_and_anchor.second->GetIdx();
    auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
    fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
    fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
    GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
  }

@@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
    return FAILED;
  }

  fused_subgraph.output_mapping.emplace(parent_index, op_desc);
  fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
  return SUCCESS;
 }

@@ -126,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
  return SUCCESS;
 }

 Status NodeItem::Init() {
  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
  num_inputs = static_cast<int>(op_desc->GetInputsSize());
  num_outputs = static_cast<int>(op_desc->GetOutputsSize());

 void NodeItem::ResolveOptionalInputs() {
  if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
    has_optional_inputs = true;
    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -143,7 +139,18 @@ Status NodeItem::Init() {
      }
    }
  }
 }

 Status NodeItem::InitInputsAndOutputs() {
  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
  num_inputs = static_cast<int>(op_desc->GetInputsSize());
  num_outputs = static_cast<int>(op_desc->GetOutputsSize());
  ResolveOptionalInputs();
  return SUCCESS;
 }

 Status NodeItem::ResolveDynamicState() {
  (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
  GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
  if (!is_dynamic) {
@@ -151,38 +158,54 @@ Status NodeItem::Init() {
                      "[%s] Failed to get shape status.",
                      node->GetName().c_str());
  }
  return SUCCESS;
 }

  if (is_dynamic) {
    for (int i = 0; i < num_inputs; ++i) {
      const auto &input_desc = MutableInputDesc(i);
      GE_CHECK_NOTNULL(input_desc);
      if (input_desc->MutableShape().IsUnknownShape()) {
        is_input_shape_static_.push_back(false);
      } else {
        num_static_input_shapes++;
        is_input_shape_static_.push_back(true);
        GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
               NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
      }
 Status NodeItem::ResolveStaticInputsAndOutputs() {
  for (int i = 0; i < num_inputs; ++i) {
    const auto &input_desc = MutableInputDesc(i);
    GE_CHECK_NOTNULL(input_desc);
    if (input_desc->MutableShape().IsUnknownShape()) {
      is_input_shape_static_.push_back(false);
    } else {
      num_static_input_shapes++;
      is_input_shape_static_.push_back(true);
      GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
             NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
    }
  }

    for (int i = 0; i < num_outputs; ++i) {
      const auto &output_desc = op_desc->MutableOutputDesc(i);
      GE_CHECK_NOTNULL(output_desc);
      if (output_desc->MutableShape().IsUnknownShape()) {
        is_output_shape_static = false;
        break;
      }
  for (int i = 0; i < num_outputs; ++i) {
    const auto &output_desc = op_desc->MutableOutputDesc(i);
    GE_CHECK_NOTNULL(output_desc);
    if (output_desc->MutableShape().IsUnknownShape()) {
      is_output_shape_static = false;
      break;
    }
  }

    if (IsControlOp() || node_type == PARTITIONEDCALL) {
      shape_inference_type = DEPEND_COMPUTE;
    } else {
      int32_t unknown_shape_type_val = 0;
      (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
      shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
    }
  if (is_output_shape_static) {
    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
  }
  return SUCCESS;
 }

 void NodeItem::ResolveUnknownShapeType() {
  if (IsControlOp() || node_type == PARTITIONEDCALL) {
    shape_inference_type = DEPEND_COMPUTE;
  } else {
    int32_t unknown_shape_type_val = 0;
    (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
    shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
  }
 }

 Status NodeItem::Init() {
  GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
  GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
  if (is_dynamic) {
    ResolveUnknownShapeType();
    GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
    GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
  }

--- a/ge/hybrid/model/node_item.h
+++ b/ge/hybrid/model/node_item.h
@@ -103,6 +103,11 @@ struct NodeItem {
 private:
  explicit NodeItem(NodePtr node);
  Status Init();
  Status InitInputsAndOutputs();
  void ResolveOptionalInputs();
  Status ResolveDynamicState();
  Status ResolveStaticInputsAndOutputs();
  void ResolveUnknownShapeType();

  std::vector<bool> is_input_shape_static_;
  std::vector<uint32_t> input_desc_indices_;
--- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc
+++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc
@@ -42,10 +42,10 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
    GELOGE(FAILED, "hccl handle is nullptr! ");
    return FAILED;
  }
  auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function<void(HcclResult status)>))dlsym(
      context.handle_, "EnqueueHcomOpertion");
  if (EnqueueHcomOpertion == nullptr) {
    GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function.");
  auto HcomExecEnqueueOperation = (HcclResult(*)(HcomOperation, std::function<void(HcclResult status)>))dlsym(
      context.handle_, "HcomExecEnqueueOperation");
  if (HcomExecEnqueueOperation == nullptr) {
    GELOGE(FAILED, "Failed to invoke HcomExecEnqueueOperation hcom unknown node function.");
    if (dlclose(context.handle_) != 0) {
      GELOGW("Failed to close handle %s", dlerror());
    }
@@ -70,7 +70,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
  const OpDescPtr op_desc = node_item.GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);

  HcomOpertion op_info;
  HcomOperation op_info;
  op_info.hcclType = op_desc->GetType();
  op_info.inputPtr = inputs.empty() ? nullptr : inputs[0];
  op_info.outputPtr = outputs.empty() ? nullptr : outputs[0];
@@ -96,7 +96,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
  op_info.root = root_id;
  auto callback = [this, op_desc](HcclResult status) {
    if (status != HCCL_SUCCESS) {
      GELOGE(HCCL_E_INTERNAL, "node %s call EnqueueHcomOpertion failed, ret: 0x%X", op_desc->GetName().c_str(), status);
      GELOGE(HCCL_E_INTERNAL, "node %s call HcomExecEnqueueOperation failed, ret: 0x%X", op_desc->GetName().c_str(), status);
    }
    std::lock_guard<std::mutex> lock(this->hccl_mutex_);
    this->cond_.notify_all();
@@ -110,9 +110,9 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
         context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root);
  op_info.count = count;

  HcclResult hccl_ret = EnqueueHcomOpertion(op_info, callback);
  HcclResult hccl_ret = HcomExecEnqueueOperation(op_info, callback);
  if (hccl_ret != HCCL_SUCCESS) {
    GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret);
    GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
    return HCCL_E_INTERNAL;
  }

@@ -213,11 +213,11 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector<HcomRemoteAccess

 Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
  GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName());
  auto EnqueueRemoteAccess =
  auto HcomExecEnqueueRemoteAccess =
      (HcclResult(*)(const string &, const vector<HcomRemoteAccessAddrInfo> &,
                     std::function<void(HcclResult status)>))dlsym(context.handle_, "EnqueueRemoteAccess");
  if (EnqueueRemoteAccess == nullptr) {
    GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function.");
                     std::function<void(HcclResult status)>))dlsym(context.handle_, "HcomExecEnqueueRemoteAccess");
  if (HcomExecEnqueueRemoteAccess == nullptr) {
    GELOGE(FAILED, "Failed to invoke HcomExecEnqueueRemoteAccess hcom unknown node function.");
    if (dlclose(context.handle_) != 0) {
      GELOGW("Failed to close handle %s", dlerror());
    }
@@ -228,15 +228,15 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do

  auto callback = [this](HcclResult status) {
    if (status != HCCL_SUCCESS) {
      GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", status);
      GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", status);
    }
    std::lock_guard<std::mutex> lock(this->hccl_mutex_);
    this->cond_.notify_all();
    GELOGI("rdma callback success.");
  };
  HcclResult hccl_ret = EnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback);
  HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback);
  if (hccl_ret != HCCL_SUCCESS) {
    GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret);
    GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
    return HCCL_E_INTERNAL;
  }

@@ -307,32 +307,32 @@ Status HcclNodeExecutor::Initialize() {
    GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror());
    return FAILED;
  }
  auto HcomExcutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize");
  if (HcomExcutorInitialize == nullptr) {
    GELOGE(FAILED, "Failed to invoke HcomExcutorInitialize hcom unknown node function.");
  auto HcomExecInitialize = (HcclResult(*)())dlsym(handle_, "HcomExecInitialize");
  if (HcomExecInitialize == nullptr) {
    GELOGE(FAILED, "Failed to invoke HcomExecInitialize hcom unknown node function.");
    return FAILED;
  }
  HcclResult hccl_ret = HcomExcutorInitialize();
  HcclResult hccl_ret = HcomExecInitialize();
  if (hccl_ret == HCCL_E_PTR) {
    GELOGI("Hccl comm is null, hcom executor initialize is not required.");
  } else if (hccl_ret == HCCL_SUCCESS) {
    GELOGI("Hcom executor initialize success.");
  } else {
    GELOGE(FAILED, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret);
    GELOGE(FAILED, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
    return FAILED;
  }
  return SUCCESS;
 }

 Status HcclNodeExecutor::Finalize() {
  auto HcomExcutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize");
  if (HcomExcutorFinalize == nullptr) {
    GELOGE(FAILED, "Failed to invoke HcomExcutorFinalize hcom unknown node function.");
  auto HcomExecFinalize = (HcclResult(*)())dlsym(handle_, "HcomExecFinalize");
  if (HcomExecFinalize == nullptr) {
    GELOGE(FAILED, "Failed to invoke HcomExecFinalize hcom unknown node function.");
    return FAILED;
  }
  HcclResult hccl_ret = HcomExcutorFinalize();
  HcclResult hccl_ret = HcomExecFinalize();
  if (hccl_ret != HCCL_SUCCESS) {
    GELOGE(FAILED, "Call HcomExcutorFinalize failed, ret: 0x%X", hccl_ret);
    GELOGE(FAILED, "Call HcomExecFinalize failed, ret: 0x%X", hccl_ret);
    return FAILED;
  }
  // dlclose file handle
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() {
 }

 Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
  if (callback_fun == nullptr) {
    GELOGW("[%s] Callback is NULL", GetNodeName());
    return SUCCESS;
  }
  auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
  if (ret != SUCCESS) {
    GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const {
  return node_item_->NodeName().c_str();
 }

 void TaskContext::ReleaseInputsAndOutputs() {
  for (int i = 0; i < node_item_->num_inputs; ++i) {
    auto tensor = inputs_start_ + i;
    tensor->Destroy();
    GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
  }

  for (int i = 0; i < node_item_->num_outputs; ++i) {
    auto tensor = outputs_start_ + i;
    tensor->Destroy();
    GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
  }
 }

 void TaskContext::ReleaseInput(int index) {
  auto input_tensor = MutableInput(index);
  if (input_tensor != nullptr) {
@@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
 const DumpProperties &TaskContext::GetDumpProperties() const {
  return execution_context_->dump_properties;
 }

 bool TaskContext::NeedCallback() {
  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -50,6 +50,8 @@ class TaskContext {
  ConstGeTensorDescPtr GetOutputDesc(int index) const;
  GeTensorDescPtr MutableInputDesc(int index) const;
  GeTensorDescPtr MutableOutputDesc(int index) const;
  void ReleaseInputsAndOutputs();
  bool NeedCallback();
  void ReleaseInput(int index);
  const TensorValue *GetInput(int index) const;
  const TensorValue *GetOutput(int index) const;
--- a/ge/ir_build/atc_ir_common.cc
+++ b/ge/ir_build/atc_ir_common.cc
@@ -63,6 +63,19 @@ vector<string> SplitInputShape(const std::string &input_shape) {
 }
 }  // namespace

 Status CheckInputFormat(const string &input_format) {
  if (input_format.empty()) {
    return ge::SUCCESS;
  }
  if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) {
    ErrorManager::GetInstance().ATCReportErrMessage(
      "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"});
    GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str());
    return ge::PARAM_INVALID;
  }
  return ge::SUCCESS;
 }

 bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
                                          std::string &dynamic_batch_size) {
  int32_t size = 0;
--- a/ge/ir_build/atc_ir_common.h
+++ b/ge/ir_build/atc_ir_common.h
@@ -75,6 +75,7 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf);
 Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory);
 Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream);
 Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode);
 Status CheckInputFormat(const string &input_format);
 void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips);
 void EraseEndSemicolon(std::string &param);
 }
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -227,7 +227,6 @@ class Impl {
  ~Impl() { (void)generator_.Finalize(); };
  graphStatus CheckOptions(const std::map<std::string, std::string> &options);
  graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
  graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape);
  graphStatus UpdateDataOpAttr(const Graph &graph);
  graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
  graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
@@ -318,42 +317,10 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options
  if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) {
    return GRAPH_PARAM_INVALID;
  }
  return GRAPH_SUCCESS;
 }

 graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) {
  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);
  for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(input_node);
    ge::OpDescPtr op = input_node->GetOpDesc();
    GE_CHECK_NOTNULL(op);
    if (op->GetType() == DATA) {
      string data_op_name = op->GetName();
      GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size());
      ge::GeTensorDesc tensor = op->GetInputDesc(0);
      ge::GeShape data_shape = tensor.GetShape();
      GELOGD("Data op get shape from InputDesc in ge ir graph.");

      string tmp_shape_str;
      const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
      if (tmp_shape.empty()) {
        GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str());
      } else {
        tmp_shape_str += data_op_name + ":";
        for (auto tmp_dim : tmp_shape) {
          tmp_shape_str += to_string((long)tmp_dim) + ",";
        }
        tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1);
        tmp_shape_str += ";";
        default_shape += tmp_shape_str;
      }

      GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str());
    }
  // Check Input Format
  if (options_.find(kInputFormat) != options_.end()) {
    return CheckInputFormat(options_[kInputFormat]);
  }
  default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1));
  GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str());
  return GRAPH_SUCCESS;
 }

@@ -378,13 +345,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri
  GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID);
  options_[ge::ir_option::LOG_LEVEL] = log;

  string input_shape;
  if (options_.find("input_shape") == options_.end()) {
    GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS,
                     return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!");
  } else {
    input_shape = options_["input_shape"];
  }
  string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"];
  string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"];
  string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"];
  string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end()
--- a/ge/proto/op_mapping_info.proto
+++ b/ge/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -39,7 +39,7 @@ size_t GetAlignedSize(size_t size) {
 }

 Status ProfilingTaskInfo(OpTask *op_task) {
  if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) {
  if (!ProfilingManager::Instance().ProfilingModelLoadOn()) {
    return SUCCESS;
  }

--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -112,8 +112,9 @@ Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, u
 Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
  return UNSUPPORTED;
 }
 Status OpTask::UpdateArgTable(const SingleOpModelParam &param) {
  auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param);

 Status OpTask::DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace) {
  auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, keep_workspace);
  auto all_addresses = BuildTaskUtils::JoinAddresses(addresses);
  uintptr_t *arg_base = nullptr;
  size_t arg_num = 0;
@@ -132,6 +133,10 @@ Status OpTask::UpdateArgTable(const SingleOpModelParam &param) {
  return SUCCESS;
 }

 Status OpTask::UpdateArgTable(const SingleOpModelParam &param) {
  return DoUpdateArgTable(param, true);
 }

 Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
                            const vector<DataBuffer> &input_buffers,
                            vector<GeTensorDesc> &output_desc,
@@ -792,10 +797,9 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
  return SUCCESS;
 }

 Status AiCpuTask::UpdateArgTable(const SingleOpModelParam &param) {
  auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, false);
  io_addr_host_ = BuildTaskUtils::JoinAddresses(addresses);
  return SUCCESS;
 Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
  // aicpu do not have workspace, for now
  return DoUpdateArgTable(param, false);
 }

 void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -54,6 +54,8 @@ class OpTask {
                              rtStream_t stream);

 protected:
  Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);

  DumpProperties dump_properties_;
  DumpOp dump_op_;
  OpDescPtr op_desc_;
@@ -110,7 +112,7 @@ class AiCpuBaseTask : public OpTask {
  AiCpuBaseTask() = default;
  ~AiCpuBaseTask() override;
  UnknowShapeOpType GetUnknownType() const { return unknown_type_; }

  Status UpdateArgTable(const SingleOpModelParam &param) override;
 protected:
  Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
  Status SetInputConst();
@@ -137,7 +139,6 @@ class AiCpuTask : public AiCpuBaseTask {
  ~AiCpuTask() override;

  Status LaunchKernel(rtStream_t stream) override;
  Status UpdateArgTable(const SingleOpModelParam &param) override;
  void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override;

  Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -293,6 +293,7 @@ const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path";

 // Configure op bank path
 const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path";
 const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update";

 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };
@@ -366,6 +367,7 @@ static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
 static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
 static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
 static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
 static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
 static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();

 // for interface: aclgrphBuildModel
@@ -389,22 +391,13 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
                                                             OP_COMPILER_CACHE_DIR,
                                                             OP_COMPILER_CACHE_MODE,
                                                             MDL_BANK_PATH,
                                                             OP_BANK_PATH};
                                                             OP_BANK_PATH,
                                                             OP_BANK_UPDATE};

 // for interface: aclgrphParse
 const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT,
                                                            INPUT_SHAPE,
                                                            OP_NAME_MAP,
                                                            IS_DYNAMIC_INPUT,
                                                            INPUT_FP16_NODES,
                                                            IS_INPUT_ADJUST_HW_LAYOUT,
                                                            IS_OUTPUT_ADJUST_HW_LAYOUT,
                                                            OUTPUT,
                                                            OUTPUT_TYPE,
                                                            OUT_NODES,
                                                            COMPRESS_WEIGHT_CONF,
                                                            ENABLE_SCOPE_FUSION_PASSES,
                                                            LOG_LEVEL};
 const std::set<std::string> ir_parser_suppported_options = {
  INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
  OUT_NODES,        COMPRESS_WEIGHT_CONF,      ENABLE_SCOPE_FUSION_PASSES};

 // for interface: aclgrphBuildInitialize
 const std::set<std::string> global_options = {CORE_TYPE,
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -37,7 +37,9 @@ enum FrameworkType {
  MINDSPORE = 1,
  TENSORFLOW = 3,
  ANDROID_NN,
 #ifndef ONLY_COMPILE_OPEN_SRC
  ONNX,
 #endif
  FRAMEWORK_RESERVED,
 };

--- a/inc/framework/common/profiling/ge_profiling.h
+++ b/inc/framework/common/profiling/ge_profiling.h
@@ -20,7 +20,8 @@
 #include "ge/ge_api_error_codes.h"
 #include "toolchain/prof_callback.h"

 #define MAX_DEV_NUM (64)
 const int MAX_DEV_NUM = 64;

 enum ProfCommandHandleType {
  kProfCommandhandleInit = 0,
  kProfCommandhandleStart,
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -30,8 +30,6 @@
 #include "runtime/base.h"

 namespace ge {
 class ModelListenerAdapter;

 class SingleOp;
 class DynamicSingleOp;

@@ -55,14 +53,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
  ge::Status Initialize();
  ge::Status Finalize();

  // Load model
  ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority,
                              std::shared_ptr<ge::ModelListener> listener);

  ge::Status UnloadModel(uint32_t modelId);

  ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data);

  // Get input and output descriptor
  ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                              std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);
@@ -168,9 +160,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
  ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                         std::vector<ge::TensorDesc> &output_desc);

  ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data,
                       std::shared_ptr<ge::ModelListener> listener);

  ge::Status CommandHandle(const ge::Command &command);

  ge::Status SetDump(const DumpConfig &dump_config);
@@ -297,8 +286,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
 private:
  static bool isInit_;
 };

 ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info);
 }  // namespace ge

 #endif  // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -36,7 +36,7 @@ using Status = domi::Status;

 namespace domi {
 using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
    const google::protobuf::Message *root_proto, const std::string &graph)>;
  const google::protobuf::Message *root_proto, const std::string &graph)>;
 class ModelParser {
 public:
  ModelParser() {}
@@ -44,19 +44,20 @@ class ModelParser {
  virtual ~ModelParser() {}

  /**
  * @ingroup domi_omg
  * @brief Analyze network model data
  * @param [in] file  Network model file path
  * @param [in|out]  graph Save the network information after analysis
  * @return SUCCESS
  * @return Others failed
  */
   * @ingroup domi_omg
   * @brief Analyze network model data
   * @param [in] file  Network model file path
   * @param [in|out]  graph Save the network information after analysis
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status Parse(const char *file, ge::Graph &graph) = 0;

  /**
   * @ingroup domi_omg
   * @brief Parse relevant data from memory and save it to graph
   * @param [in] input Model file memory data
   * @param [in] input Model file memory size
   * @param [in|out] graph A graph for saving the model information after analysis
   * @return SUCCESS
   * @return FAILED
@@ -64,36 +65,49 @@ class ModelParser {
   */
  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;

 #ifndef ONLY_COMPILE_OPEN_SRC
  /**
   * @ingroup domi_omg
   * @brief Parse relevant data from memory and save it to graph
   * @param [in] input Model file memory data
   * @param [in] input Model file memory size
   * @param [in|out] graph A graph for saving the model information after analysis
   * @return SUCCESS
   * @return FAILED
   * @author
   */
  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
 #endif

  /**
  * @ingroup domi_omg
  * @brief Analyze network model data
  * @param [in] proto  network model
  * @param [in|out]  graph Save the network information after analysis
  * @return SUCCESS
  * @return Others failed
  */
   * @ingroup domi_omg
   * @brief Analyze network model data
   * @param [in] proto  network model
   * @param [in|out]  graph Save the network information after analysis
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;

  /**
  * @ingroup domi_omg
  * @brief Analyze callback model data in subgraph
  * @param [in] proto network model
  * @param [in] callback callback of subgraph
  * @param [in|out] graph Save the network information after analysis
  * @return SUCCESS
  * @return Others failed
  */
  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto,
                                        GetGraphCallback callback,
   * @ingroup domi_omg
   * @brief Analyze callback model data in subgraph
   * @param [in] proto network model
   * @param [in] callback callback of subgraph
   * @param [in|out] graph Save the network information after analysis
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
                                        ge::ComputeGraphPtr &graph) = 0;
  /**
  * @ingroup domi_omg
  * @brief Convert model files to JSON format
  * @param [in] model_file  Model file path to be converted
  * @param [out] json_file Converted JSON file path
  * @return SUCCESS
  * @return Others failed
  */
   * @ingroup domi_omg
   * @brief Convert model files to JSON format
   * @param [in] model_file  Model file path to be converted
   * @param [out] json_file Converted JSON file path
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; }

  /*
--- a/inc/framework/omg/parser/parser_inner_ctx.h
+++ b/inc/framework/omg/parser/parser_inner_ctx.h
@@ -59,7 +59,7 @@ struct ParserContext {
  bool train_flag = false;
  domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND;
  domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
  RunMode run_mode = ONLY_PRE_CHECK;
  RunMode run_mode = GEN_OM_MODEL;
  // save caffe custom proto path, used by caffe parse
  std::string custom_proto_path;
  // save caffe proto path, used by caffe parse
--- a/metadef/graph/ge_attr_define.cc
+++ b/metadef/graph/ge_attr_define.cc
@@ -167,6 +167,7 @@ const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims";
 const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size";

 const std::string ATTR_NAME_ROOT_GRAPH_ID = "_root_graph_id";
 const std::string ATTR_NAME_ROOT_GRAPH_NAME = "_root_graph_name";

 // Identify node connecting to input and output
 const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data";
--- a/metadef/graph/proto/op_mapping_info.proto
+++ b/metadef/graph/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/metadef/graph/utils/type_utils.cc
+++ b/metadef/graph/utils/type_utils.cc
@@ -118,8 +118,7 @@ const std::map<std::string, Format> kDataFormatMap = {
  {"NCDHW", FORMAT_NCDHW},
  {"ND",   FORMAT_ND}};

 const std::map<std::string, Format> kStringToFormatMap =
  {
 const std::map<std::string, Format> kStringToFormatMap = {
    {"NCHW", FORMAT_NCHW},
    {"NHWC", FORMAT_NHWC},
    {"ND", FORMAT_ND},
@@ -164,7 +163,7 @@ const std::map<std::string, Format> kStringToFormatMap =
    {"NULL", FORMAT_NULL},
    // add for json input
    {"RESERVED", FORMAT_RESERVED},
    {"UNDEFINED", FORMAT_RESERVED},
    {"UNDEFINED", FORMAT_RESERVED}
  };

 const std::map<DataType, std::string> kDataTypeToStringMap = {
--- a/metadef/inc/common/proto/op_mapping_info.proto
+++ b/metadef/inc/common/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/metadef/inc/common/util/platform_info.h
+++ b/metadef/inc/common/util/platform_info.h
@@ -19,12 +19,8 @@

 #include <map>
 #include <string>
 #include <vector>
 #include "platform_info_def.h"

 using std::map;
 using std::vector;
 using std::string;
 #include "platform_infos_def.h"

 namespace fe {
 class PlatformInfoManager {
@@ -36,66 +32,143 @@ class PlatformInfoManager {
  uint32_t InitializePlatformInfo();
  uint32_t Finalize();

  uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platform_info, OptionalInfo &opti_compilation_info);
  uint32_t GetPlatformInfo(const std::string SoCVersion,
                           PlatformInfo &platform_info,
                           OptionalInfo &opti_compilation_info);

  uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platform_info, OptionalInfo &opti_compilation_info);

  void SetOptionalCompilationInfo(OptionalInfo &opti_compilation_info);

  uint32_t GetPlatformInfos(const std::string SoCVersion,
                            PlatFormInfos &platform_info,
                            OptionalInfos &opti_compilation_info);

  uint32_t GetPlatformInfoWithOutSocVersion(PlatFormInfos &platform_info, OptionalInfos &opti_compilation_info);

  void SetOptionalCompilationInfo(OptionalInfos &opti_compilation_info);

 private:
  PlatformInfoManager();
  ~PlatformInfoManager();

  uint32_t LoadIniFile(string ini_file_real_path);
  uint32_t LoadIniFile(std::string ini_file_real_path);

  void Trim(string &str);
  void Trim(std::string &str);

  uint32_t LoadConfigFile(string real_path);
  uint32_t LoadConfigFile(std::string real_path);

  string RealPath(const std::string &path);
  std::string RealPath(const std::string &path);

  string GetSoFilePath();
  std::string GetSoFilePath();

  void ParseVersion(map<string, string> &version_map, string &soc_version, PlatformInfo &platform_info_temp);
  void ParseVersion(std::map<std::string, std::string> &version_map,
                    std::string &soc_version,
                    PlatformInfo &platform_info_temp);

  void ParseSocInfo(map<string, string> &soc_info_map, PlatformInfo &platform_info_temp);
  void ParseSocInfo(std::map<std::string, std::string> &soc_info_map,
                    PlatformInfo &platform_info_temp);

  void ParseCubeOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp);
  void ParseCubeOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                             PlatformInfo &platform_info_temp);

  void ParseBufferOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp);
  void ParseBufferOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                               PlatformInfo &platform_info_temp);

  void ParseUBOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp);
  void ParseUBOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                           PlatformInfo &platform_info_temp);

  void ParseUnzipOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp);
  void ParseUnzipOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                              PlatformInfo &platform_info_temp);

  void ParseAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp);
  void ParseAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                       PlatformInfo &platform_info_temp);

  void ParseBufferOfAICoreMemoryRates(map<string, string> &ai_core_memory_rates_map, PlatformInfo &platform_info_temp);
  void ParseBufferOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map,
                                      PlatformInfo &platform_info_temp);

  void ParseAICoreMemoryRates(map<string, string> &ai_core_memory_rates_map, PlatformInfo &platform_info_temp);
  void ParseAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map,
                              PlatformInfo &platform_info_temp);

  void ParseUBOfAICoreMemoryRates(map<string, string> &ai_core_memory_rates_map, PlatformInfo &platform_info_temp);
  void ParseUBOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map,
                                  PlatformInfo &platform_info_temp);

  void ParseAICoreintrinsicDtypeMap(map<string, string> &ai_coreintrinsic_dtype_map, PlatformInfo &platform_info_temp);
  void ParseAICoreintrinsicDtypeMap(std::map<std::string, std::string> &ai_coreintrinsic_dtype_map,
                                    PlatformInfo &platform_info_temp);

  void ParseVectorCoreSpec(map<string, string> &vector_core_spec_map, PlatformInfo &platform_info_temp);
  void ParseVectorCoreSpec(std::map<std::string, std::string> &vector_core_spec_map,
                           PlatformInfo &platform_info_temp);

  void ParseVectorCoreMemoryRates(map<string, string> &vector_core_memory_rates_map, PlatformInfo &platform_info_temp);
  void ParseVectorCoreMemoryRates(std::map<std::string, std::string> &vector_core_memory_rates_map,
                                  PlatformInfo &platform_info_temp);

  void ParseCPUCache(map<string, string> &CPUCacheMap, PlatformInfo &platform_info_temp);
  void ParseCPUCache(std::map<std::string, std::string> &CPUCacheMap,
                     PlatformInfo &platform_info_temp);

  void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vector_coreintrinsic_dtype_map,
  void ParseVectorCoreintrinsicDtypeMap(std::map<std::string, std::string> &vector_coreintrinsic_dtype_map,
                                        PlatformInfo &platform_info_temp);

  uint32_t ParsePlatformInfoFromStrToStruct(map<string, map<string, string>> &content_info_map, string &soc_version,
  uint32_t ParsePlatformInfoFromStrToStruct(std::map<std::string, std::map<std::string, std::string>> &content_info_map,
                                            std::string &soc_version,
                                            PlatformInfo &platform_info_temp);

  uint32_t AssemblePlatformInfoVector(map<string, map<string, string>> &content_info_map);
  void ParseVersion(std::map<std::string, std::string> &version_map,
                    std::string &soc_version,
                    PlatFormInfos &platform_info_temp);

  void ParseSocInfo(std::map<std::string, std::string> &soc_info_map, PlatFormInfos &platform_info_temp);

  void ParseCubeOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                             PlatFormInfos &platform_info_temp);

  void ParseBufferOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                               PlatFormInfos &platform_info_temp);

  void ParseUBOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                           PlatFormInfos &platform_info_temp);

  void ParseUnzipOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                              PlatFormInfos &platform_info_temp);

  void ParseAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map,
                       PlatFormInfos &platform_info_temp);

  void ParseBufferOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map,
                                      PlatFormInfos &platform_info_temp);

  void ParseAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map,
                              PlatFormInfos &platform_info_temp);

  void ParseUBOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map,
                                  PlatFormInfos &platform_info_temp);

  void ParseAICoreintrinsicDtypeMap(std::map<std::string, std::string> &ai_coreintrinsic_dtype_map,
                                    PlatFormInfos &platform_info_temp);

  void ParseVectorCoreSpec(std::map<std::string, std::string> &vector_core_spec_map,
                           PlatFormInfos &platform_info_temp);

  void ParseVectorCoreMemoryRates(std::map<std::string, std::string> &vector_core_memory_rates_map,
                                  PlatFormInfos &platform_info_temp);

  void ParseCPUCache(std::map<std::string, std::string> &CPUCacheMap,
                     PlatFormInfos &platform_info_temp);

  void ParseVectorCoreintrinsicDtypeMap(std::map<std::string, std::string> &vector_coreintrinsic_dtype_map,
                                        PlatFormInfos &platform_info_temp);

  uint32_t ParsePlatformInfo(std::map<std::string, std::map<std::string, std::string>> &content_info_map,
                             std::string &soc_version,
                             PlatFormInfos &platform_info_temp);

  uint32_t AssemblePlatformInfoVector(std::map<std::string, std::map<std::string, std::string>> &content_info_map);

 private:
  bool init_flag_;
  map<string, PlatformInfo> platform_info_map_;
  std::map<std::string, PlatformInfo> platform_info_map_;
  OptionalInfo opti_compilation_info_;
  std::map<std::string, PlatFormInfos> platform_infos_map_;
  OptionalInfos opti_compilation_infos_;
 };
 }  // namespace fe
 #endif
--- a/metadef/inc/common/util/platform_infos_def.h
+++ b/metadef/inc/common/util/platform_infos_def.h
@@ -0,0 +1,283 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef PLATFORM_INFOS_DEF_H
 #define PLATFORM_INFOS_DEF_H

 #include <map>
 #include <string>
 #include <vector>
 #include <memory>
 #include "platform_info_def.h"

 namespace fe {
 class StrInfoImpl;
 using StrInfoImplPtr = std::shared_ptr<StrInfoImpl>;
 class StrInfos {
 public:
  bool Init();
  std::string GetAIcVersion();
  std::string GetCcecAIcVersion();
  std::string GetCcecAIvVersion();
  std::string IsSupportAICpuCompiler();

  void SetAIcVersion(std::string &aic_version);
  void SetCcecAIcVersion(std::string &ccec_aic_version);
  void SetCcecAIvVersion(std::string &ccec_aiv_version);
  void SetIsSupportAICpuCompiler(std::string &is_support_ai_cpu_compiler);
 private:
  StrInfoImplPtr str_info_impl_{nullptr};
 };

 class SoCInfoImpl;
 using SoCInfoImplPtr = std::shared_ptr<SoCInfoImpl>;
 class SoCInfos {
 public:
  bool Init();
  uint32_t GetAICoreCnt();
  uint32_t GetVectorCoreCnt();
  uint32_t GetAICpuCnt();
  MemoryType GetMemType();
  uint64_t GetMemSize();
  L2Type GetL2Type();
  uint64_t GetL2Size();
  uint32_t GetL2PageNum();

  void SetAICoreCnt(uint32_t ai_core_cnt);
  void SetVectorCoreCnt(uint32_t vector_core_cnt);
  void SetAICpuCnt(uint32_t ai_cpu_cnt);
  void SetMemType(MemoryType memory_type);
  void SetMemSize(uint64_t memory_size);
  void SetL2Type(L2Type l2_type);
  void SetL2Size(uint64_t l2_size);
  void SetL2PageNum(uint32_t l2_page_num);
 private:
  SoCInfoImplPtr soc_info_impl_{nullptr};
 };

 class AICoreSpecImpl;
 using AICoreSpecImplPtr = std::shared_ptr<AICoreSpecImpl>;
 class AICoreSpecs {
 public:
  bool Init();
  double GetCubeFreq();
  uint64_t GetCubeMSize();
  uint64_t GetCubeNSize();
  uint64_t GetCubeKSize();
  uint64_t GetVecCalcSize();
  uint64_t GetL0aSize();
  uint64_t GetL0bSize();
  uint64_t GetL0cSize();
  uint64_t GetL1Size();
  uint64_t GetSmaskBuffer();
  uint64_t GetUBSize();
  uint64_t GetUBBlockSize();
  uint64_t GetUBBankSize();
  uint64_t GetUBBankNum();
  uint64_t GetUBBurstInOneBlock();
  uint64_t GetUBBankGroupNum();
  uint32_t GetUnzipEngines();
  uint32_t GetUnzipMaxRatios();
  uint32_t GetUnzipChannels();
  uint8_t GetUnzipIsTight();
  uint8_t GetCubeVectorSplit();

  void SetCubeFreq(double cube_freq);
  void SetCubeMSize(uint64_t cube_m_size);
  void SetCubeNSize(uint64_t cube_n_size);
  void SetCubeKSize(uint64_t cube_k_size);
  void SetVecCalcSize(uint64_t vec_calc_size);
  void SetL0aSize(uint64_t l0_a_size);
  void SetL0bSize(uint64_t l0_b_size);
  void SetL0cSize(uint64_t l0_c_size);
  void SetL1Size(uint64_t l1_size);
  void SetSmaskBuffer(uint64_t smask_buffer);
  void SetUBSize(uint64_t ub_size);
  void SetUBBlockSize(uint64_t ubblock_size);
  void SetUBBankSize(uint64_t ubbank_size);
  void SetUBBankNum(uint64_t ubbank_num);
  void SetUBBurstInOneBlock(uint64_t ubburst_in_one_block);
  void SetUBBankGroupNum(uint64_t ubbank_group_num);
  void SetUnzipEngines(uint32_t unzip_engines);
  void SetUnzipMaxRatios(uint32_t unzip_max_ratios);
  void SetUnzipChannels(uint32_t unzip_channels);
  void SetUnzipIsTight(uint8_t unzip_is_tight);
  void SetCubeVectorSplit(uint8_t cube_vector_split);
 private:
  AICoreSpecImplPtr aicore_spec_impl_{nullptr};
 };

 class AICoreMemRateImpl;
 using AICoreMemRateImplPtr = std::shared_ptr<AICoreMemRateImpl>;
 class AICoreMemRates {
 public:
  bool Init();
  double GetDdrRate();
  double GetDdrReadRate();
  double GetDdrWriteRate();
  double GetL2Rate();
  double GetL2ReadRate();
  double GetL2WriteRate();
  double GetL1ToL0aRate();
  double GetL1ToL0bRate();
  double GetL1ToUBRate();
  double GetL0cToUBRate();
  double GetUBToL2Rate();
  double GetUBToDdrRate();
  double GetUBToL1Rate();

  void SetDdrRate(double ddr_rate);
  void SetDdrReadRate(double ddr_read_rate);
  void SetDdrWriteRate(double ddr_write_rate);
  void SetL2Rate(double l2_rate);
  void SetL2ReadRate(double l2_read_rate);
  void SetL2WriteRate(double l2_write_rate);
  void SetL1ToL0aRate(double l1_to_l0_a_rate);
  void SetL1ToL0bRate(double l1_to_l0_b_rate);
  void SetL1ToUBRate(double l1_to_ub_rate);
  void SetL0cToUBRate(double l0_c_to_ub_rate);
  void SetUBToL2Rate(double ub_to_l2_rate);
  void SetUBToDdrRate(double ub_to_ddr_rate);
  void SetUBToL1Rate(double ub_to_l1_rate);
 private:
  AICoreMemRateImplPtr aicore_mem_rate_impl_{nullptr};
 };

 class VectorCoreSpecImpl;
 using VectorCoreSpecImplPtr = std::shared_ptr<VectorCoreSpecImpl>;
 class VectorCoreSpecs {
 public:
  bool Init();
  double GetVecFreq();
  uint64_t GetVecCalcSize();
  uint64_t GetSmaskBuffer();
  uint64_t GetUBSize();
  uint64_t GetUBBlockSize();
  uint64_t GetUBBankSize();
  uint64_t GetUBBankNum();
  uint64_t GetUBBurstInOneBlock();
  uint64_t GetUBBankGroupNum();
  uint64_t GetVectorRegSize();
  uint64_t GetPredicateRegSize();
  uint64_t GetAddressRegSize();
  uint64_t GetAlignmentRegSize();

  void SetVecFreq(double vec_freq);
  void SetVecCalcSize(uint64_t vec_calc_size);
  void SetSmaskBuffer(uint64_t smask_buffer);
  void SetUBSize(uint64_t ub_size);
  void SetUBBlockSize(uint64_t ubblock_size);
  void SetUBBankSize(uint64_t ubbank_size);
  void SetUBBankNum(uint64_t ubbank_num);
  void SetUBBurstInOneBlock(uint64_t ubburst_in_one_block);
  void SetUBBankGroupNum(uint64_t ubbank_group_num);
  void SetVectorRegSize(uint64_t vector_reg_size);
  void SetPredicateRegSize(uint64_t predicate_reg_size);
  void SetAddressRegSize(uint64_t address_reg_size);
  void SetAlignmentRegSize(uint64_t alignment_reg_size);
 private:
  VectorCoreSpecImplPtr vector_core_spec_impl_{nullptr};
 };

 class VectorCoreMemRateImpl;
 using VectorCoreMemRateImplPtr = std::shared_ptr<VectorCoreMemRateImpl>;
 class VectorCoreMemRates {
 public:
  bool Init();
  double GetDdrRate();
  double GetDdrReadRate();
  double GetDdrWriteRate();
  double GetL2Rate();
  double GetL2ReadRate();
  double GetL2WriteRate();
  double GetUBToL2Rate();
  double GetUBToDdrRate();

  void SetDdrRate(double ddr_rate);
  void SetDdrReadRate(double ddr_read_rate);
  void SetDdrWriteRate(double ddr_write_rate);
  void SetL2Rate(double l2_rate);
  void SetL2ReadRate(double l2_read_rate);
  void SetL2WriteRate(double l2_write_rate);
  void SetUBToL2Rate(double ub_to_l2_rate);
  void SetUBToDdrRate(double ub_to_ddr_rate);
 private:
  VectorCoreMemRateImplPtr vector_core_mem_rate_impl_{nullptr};
 };

 class CPUCacheImpl;
 using CPUCacheImplPtr = std::shared_ptr<CPUCacheImpl>;
 class CPUCaches {
 public:
  bool Init();
  uint32_t GetAICPUSyncBySW();
  uint32_t GetTSCPUSyncBySW();

  void SetAICPUSyncBySW(uint32_t AICPUSyncBySW);
  void SetTSCPUSyncBySW(uint32_t TSCPUSyncBySW);
 private:
  CPUCacheImplPtr cpu_cache_impl_{nullptr};
 };

 class PlatFormInfosImpl;
 using PlatFormInfosImplPtr = std::shared_ptr<PlatFormInfosImpl>;
 class PlatFormInfos {
 public:
  bool Init();
  StrInfos GetStrInfo();
  SoCInfos GetSocInfo();
  AICoreSpecs GetAICoreSpec();
  AICoreMemRates GetAICoreMemRates();
  std::map<std::string, std::vector<std::string>> GetAICoreIntrinsicDtype();
  VectorCoreSpecs GetVectorCoreSpec();
  VectorCoreMemRates GetVectorCoreMemRates();
  CPUCaches GetCPUCache();
  std::map<std::string, std::vector<std::string>> GetVectorCoreIntrinsicDtype();

  void SetStrInfo(StrInfos &str_infos);
  void SetSocInfo(SoCInfos &SoC_infos);
  void SetAICoreSpec(AICoreSpecs &AICore_specs);
  void SetAICoreMemRates(AICoreMemRates &AICore_mem_rates);
  void SetAICoreIntrinsicDtype(std::map<std::string, std::vector<std::string>> &intrinsic_dtypes);
  void SetVectorCoreSpec(VectorCoreSpecs &vector_core_specs);
  void SetVectorCoreMemRates(VectorCoreMemRates &vectorcore_mem_rates);
  void SetCPUCache(CPUCaches &CPU_caches);
  void SetVectorCoreIntrinsicDtype(std::map<std::string, std::vector<std::string>> &intrinsic_dtypes);

 private:
  PlatFormInfosImplPtr platform_infos_impl_{nullptr};
 };

 class OptionalInfosImpl;
 using OptionalInfosImplPtr = std::shared_ptr<OptionalInfosImpl>;
 class OptionalInfos {
 public:
  bool Init();
  std::string GetSocVersion();
  std::string GetCoreType();
  uint32_t GetAICoreNum();
  std::string GetL1FusionFlag();

  void SetSocVersion(std::string soc_version);
  void SetCoreType(std::string core_type);
  void SetAICoreNum(uint32_t ai_core_num);
  void SetL1FusionFlag(std::string l1_fusion_flag);
 private:
  OptionalInfosImplPtr optional_infos_impl_{nullptr};
 };

 }
 #endif
--- a/metadef/inc/graph/debug/ge_attr_define.h
+++ b/metadef/inc/graph/debug/ge_attr_define.h
@@ -188,6 +188,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE;

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ROOT_GRAPH_ID;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ROOT_GRAPH_NAME;

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT;
--- a/metadef/inc/register/proto/op_mapping_info.proto
+++ b/metadef/inc/register/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {