!1022 Synchronization code 0125

From: @changzherui Reviewed-by: @ljl0711,@guoqi1024 Signed-off-by: @guoqi1024
4 years ago · 7183c03452
--- a/build.sh
+++ b/build.sh
@@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
 #     fi

 #     if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
         echo "Generating coverage statistics, please wait..."
         cd ${BASEPATH}
         rm -rf ${BASEPATH}/cov
         mkdir ${BASEPATH}/cov
         lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
 	 lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
 	 cd ${BASEPATH}/cov
 	 genhtml coverage.info
        echo "Generating coverage statistics, please wait..."
        cd ${BASEPATH}
        rm -rf ${BASEPATH}/cov
        mkdir ${BASEPATH}/cov
        lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
        lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '*/ge/common/*' '*/ge/executor/*' '*/ge/graph/*' '*/ge/host_kernels/*' '/usr/local/*' -o cov/coverage.info
        cd ${BASEPATH}/cov
        genhtml coverage.info
 fi

 # generate output package in tar form, including ut/st libraries/executables
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST
    "graph/label/partitioned_call_label_maker.cc"
    "graph/label/while_label_maker.cc"
    "graph/load/graph_loader.cc"
    "graph/load/new_model_manager/cpu_queue_schedule.cc"
    "graph/load/new_model_manager/data_dumper.cc"
    "graph/load/new_model_manager/data_inputer.cc"
    "graph/load/new_model_manager/davinci_model.cc"
    "graph/load/new_model_manager/davinci_model_parser.cc"
    "graph/load/new_model_manager/model_manager.cc"
    "graph/load/new_model_manager/model_utils.cc"
    "graph/load/new_model_manager/aipp_utils.cc"
    "graph/load/new_model_manager/task_info/end_graph_task_info.cc"
    "graph/load/new_model_manager/task_info/model_exit_task_info.cc"
    "graph/load/new_model_manager/task_info/event_record_task_info.cc"
    "graph/load/new_model_manager/task_info/event_wait_task_info.cc"
    "graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
    "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
    "graph/load/new_model_manager/task_info/hccl_task_info.cc"
    "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
    "graph/load/new_model_manager/task_info/kernel_task_info.cc"
    "graph/load/new_model_manager/task_info/label_set_task_info.cc"
    "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
    "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
    "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
    "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
    "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
    "graph/load/new_model_manager/task_info/stream_active_task_info.cc"
    "graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
    "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
    "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
    "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "graph/load/new_model_manager/task_info/task_info.cc"
    "graph/load/new_model_manager/tbe_handle_store.cc"
    "graph/load/new_model_manager/zero_copy_task.cc"
    "graph/load/new_model_manager/zero_copy_offset.cc"
    "graph/load/model_manager/cpu_queue_schedule.cc"
    "graph/load/model_manager/data_dumper.cc"
    "graph/load/model_manager/data_inputer.cc"
    "graph/load/model_manager/davinci_model.cc"
    "graph/load/model_manager/davinci_model_parser.cc"
    "graph/load/model_manager/model_manager.cc"
    "graph/load/model_manager/model_utils.cc"
    "graph/load/model_manager/aipp_utils.cc"
    "graph/load/model_manager/task_info/end_graph_task_info.cc"
    "graph/load/model_manager/task_info/model_exit_task_info.cc"
    "graph/load/model_manager/task_info/event_record_task_info.cc"
    "graph/load/model_manager/task_info/event_wait_task_info.cc"
    "graph/load/model_manager/task_info/fusion_start_task_info.cc"
    "graph/load/model_manager/task_info/fusion_stop_task_info.cc"
    "graph/load/model_manager/task_info/hccl_task_info.cc"
    "graph/load/model_manager/task_info/kernel_ex_task_info.cc"
    "graph/load/model_manager/task_info/kernel_task_info.cc"
    "graph/load/model_manager/task_info/label_set_task_info.cc"
    "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
    "graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
    "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
    "graph/load/model_manager/task_info/memcpy_async_task_info.cc"
    "graph/load/model_manager/task_info/profiler_trace_task_info.cc"
    "graph/load/model_manager/task_info/stream_active_task_info.cc"
    "graph/load/model_manager/task_info/stream_switch_task_info.cc"
    "graph/load/model_manager/task_info/stream_switchn_task_info.cc"
    "graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
    "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "graph/load/model_manager/task_info/task_info.cc"
    "graph/load/model_manager/tbe_handle_store.cc"
    "graph/load/model_manager/zero_copy_task.cc"
    "graph/load/model_manager/zero_copy_offset.cc"
    "graph/manager/graph_context.cc"
    "graph/manager/graph_manager.cc"
    "graph/manager/graph_manager_utils.cc"
@@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST
    "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
    "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
    "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
    "hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
    "hybrid/node_executor/controlop/control_op_executor.cc"
    "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
    "hybrid/node_executor/hccl/hccl_node_executor.cc"
@@ -605,37 +606,37 @@ set(INFER_SRC_LIST
    "graph/manager/util/rt_context_util.cc"
    "graph/manager/util/variable_accelerate_ctrl.cc"
    "graph/manager/util/debug.cc"
    "graph/load/new_model_manager/model_manager.cc"
    "graph/load/new_model_manager/data_inputer.cc"
    "graph/load/new_model_manager/davinci_model.cc"
    "graph/load/new_model_manager/davinci_model_parser.cc"
    "graph/load/new_model_manager/model_utils.cc"
    "graph/load/new_model_manager/aipp_utils.cc"
    "graph/load/new_model_manager/tbe_handle_store.cc"
    "graph/load/new_model_manager/cpu_queue_schedule.cc"
    "graph/load/new_model_manager/zero_copy_task.cc"
    "graph/load/new_model_manager/zero_copy_offset.cc"
    "graph/load/new_model_manager/data_dumper.cc"
    "graph/load/new_model_manager/task_info/task_info.cc"
    "graph/load/new_model_manager/task_info/event_record_task_info.cc"
    "graph/load/new_model_manager/task_info/event_wait_task_info.cc"
    "graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
    "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
    "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
    "graph/load/new_model_manager/task_info/kernel_task_info.cc"
    "graph/load/new_model_manager/task_info/label_set_task_info.cc"
    "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
    "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
    "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
    "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
    "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
    "graph/load/new_model_manager/task_info/stream_active_task_info.cc"
    "graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
    "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
    "graph/load/new_model_manager/task_info/end_graph_task_info.cc"
    "graph/load/new_model_manager/task_info/model_exit_task_info.cc"
    "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
    "graph/load/model_manager/model_manager.cc"
    "graph/load/model_manager/data_inputer.cc"
    "graph/load/model_manager/davinci_model.cc"
    "graph/load/model_manager/davinci_model_parser.cc"
    "graph/load/model_manager/model_utils.cc"
    "graph/load/model_manager/aipp_utils.cc"
    "graph/load/model_manager/tbe_handle_store.cc"
    "graph/load/model_manager/cpu_queue_schedule.cc"
    "graph/load/model_manager/zero_copy_task.cc"
    "graph/load/model_manager/zero_copy_offset.cc"
    "graph/load/model_manager/data_dumper.cc"
    "graph/load/model_manager/task_info/task_info.cc"
    "graph/load/model_manager/task_info/event_record_task_info.cc"
    "graph/load/model_manager/task_info/event_wait_task_info.cc"
    "graph/load/model_manager/task_info/fusion_start_task_info.cc"
    "graph/load/model_manager/task_info/fusion_stop_task_info.cc"
    "graph/load/model_manager/task_info/kernel_ex_task_info.cc"
    "graph/load/model_manager/task_info/kernel_task_info.cc"
    "graph/load/model_manager/task_info/label_set_task_info.cc"
    "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
    "graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
    "graph/load/model_manager/task_info/memcpy_async_task_info.cc"
    "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
    "graph/load/model_manager/task_info/profiler_trace_task_info.cc"
    "graph/load/model_manager/task_info/stream_active_task_info.cc"
    "graph/load/model_manager/task_info/stream_switch_task_info.cc"
    "graph/load/model_manager/task_info/stream_switchn_task_info.cc"
    "graph/load/model_manager/task_info/end_graph_task_info.cc"
    "graph/load/model_manager/task_info/model_exit_task_info.cc"
    "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
    "single_op/task/op_task.cc"
    "single_op/task/build_task_utils.cc"
    "single_op/task/tbe_task_builder.cc"
--- a/ge/client/proto/task.proto
+++ b/ge/client/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
    LabelSetDef label_set = 37;
    LabelGotoExDef label_goto_ex = 38;
    LabelSwitchByIndexDef label_switch_by_index = 39;
    KernelDefWithHandle kernel_with_handle = 40;
 }

 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
    uint32 kernel_ext_info_size = 19;
 }

 message KernelDefWithHandle {
    KernelContext context = 1;

    uint64 handle = 10;
    string dev_func = 11;
    uint32 block_dim = 12;
    uint32 args_size = 13;
    bytes args = 14;
    bytes sm_desc = 15;
    string original_kernel_key = 16;
    string node_info = 17;
 }

 message KernelContext {
    uint32 kernel_type = 1;
    uint32 op_id = 2;                              // OP type in CCE
--- a/ge/common/auth/file_saver.cc
+++ b/ge/common/auth/file_saver.cc
@@ -62,7 +62,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) {
    while (size > size_1g) {
      write_count = mmWrite(fd, reinterpret_cast<void *>(seek), size_1g);
      if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) {
        GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno));
        GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno));
        return FAILED;
      }
      size -= size_1g;
@@ -75,7 +75,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) {

  // -1: Failed to write to file; - 2: Illegal parameter
  if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) {
    GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno));
    GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno));
    return FAILED;
  }

@@ -133,7 +133,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi
        WriteData(static_cast<const void *>(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break);
    // Write partition data
    for (const auto &partitionData : partition_datas) {
      GELOGI("GC:size[%zu]", partitionData.size);
      GELOGI("GC:size[%u]", partitionData.size);
      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
          WriteData(static_cast<const void *>(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED;
          break);
@@ -305,7 +305,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi
      // Write partition data
      auto &cur_partition_datas = all_partition_datas[index];
      for (const auto &partition_data : cur_partition_datas) {
        GELOGI("GC:size[%zu]", partition_data.size);
        GELOGI("GC:size[%u]", partition_data.size);
        GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
            WriteData(static_cast<const void *>(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED;
            break);
--- a/ge/common/formats/utils/formats_trans_utils.cc
+++ b/ge/common/formats/utils/formats_trans_utils.cc
@@ -32,7 +32,7 @@ int64_t GetCubeSizeByDataType(DataType data_type) {
  if (size <= 0) {
    std::string error = "Failed to get cube size, the data type " +
        FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid";
    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
    return -1;
  } else if (size == 1) {
    return kCubeSize * 2;  // 32 bytes cube size
@@ -61,7 +61,7 @@ bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dim
  if (expect_dims <= 0 || shape.size() != static_cast<size_t>(expect_dims)) {
    std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) +
        ", expect " + FmtToStr(expect_dims);
    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
    return false;
  }
  return IsShapeValid(shape);
@@ -75,12 +75,12 @@ bool IsShapeValid(const std::vector<int64_t> &shape) {
  for (auto dim : shape) {
    if (dim < 0) {
      std::string error = "Invalid negative dims in the shape " +  FmtToStr(ShapeToString(shape));
      GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
      GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
      return false;
    }
    if (dim != 0 && kShapeItemNumMAX / dim < num) {
      std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX);
      GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
      GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
      return false;
    }
    num *= dim;
@@ -108,7 +108,7 @@ bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector<int64_t> &expect_
        FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " +
        FmtToStr(ShapeToString(args.src_shape)) + " and dst " +
        FmtToStr(ShapeToString(args.dst_shape));
    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
    return false;
  }
  return true;
@@ -121,7 +121,7 @@ bool IsTransShapeDstCorrect(const TransArgs &args, std::vector<int64_t> &expect_
        FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" +
        FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" +
        FmtToStr(ShapeToString(expect_shape));
    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
    return false;
  }
  return true;
--- a/ge/common/helper/model_cache_helper.cc
+++ b/ge/common/helper/model_cache_helper.cc
@@ -28,7 +28,7 @@
 #include "framework/common/util.h"
 #include "graph/detail/attributes_holder.h"
 #include "graph/detail/model_serialize_imp.h"
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/model.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/tensor_utils.h"
@@ -1000,8 +1000,8 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
      auto offset = (tensor_addr_mgr.offset);
      // Check logic address and offset
      if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) {
        GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address,
               offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase());
        GELOGW("Check logic_address[%lu] and offset [%lu] of %s failed, var mem logic base is %lu, abandon",
               logic_address, offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase());
        return PARAM_INVALID;
      }
      // Offset is needed by SaveVarVddr instead of logic address
--- a/ge/common/helper/model_helper.cc
+++ b/ge/common/helper/model_helper.cc
@@ -23,7 +23,7 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/omg/version.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"

@@ -537,7 +537,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod
  //model verison 1.0 file header does not have model_num member
  is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION &&
                            file_header_->model_num > kStatiOmFileModelNum;
  GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version);
  GELOGD("cur om model is ge root model or no %d, model version %u", is_unknown_shape_model_, file_header_->version);

  OmFileLoadHelper om_load_helper;
  if (is_unknown_shape_model_) {
@@ -746,7 +746,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om
      GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed.");
      return INTERNAL_ERROR;
    }
    GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num());
    GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num());
  }
  cur_model->SetModelTaskDef(task);
  return SUCCESS;
--- a/ge/common/helper/om_file_helper.cc
+++ b/ge/common/helper/om_file_helper.cc
@@ -203,7 +203,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
    auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data + cur_offset);
    size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
    cur_offset += partition_table_size;
    GELOGD("Cur model index %zu: ModelPartitionTable num :%u, "
    GELOGD("Cur model index %u: ModelPartitionTable num :%u, "
           "ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
           index, partition_table->num, sizeof(ModelFileHeader), partition_table_size);
    if (model_data_size <= cur_offset) {
@@ -219,7 +219,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
      partition.type = partition_table->partition[i].type;
      if (index >= model_contexts_.size()) {
        if (index != model_contexts_.size()) {
          GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index);
          GELOGE(FAILED, "cur index is %u make model_contexts_ overflow", index);
          return FAILED;
        }

@@ -231,16 +231,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
      }

      if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) {
        GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.",
        GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %u is greater than the model data size %u.",
               partition.size + cur_offset, model_data_size);
        return GE_EXEC_MODEL_DATA_SIZE_INVALID;
      }
      cur_offset += partition.size;
      GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index);
      GELOGD("Partition, type:%d, size:%u, model_index:%u", static_cast<int>(partition.type), partition.size, index);
    }
  }
  if (cur_offset != model_data_size) {
    GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size);
    GELOGE(FAILED, "do not get the complete model, read end offset:%u, all size:%u", cur_offset, model_data_size);
    return FAILED;
  }
  return SUCCESS;
--- a/ge/common/kernel_store.cc
+++ b/ge/common/kernel_store.cc
@@ -51,7 +51,7 @@ bool KernelStore::Build() {
    kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length());
    kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize());

    GELOGD("get kernel bin name %s, addr %p, size %u",
    GELOGD("get kernel bin name %s, addr %p, size %zu",
           kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize());
    mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head));
    GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false);
--- a/ge/common/math/math_util.h
+++ b/ge/common/math/math_util.h
@@ -878,11 +878,11 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) {
    return INTERNAL_ERROR;                                                                              \
  }

 #define FMK_INT64_UINT32_MULCHECK(a, b)                                                                                \
  if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) {                                                          \
    GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast<uint32_t>(a), \
           static_cast<uint32_t>(b));                                                                                  \
    return INTERNAL_ERROR;                                                                                             \
 #define FMK_INT64_UINT32_MULCHECK(a, b)                                                                 \
  if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) {                                           \
    GELOGW("Int64 %ld and Uint32 %u multiplication can result in overflow!", static_cast<int64_t>(a),   \
           static_cast<uint32_t>(b));                                                                   \
    return INTERNAL_ERROR;                                                                              \
  }

 #define FMK_FP16_ZEROCHECK(a)                                                                                          \
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -21,7 +21,7 @@
 #include "framework/common/string_util.h"
 #include "graph/ge_context.h"
 #include "runtime/base.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace {
 const char *const kTrainingTrace = "training_trace";
@@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
    uint32_t stream_id = task.stream_id;
    std::string shape_type = task.shape_type;
    int64_t cur_iter_num = task.cur_iter_num;
    uint32_t task_type = task.task_type;
    data = model_name.append(" ")
                     .append(op_name).append(" ")
                     .append(std::to_string(block_dim)).append(" ")
@@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
                     .append(std::to_string(stream_id)).append(" ")
                     .append(std::to_string(model_id)).append(" ")
                     .append(shape_type).append(" ")
                     .append(std::to_string(cur_iter_num)).append("\n");
                     .append(std::to_string(cur_iter_num)).append(" ")
                     .append(std::to_string(task_type)).append("\n");

    ReporterData reporter_data{};
    reporter_data.deviceId = device_id;
--- a/ge/common/proto/task.proto
+++ b/ge/common/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
    LabelSetDef label_set = 37;
    LabelGotoExDef label_goto_ex = 38;
    LabelSwitchByIndexDef label_switch_by_index = 39;
    KernelDefWithHandle kernel_with_handle = 40;
 }

 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
    uint32 kernel_ext_info_size = 19;
 }

 message KernelDefWithHandle {
    KernelContext context = 1;

    uint64 handle = 10;
    string dev_func = 11;
    uint32 block_dim = 12;
    uint32 args_size = 13;
    bytes args = 14;
    bytes sm_desc = 15;
    string original_kernel_key = 16;
    string node_info = 17;
 }

 message KernelContext {
    uint32 kernel_type = 1;
    uint32 op_id = 2;                              // OP type in CCE
--- a/ge/common/types.cc
+++ b/ge/common/types.cc
@@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite");

 REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign");
 REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp");
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -32,37 +32,37 @@ set(SRC_LIST
    "../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
    "../model/ge_model.cc"
    "../model/ge_root_model.cc"
    "../graph/load/new_model_manager/davinci_model.cc"
    "../graph/load/new_model_manager/davinci_model_parser.cc"
    "../graph/load/new_model_manager/model_manager.cc"
    "../graph/load/new_model_manager/tbe_handle_store.cc"
    "../graph/load/new_model_manager/cpu_queue_schedule.cc"
    "../graph/load/new_model_manager/model_utils.cc"
    "../graph/load/new_model_manager/aipp_utils.cc"
    "../graph/load/new_model_manager/data_inputer.cc"
    "../graph/load/new_model_manager/data_dumper.cc"
    "../graph/load/new_model_manager/zero_copy_task.cc"
    "../graph/load/new_model_manager/zero_copy_offset.cc"
    "../graph/load/new_model_manager/task_info/task_info.cc"
    "../graph/load/new_model_manager/task_info/event_record_task_info.cc"
    "../graph/load/new_model_manager/task_info/event_wait_task_info.cc"
    "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
    "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
    "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
    "../graph/load/new_model_manager/task_info/kernel_task_info.cc"
    "../graph/load/new_model_manager/task_info/label_set_task_info.cc"
    "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
    "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
    "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
    "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
    "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
    "../graph/load/new_model_manager/task_info/stream_active_task_info.cc"
    "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
    "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
    "../graph/load/new_model_manager/task_info/end_graph_task_info.cc"
    "../graph/load/new_model_manager/task_info/model_exit_task_info.cc"
    "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
    "../graph/load/model_manager/davinci_model.cc"
    "../graph/load/model_manager/davinci_model_parser.cc"
    "../graph/load/model_manager/model_manager.cc"
    "../graph/load/model_manager/tbe_handle_store.cc"
    "../graph/load/model_manager/cpu_queue_schedule.cc"
    "../graph/load/model_manager/model_utils.cc"
    "../graph/load/model_manager/aipp_utils.cc"
    "../graph/load/model_manager/data_inputer.cc"
    "../graph/load/model_manager/data_dumper.cc"
    "../graph/load/model_manager/zero_copy_task.cc"
    "../graph/load/model_manager/zero_copy_offset.cc"
    "../graph/load/model_manager/task_info/task_info.cc"
    "../graph/load/model_manager/task_info/event_record_task_info.cc"
    "../graph/load/model_manager/task_info/event_wait_task_info.cc"
    "../graph/load/model_manager/task_info/fusion_start_task_info.cc"
    "../graph/load/model_manager/task_info/fusion_stop_task_info.cc"
    "../graph/load/model_manager/task_info/kernel_ex_task_info.cc"
    "../graph/load/model_manager/task_info/kernel_task_info.cc"
    "../graph/load/model_manager/task_info/label_set_task_info.cc"
    "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
    "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
    "../graph/load/model_manager/task_info/memcpy_async_task_info.cc"
    "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
    "../graph/load/model_manager/task_info/profiler_trace_task_info.cc"
    "../graph/load/model_manager/task_info/stream_active_task_info.cc"
    "../graph/load/model_manager/task_info/stream_switch_task_info.cc"
    "../graph/load/model_manager/task_info/stream_switchn_task_info.cc"
    "../graph/load/model_manager/task_info/end_graph_task_info.cc"
    "../graph/load/model_manager/task_info/model_exit_task_info.cc"
    "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
    "../graph/common/local_context.cc"
    "../opskernel_manager/ops_kernel_builder_manager.cc"
    "../single_op/single_op_manager.cc"
@@ -104,6 +104,7 @@ set(SRC_LIST
    "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
    "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
    "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
    "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
    "../hybrid/node_executor/controlop/control_op_executor.cc"
    "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
    "../hybrid/node_executor/rts/rts_node_executor.cc"
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -29,15 +29,15 @@
 #include "framework/common/util.h"
 #include "graph/execute/graph_execute.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/model.h"
 #include "graph/utils/graph_utils.h"
 #include "mmpa/mmpa_api.h"
 #include "single_op/single_op_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"

 using std::string;
@@ -454,7 +454,7 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &
    if (all_data_dims[i] < 0) {
      cur_dynamic_dims.push_back(dynamic_dims[i]);
    } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) {
      GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %d should be %d",
      GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld",
             i, dynamic_dims[i], all_data_dims[i]);
      return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID;
    }
--- a/ge/executor/module.mk
+++ b/ge/executor/module.mk
@@ -22,37 +22,37 @@ local_ge_executor_src_files :=  \
    ../graph/manager/util/debug.cc \
    ../model/ge_model.cc \
    ../model/ge_root_model.cc \
    ../graph/load/new_model_manager/davinci_model.cc \
    ../graph/load/new_model_manager/davinci_model_parser.cc \
    ../graph/load/new_model_manager/model_manager.cc \
    ../graph/load/new_model_manager/tbe_handle_store.cc \
    ../graph/load/new_model_manager/cpu_queue_schedule.cc \
    ../graph/load/new_model_manager/model_utils.cc \
    ../graph/load/new_model_manager/aipp_utils.cc \
    ../graph/load/new_model_manager/data_inputer.cc \
    ../graph/load/new_model_manager/data_dumper.cc \
    ../graph/load/new_model_manager/zero_copy_task.cc \
    ../graph/load/new_model_manager/zero_copy_offset.cc \
    ../graph/load/new_model_manager/task_info/task_info.cc                  \
    ../graph/load/new_model_manager/task_info/event_record_task_info.cc     \
    ../graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
    ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
    ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
    ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
    ../graph/load/new_model_manager/task_info/kernel_task_info.cc           \
    ../graph/load/new_model_manager/task_info/label_set_task_info.cc        \
    ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
    ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
    ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
    ../graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
    ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
    ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
    ../graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
    ../graph/load/new_model_manager/task_info/model_exit_task_info.cc       \
    ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
    ../graph/load/model_manager/davinci_model.cc \
    ../graph/load/model_manager/davinci_model_parser.cc \
    ../graph/load/model_manager/model_manager.cc \
    ../graph/load/model_manager/tbe_handle_store.cc \
    ../graph/load/model_manager/cpu_queue_schedule.cc \
    ../graph/load/model_manager/model_utils.cc \
    ../graph/load/model_manager/aipp_utils.cc \
    ../graph/load/model_manager/data_inputer.cc \
    ../graph/load/model_manager/data_dumper.cc \
    ../graph/load/model_manager/zero_copy_task.cc \
    ../graph/load/model_manager/zero_copy_offset.cc \
    ../graph/load/model_manager/task_info/task_info.cc                  \
    ../graph/load/model_manager/task_info/event_record_task_info.cc     \
    ../graph/load/model_manager/task_info/event_wait_task_info.cc       \
    ../graph/load/model_manager/task_info/fusion_start_task_info.cc     \
    ../graph/load/model_manager/task_info/fusion_stop_task_info.cc      \
    ../graph/load/model_manager/task_info/kernel_ex_task_info.cc        \
    ../graph/load/model_manager/task_info/kernel_task_info.cc           \
    ../graph/load/model_manager/task_info/label_set_task_info.cc        \
    ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
    ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc    \
    ../graph/load/model_manager/task_info/memcpy_async_task_info.cc     \
    ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
    ../graph/load/model_manager/task_info/profiler_trace_task_info.cc   \
    ../graph/load/model_manager/task_info/stream_active_task_info.cc    \
    ../graph/load/model_manager/task_info/stream_switch_task_info.cc    \
    ../graph/load/model_manager/task_info/stream_switchn_task_info.cc   \
    ../graph/load/model_manager/task_info/end_graph_task_info.cc        \
    ../graph/load/model_manager/task_info/model_exit_task_info.cc       \
    ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc  \
    ../opskernel_manager/ops_kernel_builder_manager.cc \
    ../single_op/single_op_manager.cc \
    ../single_op/single_op_model.cc \
@@ -95,6 +95,7 @@ local_ge_executor_src_files :=  \
    ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
    ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
    ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
    ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc                  \
    ../hybrid/node_executor/controlop/control_op_executor.cc                \
    ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
    ../hybrid/node_executor/rts/rts_node_executor.cc                        \
--- a/ge/executor/proto/task.proto
+++ b/ge/executor/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
    LabelSetDef label_set = 37;
    LabelGotoExDef label_goto_ex = 38;
    LabelSwitchByIndexDef label_switch_by_index = 39;
    KernelDefWithHandle kernel_with_handle = 40;
 }

 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
    uint32 kernel_ext_info_size = 19;
 }

 message KernelDefWithHandle {
    KernelContext context = 1;

    uint64 handle = 10;
    string dev_func = 11;
    uint32 block_dim = 12;
    uint32 args_size = 13;
    bytes args = 14;
    bytes sm_desc = 15;
    string original_kernel_key = 16;
    string node_info = 17;
 }

 message KernelContext {
    uint32 kernel_type = 1;
    uint32 op_id = 2;                              // OP type in CCE
--- a/ge/ge_inference.mk
+++ b/ge/ge_inference.mk
@@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \
    graph/manager/util/rt_context_util.cc               \
    graph/manager/util/variable_accelerate_ctrl.cc       \
    graph/manager/util/debug.cc  \
    graph/load/new_model_manager/model_manager.cc                        \
    graph/load/new_model_manager/data_inputer.cc                         \
    graph/load/new_model_manager/davinci_model.cc                        \
    graph/load/new_model_manager/davinci_model_parser.cc                 \
    graph/load/new_model_manager/model_utils.cc                          \
    graph/load/new_model_manager/aipp_utils.cc                           \
    graph/load/new_model_manager/tbe_handle_store.cc                     \
    graph/load/new_model_manager/cpu_queue_schedule.cc                   \
    graph/load/new_model_manager/zero_copy_task.cc                       \
    graph/load/new_model_manager/zero_copy_offset.cc                     \
    graph/load/new_model_manager/data_dumper.cc                          \
    graph/load/new_model_manager/task_info/task_info.cc                  \
    graph/load/new_model_manager/task_info/event_record_task_info.cc     \
    graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
    graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
    graph/load/new_model_manager/task_info/kernel_task_info.cc           \
    graph/load/new_model_manager/task_info/label_set_task_info.cc        \
    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
    graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
    graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
    graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
    graph/load/new_model_manager/task_info/model_exit_task_info.cc       \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
    graph/load/model_manager/model_manager.cc                        \
    graph/load/model_manager/data_inputer.cc                         \
    graph/load/model_manager/davinci_model.cc                        \
    graph/load/model_manager/davinci_model_parser.cc                 \
    graph/load/model_manager/model_utils.cc                          \
    graph/load/model_manager/aipp_utils.cc                           \
    graph/load/model_manager/tbe_handle_store.cc                     \
    graph/load/model_manager/cpu_queue_schedule.cc                   \
    graph/load/model_manager/zero_copy_task.cc                       \
    graph/load/model_manager/zero_copy_offset.cc                     \
    graph/load/model_manager/data_dumper.cc                          \
    graph/load/model_manager/task_info/task_info.cc                  \
    graph/load/model_manager/task_info/event_record_task_info.cc     \
    graph/load/model_manager/task_info/event_wait_task_info.cc       \
    graph/load/model_manager/task_info/fusion_start_task_info.cc     \
    graph/load/model_manager/task_info/fusion_stop_task_info.cc      \
    graph/load/model_manager/task_info/kernel_ex_task_info.cc        \
    graph/load/model_manager/task_info/kernel_task_info.cc           \
    graph/load/model_manager/task_info/label_set_task_info.cc        \
    graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/model_manager/task_info/label_goto_ex_task_info.cc    \
    graph/load/model_manager/task_info/memcpy_async_task_info.cc     \
    graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/model_manager/task_info/profiler_trace_task_info.cc   \
    graph/load/model_manager/task_info/stream_active_task_info.cc    \
    graph/load/model_manager/task_info/stream_switch_task_info.cc    \
    graph/load/model_manager/task_info/stream_switchn_task_info.cc   \
    graph/load/model_manager/task_info/end_graph_task_info.cc        \
    graph/load/model_manager/task_info/model_exit_task_info.cc       \
    graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/model_manager/task_info/super_kernel/super_kernel.cc  \
    single_op/task/op_task.cc                                            \
    single_op/task/build_task_utils.cc                                   \
    single_op/task/tbe_task_builder.cc                                   \
@@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \
    single_op/single_op_manager.cc                                       \
    hybrid/hybrid_davinci_model_stub.cc                                  \
    hybrid/node_executor/aicpu/aicpu_ext_info.cc                         \
    # graph/load/new_model_manager/task_info/hccl_task_info.cc
    # graph/load/model_manager/task_info/hccl_task_info.cc

 OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES)

--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -33,7 +33,7 @@ namespace {
      uint64_t size = data_num * sizeof(TYPE);                                                                         \
      ge_tensor = MakeShared<GeTensor>(out_desc, size);                                                                \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size);                   \
      GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size);                    \
      ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType());                                              \
      ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape());                                                    \
    } else {                                                                                                           \
@@ -72,7 +72,7 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
    num_size = max_range_size;
  }
  if (num_size < 0) {
    GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size);
    GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size);
    return INTERNAL_ERROR;
  }
  data_num = static_cast<uint64_t>(num_size);
--- a/ge/ge_local_engine/proto/task.proto
+++ b/ge/ge_local_engine/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
    LabelSetDef label_set = 37;
    LabelGotoExDef label_goto_ex = 38;
    LabelSwitchByIndexDef label_switch_by_index = 39;
    KernelDefWithHandle kernel_with_handle = 40;
 }

 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
    uint32 kernel_ext_info_size = 19;
 }

 message KernelDefWithHandle {
    KernelContext context = 1;

    uint64 handle = 10;
    string dev_func = 11;
    uint32 block_dim = 12;
    uint32 args_size = 13;
    bytes args = 14;
    bytes sm_desc = 15;
    string original_kernel_key = 16;
    string node_info = 17;
 }

 message KernelContext {
    uint32 kernel_type = 1;
    uint32 op_id = 2;                              // OP type in CCE
--- a/ge/ge_runner.mk
+++ b/ge/ge_runner.mk
@@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/label/partitioned_call_label_maker.cc \
    graph/label/while_label_maker.cc \
    graph/load/graph_loader.cc \
    graph/load/new_model_manager/cpu_queue_schedule.cc \
    graph/load/new_model_manager/data_dumper.cc \
    graph/load/new_model_manager/data_inputer.cc \
    graph/load/new_model_manager/davinci_model.cc \
    graph/load/new_model_manager/davinci_model_parser.cc \
    graph/load/new_model_manager/model_manager.cc \
    graph/load/new_model_manager/model_utils.cc \
    graph/load/new_model_manager/aipp_utils.cc \
    graph/load/new_model_manager/task_info/end_graph_task_info.cc \
    graph/load/new_model_manager/task_info/model_exit_task_info.cc \
    graph/load/new_model_manager/task_info/event_record_task_info.cc \
    graph/load/new_model_manager/task_info/event_wait_task_info.cc \
    graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
    graph/load/new_model_manager/task_info/hccl_task_info.cc \
    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
    graph/load/new_model_manager/task_info/kernel_task_info.cc \
    graph/load/new_model_manager/task_info/label_set_task_info.cc \
    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
    graph/load/new_model_manager/task_info/stream_active_task_info.cc \
    graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/new_model_manager/task_info/task_info.cc \
    graph/load/new_model_manager/tbe_handle_store.cc \
    graph/load/new_model_manager/zero_copy_task.cc \
    graph/load/new_model_manager/zero_copy_offset.cc    \
    graph/load/model_manager/cpu_queue_schedule.cc \
    graph/load/model_manager/data_dumper.cc \
    graph/load/model_manager/data_inputer.cc \
    graph/load/model_manager/davinci_model.cc \
    graph/load/model_manager/davinci_model_parser.cc \
    graph/load/model_manager/model_manager.cc \
    graph/load/model_manager/model_utils.cc \
    graph/load/model_manager/aipp_utils.cc \
    graph/load/model_manager/task_info/end_graph_task_info.cc \
    graph/load/model_manager/task_info/model_exit_task_info.cc \
    graph/load/model_manager/task_info/event_record_task_info.cc \
    graph/load/model_manager/task_info/event_wait_task_info.cc \
    graph/load/model_manager/task_info/fusion_start_task_info.cc \
    graph/load/model_manager/task_info/fusion_stop_task_info.cc \
    graph/load/model_manager/task_info/hccl_task_info.cc \
    graph/load/model_manager/task_info/kernel_ex_task_info.cc \
    graph/load/model_manager/task_info/kernel_task_info.cc \
    graph/load/model_manager/task_info/label_set_task_info.cc \
    graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/model_manager/task_info/label_goto_ex_task_info.cc \
    graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/model_manager/task_info/memcpy_async_task_info.cc \
    graph/load/model_manager/task_info/profiler_trace_task_info.cc \
    graph/load/model_manager/task_info/stream_active_task_info.cc \
    graph/load/model_manager/task_info/stream_switch_task_info.cc \
    graph/load/model_manager/task_info/stream_switchn_task_info.cc \
    graph/load/model_manager/task_info/super_kernel/super_kernel.cc \
    graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/model_manager/task_info/task_info.cc \
    graph/load/model_manager/tbe_handle_store.cc \
    graph/load/model_manager/zero_copy_task.cc \
    graph/load/model_manager/zero_copy_offset.cc    \
    graph/manager/graph_context.cc \
    graph/manager/graph_manager.cc \
    graph/manager/graph_manager_utils.cc \
@@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \
    hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
    hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
    hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
    hybrid/node_executor/host_cpu/kernel/data_kernel.cc                  \
    hybrid/node_executor/controlop/control_op_executor.cc                \
    hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
    hybrid/node_executor/hccl/hccl_node_executor.cc                      \
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph
  return SUCCESS;
 }

 Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                           GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
 Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
  if (comp_graph == nullptr) {
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
    return GE_GRAPH_PARAM_NULLPTR;
@@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo
  (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
  if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) {
    GE_CHK_STATUS_RET(
        BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id),
        BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id),
        "Build for dynamic shape graph failed.");
    return SUCCESS;
  }

  GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id),
  GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id),
                    "Build for known shape graph failed.");
  ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr);
  return SUCCESS;
 }

 Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
 Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
                                             GeModelPtr &ge_model_ptr, uint64_t session_id) {
  if (ge::GetContext().GetHostExecFlag()) {
    GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed.");
@@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v
  }

  GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str());
  Status ret = SecondPartition(comp_graph, subgraph_list);
  Status ret = SecondPartition(comp_graph);
  GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str());
  auto subgraph_map = graph_partitioner_.GetSubGraphMap();

@@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
 }

 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                               std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                               GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                               uint64_t session_id) {
  GELOGI("Start to build BuildForDynamicShape for dynamic shape.");
@@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
        }
      }
      // known shape build flow
      GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id),
      GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id),
                        "Build for known shape graph failed.");
    }
    ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr);
@@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
  return SUCCESS;
 }

 Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) {
 Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) {
  GE_TIMESTAMP_START(GraphPartition2);
  auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning);
  if (ret != SUCCESS) {
@@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge:
    return ret;
  }
  GE_CHK_STATUS_RET(ret, "Graph partition Failed.");
  auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
  if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) {
    subgraph_ptr_list = graph_2_subgraphlist[comp_graph];
  } else {
  const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
  if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) {
    GELOGE(FAILED, "Find subgraph failed.");
    return FAILED;
  }
@@ -745,7 +741,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
  if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) {
    return SUCCESS;
  }
  GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type);
  GELOGD("[%s] has attr input_memory_type %u", op_desc->GetName().c_str(), mem_type);
  for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
    const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
@@ -755,7 +751,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
    while (true) {
      const auto &src_desc = src_node->GetOpDesc();
      GE_IF_BOOL_EXEC(src_desc == nullptr, continue);
      GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(),
      GELOGD("[%s:%u] set attr output_memory_type %d", src_desc->GetName().c_str(), src_out_anchor->GetIdx(),
             mem_type);
      if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE,
                             mem_type)) {
--- a/ge/graph/build/graph_builder.h
+++ b/ge/graph/build/graph_builder.h
@@ -47,8 +47,7 @@ class GraphBuilder {
  GraphBuilder(const GraphBuilder &in) = delete;
  GraphBuilder &operator=(const GraphBuilder &in) = delete;
  virtual ~GraphBuilder() = default;
  Status Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
               GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
  Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
  void SetOptions(const GraphManagerOptions &options);

 private:
@@ -59,12 +58,12 @@ class GraphBuilder {
  Status UpdateDataInputSize(const ge::NodePtr &node_ptr);
  Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr);
  Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc);
  Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list);
  Status SecondPartition(ge::ComputeGraphPtr &comp_graph);
  Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph);
  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                   GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
  Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
  Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
                                 GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
  Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
--- a/ge/graph/build/memory/binary_block_mem_assigner.cc
+++ b/ge/graph/build/memory/binary_block_mem_assigner.cc
@@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
    GELOGW("Vector all_memory_size is empty!");
    return SUCCESS;
  }
  if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) {
    GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front());
  if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) {
    GELOGE(FAILED, "dividend is 0!");
    return FAILED;
  }
  // Memory size is 512 aligned, so it is not necessary to take less than 512
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -24,6 +24,7 @@
 #include "graph/buffer.h"
 #include "graph/ge_attr_value.h"
 #include "graph/ge_context.h"
 #include "graph/types.h"
 #include "graph/node.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
@@ -65,7 +66,10 @@ void AlignMemOffset(size_t &mem_align_size) {
 }

 static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
  if (left.GetLifeBegin() < right.GetLifeBegin()) {
  auto left_node_op_desc = left.node->GetOpDesc();
  auto right_node_op_desc = right.node->GetOpDesc();
  if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)
      && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) {
    return true;
  }
  return false;
@@ -97,14 +101,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
  auto left_node_op_desc = left.node->GetOpDesc();
  auto right_node_op_desc = right.node->GetOpDesc();
  if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) {
    if (left.GetLifeBegin() < right.GetLifeBegin()) {
      if (left.life_time_end >= right.GetLifeBegin()) {
    if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) {
      if (left.life_time_end >= static_cast<size_t>(right_node_op_desc->GetId())) {
        return true;
      }
    } else if (left.GetLifeBegin() == right.GetLifeBegin()) {
    } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) {
      return true;
    } else {
      if (right.life_time_end >= left.GetLifeBegin()) {
      if (right.life_time_end >= static_cast<size_t>(left_node_op_desc->GetId())) {
        return true;
      }
    }
@@ -322,7 +326,12 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_
 size_t MemoryBlock::GetLifeBegin() {
  size_t life_time = 0;
  if (!node_type_index_list_.empty()) {
      life_time = node_type_index_list_.front().GetLifeBegin();
    if (node_type_index_list_.front().node != nullptr) {
      auto node_op_desc = node_type_index_list_.front().node->GetOpDesc();
      if (node_op_desc != nullptr) {
        life_time = node_op_desc->GetId();
      }
    }
  }
  return life_time;
 }
@@ -409,7 +418,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_
  depend_stream_life_[stream_id_] = GetLifeBegin();
 }

 size_t MemoryBlock::GetLifeEnd() const {
 size_t MemoryBlock::GetLifeEnd() {
  if (!node_type_index_list_.empty()) {
    return node_type_index_list_.back().life_time_end;
  }
@@ -542,11 +551,31 @@ void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_
  }
 }

 void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) {
  auto node_op_desc = node->GetOpDesc();
  GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
  // if input size just one and from variable, no need to reassign continuous memory
  bool is_input_continuous = false;
  (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) {
    auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return);
    auto in_node = peer_out_anchor->GetOwnerNode();
    GE_IF_BOOL_EXEC(in_node == nullptr, return);
    if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) {
      GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str());
      (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
    }
  }
 }

 void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
  vector<int64_t> temp;
  std::map<std::string, vector<int64_t>> batch_all_memory_size;
  std::map<std::string, int64_t> batch_total_size;
  for (const NodePtr &n : compute_graph_->GetAllNodes()) {
    MarkContinuousAllocedForOneInputFromVariable(n);

    auto node_op_desc = n->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);

@@ -563,29 +592,32 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {

    for (auto &out_anchor : n->GetAllOutDataAnchors()) {
      GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
      int64_t size = 0;
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
      GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.",
                                       node_op_desc->GetName().c_str(), size);
                      return;);
      batch_all_memory_size[batch_label].emplace_back(size);
      if (batch_total_size.find(batch_label) == batch_total_size.end()) {
        batch_total_size[batch_label] = size;
      } else {
        batch_total_size[batch_label] += size;
      }

      if (!anchor_to_symbol_.empty()) {
        auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString());
        if (iter1 == anchor_to_symbol_.end()) {
          continue;
      bool reuse_input = false;
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS,
                      GELOGI("Get reuse_input failed"));

      if (!reuse_input) {
        int64_t size = 0;
        GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
        batch_all_memory_size[batch_label].emplace_back(size);
        if (batch_total_size.find(batch_label) == batch_total_size.end()) {
          batch_total_size[batch_label] = size;
        } else {
          batch_total_size[batch_label] += size;
        }
        const std::string &symbol = iter1->second;
        auto iter2 = symbol_size_.find(symbol);
        if (iter2 == symbol_size_.end()) {
          symbol_size_[symbol] = size;
        } else if (size > static_cast<int64_t>(iter2->second)) {
          iter2->second = size;

        if (!anchor_to_symbol_.empty()) {
          auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString());
          if (iter1 == anchor_to_symbol_.end()) {
            continue;
          }
          const std::string &symbol = iter1->second;
          auto iter2 = symbol_size_.find(symbol);
          if (iter2 == symbol_size_.end()) {
            symbol_size_[symbol] = size;
          } else if (size > static_cast<int64_t>(iter2->second)) {
            iter2->second = size;
          }
        }
      }
    }
@@ -626,17 +658,35 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) {
  return false;
 }

 bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) {
 void AddReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) {
  string key = std::to_string(mem_block.Size());
  key += "_" + std::to_string(mem_block.stream_id_);
  key += "_" + std::to_string(mem_block.memory_type_);
  auto it = reusable_block_counts.find(key);
  if (it != reusable_block_counts.end()) {
    it->second++;
  } else {
    reusable_block_counts[key] = 1;
  }
 }

 void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) {
  string key = std::to_string(mem_block.Size());
  key += "_" + std::to_string(mem_block.stream_id_);
  key += "_" + std::to_string(mem_block.memory_type_);
  auto it = reusable_block_counts.find(key);
  if (it != reusable_block_counts.end()) {
    if (it->second > 0) {
      it->second--;
    }
  }
 }

 bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block,
                    size_t block_size, size_t real_size, bool continuous) {
  bool can_reuse = false;
  if (reusable_block.Size() == block_size) {
    // in some continuous input case, continuous first input node's is not same as topo first node.
    if (continuous_life_begin > 0) {
      if (continuous_life_begin > reusable_block.GetLifeEnd()) {
        can_reuse = true;
      }
    } else {
      can_reuse = true;
    }
    can_reuse = true;
  }
  return can_reuse;
 }
@@ -647,13 +697,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
  if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) {
    return false;
  }
  auto node_desc = n->GetOpDesc();
  GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str());
                  return false;);
  std::vector<int64_t> offsets_for_fusion = {};
  bool has_lx_fusion_attr =
      AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);

  if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) {
    auto out_anchor = n->GetOutDataAnchor(out_index);
    GE_IF_BOOL_EXEC(out_anchor == nullptr,
@@ -676,17 +719,16 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
                      return false;);

      // If GetBool fail, is_input_continuous is false.
      (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous);
      if (is_input_continuous) {
      bool is_input_continuous_no_padding = false;
      (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT,
                                   is_input_continuous_no_padding);
      if (is_input_continuous_no_padding) {
        reset_zero_copy_flag = true;
        has_lx_fusion_attr = true;
      } else {
        (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
        return false;
      }
      (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);

      // lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later
      GE_IF_BOOL_EXEC(is_input_continuous &&
          (CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))),
      GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()),
                      GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index);
                      no_need_assign_memory = true;
                      return false;);
@@ -700,10 +742,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
          // Only set attr one times.
          if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) {
            (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
            // lx fusion case assign max size for first block, so reuse as none continuous
            GE_IF_BOOL_EXEC(has_lx_fusion_attr,
                            is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index);
                            return false;);
            node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize();
          }
          peer_input_index = peer_in_anchor->GetIdx();
@@ -716,95 +754,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
  return false;
 }

 bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) {
  if (n == nullptr) {
    return false;
  }

  int64_t max_node_life_time = 0;
  int64_t continuous_input_node_life_time = 0;
  if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) {
    auto out_anchor = n->GetOutDataAnchor(out_index);
    if(out_anchor == nullptr) {
      return false;
    }

    // continuous input node's life time should be max
    for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
      if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){
        return false;
      }
      auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc();
      GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr,
                      GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index);
      return false;);

      if(peer_in_node_desc->GetId() > max_node_life_time) {
        max_node_life_time = peer_in_node_desc->GetId();
      }

      // If GetBool fail, is_input_continuous is false.
      bool is_input_continuous = false;
      (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous);
      if (!is_input_continuous) {
        (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
      }
      if (is_input_continuous) {
        continuous_input_node_life_time = peer_in_node_desc->GetId();
      }
    }
  }
  return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ;
 }

 ///
 /// @ingroup GE
 /// @brief Check continuous memory reuseable
 /// @return void
 ///
 bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) {
  // n,peer_node_desc have been checked
  auto node_desc = n->GetOpDesc();
  auto peer_node_desc = peer_node->GetOpDesc();
  continuous_life_begin_ = static_cast<size_t>(node_desc->GetId());
  // lx fusion case check all continuous input node, firt input node's life time should be min
  for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) {
    if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) ||
        (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) ||
        (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) {
      GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index);
      return false;
    }
    auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc();
    ///
    ///  node2 node1  node3
    ///      |   /   / |
    ///      node5    node6
    /// firt input node's life time is not min
    /// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse
    ///
    if (static_cast<size_t>(peer_out_node_desc->GetId()) < continuous_life_begin_) {
      continuous_life_begin_ = static_cast<size_t>(peer_out_node_desc->GetId());
      GELOGI(
        "Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time,"
        "min is node[%s] life[%zu]",
        n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(),
        peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_);
    }
    // when node3's output node5's life time is not max(node6 > node5), not reuse
    if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(),
                                      in_anchor->GetPeerOutAnchor()->GetIdx())) {
      GELOGI(
        "Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s "
        "max life node",
        n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(),
        peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx());
      return false;
    }
  }
  return true;
 }

 ///
 /// @ingroup GE
 /// @brief Check pre_reuse flag & post_reuse glag for each symbol
@@ -1090,9 +1039,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
        GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue);

        // A node can reuse blocks of the same stream and preorder streams
        if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) {
          reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_},
                                           real_size, no_align_size);
        if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) {
          reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size);
          if (mem_type == kOutput) {
            auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
            if (iter != anchor_to_symbol_.end()) {
@@ -1101,6 +1049,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
          }
          reusable_block->continuous_block_ = continuous;
          reusable_block->ref_count_++;
          ReduceReusableBlockCount(*reusable_block, reusable_block_counts_);
          reusable_blocks_[memory_type][stream_id].erase((++it).base());
          return reusable_block;
        }
@@ -1113,7 +1062,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,

  // Data and netoutput need zero copy block
  block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
  block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);

  block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId());
  block->stream_id_ = node_op_desc->GetStreamId();
  block->ref_count_++;
  block->continuous_block_ = continuous;
@@ -1131,18 +1081,73 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  return block;
 }

 MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
                                                     const bool is_op_reuse_mem) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
 bool IsOutputIndexRef(const OpDescPtr &op_desc, uint32_t index) {
  auto output_tensor = op_desc->GetOutputDescPtr(index);
  bool dst_reuse_input = false;
  (void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input);
  if (dst_reuse_input) {
    return true;
  }

  bool is_ref = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
  if (is_ref) {
    string output_name = op_desc->GetOutputNameByIndex(index);
    for (const auto &input_name : op_desc->GetAllInputNames()) {
      if (output_name == input_name) {
        return true;;
      }
    }
  }
  return false;
 }

 void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef,
                                             const NodePtr &n) {
  const auto node_op_desc = n->GetOpDesc();
  for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
    if (!IsOutputIndexRef(node_op_desc, index)) {
      isAllOutputRef = false;
      break;
    } else {
      zero_memory_list_.emplace_back(n, kOutput, index);
      isOutputHasRef = true;
    }
  }
 }


 Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
                                               const bool is_op_reuse_mem) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null.");
  auto node_op_desc = n->GetOpDesc();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null.");

  // continuous output support ref only when all output ref input
  bool isAllOutputRef = true;
  bool isOutputHasRef = false;

  ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n);

  if (isAllOutputRef) {
    GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str());
    return SUCCESS;
  }

  if (!isAllOutputRef && isOutputHasRef) {
    GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s",
           n->GetName().c_str());
    return INTERNAL_ERROR;
  }

  MemoryBlock *block = nullptr;
  int64_t total_size = 0;
  int64_t memory_type = RT_MEMORY_HBM;
  for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
    auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
    if (output_op_desc == nullptr) {
      return nullptr;
      GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
      return INTERNAL_ERROR;
    }

    if (CheckIsZeroMemNodeType(n->GetType())) {
@@ -1152,8 +1157,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec

    int64_t size = 0;
    if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
      GELOGI("Get size failed");
      return nullptr;
      GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
      return INTERNAL_ERROR;
    }
    size_t align_size = static_cast<size_t>(size);
    AlignMemOffset(align_size);
@@ -1176,7 +1181,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
  }

  if (total_size == 0) {
    return nullptr;
    return SUCCESS;
  }

  auto block_size = GetBlockSize(total_size, ranges);
@@ -1190,8 +1195,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
    // hccl task need align header and tail
    block->first_continuous_block_ = true;
    block->last_continuous_block_ = true;
  } else {
    GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
    return INTERNAL_ERROR;
  }
  return block;
  return SUCCESS;
 }

 MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges,
@@ -1203,9 +1211,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
  NodeIndexIO node_index_io(n, index, kOut);
  int64_t size = 0;
  auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
  if (output_op_desc != nullptr) {
    GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
  }
  GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr);
  GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
  size_t no_align_size = 0;
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
                                 return nullptr, "Get no align size failed");
@@ -1213,24 +1220,16 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
  std::string symbol;
  if (IsSymbolExist(node_index_io, symbol)) {
    block = symbol_blocks_[symbol];
    GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str());
        return nullptr);
    // reduce old size
    size_t align_size = block->Size();
    AlignMemOffset(align_size);
    theory_memory_size_ -= align_size;

    auto block_size = GetBlockSize(size, ranges);
    block->SetSize(block_size);
    block->SetLifeTimeEnd(life_time_);
    block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size);
    block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size);
    block->ref_count_++;

    // add new size
    align_size = block_size;
    AlignMemOffset(align_size);
    theory_memory_size_ += align_size;
  } else {
    // if ref input is variable, can not find symbol, must judge alone
    if (IsOutputIndexRef(node_op_desc, index)) {
      zero_memory_list_.emplace_back(n, kOutput, index, false);
      GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index);
      return nullptr;
    }

    int64_t max_size = size;
    int64_t memory_type = RT_MEMORY_HBM;
    auto iter1 = anchor_to_symbol_.find(node_index_io.ToString());
@@ -1282,6 +1281,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
                      GELOGI("Get dst_reuse_input_index failed"));
      if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
        block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size());
        out_count_reuse_input += 1;
        reuse_input = true;
      }
@@ -1322,7 +1322,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i
      if (static_cast<uint32_t>(index) == output_index) {
        if (node->GetOwnerComputeGraph() != nullptr) {
          string graph_name = node->GetOwnerComputeGraph()->GetName();
          GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(),
          GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(),
                 op_desc->GetName().c_str(), index, op_desc->GetStreamId());
        }
        return true;
@@ -1360,6 +1360,7 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock
    if (to_release->same_stream_) {
      to_release->SetLifeTimeEnd(life_time_);
      reusable_memory.emplace_back(to_release);
      AddReusableBlockCount(*to_release, reusable_block_counts_);
    }
  }
 }
@@ -1459,7 +1460,6 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
  }

  is_op_reuse_mem_ = true;
  continuous_life_begin_ = 0;
  if (op_reuse_env_valid_ == true) {
    vector<string>::iterator it_name =
      std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName());
@@ -1477,8 +1477,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
                  for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
                       ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
  if (IsContinuousOutput(node)) {
    (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
    return SUCCESS;
    return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
  }
  for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
    int64_t size = 0;
@@ -1486,6 +1485,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
    if (output_op_desc != nullptr) {
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
    }

    // fusion: other type's size not means malloc HBM memory
    bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1;
    if (l1_flag) {
@@ -1493,6 +1493,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
             op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
      size = 0;
    }

    int32_t calc_type = 0;
    bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
    GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;);

    std::string peer_name;
    uint32_t peer_input_index = 0;
    bool out_node_set_continuous_input = false;
@@ -1511,7 +1516,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
      continue;
    }
    // atomic can't be reused
    bool need_change = is_op_reuse_mem_ && is_atomic;
    bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic;
    if (need_change) {
      is_op_reuse_mem_ = false;
    }
@@ -1904,12 +1909,11 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
    }
    op_desc->SetWorkspace(workspace_list);
  }
  GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] "
         "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]",
         graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(),
         node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_,
         block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level,
         block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input,
  GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] "
         "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(),
         op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),
         block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_,
         block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input,
         block->batch_label_.c_str());
 }

@@ -1973,9 +1977,8 @@ Status BlockMemAssigner::Assign() {

 bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
  return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
         (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) ||
         (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
         (node_type == HVDCALLBACKBROADCAST);
         (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
         (node_type == ASSIGN) || (node_type == HVDWAIT);
 }

 bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -39,15 +39,14 @@ using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;
 enum OpMemoryType { kOutput, kWorkspace };

 struct NodeTypeIndex {
  NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0)
      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {}
  NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false)
      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {}

  ge::NodePtr node = nullptr;
  OpMemoryType mem_type = kOutput;
  uint32_t index = 0;
  bool ref_input = false;
  size_t life_time_begin = 0;
  size_t life_time_end = kMaxLifeTime;
  bool ref_input = false;
  const string GetMemType() const {
    if (mem_type == kOutput) {
      return "output";
@@ -56,34 +55,6 @@ struct NodeTypeIndex {
    }
    return "unknown";
  }

  size_t GetLifeBegin() const {
    if ((node == nullptr) || (node->GetOpDesc() == nullptr)) {
      return 0;
    }

    if ((life_time_begin > 0) && (life_time_begin < static_cast<size_t>(node->GetOpDesc()->GetId()))) {
      return life_time_begin;
    } else {
      return node->GetOpDesc()->GetId();
    }
  }

  std::string GetLifeBeginDesc() const {
    if (node == nullptr) {
      return "";
    }
    auto node_op_desc = node->GetOpDesc();
    if (node_op_desc != nullptr) {
      auto life_begin = GetLifeBegin();
      if (life_begin != static_cast<size_t>(node_op_desc->GetId())) {
        return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId());
      } else {
        return std::to_string(node_op_desc->GetId());
      }
    }
    return "";
  }
 };

 class MemoryBlock {
@@ -115,13 +86,16 @@ class MemoryBlock {
    symbol_list_.clear();
  }

  size_t Size() const { return block_size_; }

  void SetSize(size_t size) {
    if (size > block_size_) {
      block_size_ = size;
  void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size,
            int64_t stream_id) {
    real_size_list_.emplace_back(real_size);
    no_align_size_list_.emplace_back(no_align_size);
    node_type_index_list_.emplace_back(node, type, out_index, false);
    if (stream_id != stream_id_) {
        same_stream_ = false;
    }
  }
  size_t Size() const { return block_size_; }

  size_t AlignSize() const;

@@ -169,7 +143,7 @@ class MemoryBlock {

  size_t GetLifeBegin();

  size_t GetLifeEnd() const;
  size_t GetLifeEnd();

  void AddDependLifeBegin(DependStreamLife &node_depend_stream_life);

@@ -432,7 +406,6 @@ class BlockMemAssigner : public MemAssigner {
  bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
                                   uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag);

  bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index);
  ///
  /// @ingroup GE
  /// @|+++++++++block1++++++++|                               |+++++++++block1++++++++|
@@ -448,10 +421,16 @@ class BlockMemAssigner : public MemAssigner {

  bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type);

  MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);
  void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n);

  Status ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);

  void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node);

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;

  std::map<std::string, uint64_t> reusable_block_counts_;

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_;

  std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_;
@@ -481,7 +460,6 @@ class BlockMemAssigner : public MemAssigner {

  std::string max_batch_label_;

  size_t continuous_life_begin_ = 0;
  ///
  /// @          [stream1][nodeid]
  /// @[nodeid]  [stream2][nodeid]
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
--- a/ge/graph/build/memory/graph_mem_assigner.h
+++ b/ge/graph/build/memory/graph_mem_assigner.h
@@ -119,15 +119,31 @@ class GraphMemoryAssigner {
  ///
  ge::Status ReAssignContinuousMemory(bool is_loop_graph);

  ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory();

  ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory();

  ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse);

  ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse);

  ge::Status ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map, int32_t mem_reuse_model);

  ge::Status GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
                              int32_t mem_reuse_model, string &max_batch_label);

  ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index,
                                               int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size);

  ge::Status ReAssignAtomicMemory(bool is_loop_graph);

  ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
                                              map<string, vector<NodePtr>> &connecting_output_atomic_nodes);

  ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
                                         int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type);
                                         int64_t &continuous_mem_size, int64_t memory_type);

  ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type);
  ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node);

  ///
  /// @brief check the input of node whether support atomic attr
@@ -153,10 +169,10 @@ class GraphMemoryAssigner {
  ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes);

  ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
                                      const std::vector<int64_t> &mem_offset_end, int64_t memory_type);
                                      const std::vector<int64_t> &mem_offset_end);

  ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start,
                                const std::vector<int64_t> &atomic_mem_size, int64_t memory_type);
                                const std::vector<int64_t> &atomic_mem_size);

  ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node);

--- a/ge/graph/build/memory/var_mem_assign_util.cc
+++ b/ge/graph/build/memory/var_mem_assign_util.cc
@@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
                    return FAILED);
    ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0);
    GE_CHECK_NOTNULL(tensor_desc);
    rtMemType_t memory_type = RT_MEMORY_HBM;
    uint32_t mem_type = 0;
    if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) {
      memory_type = RT_MEMORY_RDMA_HBM;
    }
    if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) {
      GE_CHK_STATUS_RET(
          VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM));
          VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type));
      GE_IF_BOOL_EXEC(n->GetType() == VARIABLE,
                      GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID())));
      GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID())
@@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
    }

    uint8_t *dev_ptr = nullptr;
    rtMemType_t memory_type = RT_MEMORY_HBM;
    GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID())
                          ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type));
    vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset();
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const {
  return false;
 }

 // Iteraotor loop :
 // StreamSwitch  ->  StreamActive
 // FpBp loop:
 // StreamSwitch  ->  AssignAdd  ->  StreamActive
 NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) {
  for (auto pre_node : active_node->GetInControlNodes()) {
    if (pre_node->GetType() == STREAMSWITCH) {
      return pre_node;
    }
    for (auto pre_pre_node : pre_node->GetInControlNodes()) {
      if (pre_pre_node->GetType() == STREAMSWITCH) {
        return pre_pre_node;
      }
    }
  }
  return nullptr;
 }

 Status StreamAllocator::SetActiveStreamsForLoop() {
  vector<uint32_t> loop_active_streams;
  for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) {
@@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
    bool is_loop_active = false;
    if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) {
      vector<string> activated_label_list;

      NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node);
      if (pre_switch_node == nullptr) {
        GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str());
        return FAILED;
      }

      if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) ||
          activated_label_list.empty()) {
        GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams),
@@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
        // it may cause some stream actived by iterator next step when this stream still alive.
        // If above situation happen, active message will lose, cause process block in next iteration.
        // In order to avoid this abnormal happen,
        // add event between each last node and iterator active node in target active stream
        // add event between each last node and iterator switch node
        GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size());
        for (auto iter : stream_id_to_last_node) {
          if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) {
@@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
            continue;
          }
          AddSendEventId(iter.second, event_num_);
          AddRecvEventId(node, event_num_);
          AddRecvEventId(pre_switch_node, event_num_);
          event_num_++;
        }

--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -466,11 +466,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
        task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr));
      }

      GELOGI(
          "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]"
          " task finished, generate %u task(s).",
          op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id,
          task_list_size_after - task_list_size_before);
      GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]"
             " task finished, generate %zu task(s).",
             op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id,
             task_list_size_after - task_list_size_before);

      // record nodes which have call generate task successfully
      fusion_nodes_seen.insert(fusion_node.get());
@@ -527,13 +526,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
    return GE_GRAPH_GRAPH_NODE_NULL;
  }

  int64_t node_index = 0;
  for (auto &node : all_nodes) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    op_desc->SetId(node_index++);
  }

  map<int64_t, vector<OpDescPtr>> all_stream_ops;
  for (auto &node : all_nodes) {
    OpDescPtr op_desc = node->GetOpDesc();
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -21,7 +21,7 @@

 #include "common/ge_inner_error_codes.h"
 #include "common/model_parser/base.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "omm/csa_interact.h"
 #include "runtime/dev.h"
 #include "runtime/mem.h"
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -22,8 +22,8 @@
 #include "common/helper/model_helper.h"
 #include "common/util.h"
 #include "graph/ge_context.h"
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "omm/csa_interact.h"
 #include "runtime/dev.h"
--- a/ge/graph/load/new_model_manager/aipp_utils.cc
+++ b/ge/graph/load/new_model_manager/aipp_utils.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/aipp_utils.h"
 #include "graph/load/model_manager/aipp_utils.h"

 #include <string>

--- a/ge/graph/load/new_model_manager/aipp_utils.h
+++ b/ge/graph/load/new_model_manager/aipp_utils.h
--- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc
+++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/cpu_queue_schedule.h"
 #include "graph/load/model_manager/cpu_queue_schedule.h"
 #include "common/debug/ge_log.h"
 #include "common/debug/log.h"

--- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h
+++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.h
@@ -20,8 +20,8 @@
 #include <vector>

 #include "common/ge_inner_error_codes.h"
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/new_model_manager/zero_copy_offset.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/zero_copy_offset.h"
 #include "runtime/kernel.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/data_dumper.h"
 #include "graph/load/model_manager/data_dumper.h"

 #include <cstdlib>
 #include <ctime>
@@ -29,7 +29,7 @@
 #include "framework/common/util.h"
 #include "graph/anchor.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "graph/manager/util/debug.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
--- a/ge/graph/load/new_model_manager/data_dumper.h
+++ b/ge/graph/load/new_model_manager/data_dumper.h
--- a/ge/graph/load/new_model_manager/data_inputer.cc
+++ b/ge/graph/load/new_model_manager/data_inputer.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/data_inputer.h"
 #include "graph/load/model_manager/data_inputer.h"

 #include <securec.h>

--- a/ge/graph/load/new_model_manager/data_inputer.h
+++ b/ge/graph/load/new_model_manager/data_inputer.h
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 #include <graph/utils/node_utils.h>
 #include <algorithm>
@@ -36,9 +36,9 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/graph.h"
 #include "graph/load/new_model_manager/cpu_queue_schedule.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/new_model_manager/tbe_handle_store.h"
 #include "graph/load/model_manager/cpu_queue_schedule.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/tbe_handle_store.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
@@ -446,23 +446,20 @@ void DavinciModel::InitRuntimeParams() {
      runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size);
 }

 void DavinciModel::CheckHasHcomOp() {
  Graph graph = ge_model_->GetGraph();
  auto compute_graph = GraphUtils::GetComputeGraph(graph);
  if (compute_graph == nullptr) {
    return;
  }
 void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) {
  const set<string> hcom_opp_types({
      HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER,
      HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE
  });

  for (const auto &node : compute_graph->GetAllNodes()) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue);
    GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) ||
                     (op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) ||
                     (op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) ||
                     (op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) ||
                     (op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) ||
                     (op_desc->GetType() == HCOMREDUCE)),
                    uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId());
                    (void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue);
    if (hcom_opp_types.count(op_desc->GetType()) > 0) {
      uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId());
      hcom_streams_.emplace(stream_id);
      GELOGD("hcom stream: %u.", stream_id);
    }
  }
 }

@@ -641,7 +638,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  name_ = ge_model_->GetName();
  (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_);
  GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_);
  CheckHasHcomOp();
  CheckHasHcomOp(compute_graph);

  vector<int64_t> huge_stream_list;
  (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list);
@@ -722,7 +719,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  /// the aicpu opertor needs to destroy history record, and update operator memory address.
  /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel().
  need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer();
  (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_);

  string fp_ceiling_mode;
  if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
@@ -1028,7 +1024,7 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
                                        const vector<OpDescPtr> &output_op_list) {
  GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size());
  for (auto &item : data_by_index) {
    auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
    const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
    GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
    input_addrs_list_.emplace_back(output_addrs);

@@ -1036,14 +1032,18 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
    GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed");
    GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed");
    GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed");
    GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed");
    if (item.second->GetType() == AIPP_DATA_TYPE) {
      GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str());
      is_dynamic_aipp_ = true;
    }
  }

  vector<string> out_node_name;
  (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name);
  GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size());
  for (const auto &op_desc : output_op_list) {
    auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
    const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
    GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size());
    output_addrs_list_.emplace_back(input_addrs);

@@ -1061,10 +1061,11 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
    if (InitOutputTensorInfo(op_desc) != SUCCESS) {
      return INTERNAL_ERROR;
    }

    GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed");
  }

  GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed");
  return InitOutputDescInfo(output_op_list);
  return SUCCESS;
 }

 bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
@@ -1809,7 +1810,7 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp
 ///
 Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
  if (!op_desc->HasAttr(ATTR_NAME_AIPP)) {
    GELOGW("There is not AIPP related with index %u.", index);
    GELOGW("there is not AIPP related with index %u.", index);
    return SUCCESS;
  }

@@ -1818,7 +1819,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
                         "Data node do not contain param aipp!");
  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed");
  GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u",
  GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u",
         op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank());

  AippConfigInfo aipp_info;
@@ -1875,7 +1876,7 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons
    (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
    for (const auto item : data_list) {
      if (item.second->GetName() == releated_name) {
        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index);
        GELOGI("Find aipp_data [%s] index %u from index %u", releated_name.c_str(), item.first, index);
        aipp_index = item.first;
      }
    }
@@ -1980,27 +1981,24 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format,
  }
 }

 Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) {
  for (const auto &item : data_by_index) {
    const auto op_desc = item.second;
    GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));
 Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) {
  GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));

    InputOutputDescInfo input;
    ShapeDescription dims_info;
    Format format = op_desc->GetInputDescPtr(0)->GetFormat();
    CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info);
  InputOutputDescInfo input;
  ShapeDescription dims_info;
  Format format = op_desc->GetInputDescPtr(0)->GetFormat();
  CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info);

    input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
    input.name = op_desc->GetName();
    int64_t input_size = 0;
    GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
    input.size = input_size;
    input_formats_.push_back(format);
    input_descs_.push_back(input);
  input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
  input.name = op_desc->GetName();
  int64_t input_size = 0;
  GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
  input.size = input_size;
  input_formats_.push_back(format);
  input_descs_.push_back(input);

    input.shape_info = dims_info;
    input_descs_dims_.push_back(input);
  }
  input.shape_info = dims_info;
  input_descs_dims_.push_back(input);
  return SUCCESS;
 }

@@ -2066,35 +2064,31 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
  output.data_type = op_desc->GetInputDescPtr(index)->GetDataType();
 }

 Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) {
  GELOGD("Output node size: %zu", output_op_list.size());
  for (const auto &op_desc : output_op_list) {
    uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
    for (uint32_t index = 0; index < out_size; index++) {
      string output_name;
      InputOutputDescInfo output;
      uint32_t format_result;
      CreateOutput(index, op_desc, output, format_result);

      std::vector<std::string> src_name = op_desc->GetSrcName();
      std::vector<int64_t> src_index = op_desc->GetSrcIndex();
      GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR,
                             "construct output_name failed.");
      // forward compatbility, if old om has no out_node_name, need to return output follow origin way
      if (out_size == out_node_name_.size()) {
        // neweast plan, the index will add to name during generate model.
        bool contains_colon = out_node_name_[index].find(":") != std::string::npos;
        output_name =
            contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]);
      } else {
        output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" +
                      std::to_string(src_index[index]);
      }
      output.name = output_name;
      output_descs_.push_back(output);
      output_formats_.push_back(format_result);
 Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name) {
  uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
  for (uint32_t i = 0; i < out_size; ++i) {
    string output_name;
    InputOutputDescInfo output;
    uint32_t format_result;
    CreateOutput(i, op_desc, output, format_result);

    std::vector<std::string> src_name = op_desc->GetSrcName();
    std::vector<int64_t> src_index = op_desc->GetSrcIndex();
    GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR,
                           "construct output_name failed.");
    // forward compatbility, if old om has no out_node_name, need to return output follow origin way
    if (out_size == out_node_name.size()) {
      // neweast plan, the index will add to name during generate model.
      bool contains_colon = out_node_name[i].find(":") != std::string::npos;
      output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]);
    } else {
      output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]);
    }
    output.name = output_name;
    output_descs_.push_back(output);
    output_formats_.push_back(format_result);
  }

  return SUCCESS;
 }

@@ -2147,11 +2141,6 @@ Status DavinciModel::SyncVarData() {
                           RT_MEMCPY_HOST_TO_DEVICE));
  }

  for (const auto &item : broadcast_variable_) {
    ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_);
    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
                     item.first.c_str());
  }
  return ret;
 }

@@ -2481,7 +2470,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
    uint64_t buffer_length = buffer.length;
    void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data));

    GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]",
    GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]",
           runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length);
    GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind));
    idx++;
@@ -2635,12 +2624,6 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
 ///
 Status DavinciModel::ReturnNoOutput(uint32_t data_id) {
  GELOGI("ReturnNoOutput model id:%u", model_id_);
  for (const auto item : broadcast_variable_) {
    Status ret = VarManager::Instance(session_id_)
                     ->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_);
    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
                     item.first.c_str());
  }

  GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!");
  std::vector<ge::OutputTensorInfo> outputs;
@@ -3064,6 +3047,64 @@ Status DavinciModel::MallocKnownArgs() {
  return SUCCESS;
 }

 void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
                                             const domi::TaskDef &task_def, size_t task_index) {
  bool flag = GetL1FusionEnableOption();
  char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
  INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
  int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
  if (env_flag != 0) {
    flag = true;
  }

  TaskDescInfo task_desc_info;
  if (!om_name_.empty()) {
    task_desc_info.model_name = om_name_;
  } else {
    task_desc_info.model_name = name_;
  }
  task_desc_info.op_name = op->GetName();
  task_desc_info.block_dim = task_def.kernel().block_dim();
  task_desc_info.task_id = task->GetTaskID();
  task_desc_info.stream_id = task->GetStreamId();
  task_desc_info.shape_type = "static";
  task_desc_info.cur_iter_num = 0;
  // task type
  task_desc_info.task_type = kTaskTypeInvalid;
  auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
  if (model_task_type == RT_MODEL_TASK_KERNEL) {
    const domi::KernelDef &kernel_def = task_def.kernel();
    const auto &context = kernel_def.context();
    auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
    if (kernel_type == ccKernelType::TE) {
      task_desc_info.task_type = kTaskTypeAicore;
    } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
      task_desc_info.task_type = kTaskTypeAicpu;
    } else {
      GELOGD("Other kernel type: %u", context.kernel_type());
    }
  } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) {
    task_desc_info.task_type = kTaskTypeAicpu;
  } else {
    GELOGD("Skip task type: %d", static_cast<int>(model_task_type));
  }
  profiler_report_op_info_[task_desc_info.op_name] =
    std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
  task_desc_info_.emplace_back(task_desc_info);
  if (flag) {
    if (task->GetSktTaskID() != 0xFFFFFFFF) {
      TaskDescInfo task_desc_info;
      string op_name = "super_kernel_" + to_string(task_index);
      task_desc_info.op_name = op_name;
      task_desc_info.task_id = task->GetSktTaskID();
      profiler_report_op_info_[task_desc_info.op_name] =
        std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
      task_desc_info_.emplace_back(task_desc_info);
    }
  }
  return;
 }

 Status DavinciModel::DistributeTask() {
  GELOGI("do Distribute.");
  for (auto &task : cpu_task_list_) {
@@ -3075,18 +3116,11 @@ Status DavinciModel::DistributeTask() {
  }

  task_desc_info_.clear();
  bool flag = GetL1FusionEnableOption();
  char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
  INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
  int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
  if (env_flag != 0) {
    flag = true;
  }

  const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
  for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
    auto &task_def = model_task_def->task(task_index);
    auto &task = task_list_.at(task_index);
    GE_CHECK_NOTNULL(task);
    GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
    // for data dump
    auto op_index = std::max(task_def.kernel().context().op_index(),
@@ -3106,33 +3140,9 @@ Status DavinciModel::DistributeTask() {
    GE_IF_BOOL_EXEC(no_need_profiling, continue);

    SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
    // Load task info for profiling
    TaskDescInfo task_desc_info;
    if (!om_name_.empty()) {
      task_desc_info.model_name = om_name_;
    } else {
      task_desc_info.model_name = name_;
    }
    task_desc_info.op_name = op->GetName();
    task_desc_info.block_dim = task_def.kernel().block_dim();
    task_desc_info.task_id = task->GetTaskID();
    task_desc_info.stream_id = task->GetStreamId();
    task_desc_info.shape_type = "static";
    task_desc_info.cur_iter_num = 0;
    profiler_report_op_info_[task_desc_info.op_name] =
      std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
    task_desc_info_.emplace_back(task_desc_info);
    if (flag) {
      if (task->GetSktTaskID() != 0xFFFFFFFF) {
        TaskDescInfo task_desc_info;
        string op_name = "super_kernel_" + to_string(task_index);
        task_desc_info.op_name = op_name;
        task_desc_info.task_id = task->GetSktTaskID();
        profiler_report_op_info_[task_desc_info.op_name] =
          std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
        task_desc_info_.emplace_back(task_desc_info);
      }
    }

    // save task info for profiling
    SaveProfilingTaskDescInfo(op, task, task_def, task_index);
  }
  // launch dump kernel to aicpu
  GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");
@@ -3993,14 +4003,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des
    } else {
      compute_graph_info.model_name = name_;
    }

    std::vector<Format> format =  { FORMAT_NULL };
    std::vector<std::vector<int64_t>> shape = { {0} };
    std::vector<DataType> data_type = { DT_UNDEFINED };
    compute_graph_info.op_name = op_desc.op_name;
    compute_graph_info.op_type = op_desc.op_type;
    compute_graph_info.input_format = op_desc.input_format;
    compute_graph_info.input_shape = op_desc.input_shape;
    compute_graph_info.input_data_type = op_desc.input_data_type;
    compute_graph_info.output_format = op_desc.output_format;
    compute_graph_info.output_shape = op_desc.output_shape;
    compute_graph_info.output_data_type = op_desc.output_data_type;
    compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format;
    compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape;
    compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type;
    compute_graph_info.output_format = op_desc.output_format.empty() ? format :  op_desc.output_format;
    compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape;
    compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type;
    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    auto iter = profiler_report_op_info_.find(op_desc.op_name);
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -32,12 +32,12 @@
 #include "common/types.h"
 #include "framework/common/util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/aipp_utils.h"
 #include "graph/load/new_model_manager/data_dumper.h"
 #include "graph/load/new_model_manager/data_inputer.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/new_model_manager/zero_copy_offset.h"
 #include "graph/load/new_model_manager/zero_copy_task.h"
 #include "graph/load/model_manager/aipp_utils.h"
 #include "graph/load/model_manager/data_dumper.h"
 #include "graph/load/model_manager/data_inputer.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "graph/load/model_manager/zero_copy_offset.h"
 #include "graph/load/model_manager/zero_copy_task.h"
 #include "graph/model.h"
 #include "graph/node.h"
 #include "graph/op_desc.h"
@@ -623,6 +623,9 @@ class DavinciModel {

  Status DistributeTask();

  void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
                                 const domi::TaskDef &task_def, size_t task_index);

  uint8_t *MallocFeatureMapMem(size_t data_size);

  uint8_t *MallocWeightsMem(size_t weights_size);
@@ -824,7 +827,7 @@ class DavinciModel {

  void OpDebugUnRegister();

  void CheckHasHcomOp();
  void CheckHasHcomOp(const ComputeGraphPtr &graph);

  Status DoTaskSink();

@@ -847,8 +850,8 @@ class DavinciModel {
  Status InitOutputTensorInfo(const OpDescPtr &op_desc);
  Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);

  Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index);
  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list);
  Status InitInputDescInfo(const OpDescPtr &op_desc);
  Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name);

  Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
  Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
@@ -883,7 +886,6 @@ class DavinciModel {
  GeModelPtr ge_model_;  // release after DavinciModel::Init

  bool need_destroy_aicpu_kernel_{false};
  vector<string> out_node_name_;

  map<uint32_t, OpDescPtr> op_list_;  // release after DavinciModel::Init

--- a/ge/graph/load/new_model_manager/davinci_model_parser.cc
+++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/davinci_model_parser.h"

 namespace ge {
 DavinciModelParser::DavinciModelParser() {}
--- a/ge/graph/load/new_model_manager/davinci_model_parser.h
+++ b/ge/graph/load/new_model_manager/davinci_model_parser.h
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/model_manager/model_manager.h"

 #include <string>

@@ -28,8 +28,8 @@
 #include "framework/common/util.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "graph/utils/attr_utils.h"
@@ -527,6 +527,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
    DataBuffer data;
    data.data = inputs[i].data;
    data.length = inputs[i].length;
    input_data.shapes.emplace_back(inputs[i].dims);
    input_data.blobs.push_back(data);
  }
  if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
@@ -1703,7 +1704,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
    for (uint32_t i = 0; i < res_op_nums; i++) {
      ReturnCode ret_code = res_ret_code_list.at(i);
      SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%lu, ret_code:%d", aicpu_info.opType,
             aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
      std::vector<char> op_name;
      op_name.clear();
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
--- a/ge/graph/load/new_model_manager/model_utils.cc
+++ b/ge/graph/load/new_model_manager/model_utils.cc
@@ -14,20 +14,13 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/model_utils.h"

 #include "graph/load/model_manager/model_utils.h"
 #include <string>

 #include "common/debug/log.h"
 #include "common/op/ge_op_utils.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "runtime/base.h"
 #include "runtime/kernel.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/types.h"

 #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET)                                                                 \
  do {                                                                                                       \
@@ -342,13 +335,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
    int64_t input_offset = v_input_offset[non_const_index];
    non_const_index++;
    GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset),
                    VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base);
                    uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base;
                    uint8_t *variable_addr = nullptr;
                    GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {});
                    v_input_data_addr.push_back(variable_addr);
                    GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]",
                           model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
                    continue);
    

    int64_t mem_type;
    bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
    // feature maps
@@ -382,6 +375,34 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co

 ///
 /// @ingroup ge
 /// @brief Get variable address.
 /// @return Status
 ///
 Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset,
                              uint8_t *&var_addr) {
  rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset);
  switch (mem_type) {
    case RT_MEMORY_RDMA_HBM:
      if (offset < 0) {
        GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset));
        return PARAM_INVALID;
      }
      var_addr = reinterpret_cast<uint8_t *>(offset);
      break;
    case RT_MEMORY_HBM:
      VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base);
      var_addr = model_param.var_base + offset - model_param.logic_var_base;
      break;
    default:
      GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type);
      return PARAM_INVALID;
  }
  GE_CHECK_NOTNULL(var_addr);
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get output data address.
 /// @return vector<void*>
 ///
@@ -404,19 +425,26 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
    return v_output_data_addr;
  }
  for (size_t i = 0; i < outputs_size; ++i) {
    GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
                    VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base);
                    uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base;
                    v_output_data_addr.push_back(variable_addr);
                    GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
                           model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
                    continue);
    const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i);
    if (tensor_desc == nullptr) {
      GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
      continue;
    }

    int32_t calc_type = 0;
    bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
    if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
      GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str());
      continue;
    }
    GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
                    uint8_t *variable_addr = nullptr;
                    GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {});
                    v_output_data_addr.push_back(variable_addr);
                    GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
                           model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
                    continue);

    int64_t mem_type;
    bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
    // feature maps
--- a/ge/graph/load/new_model_manager/model_utils.h
+++ b/ge/graph/load/new_model_manager/model_utils.h
@@ -21,7 +21,7 @@

 #include "common/ge_inner_error_codes.h"
 #include "common/types.h"
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 #include "graph/utils/tensor_adapter.h"

@@ -107,6 +107,15 @@ class ModelUtils {
  /// @return Status
  ///
  static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr);

 private:
  ///
  /// @ingroup ge
  /// @brief Get variable address.
  /// @return Status
  ///
  static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset,
                           uint8_t *&var_addr);
 };
 }  // namespace ge

--- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
@@ -14,11 +14,11 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/end_graph_task_info.h"
 #include "graph/load/model_manager/task_info/end_graph_task_info.h"

 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace {
 const uint32_t kDumpFlag = 2;
--- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class EndGraphTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/event_record_task_info.h"
 #include "graph/load/model_manager/task_info/event_record_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class EventRecordTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/event_wait_task_info.h"
 #include "graph/load/model_manager/task_info/event_wait_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class EventWaitTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/fusion_start_task_info.h"
 #include "graph/load/model_manager/task_info/fusion_start_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class FusionStartTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h"
 #include "graph/load/model_manager/task_info/fusion_stop_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class FusionStopTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
@@ -14,14 +14,14 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/hccl_task_info.h"
 #include "graph/load/model_manager/task_info/hccl_task_info.h"

 #include <utility>

 #include "common/opskernel/ops_kernel_info_store.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_utils.h"

 namespace ge {
 std::mutex HcclTaskInfo::hccl_follow_stream_mutex_;
--- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h
@@ -23,7 +23,7 @@
 #include <vector>

 #include "common/opskernel/ge_task_info.h"
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/manager/util/hcom_util.h"
 namespace ge {
 class HcclTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h"
 #include "graph/load/model_manager/task_info/kernel_ex_task_info.h"

 #include <vector>

@@ -24,8 +24,8 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/fmk_error_codes.h"
 #include "graph/attr_value.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_manager.h"

 namespace ge {
 Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/kernel_task_info.h"
 #include "graph/load/model_manager/task_info/kernel_task_info.h"
 #include <map>
 #include <memory>
 #include <string>
@@ -25,9 +25,9 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/l2_cache_optimize.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "runtime/kernel.h"
 #include "super_kernel/super_kernel.h"
 #include "super_kernel/super_kernel_factory.h"
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <vector>

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 namespace ge {
 class KernelTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc
@@ -14,9 +14,9 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h"
 #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h"

 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/debug/ge_attr_define.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class LabelGotoExTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc
@@ -14,9 +14,9 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/label_set_task_info.h"
 #include "graph/load/model_manager/task_info/label_set_task_info.h"

 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/debug/ge_attr_define.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class LabelSetTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h"
 #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h"

 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 constexpr uint8_t kLabelSwitchIndexNum = 1;
--- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class LabelSwitchByIndexTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h"
 #include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace {
 const uint32_t kAlignBytes = 64;
--- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class MemcpyAddrAsyncTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h"
 #include "graph/load/model_manager/task_info/memcpy_async_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc
@@ -14,11 +14,11 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/model_exit_task_info.h"
 #include "graph/load/model_manager/task_info/model_exit_task_info.h"

 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class ModelExitTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc
@@ -14,10 +14,10 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h"
 #include "graph/load/model_manager/task_info/profiler_trace_task_info.h"

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
--- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class ProfilerTraceTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc
@@ -14,12 +14,12 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/stream_active_task_info.h"
 #include "graph/load/model_manager/task_info/stream_active_task_info.h"

 #include <vector>

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/debug/ge_attr_define.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class StreamActiveTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc
@@ -14,13 +14,13 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/stream_switch_task_info.h"
 #include "graph/load/model_manager/task_info/stream_switch_task_info.h"

 #include <vector>

 #include "framework/common/debug/ge_log.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "graph/debug/ge_attr_define.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
@@ -16,7 +16,7 @@

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class StreamSwitchTaskInfo : public TaskInfo {
--- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
@@ -13,12 +13,12 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h"
 #include "graph/load/model_manager/task_info/stream_switchn_task_info.h"
 #include <vector>
 #include "framework/common/debug/ge_log.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_utils.h"

 namespace {
 const uint8_t kStreamSwitchnInputNum = 1;
--- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h
--- a/ge/graph/load/new_model_manager/task_info/task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/task_info.cc
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/load/model_manager/task_info/task_info.h"

 #include <vector>

--- a/ge/graph/load/new_model_manager/task_info/task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/task_info.h
@@ -22,8 +22,8 @@
 #include "cce/customize.h"
 #include "framework/common/taskdown_common.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/load/new_model_manager/ts_mem_mall.h"
 #include "graph/load/new_model_manager/task_info/task_info_factory.h"
 #include "graph/load/model_manager/ts_mem_mall.h"
 #include "graph/load/model_manager/task_info/task_info_factory.h"
 #include "proto/task.pb.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/task_info/task_info_factory.h
+++ b/ge/graph/load/new_model_manager/task_info/task_info_factory.h
--- a/ge/graph/load/new_model_manager/tbe_handle_store.cc
+++ b/ge/graph/load/new_model_manager/tbe_handle_store.cc
--- a/ge/graph/load/new_model_manager/tbe_handle_store.h
+++ b/ge/graph/load/new_model_manager/tbe_handle_store.h
--- a/ge/graph/load/new_model_manager/ts_mem_mall.h
+++ b/ge/graph/load/new_model_manager/ts_mem_mall.h
--- a/ge/graph/load/new_model_manager/zero_copy_offset.cc
+++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc
@@ -14,12 +14,12 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/zero_copy_offset.h"
 #include "graph/load/model_manager/zero_copy_offset.h"

 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/new_model_manager/zero_copy_task.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "graph/load/model_manager/zero_copy_task.h"

 namespace ge {
 namespace {
--- a/ge/graph/load/new_model_manager/zero_copy_offset.h
+++ b/ge/graph/load/new_model_manager/zero_copy_offset.h
@@ -25,7 +25,7 @@
 #include "external/ge/ge_api_error_codes.h"
 #include "framework/common/ge_types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/zero_copy_task.h"
 #include "graph/load/model_manager/zero_copy_task.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "runtime/mem.h"
--- a/ge/graph/load/new_model_manager/zero_copy_task.cc
+++ b/ge/graph/load/new_model_manager/zero_copy_task.cc
@@ -14,11 +14,11 @@
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/zero_copy_task.h"
 #include "graph/load/model_manager/zero_copy_task.h"

 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "common/ge_compiler_options.h"

 namespace ge {
--- a/ge/graph/load/new_model_manager/zero_copy_task.h
+++ b/ge/graph/load/new_model_manager/zero_copy_task.h
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -92,6 +92,7 @@
 #include "graph/passes/unused_args_clean_pass.h"
 #include "graph/passes/global_step_insert_pass.h"
 #include "graph/passes/memcpy_addr_async_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/build/label_allocator.h"
 #include "graph/utils/tensor_adapter.h"
 #include "inc/pass_manager.h"
@@ -729,9 +730,7 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
  GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
  GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
                       GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
                       compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeBeforeBuildForRts", stages.optimizer.OptimizeGraphBeforeBuildForRts, compute_graph);

  Status ret = compute_graph->TopologicalSorting();
  if (ret != SUCCESS) {
@@ -2150,6 +2149,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
                                               new (std::nothrow) TransOpWithoutReshapeFusionPass))
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
                                               new (std::nothrow) TransOpBreadthFusionPass))
  GE_CHK_STATUS_RET(
      after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass));

  GE_TIMESTAMP_START(after_merge_passes);
  auto ret = after_merge_passes.Run(compute_graph);
@@ -2776,7 +2777,7 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr>
    }

    GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims);
    GELOGI("Shape dims of %d data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str());
    GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str());
  }
  return SUCCESS;
 }
@@ -3121,9 +3122,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp
    graph_name.append(std::to_string(graph_node->GetGraphId()));
    compute_graph->SetName(graph_name);
  }
  std::vector<SubGraphInfoPtr> sub_graph_list;
  auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model,
                                                                      session_id);

  auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "SubGraph build Failed.");
    return ret;