!2038 bugfix for oms taskdef random change

Merge pull request !2038 from gengchao/om_c76
bugfix for om's taskdef random change
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,8 +1,8 @@
 [submodule "parser"]
 	path = parser
 	url = https://gitee.com/ascend/parser.git
 	branch = development
 	branch = r1.2.0
 [submodule "metadef"]
 	path = metadef
 	url = https://gitee.com/ascend/metadef.git
 	branch = development
 	branch = r1.2.0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,10 +52,10 @@ if (ENABLE_OPEN_SRC)
    include(cmake/FindModule.cmake)
    include(cmake/intf_pub_linux.cmake)

    # for CPU/GPU mode, find c_sec and slog from local prebuild
    # for CPU/GPU mode, find c_sec and alog from local prebuild
    #if(NOT ENABLE_D AND NOT GE_ONLY)
    #    set(GE_PREBUILD_PATH ${GE_CODE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
    #    find_module(slog libslog.so ${GE_PREBUILD_PATH})
    #    find_module(slog libalog.so ${GE_PREBUILD_PATH})
    # if D_LINK_PATH is set in environment variables, search libraries in given path
    if(DEFINED ENV{D_LINK_PATH})
        # D_LINK_PATH is set
@@ -72,7 +72,7 @@ if (ENABLE_OPEN_SRC)
        endif()
        set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
        set(STATIC_ACL_LIB ${GE_LIB_PATH})
        find_module(slog libslog.so ${GE_LIB_PATH})
        find_module(slog libalog.so ${GE_LIB_PATH})
        find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
        find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
        find_module(hccl libhccl.so ${GE_LIB_PATH})
@@ -88,7 +88,7 @@ if (ENABLE_OPEN_SRC)
    elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
    else()
        find_module(slog libslog.so ${ASCEND_ATC_DIR})
        find_module(slog libalog.so ${ASCEND_ATC_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
 	find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
        if(PLATFORM STREQUAL "train")
@@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL)
    include(cmake/intf_pub_linux.cmake)

    # common libraries
    find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
    find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
    find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

@@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES)
    include(cmake/intf_pub_linux.cmake)

    # common libraries
    find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
    find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
    find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

--- a/+ 73
+++ b/+ 73
@@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation.

 License: MIT License
 Please see above.



 Software: caffe 1.0

 License: BSD 2-Clause License

 Open Source Software Licensed Under the BSD 2-Clause License

 GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model.
 Please see below for the full list of source code files from caffe that are used by GraphEngine.
 The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 ----------------------------------------------------------------------------------------
 1. caffe.proto  master
 All contributions by the University of California:
 Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 All rights reserved.


 Terms of the BSD 2-Clause License:
 --------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

 Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



 Software: tensorflow 1.15.0

 License: Apache-2.0 License

 Open Source Software Licensed Under the Apache-2.0 License


 GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model.
 Please see below for the full list of source code files from tensorflow that are used by GraphEngine.
 The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 ----------------------------------------------------------------------------------------
 1. attr_value.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 2. function.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 3. graph.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 4. node_def.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 5. op_def.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 6. resource_handle.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 7. tensor.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 8. tensor_shape.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 9. types.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 10. versions.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 Terms of the Apache-2.0 License:
 Please see above.
--- a/build.sh
+++ b/build.sh
@@ -224,14 +224,12 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
 #     fi

 #     if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
         echo "Generating coverage statistics, please wait..."
         cd ${BASEPATH}
         rm -rf ${BASEPATH}/cov
         mkdir ${BASEPATH}/cov
         lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
 	 lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
 	 cd ${BASEPATH}/cov
 	 genhtml coverage.info
 #         echo "Generating coverage statistics, please wait..."
 #         cd ${BASEPATH}
 #         rm -rf ${BASEPATH}/cov
 #         mkdir ${BASEPATH}/cov
 #         gcovr -r ./ --exclude 'third_party' --exclude 'build' --exclude 'tests' --exclude 'prebuild' --exclude 'inc' --print-summary --html --html-details -d -o cov/index.html
 #     fi
 fi

 # generate output package in tar form, including ut/st libraries/executables
--- a/cmake/FindModule.cmake
+++ b/cmake/FindModule.cmake
@@ -21,7 +21,7 @@ function(find_module module name)
    if ("${${module}_LIBRARY_DIR}" STREQUAL "${module}_LIBRARY_DIR-NOTFOUND")
      message(FATAL_ERROR "${name} not found in ${path}")
    endif()

    
    add_library(${module} SHARED IMPORTED)
    set_target_properties(${module} PROPERTIES
      IMPORTED_LOCATION ${${module}_LIBRARY_DIR}
--- a/cmake/external_libs/gflags.cmake
+++ b/cmake/external_libs/gflags.cmake
@@ -23,7 +23,6 @@ ExternalProject_Add(gflags_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
--- a/cmake/external_libs/gtest.cmake
+++ b/cmake/external_libs/gtest.cmake
@@ -10,10 +10,7 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/ge_gtest/release-1.8.0.tar.gz")
    set(MD5 "")
 elseif (ENABLE_GITEE)
 if (ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
    set(MD5 "")
 else()
@@ -25,9 +22,8 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-
 set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(gtest_build
                    URL ${REQ_URL}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR>
                -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
 		    -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE 
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -5,24 +5,19 @@ endif()
 include(ExternalProject)

 set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include)
 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 #elseif (ENABLE_GITEE)
 #if (ENABLE_GITEE)
 #    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
 #    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
 #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 else()
    set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 endif ()
 #    set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 #else()
 set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
 set(MD5 "0dc903888211db3a0f170304cd9f3a89")
 set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 #endif ()
 ExternalProject_Add(json_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/cloud_code/pkg/include.zip
                    SOURCE_DIR  ${JSON_SRC_DIR}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ""
                    BUILD_COMMAND ""
                    INSTALL_COMMAND ""
--- a/cmake/external_libs/onnx.cmake
+++ b/cmake/external_libs/onnx.cmake
@@ -6,10 +6,7 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
 set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
 file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})

 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz")
    set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
 elseif (ENABLE_GITEE)
 if (ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
    set(MD5 "1bdbcecdd68ea8392630467646776e02")
 else()
@@ -22,7 +19,6 @@ ExternalProject_Add(onnx
                    #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
                    #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
                    #SOURCE_DIR ${ONNX_SRC_DIR}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ""
                    BUILD_COMMAND ""
                    #INSTALL_COMMAND "" 
--- a/cmake/external_libs/protobuf_shared.cmake
+++ b/cmake/external_libs/protobuf_shared.cmake
@@ -26,7 +26,6 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protobuf_build
                    URL ${REQ_URL}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -Dprotobuf_WITH_ZLIB=OFF
                    -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
--- a/cmake/external_libs/protobuf_static.cmake
+++ b/cmake/external_libs/protobuf_static.cmake
@@ -1,3 +1,7 @@
 if (HAVE_PROTOBUF_STATIC)
    return()
 endif()

 include(ExternalProject)
 include(GNUInstallDirs)
 #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)
@@ -27,7 +31,6 @@ ExternalProject_Add(protobuf_static_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@@ -58,3 +61,5 @@ include_directories(${PROTOBUF_STATIC_PKG_DIR}/include)
 endif ()

 add_dependencies(ascend_protobuf_static protobuf_static_build)

 set(HAVE_PROTOBUF_STATIC TRUE)
--- a/cmake/external_libs/protoc.cmake
+++ b/cmake/external_libs/protoc.cmake
@@ -30,7 +30,6 @@ ExternalProject_Add(protoc_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
--- a/cmake/external_libs/securec.cmake
+++ b/cmake/external_libs/securec.cmake
@@ -10,20 +10,11 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz")
    set(MD5 "")
 else()
    set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz")
    set(MD5 "")
 endif ()

 ExternalProject_Add(c_sec_build
                    URL ${REQ_URL}
                    #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                    URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
                    PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
--- a/cmake/intf_pub_linux.cmake
+++ b/cmake/intf_pub_linux.cmake
@@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE
    $<$<CONFIG:Debug>:CFG_BUILD_DEBUG>   
    WIN64=1
    LINUX=0
    LOG_CPP
 )
 target_link_options(intf_pub INTERFACE
    -Wl,-z,relro
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -620,6 +620,7 @@ target_compile_definitions(ge_runner PRIVATE
    FMK_SUPPORT_DUMP
    DAVINCI_CLOUD
    google=ascend_private
    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )

 target_compile_options(ge_runner PRIVATE
@@ -687,6 +688,7 @@ target_compile_definitions(ge_compiler PRIVATE
    FMK_HOST_INFER
    COMPILE_OMG_PACKAGE
    google=ascend_private
    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )

 target_compile_options(ge_compiler PRIVATE
--- a/ge/client/ge_api.cc
+++ b/ge/client/ge_api.cc
@@ -32,6 +32,9 @@
 #include "graph/common/ge_call_wrapper.h"
 #include "register/op_registry.h"
 #include "common/ge/tbe_plugin_manager.h"
 #ifndef ONLY_COMPILE_OPEN_SRC
 #include "toolchain/plog.h"
 #endif

 using domi::OpRegistry;
 using std::map;
@@ -129,6 +132,11 @@ Status GEInitializeImpl(const std::map<string, string> &options) {

 // Initialize GE, prepare for execution, call GELib::Initialize
 Status GEInitialize(const std::map<string, string> &options) {
 #ifndef ONLY_COMPILE_OPEN_SRC
  if (DlogReportInitialize() != SUCCESS) {
    GELOGW("Dlog report device log initialize failed.");
  }
 #endif
  return GEInitializeImpl(options);
 }

@@ -143,6 +151,11 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
    std::string val = option.second.GetString();
    str_options[key] = val;
  }
 #ifndef ONLY_COMPILE_OPEN_SRC
  if (DlogReportInitialize() != SUCCESS) {
    GELOGW("Dlog report device log initialize failed.");
  }
 #endif
  return GEInitializeImpl(str_options);
 }

@@ -187,6 +200,12 @@ Status GEFinalize() {
  // to avoid memory fragment, use malloc_trim to back free stack to system
  malloc_trim(0);

 #ifndef ONLY_COMPILE_OPEN_SRC
  if (DlogReportFinalize() != SUCCESS) {
    GELOGW("Dlog report device log finalize failed.");
  }
 #endif

  GELOGT(TRACE_STOP, "GEFinalize finished");
  return ret;
 }
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -12,7 +12,7 @@ set(PROTO_LIST
    "${METADEF_DIR}/proto/tensorflow/tensor.proto"
    "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto"
    "${METADEF_DIR}/proto/tensorflow/types.proto"
    "${METADEF_DIR}/proto/tensorflow/versions.proto" 
    "${METADEF_DIR}/proto/tensorflow/versions.proto"
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
@@ -163,7 +163,7 @@ target_include_directories(ge_common_static PRIVATE

 target_link_libraries(ge_common_static PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
    ascend_protobuf_static
    json
    c_sec
    $<$<NOT:$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-lrt>
--- a/ge/common/dump/dump_op.cc
+++ b/ge/common/dump/dump_op.cc
@@ -94,9 +94,6 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
    for (auto dim : output_descs.at(i).GetShape().GetDims()) {
      output.mutable_shape()->add_dim(dim);
    }
    for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
      output.mutable_origin_shape()->add_dim(dim);
    }
    int64_t output_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
@@ -121,9 +118,6 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
    for (auto dim : input_descs.at(i).GetShape().GetDims()) {
      input.mutable_shape()->add_dim(dim);
    }
    for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
      input.mutable_origin_shape()->add_dim(dim);
    }
    int64_t input_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
@@ -220,15 +214,8 @@ Status DumpOp::LaunchDumpOp() {
  SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
         dump_path.c_str());
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
  }

  aicpu::dump::Task task;
  task.set_task_id(task_id);
  task.set_stream_id(stream_id);
  task.mutable_op()->set_op_name(op_desc_->GetName());
  task.mutable_op()->set_op_type(op_desc_->GetType());
  if (dump_properties_.GetDumpMode() == kDumpOutput) {
--- a/ge/common/ge/tbe_plugin_manager.cc
+++ b/ge/common/ge/tbe_plugin_manager.cc
@@ -184,7 +184,7 @@ void TBEPluginManager::LoadCustomOpLib() {
  std::string fmk_type = std::to_string(domi::TENSORFLOW);
  auto it = options_.find(ge::FRAMEWORK_TYPE);
  if (it != options_.end()) {
   fmk_type = it->second;
    fmk_type = it->second;
  }
  std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
  GELOGI("The size of registration_datas is: %zu", registration_datas.size());
@@ -192,7 +192,7 @@ void TBEPluginManager::LoadCustomOpLib() {
    if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
      GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
             TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
      (void)domi::OpRegistry::Instance()->Register(reg_data);
      domi::OpRegistry::Instance()->Register(reg_data);
    }
  }
 }
--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -182,7 +182,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
    command.module_index = prof_config_param->profSwitch;
  }
  GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
         command.module_index);
          command.module_index);
  if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
    GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
  }
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -89,13 +89,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
 #ifdef DAVINCI_SUPPORT_PROFILING
  // enable profiling by env
  char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 };
  is_load_profiling_ = false; // Change in ProfInit
  is_execute_profiling_ = false;

  if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
    // enable profiling by ge option
    if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
                 options.profiling_options.size()) != EOK) {
    if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
                 MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
      GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
      return INTERNAL_ERROR;
    }
@@ -125,11 +124,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
    return ge::PARAM_INVALID;
  }

  if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(),
               sizeof(options.job_id.c_str())) != EOK) {
  if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(),
               MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
    GELOGE(INTERNAL_ERROR, "copy job_id failed.");
    return INTERNAL_ERROR;
  }
  GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
 #endif
  return ge::SUCCESS;
 }
@@ -159,6 +159,7 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
    if (!fp_point_.empty() && !bp_point_.empty()) {
      GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
    }
    is_training_trace_ = true;
  } catch (...) {
    GELOGE(FAILED, "Json prof_conf options is invalid.");
    return ge::PARAM_INVALID;
@@ -212,16 +213,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
    uint32_t block_dim = task.block_dim;
    uint32_t task_id = task.task_id;
    uint32_t stream_id = task.stream_id;
    std::string shape_type = task.shape_type;
    int64_t cur_iter_num = task.cur_iter_num;
    data = model_name.append(" ")
                     .append(op_name).append(" ")
                     .append(std::to_string(block_dim)).append(" ")
                     .append(std::to_string(block_dim).append(" ")
                     .append(std::to_string(task_id)).append(" ")
                     .append(std::to_string(stream_id)).append(" ")
                     .append(std::to_string(model_id)).append(" ")
                     .append(shape_type).append(" ")
                     .append(std::to_string(cur_iter_num)).append("\n");
                     .append(std::to_string(model_id)).append("\n"));

    ReporterData reporter_data{};
    reporter_data.deviceId = device_id;
@@ -632,6 +629,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
    uint64_t module, const std::map<std::string, std::string> &config_para) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::lock_guard<std::mutex> lock(mutex_);
  uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK;
  if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
    is_training_trace_ = true;
  }
  int32_t device_num = 0;
  vector<int32_t> device_list;
  if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
@@ -846,7 +847,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
      return;
    }
  }
  
  return;
 }

--- a/ge/common/proto/op_mapping_info.proto
+++ b/ge/common/proto/op_mapping_info.proto
@@ -15,7 +15,6 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -24,7 +23,6 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/common/proto/tensorflow/attr_value.proto
+++ b/ge/common/proto/tensorflow/attr_value.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/function.proto
+++ b/ge/common/proto/tensorflow/function.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/graph.proto
+++ b/ge/common/proto/tensorflow/graph.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/graph_library.proto
+++ b/ge/common/proto/tensorflow/graph_library.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/node_def.proto
+++ b/ge/common/proto/tensorflow/node_def.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/op_def.proto
+++ b/ge/common/proto/tensorflow/op_def.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/resource_handle.proto
+++ b/ge/common/proto/tensorflow/resource_handle.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/tensor.proto
+++ b/ge/common/proto/tensorflow/tensor.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/tensor_shape.proto
+++ b/ge/common/proto/tensorflow/tensor_shape.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 // Protocol buffer representing the shape of tensors.

 syntax = "proto3";
--- a/ge/common/proto/tensorflow/types.proto
+++ b/ge/common/proto/tensorflow/types.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/versions.proto
+++ b/ge/common/proto/tensorflow/versions.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -197,7 +197,7 @@ target_include_directories(ge_executor PRIVATE
 target_link_libraries(ge_executor PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    json
    ascend_protobuf
    ascend_protobuf_static
    c_sec
    $<$<NOT:$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-lrt>
    -ldl
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -209,33 +209,19 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,

 namespace ge {
 bool GeExecutor::isInit_ = false;

 static void InitOpsProtoManger() {
  string opsproto_path;
  const char *path_env = std::getenv("ASCEND_OPP_PATH");
  if (path_env != nullptr) {
    string path = path_env;
    string file_path = RealPath(path.c_str());
    if (file_path.empty()) {
      GELOGE(FAILED, "File path %s is invalid.", path.c_str());
      return;
 class ModelListenerAdapter : public ModelListener {
 public:
  domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
                             std::vector<ge::OutputTensorInfo> &outputs) {
    if (listener == nullptr) {
      GELOGE(ge::FAILED, "listener is null.");
      return FAILED;
    }
    opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
    GELOGI("Get opsproto so path from env : %s", path.c_str());
  } else {
    string path_base = PluginManager::GetPath();
    GELOGI("path_base is %s", path_base.c_str());
    path_base = path_base.substr(0, path_base.rfind('/'));
    path_base = path_base.substr(0, path_base.rfind('/') + 1);
    opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
  }

  GELOGI("Get opsproto path is %s", opsproto_path.c_str());
  OpsProtoManager *manager = OpsProtoManager::Instance();
  map<string, string> option_tmp;
  option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
  (void)manager->Initialize(option_tmp);
 }
    return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
  }

  std::shared_ptr<ge::ModelListener> listener;
 };

 GeExecutor::GeExecutor() {}

@@ -246,16 +232,6 @@ Status GeExecutor::Initialize() {
    return ge::SUCCESS;
  }

  OpTilingManager::GetInstance().LoadSo();

  Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
  if (initHostCpuEngineStatus != SUCCESS) {
    GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
    return initHostCpuEngineStatus;
  }

  InitOpsProtoManger();

  std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
  mem_type.push_back(RT_MEMORY_P2P_DDR);
  auto ret = MemManager::Instance().Initialize(mem_type);
@@ -560,6 +536,60 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
  return SUCCESS;
 }

 // Load model
 Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
                                    int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("load model offline begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  string filePath = RealPath(path.c_str());
  if (filePath.empty()) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
           "File path is invalid. please check your text file '%s'.", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
  }

  std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
  if (listener_adapter == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  listener_adapter->listener = listener;

  Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  return SUCCESS;
 }

 Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
                             std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("Load model begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
  if (listener_adapter == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  listener_adapter->listener = listener;

  Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GeExecutor] LoadModel failed.");
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  return ret;
 }

 Status GeExecutor::UnloadModel(uint32_t model_id) {
  GELOGD("unload model %u begin.", model_id);
  if (!isInit_) {
@@ -569,7 +599,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
  Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
    return ACL_ERROR_GE_INTERNAL_ERROR;
    return ret;
  }

  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model =
@@ -587,11 +617,26 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
  ret = GraphLoader::UnloadModel(model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
    return ACL_ERROR_GE_UNLOAD_MODEL;
    return ret;
  }
  return SUCCESS;
 }

 Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
  GELOGI("run model begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  InputData inputs;
  GetDomiInputData(input_data, inputs);
  OutputData outputs;
  GetDomiOutputData(output_data, outputs);

  return GraphExecutor::DataInput(inputs, outputs);
 }

 // Get input and output descriptor
 Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                    std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {
@@ -1006,12 +1051,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size

 Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                SingleOp **single_op) {
  return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op);
  return LoadSingleOpV2(model_name, modelData, stream, single_op, 0);
 }

 Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                  SingleOp **single_op, const uint64_t model_id) {
  return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id);
 }

 Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                       DynamicSingleOp **single_op) {
  return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op);
  return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0);
 }

 Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                         DynamicSingleOp **single_op, const uint64_t model_id) {
  return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id);
 }

 Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
--- a/ge/executor/proto/dump_task.proto
+++ b/ge/executor/proto/dump_task.proto
@@ -108,4 +108,5 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name = 6;
 }
--- a/ge/executor/proto/op_mapping_info.proto
+++ b/ge/executor/proto/op_mapping_info.proto
@@ -15,7 +15,6 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -24,7 +23,6 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -39,7 +39,7 @@ namespace {
      }                                                                                                                \
      ge_tensor = MakeShared<GeTensor>(out_desc);                                                                      \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
      GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
      if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) {      \
        GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str());          \
        return MEMALLOC_FAILED;                                                                                        \
@@ -50,7 +50,8 @@ namespace {
    } else {                                                                                                           \
      ge_tensor = outputs[i];                                                                                          \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i);                                             \
      GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i,                          \
             reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size());             \
    }                                                                                                                  \
    auto tensor = TensorAdapter::AsTensor(*ge_tensor);                                                                 \
    auto tensor_name = op_desc->GetOutputNameByIndex(i);                                                               \
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -262,10 +262,19 @@ static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag)
      change_shape_flag = true;
    }
  }
  for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) {
    auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
    GE_CHECK_NOTNULL(output_desc);
    // pass scalar output desc
    auto dims = output_desc->GetShape().GetDims();
    if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) {
      change_shape_flag = true;
    }
  }
  return SUCCESS;
 }

 static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
 static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
  for (auto input : inputs) {
    auto input_desc = input.GetTensorDesc();
    GeShape shape_ori = input_desc.GetShape();
@@ -280,6 +289,12 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
    bool is_const = false;
    (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
    if (!is_const && shape_ori.GetDims().size() > 0) {
      int64_t storage_format = FORMAT_NCHW;
      if (ge::AttrUtils::GetInt(desc, ge::ATTR_NAME_STORAGE_FORMAT, storage_format) &&
          !ge::AttrUtils::SetListInt(desc, ge::ATTR_NAME_STORAGE_SHAPE, dynamic_shape_dims)) {
        GELOGE(FAILED, "Set attr ATTR_NAME_STORAGE_SHAPE fail.");
        return FAILED;
      }
      desc.SetShape(dynamic_shape);
      desc.SetShapeRange(dynamic_shape_range);
    }
@@ -287,6 +302,7 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
    inputTensor.SetTensorDesc(desc);
    inputs_dynamic.push_back(inputTensor);
  }
  return SUCCESS;
 }

 class GeGenerator::Impl {
@@ -530,6 +546,24 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
  return true;
 }

 static Status SetModelNameForDump(GeRootModelPtr ge_root_model) {
  ModelHelper model_helper;
  string model_name = "";
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
                                                                 model_name);
  if (name_ret != SUCCESS) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
    return PARAM_INVALID;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
  ge_model->SetName(model_name);
  return SUCCESS;
 }

 Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
                                  ModelBufferData &model, bool is_offline) {
  rtContext_t ctx = nullptr;
@@ -538,7 +572,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
    GELOGD("Current ctx is null.");
    ctx = nullptr;
  }

  GeRootModelPtr ge_root_model = nullptr;
  GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
  impl_->is_offline_ = is_offline;
@@ -562,22 +595,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
           impl_->build_step_.c_str());
    return SUCCESS;
  }

  GE_CHECK_NOTNULL(ge_root_model);
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  ModelHelper model_helper;
  string model_name = "";
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
                                                                 model_name);
  if (name_ret != SUCCESS) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
    return PARAM_INVALID;
  ret = SetModelNameForDump(ge_root_model);
  if (ret != SUCCESS) {
    return ret;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
  ge_model->SetName(model_name);
  ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
  if (ret != SUCCESS) {
    GELOGE(ret, "Save model failed");
@@ -586,11 +608,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
    }
    return ret;
  }

  if (ctx != nullptr) {
    (void)rtCtxSetCurrent(ctx);
  }

  return SUCCESS;
 }

@@ -684,8 +704,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) {
    vector<GeTensor> inputs_dynamic;
    vector<GeTensor> outputs_dynamic;
    ResetTensorVecShape(inputs, inputs_dynamic);
    ResetTensorVecShape(outputs, outputs_dynamic);
    GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic));
    GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
    GE_CHK_STATUS_RET_NOLOG(
      impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
  } else {
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() {
  MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);

  if (mem_assigner->GetP2PMemOffset() >= 0) {
  if (mem_assigner->GetP2PMemOffset() > 0) {
    MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
    memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  }
@@ -402,6 +402,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
    GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
    return FAILED;
  }
  continuous_mem_start = iter->second.mem_offset_;
  for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
    auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -582,9 +582,13 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
  // Add TBE Kernels and custom aicpu op bin
  std::set<std::string> tbe_name_set;
  std::set<std::string> aicpu_name_set;
  std::set<std::string> aicpu_op_types;
  std::set<std::string> aicpu_tf_op_types;
  for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
    auto node_op_desc = n->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
    // check aicpu op type
    CollectCheckAicpuAttr(node_op_desc, aicpu_op_types, aicpu_tf_op_types);
    TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
    if (tbe_kernel == nullptr) {
      std::string kernel_name;
@@ -606,6 +610,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
    tbe_kernel_store_.AddTBEKernel(tbe_kernel);
  }

  SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);

  for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
    auto node_op_desc = n->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -797,4 +803,51 @@ Status ModelBuilder::CompileSingleOp() {
  GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp");
  return ge::SUCCESS;
 }

 void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
                                           std::set<std::string> &aicpu_tf_op_types) {
  std::string aicpu_optype;
  bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
  std::vector<std::string> tf_optypes;
  bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
  if (has_attr_check_cpu && !aicpu_optype.empty()) {
    aicpu_op_types.insert(aicpu_optype);
  }

  if (has_attr_check_tf && !tf_optypes.empty()) {
    aicpu_tf_op_types.insert(tf_optypes.begin(), tf_optypes.end());
  }

  return;
 }

 void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
                                            std::set<std::string> &aicpu_tf_op_types) {
  std::vector<std::string> aicpu_optype_list;
  std::vector<std::string> aicpu_tf_optype_list;
  if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
    GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
    aicpu_op_types.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
  }

  if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) {
    GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
    aicpu_tf_op_types.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
  }

  // reset list with set
  aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end());
  aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end());
  GELOGI(
      "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, "
      "aicpu_tf_optype_list:%zu.",
      compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
      aicpu_tf_optype_list.size());
  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return,
                   "Set attr needCheckCpu fail.");

  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return,
                   "Set attr needCheckTf fail.");
  return;
 }
 }  // namespace ge
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -83,6 +83,12 @@ class ModelBuilder {

  Status CompileSingleOp();

  void CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
                               std::set<std::string> &aicpu_tf_op_types);

  void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
                                std::set<std::string> &aicpu_tf_op_types);

  uint64_t session_id_;

  map<int64_t, size_t> mem_type_to_mem_offset_;
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -36,7 +36,7 @@ using std::vector;
 namespace {
 const uint32_t kMaxSwitchStreamNum = 1;
 const int64_t kTaskNumPerNormalNode = 3;
 const int64_t kTaskNumPerHcclNode = 200;
 const int64_t kTaskNumPerHcclNode = 245;
 const char *const kTrueStr = "true";
 const char *const kFalseStr = "false";

--- a/ge/graph/build/stream_graph_optimizer.cc
+++ b/ge/graph/build/stream_graph_optimizer.cc
@@ -66,13 +66,13 @@ bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &com
    if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
      label_set.insert(batch_label);
    } else {
      GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
      GELOGD("Node %s[%s] has no batch_label, subgraph %s, stream id: %ld ", cur_node->GetName().c_str(),
             cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
      continue;
    }

    GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
           comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
    GELOGD("Node %s in subgraph %s stream id: %ld, batch_label: %s, node num: %zu", cur_node->GetName().c_str(),
           comp_graph->GetName().c_str(), stream_id, batch_label.c_str(), comp_graph->GetDirectNodesSize());
  }
  if (stream_set.size() > 1 || label_set.size() > 1) {
    GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
@@ -126,12 +126,14 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
                 run_context.graphStreamList.size());
          return FAILED;
        }

        run_context.stream = run_context.graphStreamList[stream_id];
 	std::string batch_label;
 	(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
        std::string batch_label;
        (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
        GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
 	       "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
               "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
               static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());

        for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
          GE_CHECK_NOTNULL(*iter);
          Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -54,9 +54,10 @@ const uint64_t kProfilingFpStartLogid = 1;
 const uint64_t kProfilingBpEndLogid = 2;
 const uint64_t kProfilingArStartLogid = 3;
 const uint64_t kProfilingArEndLogid = 4;
 const uint64_t kProfilingIterEndLogid = 255;
 const uint64_t kProfilingIterEndLogid = 65535;
 const int64_t kHashFactor = 100000;
 const int64_t kInvalidGroupId = -1;
 const char *const kKernelInfoNameHccl = "ops_kernel_info_hccl";
 }  // namespace
 namespace ge {
 TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) {
@@ -348,14 +349,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
    }

    // Reset stream id to ge stream id, as graph load must use ge stream to reassign stream
    void *ops_kernel_info_store_ptr = kernel_info_store.get();
    for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) {
      task_def_list[idx].set_stream_id(static_cast<uint32_t>(stream_id));
      op_name_map[idx] = name;
      // Set opsKernelInfoStorePtr and op_index, the two fields be use in DistributeTask and InitTaskInfo
      TaskDef *task_def_ptr = &task_def_list[idx];
      GE_CHECK_NOTNULL(task_def_ptr);
      task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr));
      // Set opsKernelInfoStorePtr for hccl which will be use in DistributeTask and InitTaskInfo
      if (op_kernel_lib_name == kKernelInfoNameHccl) {
        task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(kernel_info_store.get()));
      }
    }
    GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).",
           op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id,
@@ -567,7 +569,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
      continue;
    }
    string op_type = op_desc->GetType();
    if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) {
    if ((!is_single_stream && !op_desc->GetSubgraphInstanceNames().empty()) || separator_types.count(op_type) != 0) {
      continuous_op_lists.emplace_back(vector<OpDescPtr>());
    } else {
      continuous_op_lists.back().emplace_back(op_desc);
@@ -676,7 +678,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP
      }
    }
    if (graph->GetNeedIteration()) {
      if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
      if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
        profiling_point.end_index.insert(current_idx);
        GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
               op_desc->GetName().c_str(), current_idx);
@@ -773,7 +775,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin
    }

    if (graph->GetNeedIteration()) {
      if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
      if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
        profiling_point.end_index.insert(current_idx);
        GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
               op_desc->GetName().c_str(), current_idx);
--- a/ge/graph/common/transop_util.cc
+++ b/ge/graph/common/transop_util.cc
@@ -23,7 +23,10 @@
 namespace {
 const int kInvalidTransopDataIndex = -1;
 const int kTransOpOutIndex = 0;
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}};
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {
  {ge::DT_FLOAT, ge::DT_BOOL},
  {ge::DT_INT64, ge::DT_BOOL}
 };
 }  // namespace

 namespace ge {
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
                                     ModelData &model_data) {
  Status ret;
  if (!CheckInputPathValid(path)) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
    GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return GE_EXEC_MODEL_PATH_INVALID;
  }

  GELOGI("Load model begin, model path is: %s", path.c_str());
  if (!key_path.empty() && !CheckInputPathValid(key_path)) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
    return ACL_ERROR_GE_PARAM_INVALID;
    GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
    return GE_EXEC_MODEL_KEY_PATH_INVALID;
  }

  ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@@ -144,6 +144,63 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
    return SUCCESS;
 }

 Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                      const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
  Status ret;
  ModelData model_data;
  ret = LoadDataFromFile(path, key_path, priority, model_data);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
    return ret;
  }

  ret = LoadModel(model_data, listener, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
  }

  if (model_data.model_data != nullptr) {
    delete[] static_cast<char *>(model_data.model_data);
    model_data.model_data = nullptr;
  }

  return ret;
 }

 Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
                              uint32_t &model_id) {
  GELOGI("Load model begin, model_id:%u.", model_id);

  // For GeOp, Open Device 0 here.
  GE_CHK_RT_RET(rtSetDevice(0));
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
  if (ret != SUCCESS) {
    GE_CHK_RT(rtDeviceReset(0));
    GELOGE(ret, "LoadModel: Load failed.");
    return ret;
  }
  ret = model_manager->Start(model_id);
  if (ret != SUCCESS) {
    if (model_manager->Unload(model_id) != SUCCESS) {
      GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
    }
    GELOGE(ret, "LoadModel: Start failed.");
    return ret;
  }
  GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
  return SUCCESS;
 }

 Status GraphLoader::CommandHandle(const Command &command) {
  try {
    auto model_manager = ModelManager::GetInstance();
@@ -168,16 +225,16 @@ Status GraphLoader::CommandHandle(const Command &command) {
 }

 Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
                                      size_t mem_size, void *weight_ptr, size_t weight_size) {
                                      size_t memsize, void *weight_ptr, size_t weightsize) {
  GELOGI("Load model begin, model_id:%u.", model_id);
  // For ACL, Open Device from App.
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelOffline(
      model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
      model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
  if (ret != SUCCESS) {
    GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id);
    return ACL_ERROR_GE_LOAD_MODEL;
    GELOGE(ret, "Load model failed, model_id:%u.", model_id);
    return ret;
  }
  GELOGI("Load model success, model_id:%u.", model_id);
  return SUCCESS;
@@ -202,8 +259,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
  if (ret != SUCCESS) {
    GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id);
    return ACL_ERROR_GE_LOAD_MODEL;
    GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id);
    return ret;
  }

  GELOGI("Load model with queue success, model_id:%u.", model_id);
@@ -263,10 +320,10 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
  return SUCCESS;
 }

 Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
 Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id);
  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id, sub_model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "Destroy aicpu kernel failed.");
    return ret;
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@@ -44,6 +44,12 @@ class GraphLoader {

  static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);

  static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
                          uint32_t &model_id);

  static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                  const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);

  static Status CommandHandle(const Command &command);

  static Status GetMemoryInfo(int64_t &free);
@@ -62,7 +68,7 @@ class GraphLoader {
                             const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
                             std::vector<GeTensorDesc> &output_desc);

  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

  static Status DestroyAicpuSessionForInfer(uint32_t model_id);

--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@@ -319,9 +319,6 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
  for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
    output.mutable_shape()->add_dim(dim);
  }
  for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
    output.mutable_origin_shape()->add_dim(dim);
  }
  int64_t output_size = 0;
  if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
    GELOGE(PARAM_INVALID, "Get output size filed");
@@ -479,9 +476,6 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
  for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
    input.mutable_shape()->add_dim(dim);
  }
  for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
    input.mutable_origin_shape()->add_dim(dim);
  }
  int64_t input_size = 0;
  if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
    GELOGI("Get aipp input size according to attr is %ld", input_size);
@@ -897,6 +891,7 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
      toolkit::dumpdata::DumpData dump_data;
      dump_data.set_version("2.0");
      dump_data.set_dump_time(GetNowTime());
      dump_data.set_op_name(op_desc_info.op_name);
      for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
        toolkit::dumpdata::OpInput input;
        input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i])));
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
    if (weight_ptr == nullptr) {
      weights_mem_base_ = MallocWeightsMem(weights_size);
      if (weights_mem_base_ == nullptr) {
        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size);
        return ACL_ERROR_GE_MEMORY_ALLOCATION;
        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
      }
      is_inner_weight_base_ = true;
    }
@@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh

 Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  if (is_feature_map_mem_has_inited_) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once .");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
    GELOGE(FAILED, "call InitFeatureMapMem more than once .");
    return FAILED;
  }
  is_feature_map_mem_has_inited_ = true;

@@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

  if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
    GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
    return FAILED;
  }

  mem_base_ = static_cast<uint8_t *>(dev_ptr);
@@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  if (TotalMemSize() && mem_base_ == nullptr) {
    mem_base_ = MallocFeatureMapMem(data_size);
    if (mem_base_ == nullptr) {
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
      GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
      return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
    }
    GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
            runtime_param_.graph_id, mem_base_, data_size);
@@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
  if (p2p_data_size != 0) {
    p2p_mem_base_ = MallocP2PMem(p2p_data_size);
    if (p2p_mem_base_ == nullptr) {
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
      GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
      return GE_EXEC_ALLOC_P2P_MEM_FAILED;
    }
    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
           p2p_mem_base_, p2p_data_size);
@@ -485,6 +485,8 @@ Status DavinciModel::DoTaskSink() {

  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");

  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");

  GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");

  GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
@@ -537,7 +539,7 @@ Status DavinciModel::OpDebugRegister() {

    uint32_t op_debug_mode = 0;
    (void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode);
    GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode);
    GELOGD("The value of op debug mode in ge_model is %u.", op_debug_mode);
    uint32_t debug_task_id = 0;
    uint32_t debug_stream_id = 0;
    rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
@@ -605,7 +607,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  version_ = ge_model_->GetVersion();
  name_ = ge_model_->GetName();
  (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_);
  GELOGD("The value of ge.l1Fusion in ge_model_ is %d.", is_l1_fusion_enable_);
  GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_);
  CheckHasHcomOp();

  vector<int64_t> huge_stream_list;
@@ -710,7 +712,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  }

  // collect profiling for ge
  GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed");
  auto &profiling_manager = ProfilingManager::Instance();
  if (profiling_manager.ProfilingModelLoadOn()) {
    Status p_ret = ReportProfilingData();
@@ -733,7 +734,6 @@ Status DavinciModel::ReportProfilingData() {
  }
  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
  GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
  op_list_.clear();

  return SUCCESS;
 }
@@ -2087,61 +2087,12 @@ Status DavinciModel::SyncVarData() {
  return ret;
 }

 Status DavinciModel::InitModelProfile() {
  for (const auto &task : task_list_) {
    GE_CHECK_NOTNULL(task);
    const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
    // when type is RT_MODEL_TASK_KERNEL, ctx is not null
    if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
      continue;
    }

    GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size());
    op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID()));
  }

  std::set<uint32_t> task_id_set;
  using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
  using Range = std::pair<CIT, CIT>;
  for (const auto &task : task_list_) {
    GE_CHECK_NOTNULL(task);
    const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
    if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
      continue;
    }

    if (task_id_set.count(task->GetTaskID()) > 0) {
      continue;
    }

    const auto &op_desc = GetOpByIndex(fusion_op_info->op_index);
    GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index);

    ProfileInfo profile;
    profile.fusion_info = *fusion_op_info;
    Range range = op_id_map_.equal_range(fusion_op_info->op_index);
    for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
      profile.task_count++;
      task_id_set.insert(range_idx->second);
    }

    // memory info
    TaskMemInfo &mem_info = profile.memory_info;
    const auto input_size = ModelUtils::GetInputSize(op_desc);
    const auto output_size = ModelUtils::GetOutputSize(op_desc);
    const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc);
    const auto weight_size = ModelUtils::GetWeightSize(op_desc);
    mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0);
    mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0);
    mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0);
    mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0);
    mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size;

    profile_list_.emplace_back(profile);
 inline int64_t SumSize(const vector<int64_t> &size_list) {
  int64_t sum_size = 0;
  for (const int64_t &size : size_list) {
    sum_size += size;
  }

  GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size());
  return SUCCESS;
  return sum_size;
 }

 Status DavinciModel::SinkModelProfile() {
@@ -2149,12 +2100,18 @@ Status DavinciModel::SinkModelProfile() {
  auto &prof_mgr = ProfilingManager::Instance();
  ReporterData reporter_data{};
  // report model data tag name
  std::string tag_name("model_load_info_" + std::to_string(this->Id()));
  std::string tag_name;
  tag_name.append("model_load_info_").append(std::to_string(this->Id()));
  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");

  // Model Header
  std::string name = om_name_.empty() ? name_ : om_name_;
  string name;
  if (!om_name_.empty()) {
    name = om_name_;
  } else {
    name = name_;
  }
  size_t name_len = name.size();
  reporter_data.deviceId = device_id_;
  reporter_data.data = (unsigned char *)&name_len;
@@ -2186,71 +2143,128 @@ Status DavinciModel::SinkModelProfile() {
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  int32_t task_num = task_list_.size();
  std::multimap<uint32_t, uint32_t> op_id_map;
  std::set<uint32_t> task_id_set;
  for (int32_t i = 0; i < task_num; i++) {
    auto task = task_list_[i];
    GE_CHECK_NOTNULL(task);
    auto fusion_op_info = task->GetFusionOpInfo();
    // when type is RT_MODEL_TASK_KERNEL, ctx is not null
    if (fusion_op_info != nullptr) {
      uint32_t op_num = fusion_op_info->original_op_names.size();
      uint32_t task_id = task->GetTaskID();
      if (op_num > 0) {
        GELOGI("task.id = %u, opNum = %u", task_id, op_num);
        op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id));
      }
    }
  }

  struct memoryInfo {
    int64_t input_size;
    int64_t output_size;
    int64_t weight_size;
    int64_t workspace_size;
    int64_t total_size;

    memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {}
  };

  using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
  using Range = std::pair<CIT, CIT>;
  for (const ProfileInfo &profile : profile_list_) {
    // op name after fusion
    string fusion_op_name = profile.fusion_info.op_name;
    int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
    reporter_data.data = (unsigned char *)&fusion_op_name_len;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    reporter_data.data = (unsigned char *)fusion_op_name.c_str();
    reporter_data.dataLen = fusion_op_name_len;
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // original op name before fusion
    uint32_t op_num = profile.fusion_info.original_op_names.size();
    reporter_data.data = (unsigned char *)&op_num;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    for (uint32_t k = 0; k < op_num; k++) {
      std::string op_name = profile.fusion_info.original_op_names[k];
      int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
      reporter_data.data = (unsigned char *)&op_name_len;
  for (int32_t i = 0; i < task_num; i++) {
    auto task = task_list_[i];
    GE_CHECK_NOTNULL(task);
    auto fusion_op_info = task->GetFusionOpInfo();
    if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) {
      uint32_t task_id = task->GetTaskID();
      uint32_t op_num = fusion_op_info->original_op_names.size();
      uint32_t task_count = 0;
      if (task_id_set.count(task_id) != 0) {
        continue;
      }

      uint32_t op_id = fusion_op_info->op_index;
      Range range = op_id_map.equal_range(op_id);
      for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
        task_count++;
        uint32_t task_id = range_idx->second;
        task_id_set.insert(task_id);
      }

      // op name after fusion
      string fusion_op_name = fusion_op_info->op_name;
      int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
      reporter_data.data = (unsigned char *)&fusion_op_name_len;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
      reporter_data.data = (unsigned char *)op_name.c_str();
      reporter_data.dataLen = op_name_len;

      reporter_data.data = (unsigned char *)fusion_op_name.c_str();
      reporter_data.dataLen = fusion_op_name_len;
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      // original op name before fusion
      reporter_data.data = (unsigned char *)&op_num;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      for (uint32_t k = 0; k < op_num; k++) {
        std::string op_name = fusion_op_info->original_op_names[k];
        int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
        reporter_data.data = (unsigned char *)&op_name_len;
        reporter_data.dataLen = sizeof(int32_t);
        GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                         "Reporter data fail, model id:%u.", this->Id());
        reporter_data.data = (unsigned char *)op_name.c_str();
        reporter_data.dataLen = op_name_len;
        GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                         "Reporter data fail, model id:%u.", this->Id());
      }

      // stream id info
      uint32_t streamId = task->GetStreamId();
      reporter_data.data = (unsigned char *)&streamId;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      // memory info
      struct memoryInfo memory_info;
      uint32_t op_index = fusion_op_info->op_index;
      auto iter = op_list_.find(op_index);
      GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index);
      auto op_desc = iter->second;
      memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc));
      memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc));
      memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc));
      memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc));
      memory_info.total_size =
          memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size;
      reporter_data.data = (unsigned char *)&memory_info;
      reporter_data.dataLen = sizeof(struct memoryInfo);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
    }

    // stream id info
    uint32_t streamId = profile.fusion_info.stream_id;
    reporter_data.data = (unsigned char *)&streamId;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // memory info
    reporter_data.data = (unsigned char *)&profile.memory_info;
    reporter_data.dataLen = sizeof(profile.memory_info);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // task info
    reporter_data.data = (unsigned char *)&profile.task_count;
    reporter_data.dataLen = sizeof(uint32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
    for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
      uint32_t task_id = idx->second;
      reporter_data.data = (unsigned char *)&task_id;
      // task info
      reporter_data.data = (unsigned char *)&task_count;
      reporter_data.dataLen = sizeof(uint32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());

      Range task_range = op_id_map.equal_range(op_id);
      for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
        uint32_t task_id = idx->second;
        reporter_data.data = (unsigned char *)&task_id;
        reporter_data.dataLen = sizeof(uint32_t);
        GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                         "Reporter data fail, model id:%u.", this->Id());
      }
    }
  }

  return SUCCESS;
 }

@@ -2824,19 +2838,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
  return SUCCESS;
 }

 Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
  for (size_t i = 0; i < total_io_addrs.size(); ++i) {
    auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
 Status DavinciModel::UpdateKnownZeroCopyAddr() {
  for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
    auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
    if (it_in != knonw_input_data_info_.end()) {
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
             knonw_input_data_info_.at(total_io_addrs[i]));
      total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
             knonw_input_data_info_.at(total_io_addrs_[i]));
      total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
    }
    auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
    auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
    if (it_out != knonw_output_data_info_.end()) {
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
             knonw_output_data_info_.at(total_io_addrs[i]));
      total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
             knonw_output_data_info_.at(total_io_addrs_[i]));
      total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
    }
  }
  GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -2865,7 +2879,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
  } else {
    total_io_addrs_ = orig_total_io_addrs_;
  }
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

  if (total_args_size_ == 0) {
    GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -2932,14 +2946,7 @@ Status DavinciModel::MallocKnownArgs() {
    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  // malloc dynamic and static hybrid memory
  if (total_hybrid_args_size_ != 0) {
    rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }

  // malloc fixed addr memory, eg: rts op
  if (total_fixed_addr_size_ != 0) {
    GELOGI("Begin to allocate fixed addr.");
@@ -2993,7 +3000,9 @@ Status DavinciModel::DistributeTask() {
    }

    auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
    bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX);
    bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL)
        && (task_type != RT_MODEL_TASK_KERNEL_EX)
        && (task_type != RT_MODEL_TASK_HCCL);
    GE_IF_BOOL_EXEC(no_need_profiling, continue);

    SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
@@ -3008,8 +3017,6 @@ Status DavinciModel::DistributeTask() {
    task_desc_info.block_dim = task_def.kernel().block_dim();
    task_desc_info.task_id = task->GetTaskID();
    task_desc_info.stream_id = task->GetStreamId();
    task_desc_info.shape_type = "static";
    task_desc_info.cur_iter_num = 0;
    task_desc_info_.emplace_back(task_desc_info);
    if (flag) {
      if (task->GetSktTaskID() != 0xFFFFFFFF) {
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -76,20 +76,6 @@ struct timeInfo {
  int64_t dumpEndTime;
 };

 struct TaskMemInfo {
  int64_t input_size{0};
  int64_t output_size{0};
  int64_t weight_size{0};
  int64_t workspace_size{0};
  int64_t total_size{0};
 };

 struct ProfileInfo {
  FusionOpInfo fusion_info;
  TaskMemInfo memory_info;
  uint32_t task_count{0};
 };

 enum ExecuteMode {
  INITIALIZATION,
  SYNCHRONIZATION,
@@ -150,6 +136,20 @@ class DavinciModel {
  ///
  void SetId(uint32_t model_id) { model_id_ = model_id; }

  ///
  /// @ingroup ge
  /// @brief Get SubModelId
  /// @return sub model ID
  ///
  uint32_t SubModelId() const { return sub_model_id_; }

  ///
  /// @ingroup ge
  /// @brief Set SubModelId
  /// @return sub model ID
  ///
  void SetSubModelId(uint32_t sub_model_id) { sub_model_id_ = sub_model_id; }

  static void *Run(DavinciModel *model_pointer);

  ///
@@ -240,6 +240,8 @@ class DavinciModel {
  const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

  // get Op
  const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }

  OpDescPtr GetOpByIndex(uint32_t index) const {
    if (op_list_.find(index) == op_list_.end()) {
      return nullptr;
@@ -448,6 +450,10 @@ class DavinciModel {

  int64_t GetLoadEndTime() { return load_end_time_; }

  Status SinkModelProfile();

  Status SinkTimeProfile(const InputData &current_data);

  Status ReportProfilingData();

  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@@ -484,14 +490,6 @@ class DavinciModel {
  void SetTotalIOAddrs(vector<void *> &io_addrs) {
    total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
  }
  void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
  uint32_t GetHybridArgsSize() {
    return total_hybrid_args_size_;
  }
  void *GetCurrentHybridArgsAddr(uint32_t offset) {
    void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
    return cur_args;
  }
  void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
  int64_t GetFixedAddrsSize(string tensor_name);
  void *GetCurrentFixedAddr(int64_t offset) const {
@@ -510,7 +508,7 @@ class DavinciModel {
  Status MallocKnownArgs();
  Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
  Status UpdateKnownZeroCopyAddr();
  void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -812,11 +810,6 @@ class DavinciModel {

  void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);

  Status InitModelProfile();
  Status SinkModelProfile();

  Status SinkTimeProfile(const InputData &current_data);

  Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
                             std::vector<ge::OutputTensorInfo> &outputs);

@@ -836,6 +829,7 @@ class DavinciModel {

  uint32_t model_id_;
  uint32_t runtime_model_id_;
  uint32_t sub_model_id_ = 0;
  string name_;

  // used for inference data dump
@@ -952,8 +946,6 @@ class DavinciModel {
  void *args_ = nullptr;
  void *args_host_ = nullptr;
  void *fixed_addrs_ = nullptr;
  void *hybrid_addrs_ = nullptr;
  uint32_t total_hybrid_args_size_ = 0;
  int64_t total_fixed_addr_size_ = 0;
  std::map<const void *, void *> knonw_input_data_info_;
  std::map<const void *, void *> knonw_output_data_info_;
@@ -993,9 +985,6 @@ class DavinciModel {
  // key: input_index: input is merge node; value: each gear info and each output shape
  std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  std::vector<std::vector<int64_t>> all_gears_info_;

  std::multimap<uint32_t, uint32_t> op_id_map_;
  std::vector<ProfileInfo> profile_list_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -18,6 +18,8 @@

 #include <string>

 #include "mmpa/mmpa_api.h"
 #include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
 #include "common/dump/dump_manager.h"
 #include "common/l2_cache_optimize.h"
 #include "common/profiling/profiling_manager.h"
@@ -30,6 +32,7 @@
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "graph/utils/attr_utils.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "hybrid/hybrid_davinci_model.h"

@@ -51,7 +54,7 @@ const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
 const char *const kDeleteCustOp = "deleteCustOp";
 const int kTimeSpecNano = 1000000000;
 const int kTimeSpecMiro = 1000000;
 const int kSessionMaxBias = 100;
 const int kOpNameMaxSize = 100;
 struct CustAicpuSoBuf {
  uint64_t kernelSoBuf;
  uint32_t kernelSoBufLen;
@@ -78,7 +81,8 @@ ModelManager::ModelManager() {
  session_id_bias_ = 0;
 }

 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
                                    uint32_t sub_model_id) {
  STR_FWK_OP_KERNEL param_base = {};
  void *devicebase = nullptr;
  void *aicpu_kernel_addr = nullptr;
@@ -88,11 +92,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
  param_base.fwkKernelBase.fwk_kernel.sessionID = session_id;
  if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
    std::vector<uint64_t> v_aicpu_kernel;
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
    std::lock_guard<std::recursive_mutex> lock(map_mutex_);
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                            std::to_string(sub_model_id);
    auto iter = model_aicpu_kernel_.find(model_key);
    if (iter != model_aicpu_kernel_.end()) {
      GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
      GELOGD("kernel destroy session_id %lu, model_id %u, sub_model_id %u..", session_id, model_id, sub_model_id);
      v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
      // Insert size of aicpu kernel vector in the first element
      v_aicpu_kernel.insert(v_aicpu_kernel.begin(), v_aicpu_kernel.size());
@@ -177,7 +181,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
 }

 void ModelManager::DestroyAicpuSession(uint64_t session_id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  auto it = sess_ids_.find(session_id);
  if (it == sess_ids_.end()) {
    GELOGI("The session: %lu not created.", session_id);
@@ -190,7 +194,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
      GE_CHK_RT(rtSetDevice(static_cast<int32_t>(GetContext().DeviceId())));
    }

    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0, 0);
    if (ret != SUCCESS) {
      GELOGW("The session: %lu destroy failed.", session_id);
    } else {
@@ -206,7 +210,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
 }

 ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);
  auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
  if (hybrid_davinci_model != hybrid_model_map_.end()) {
    uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@@ -224,12 +228,14 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
  return SUCCESS;
 }

 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
  GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                          std::to_string(sub_model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id,
                                sub_model_id);
    if (ret != SUCCESS) {
      GELOGE(FAILED, "Destroy aicpu kernel failed.");
      return FAILED;
@@ -238,10 +244,12 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
  return SUCCESS;
 }

 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id,
                                           uint64_t kernel_id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::vector<uint64_t> v_aicpu_kernel;
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                          std::to_string(sub_model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
  }
@@ -251,7 +259,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
 }

 ModelManager::~ModelManager() {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);
  model_map_.clear();
  model_aicpu_kernel_.clear();
  cust_aicpu_so_.clear();
@@ -359,24 +367,25 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

 void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
  GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);
  model_map_[id] = davinci_model;
 }

 void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
  GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);
  hybrid_model_map_[id] = hybrid_model;
 }

 Status ModelManager::DeleteModel(uint32_t id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);

  auto it = model_map_.find(id);
  auto hybrid_model_it = hybrid_model_map_.find(id);
  if (it != model_map_.end()) {
    uint64_t session_id = it->second->GetSessionId();
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id);
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id)  + "_" +
                            std::to_string(it->second->SubModelId());
    auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
    if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
      (void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
@@ -385,22 +394,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
  } else if (hybrid_model_it != hybrid_model_map_.end()) {
    (void)hybrid_model_map_.erase(hybrid_model_it);
  } else {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
    return GE_EXEC_MODEL_ID_INVALID;
  }

  return SUCCESS;
 }

 std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);

  auto it = model_map_.find(id);
  return (it == model_map_.end()) ? nullptr : it->second;
 }

 std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(map_mutex_);

  auto it = hybrid_model_map_.find(id);
  return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@@ -971,9 +980,8 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
 }

 Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
  auto davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetCurShape Failed, Invalid Model ID %u!", model_id);
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model->GetCurShape(batch_info, dynamic_type);
  return SUCCESS;
 }
@@ -986,8 +994,7 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetModelAttr Failed, Invalid Model ID %u!", model_id);
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model->GetModelAttr(dynamic_output_shape_info);
  return SUCCESS;
 }
@@ -997,8 +1004,9 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
                                                       std::vector<uint32_t> &inputFormats,
                                                       std::vector<uint32_t> &outputFormats) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
      "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
                         model_id);

  return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
 }

@@ -1013,18 +1021,28 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
 Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
                         "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

  return davinci_model->GetAIPPInfo(index, aipp_info);
 }

 Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
                         "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

  return davinci_model->GetAippType(index, type, aipp_index);
 }

 Status ModelManager::GenSessionId(uint64_t &session_id) {
  const uint64_t kSessionTimeMask = 0xffffffffffff0000;
  const uint64_t kSessionPidMask  = 0x000000000000ff00;
  const uint64_t kSessionBiasMask = 0x00000000000000ff;

  const uint64_t kMaskPerOffset = 8;

  std::lock_guard<std::mutex> lock(session_id_create_mutex_);

  mmTimeval tv;
@@ -1032,12 +1050,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {
    GELOGE(INTERNAL_ERROR, "Failed to get current time.");
    return INTERNAL_ERROR;
  }
  session_id = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec);  // 1000000us
  uint64_t timestamp = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec);  // 1000000us

  static uint32_t pid = mmGetPid();

  session_id_bias_++;
  // max bais 100.
  session_id_bias_ = session_id_bias_ % kSessionMaxBias;
  session_id = session_id * kSessionMaxBias + session_id_bias_;

  session_id = ((timestamp<<kMaskPerOffset<<kMaskPerOffset) & kSessionTimeMask) +
               ((pid<<kMaskPerOffset) & kSessionPidMask) + (session_id_bias_ & kSessionBiasMask);

  GELOGD("Generate new session id: %lu.", session_id);
  return SUCCESS;
@@ -1053,15 +1073,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
  mmTimespec timespec = mmGetTickCount();

  ModelHelper model_helper;
  Status ret = model_helper.LoadRootModel(model);
  if (model_helper.GetModelType()) {
    bool is_shape_unknown = false;
    GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
                      "CheckIsUnknownShape failed, model id:%u", model_id);
    if (is_shape_unknown || GetContext().GetHostExecFlag()) {
      return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
    }
  }
  Status ret = model_helper.LoadModel(model);
  if (ret != SUCCESS) {
    GELOGE(ret, "load model failed.");
    return ret;
@@ -1075,8 +1087,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    } catch (...) {
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
      GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
      return INTERNAL_ERROR;
    }
    ret = davinci_model->Assign(ge_model);
    if (ret != SUCCESS) {
@@ -1088,7 +1100,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
    int32_t device_id = 0;
    rtError_t rt_ret = rtGetDevice(&device_id);
    if (rt_ret != RT_ERROR_NONE || device_id < 0) {
      GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    davinci_model->SetDeviceId(device_id);
@@ -1220,14 +1232,15 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "Invalid model id %u, check whether model has been loaded or not.", model_id);
                         "Invalid model id %u, check weather model has been loaded or not.", model_id);

  if (davinci_model->NeedDestroyAicpuKernel()) {
    GELOGI("Start to destroy specified aicpu kernel.");
    // Zero copy is enabled by default, no need to judge.
    uint64_t session_id_davinci = davinci_model->GetSessionId();
    uint32_t model_id_davinci = davinci_model->GetModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci);
    uint32_t sub_model_id = davinci_model->SubModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci, sub_model_id);
    if (status != SUCCESS) {
      GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci,
             model_id_davinci);
@@ -1243,11 +1256,11 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
 }

 Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  auto it = sess_ids_.find(session_id);
  // never been created by any model
  if (it == sess_ids_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0, 0);
    if (ret == SUCCESS) {
      (void)sess_ids_.insert(session_id);
      GELOGI("The session: %lu create success.", session_id);
@@ -1462,7 +1475,8 @@ void ModelManager::GenModelId(uint32_t *id) {
  if (id == nullptr) {
    return;
  }
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  std::lock_guard<std::mutex> lock(map_mutex_);
  *id = ++max_model_id_;
 }

@@ -1534,4 +1548,203 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
  return SUCCESS;
 }

 Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
                                              std::vector<std::string> &aicpu_tf_optype_list) {
  std::string kernel_name = "checkOpType";
  GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  std::vector<SysOpInfo> req_aicpu_op_info_list;
  std::vector<SysOpInfo> res_aicpu_op_info_list;
  std::vector<ReturnCode> res_ret_code_list;

  if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
    GELOGI("No need to check aicpu op type.");
    return SUCCESS;
  }

  vector<void *> allocated_mem;
  rtError_t status;
  rtStream_t stream = nullptr;
  void *args = nullptr;

  void *d_req_op_list = nullptr;
  void *d_res_op_list = nullptr;
  void *d_ret_code_list = nullptr;

  size_t aicpu_op_nums = aicpu_optype_list.size();
  size_t tf_op_nums = aicpu_tf_optype_list.size();
  size_t op_nums = aicpu_op_nums + tf_op_nums;
  std::function<void()> callback = [&]() {
    for (auto mem : allocated_mem) {
      GE_CHK_RT(rtFree(mem));
    }
  };
  GE_MAKE_GUARD(release, callback);
  // malloc sysOpInfoList in SysOpCheckInfo
  status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_req_op_list);

  // malloc sysOpInfoList in SysOpCheckResp
  status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_res_op_list);

  // malloc returnCodeList in SysOpCheckResp
  status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_ret_code_list);

  for (const auto &op_type : aicpu_optype_list) {
    SysOpInfo op_info;
    // malloc op_type name in SysOpInfo
    void *d_op_type_name = nullptr;
    status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
    if (status != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
      return RT_ERROR_TO_GE_STATUS(status);
    }
    allocated_mem.push_back(d_op_type_name);
    GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
    op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
    op_info.opLen = op_type.length();
    op_info.kernelsType = CPU_KERNEL;
    req_aicpu_op_info_list.emplace_back(op_info);
  }

  for (const auto &op_type : aicpu_tf_optype_list) {
    SysOpInfo op_info;
    // malloc op_type name in SysOpInfo
    void *d_op_type_name = nullptr;
    status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
    if (status != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
      return RT_ERROR_TO_GE_STATUS(status);
    }
    allocated_mem.push_back(d_op_type_name);
    GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
    op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
    op_info.opLen = op_type.size();
    op_info.kernelsType = TF_KERNEL;
    req_aicpu_op_info_list.emplace_back(op_info);
  }
  GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size());
  GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),
                     sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));

  SysOpCheckInfo op_check_info_req = { 0 };
  SysOpCheckResp op_check_info_res = { 0 };
  op_check_info_req.opListNum = op_nums;
  op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
  op_check_info_req.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));

  op_check_info_res.opListNum = 0;
  op_check_info_res.isWithoutJson = 0;
  op_check_info_res.returnCodeList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
  op_check_info_res.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));

  uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp);
  status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(args);
  GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo),
                     RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)),
                     sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtStreamCreate(&stream, 0));
  GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));

  status = rtStreamSynchronize(stream);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
    GE_CHK_RT(rtStreamDestroy(stream));
    return RT_ERROR_TO_GE_STATUS(status);
  }

  // Check the response
  SysOpCheckResp *d_op_check_info_res =
    reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(
    reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
  (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp));
  GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
                     RT_MEMCPY_DEVICE_TO_HOST));

  if (op_check_info_res.isWithoutJson) {
    GELOGI("No need to check aicpu in this scenoria.");
    GE_CHK_RT(rtStreamDestroy(stream));
    return SUCCESS;
  }
  uint64_t res_op_nums = op_check_info_res.opListNum;
  GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums);
  if (res_op_nums != 0) {
    res_ret_code_list.clear();
    res_ret_code_list.resize(res_op_nums);
    res_aicpu_op_info_list.clear();
    res_aicpu_op_info_list.resize(res_op_nums);
    GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums,
                       reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.returnCodeList)),
                       sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
    GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums,
                       reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),
                       sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
    if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
      GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
      GE_CHK_RT(rtStreamDestroy(stream));
      return FAILED;
    }
    std::string fail_reason;
    for (uint32_t i = 0; i < res_op_nums; i++) {
      ReturnCode ret_code = res_ret_code_list.at(i);
      SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
             aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
      std::vector<char> op_name;
      op_name.clear();
      op_name.resize(kOpNameMaxSize);
      GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType),
                         aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
      std::string kernel_type =
          (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
      string op_name_str(op_name.data());
      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type +
                     "  ret code:" + std::to_string(static_cast<int>(ret_code)) +
                     "<0: op_type, 1: format, 2: datatype> \n";
    }
    fail_reason += "not support.";
    GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
    GE_CHK_RT(rtStreamDestroy(stream));
    return FAILED;
  }

  GE_CHK_RT(rtStreamDestroy(stream));
  GELOGI("Cpu kernel launch check optype task success.");
  return SUCCESS;
 }

 Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
  std::vector<std::string> aicpu_optype_list;
  std::vector<std::string> aicpu_tf_optype_list;
  bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
  bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
  if (!aicpu_need_check && !tf_need_check) {
    GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
    return SUCCESS;
  }
  GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
                    "Launch check aicpu op type failed.");
  return SUCCESS;
 }

 }  // namespace ge
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -273,7 +273,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  std::shared_ptr<hybrid::HybridDavinciModel> GetHybridModel(uint32_t id);

  ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id);
  ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
                            uint32_t sub_model_id);

  ge::Status CreateAicpuSession(uint64_t session_id);

@@ -281,9 +282,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  void DestroyAicpuSession(uint64_t session_id);

  ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
  ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

  ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id);
  ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id, uint64_t kernel_id);

  ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

@@ -295,6 +296,11 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  ge::Status LaunchKernelCustAicpuSo(const string &kernel_name);

  ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
                                      std::vector<std::string> &aicpu_tf_optype_list);

  ge::Status CheckAicpuOpList(GeModelPtr ge_model);

  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

  ge::Status GenSessionId(uint64_t &session_id);
@@ -353,7 +359,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
  std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
  uint32_t max_model_id_;
  std::recursive_mutex map_mutex_;
  std::mutex map_mutex_;
  std::mutex sess_ids_mutex_;
  std::mutex session_id_create_mutex_;
  static::std::mutex exeception_infos_mutex_;
  uint64_t session_id_bias_;
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -97,14 +97,16 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin

  // 2.2 Collect aicpu kernel
  uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID;
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(),
                                                                 davinci_model->SubModelId(), kernel_id) != SUCCESS,
                  GELOGE(FAILED, "CreateAicpuKernel error.");
                  return FAILED;)
  // 2.3 Create session
  GE_CHECK_NOTNULL(ModelManager::GetInstance());
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS,
                  GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id);
                  return FAILED;)
  ret = ModelManager::GetInstance()->CreateAicpuSession(session_id);
  GE_IF_BOOL_EXEC(ret != SUCCESS,
                  GELOGE(ret, "CreateAicpuSession error. session id: %lu", session_id);
                  return ret;)

  kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL);
  if (davinci_model_->IsKnownNode()) {
@@ -132,6 +134,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
                    return RT_ERROR_TO_GE_STATUS(rt_ret);)

    InitDumpTask(input_output_addr, op_desc);
    GELOGI("KernelExTaskInfo knonw node Init Success.");
    return SUCCESS;
  }
@@ -166,11 +169,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
                    return RT_ERROR_TO_GE_STATUS(rt_ret);)

    if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                            op_desc->GetName())) {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
      dump_args_ = input_output_addr_;
    }
    InitDumpTask(input_output_addr_, op_desc);
    if (davinci_model_->GetOpDugReg()) {
      GELOGI("Op debug is open in kernel ex task info");
      dump_args_ = input_output_addr_;
@@ -200,6 +199,14 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
  return SUCCESS;
 }

 void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    dump_args_ = input_output_addr_;
  }
 }

 Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  auto kernel_ex_def = task_def.kernel_ex();
  uint32_t op_index = kernel_ex_def.op_index();
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
@@ -60,6 +60,8 @@ class KernelExTaskInfo : public TaskInfo {
 private:
  Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc);

  void InitDumpTask(void *addr, const OpDescPtr &op_desc);

  uint32_t task_id_;
  uint32_t stream_id_;
  uint32_t dump_flag_;
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -90,18 +90,20 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
                  fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names;
                  fusion_op_info_.op_name = op_desc_->GetName());

  string session_graph_model_id;
  davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
  // get bin_file_key
  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
  // new aicpu kernel(rtCpuKernelLaunch) no need to check function
  if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
    rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
    rtError_t rt_ret;
    rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
                                                    kernel_def.stub_func().c_str());
                    return RT_ERROR_TO_GE_STATUS(rt_ret););
  } else if (kernel_type_ == ccKernelType::TE) {
    // get bin_file_key
    string session_graph_model_id;
    davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
    const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
    rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
    rtError_t rt_ret;
    rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                    GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
                    return RT_ERROR_TO_GE_STATUS(rt_ret););
@@ -370,11 +372,7 @@ Status KernelTaskInfo::SuperKernelDistribute() {
 Status KernelTaskInfo::Distribute() {
  GELOGD("KernelTaskInfo Distribute Start.");
  if (davinci_model_->IsKnownNode()) {
    if (kernel_type_ == ccKernelType::TE) {
      args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
      args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
    }
    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
  }
  rtError_t rt_ret = RT_ERROR_NONE;
@@ -430,31 +428,36 @@ Status KernelTaskInfo::UpdateArgs() {
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
  vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
  vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

  vector<void *> io_addrs;
  io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
  io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
  if (kernel_type_ == ccKernelType::TE) {
    vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
  if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
    io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
    io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
    io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
    davinci_model_->SetTotalIOAddrs(io_addrs);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
    uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
    auto addrs_size = sizeof(uint64_t) * io_addrs.size();
    errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
    if (sec_ret != EOK) {
      GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
      return FAILED;
    }
    // copy args to device
    rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
  } else {
    string peer_input_name;
    if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
      uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
      if (output_index > output_data_addrs.size()) {
        GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
               output_data_addrs.size(), output_index);
        return FAILED;
      }
      io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
      for (size_t i = 0; i < output_data_addrs.size(); ++i) {
        if (i == output_index) {
          void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
          io_addrs.emplace_back(fixed_addr);
          continue;
        }
        io_addrs.emplace_back(output_data_addrs[i]);
      }
      io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
    }
  }

  davinci_model_->SetTotalIOAddrs(io_addrs);
  GELOGI("KernelTaskInfo::UpdateArgs success.");
  return SUCCESS;
 }
@@ -530,18 +533,33 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
 }

 Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  const domi::KernelDef &kernel_def = task_def.kernel();
  domi::KernelDef kernel_def = task_def.kernel();
  uint32_t args_size = kernel_def.args_size();
  args_offset_ = davinci_model->GetTotalArgsSize();
  davinci_model->SetTotalArgsSize(args_size);
  GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

  // get opcontext stored in model
  const domi::KernelContext &context = kernel_def.context();
  kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type_ == ccKernelType::TE) {
    uint32_t args_size = kernel_def.args_size();
    args_offset_ = davinci_model->GetTotalArgsSize();
    davinci_model->SetTotalArgsSize(args_size);
    GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
    davinci_model->SetHybridArgsSize(kernel_def.args_size());
    GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
  // get opdesc
  op_desc_ = davinci_model->GetOpByIndex(context.op_index());
  GE_CHECK_NOTNULL(op_desc_);
  // alloc fixed addr
  string peer_input_name;
  if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
    uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
    if (output_index > op_desc_->GetOutputsSize()) {
      GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
             output_index);
      return FAILED;
    }
    fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
    auto tensor_desc = op_desc_->GetOutputDesc(output_index);
    int64_t tensor_size = 0;
    GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
    davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
    GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
           fixed_addr_offset_);
  }
  return SUCCESS;
 }
@@ -553,6 +571,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
  GE_CHECK_NOTNULL(op_desc);
  if (davinci_model_->IsKnownNode()) {
    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    InitDumpTask(offset);
    return SUCCESS;
  }

@@ -617,15 +637,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
    return FAILED;
  }
  skt_dump_args_ = static_cast<char *>(args_) + offset;
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    if (IsL1FusionOp(op_desc)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + offset;
  }
  InitDumpTask(offset);

  GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset,
                             "Op debug is open in TVM task info");
@@ -870,7 +882,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  }

  // copy args to new host memory
  args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
  std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
  GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
  errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
  if (sec_ret != EOK) {
@@ -878,23 +890,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    return FAILED;
  }

  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

  if (davinci_model_->IsKnownNode()) {
    return SUCCESS;
  }
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();

  vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
  vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
  vector<void *> io_addrs;
@@ -911,6 +908,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    }
  }

  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

  // malloc device memory for args
  rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
@@ -925,16 +935,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    if (IsL1FusionOp(op_desc)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
  }
  InitDumpTask(sizeof(aicpu::AicpuParamHead));
  if (davinci_model_->GetOpDugReg()) {
    GELOGI("Op debug is open in aicpu task info");
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
@@ -948,6 +949,18 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  return SUCCESS;
 }

 void KernelTaskInfo::InitDumpTask(uint32_t offset) {
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc_->GetName())) {
    if (IsL1FusionOp(op_desc_)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + offset;
  }
 }

 Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
  if (ext_info.empty()) {
    return SUCCESS;
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -129,7 +129,9 @@ class KernelTaskInfo : public TaskInfo {
  Status SuperKernelDistribute();
  bool IsL1FusionOp(const OpDescPtr &op_desc);

  // For super kernel
  void InitDumpTask(uint32_t offset);

    // For super kernel
  Status SaveSKTDumpInfo();
  void UpdateTaskId();
  void UpdateSKTTaskId();
@@ -159,9 +161,7 @@ class KernelTaskInfo : public TaskInfo {
  OpDescPtr op_desc_;
  DavinciModel *davinci_model_;
  uint32_t args_offset_ = 0;
  uint32_t hybrid_args_offset_ = 0;
  int64_t fixed_addr_offset_ = 0;
  std::unique_ptr<uint8_t[]> args_addr = nullptr;
  bool call_save_dump_ = false;

  // aicpu ext_info device mem
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -23,15 +23,25 @@
 #include <sstream>
 #include <string>
 #include <thread>
 #include <utility>

 #include "common/ge/ge_util.h"
 #include "common/math/math_util.h"
 #include "common/thread_pool.h"
 #include "common/util.h"
 #include "external/graph/types.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/ge_types.h"
 #include "analyzer/analyzer.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/common/local_context.h"
 #include "graph/common/transop_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/ge_global_options.h"
 #include "graph/ge_local_context.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/util/rt_context_util.h"
 #include "graph/partition/dynamic_shape_partition.h"
 #include "graph/passes/enter_pass.h"
@@ -51,6 +61,8 @@
 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/dimension_compute_pass.h"
 #include "graph/passes/flow_ctrl_pass.h"
 #include "graph/passes/hccl_group_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/input_output_connection_identify_pass.h"
 #include "graph/passes/iterator_op_pass.h"
@@ -65,6 +77,7 @@
 #include "graph/passes/permute_pass.h"
 #include "graph/passes/prune_pass.h"
 #include "graph/passes/ref_identity_delete_op_pass.h"
 #include "graph/passes/replace_with_empty_const_pass.h"
 #include "graph/passes/remove_same_const_pass.h"
 #include "graph/passes/reshape_recovery_pass.h"
 #include "graph/passes/reshape_remove_pass.h"
@@ -75,12 +88,14 @@
 #include "graph/passes/switch_logic_remove_pass.h"
 #include "graph/passes/switch_to_stream_switch_pass.h"
 #include "graph/passes/transop_breadth_fusion_pass.h"
 #include "graph/passes/transop_depth_fusion_pass.h"
 #include "graph/passes/transop_nearby_allreduce_fusion_pass.h"
 #include "graph/passes/transop_symmetry_elimination_pass.h"
 #include "graph/passes/transop_without_reshape_fusion_pass.h"
 #include "graph/passes/transpose_transdata_pass.h"
 #include "graph/passes/useless_control_out_remove_pass.h"
 #include "graph/passes/variable_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/passes/variable_ref_delete_op_pass.h"
 #include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
 #include "graph/passes/end_of_sequence_add_control_pass.h"
@@ -91,6 +106,9 @@
 #include "graph/passes/memcpy_addr_async_pass.h"
 #include "graph/build/label_allocator.h"
 #include "graph/utils/tensor_adapter.h"
 #include "graph/utils/type_utils.h"
 #include "graph/graph_util.h"
 #include "graph/types.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"
 #include "ir_build/atc_ir_common.h"
@@ -518,7 +536,7 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr
  return SUCCESS;
 }

 Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, 
 Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph,
                                                      Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) {
  GE_CHECK_NOTNULL(compute_graph);
  // use default 16 multi thread
@@ -719,6 +737,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
                                                 GeRootModelPtr &ge_root_model, uint64_t session_id) {
  GE_CHECK_NOTNULL(graph_node);
  GE_CHECK_NOTNULL(compute_graph);

  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
  GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
  GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
                       GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
@@ -2421,6 +2442,13 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
      continue;
    }
    auto model_id = model->GetModelId();
    // unknown model not release
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape));
    if (is_unknown_shape) {
      GELOGD("model_id[%u] graph_id[%u] is unknown model, not release memory", model_id, graph_id);
      continue;
    }
    // not loaded,no need unload
    if (!it.second->GetLoadFlag()) {
      GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id);
@@ -2438,7 +2466,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
      GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id);
      continue;
    }
    result = GraphLoader::DestroyAicpuKernel(session_id, model_id);
    result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
    if (result != SUCCESS) {
      GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
             graph_id);
--- a/ge/graph/manager/graph_mem_allocator.cc
+++ b/ge/graph/manager/graph_mem_allocator.cc
@@ -16,7 +16,10 @@

 #include "graph/manager/graph_mem_allocator.h"

 #include <set>
 #include <string>

 #include "framework/common/debug/ge_log.h"
 #include "graph/manager/graph_caching_allocator.h"
 #include "graph/manager/rdma_pool_allocator.h"

--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) {
  }
  return SUCCESS;
 }
 Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) {
  if (compute_graph == nullptr) {
    GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr.");
    return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
  }

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed.");
    return GE_CLI_GE_NOT_INITIALIZED;
  }

  auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
  GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size());
  Status ret = SUCCESS;
  string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
  GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str());
  if (!graph_optimizer.empty()) {
    for (auto &iter : graph_optimizer) {
      if (iter.first == exclude_core_type || iter.second == nullptr) {
        continue;
      }
      GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str());
      ret = iter.second->OptimizeWholeGraph(*compute_graph);
      GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first);
      if (ret != SUCCESS) {
        GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret);
        return ret;
      }
    }
  }
  return ret;
 }
 }  // namespace ge
--- a/ge/graph/optimize/graph_optimize.h
+++ b/ge/graph/optimize/graph_optimize.h
@@ -52,6 +52,9 @@ class GraphOptimize {
  // for fe prepare optimize in quantize scene
  Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph);

  // for engine to optimize merged whole graph before ge Optimize2
  Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph);

  // for rts optimize before build to add attr and insert memcpy op
  Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);

--- a/ge/graph/passes/atomic_addr_clean_pass.cc
+++ b/ge/graph/passes/atomic_addr_clean_pass.cc
@@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
      }
    }
  }
  return LinkToPotentialPrecedenceNode(graph, clean_addr_node);
 }

 // Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
 // node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream
 // concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control
 // edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
 // each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
 // successors of Data/Variable.
 Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) {
  GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
         atomic_clean_node->GetName().c_str());
  auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
  GE_CHECK_NOTNULL(out_ctrl_anchor);

  for (const auto &node : graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(node);
    bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty();
    if (!need_handle) {
      continue;
    }
    auto second_nodes = node->GetOutAllNodes();
    for (const auto &second_node : second_nodes) {
      GE_CHECK_NOTNULL(second_node);
      auto in_ctrl_anchor = second_node->GetInControlAnchor();
      GE_CHECK_NOTNULL(in_ctrl_anchor);
      if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
        GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor));
        GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str());
      }
    }
  }

  return SUCCESS;
 }

--- a/ge/graph/passes/atomic_addr_clean_pass.h
+++ b/ge/graph/passes/atomic_addr_clean_pass.h
@@ -68,6 +68,14 @@ class AtomicAddrCleanPass : public GraphPass {
  Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node);

  /**
   * Link atomic clean node to all potential precedence nodes which may execute before atomic clean node
   * @param graph
   * @param atomic_clean_node
   * @return
   */
  Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node);

  /**
   * Check if this node is atomic op.
   * @param node
   * @return
--- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
+++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
@@ -113,16 +113,13 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) {
  GE_CHECK_NOTNULL(op_desc);
  std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim};
  GeShape dynamic_shape(dynamic_shape_dims);
  bool reset_shape_flag = false;
  if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) {
    (void)ResetOutputTensorShape(op_desc, dynamic_shape);
  }
  (void)ResetInputTensorShape(op_desc, dynamic_shape);
  (void)ResetOutputTensorShape(op_desc, dynamic_shape);
  return SUCCESS;
 }

 Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape,
                                                            bool &reset_shape_flag) {
  reset_shape_flag = false;
 Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc,
                                                            const GeShape &dynamic_shape) {
  GE_CHECK_NOTNULL(op_desc);
  for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
    auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
@@ -136,7 +133,6 @@ Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc,
    if (CheckIfConstInput(input_desc)) {
      continue;
    }
    reset_shape_flag = true;
    input_desc->SetShape(dynamic_shape);
  }
  return SUCCESS;
--- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.h
+++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.h
@@ -27,7 +27,7 @@ class DynamicSingleOpResetShapePass : public GraphPass {

 private:
  Status ResetOpShape(OpDescPtr &op_desc);
  Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag);
  Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
  Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
  Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu);
  bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc);
--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -22,6 +22,8 @@
 #include "graph/preprocess/multi_batch_options.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "register/op_registry.h"

 namespace ge {
@@ -478,8 +480,28 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
  if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
    return SUCCESS;
  }

  (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());

  GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex));
  std::vector<std::string> input_dims_str;
  for (size_t i = 0; i < batch_shapes_.size(); ++i) {
    auto shape = data_shape;
    auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape);
    if (ret != SUCCESS) {
      GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str());
      return ret;
    }
    tensor.SetShape(shape);
    int64_t tensor_size = 0;
    (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size);
    string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" +
 	               TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" +
 	               std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" +
                       formats::JoinToString(tensor.GetShape().GetDims());
    input_dims_str.emplace_back(input_str);
  }
  (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);

  size_t max_shape_index = 0;
  int64_t max_size = 0;
  for (size_t i = 0; i < batch_shapes_.size(); ++i) {
@@ -503,12 +525,24 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {

 ///
 /// @ingroup ge
 /// @brief Set shape to Data node in branch.
 /// @param [in] const NodePtr &data: data in branch.
 /// @brief Update Data node in Subgraph.
 /// @param [in] const NodePtr &data: data in Subgraph.
 /// @param [in] size_t index: The batch index.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) {
 Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) {
  int node_index = -1;
  if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) {
    GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str());
    return FAILED;
  }

  int parent_index = node_index + 1;
  if (!AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
    GELOGE(FAILED, "Failed to set parent index for node %s", data->GetName().c_str());
    return FAILED;
  }

  auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
  const auto &dims = data_shape.GetDims();
  if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
@@ -580,13 +614,15 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
    subgraph->SetParentGraph(graph);
    graph->AddSubgraph(subgraph->GetName(), subgraph);
    all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT);
    GE_CHK_STATUS_RET(UpdateSubgraphOutput(all_branch_output_[subgraph]),
                      "Update %s failed", all_branch_output_[subgraph]->GetName().c_str());

    const string key_name = "branches" + std::to_string(i);
    op_desc->AddSubgraphName(key_name);
    op_desc->SetSubgraphInstanceName(i, subgraph->GetName());

    for (const auto &data : input_nodes) {
      GE_CHK_STATUS_RET(UpdateShapeToData(data, i), "Update %s failed", subgraph->GetName().c_str());
      GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str());
    }
  }

@@ -595,55 +631,27 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
    const auto &op_desc = n->GetOpDesc();
    op_desc->SetName(n->GetName() + kMultiBatchNodePostfix + "0");
    if (n->GetType() == DATA) {
      GE_CHK_STATUS_RET(UpdateShapeToData(n, 0), "Update %s failed", branch->GetName().c_str());
      GE_CHK_STATUS_RET(UpdateSubgraphData(n, 0), "Update %s failed", branch->GetName().c_str());
    }
  }

  return PostProcSubgraph(graph);
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Assign parent index for branches.
 /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
 /// @brief Update output_node in Subgraph.
 /// @param [in] const NodePtr &output_node: output_node in Subgraph.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::PostProcSubgraph(const ComputeGraphPtr &graph) {
  auto func_desc = case_node_->GetOpDesc();
  domi::ParseSubgraphFuncV2 parse_func_v2 = nullptr;
  auto post_func = domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType());
  if (post_func == nullptr) {
    GELOGW("The subgraph post func for node %s type %s is null.", case_node_->GetName().c_str(),
           case_node_->GetType().c_str());
    if (domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType(), parse_func_v2) != SUCCESS ||
      parse_func_v2 == nullptr) {
      GELOGW("The subgraph new post func v2 for node %s type %s is null", case_node_->GetName().c_str(),
             case_node_->GetType().c_str());
      return FAILED;
    }
  }

  for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
    const auto &subgraph = graph->GetSubgraph(name);
    if (subgraph == nullptr) {
      GELOGE(FAILED, "Subgraph not found, name: %s", name.c_str());
      return FAILED;
    }

    std::string subgraph_name;
    GE_CHK_STATUS_RET(func_desc->GetSubgraphNameByInstanceName(subgraph->GetName(), subgraph_name),
                      "Subgraph: %s get subgraph name failed.", subgraph->GetName().c_str());

    auto graph = GraphUtils::CreateGraphFromComputeGraph(subgraph);
    Status ret = FAILED;
    if (post_func != nullptr) {
      ret = post_func(subgraph_name, graph);
    } else if (parse_func_v2 != nullptr) {
      ret = parse_func_v2(subgraph_name.c_str(), graph);
    }
    if (ret != SUCCESS) {
      GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", graph.GetName().c_str(),
             case_node_->GetName().c_str(), case_node_->GetType().c_str());
 Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) {
  const auto &op_desc = output_node->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  for (size_t index = 0; index < op_desc->GetInputsSize(); ++index) {
    GeTensorDescPtr tensor = op_desc->MutableInputDesc(index);
    GE_CHECK_NOTNULL(tensor);
    if (!AttrUtils::SetInt(tensor, ATTR_NAME_PARENT_NODE_INDEX, index)) {
      GELOGE(FAILED, "Failed to set parent index for node %s", output_node->GetName().c_str());
      return FAILED;
    }
  }
--- a/ge/graph/passes/multi_batch_clone_pass.h
+++ b/ge/graph/passes/multi_batch_clone_pass.h
@@ -105,12 +105,20 @@ class MultiBatchClonePass : public GraphPass {

  ///
  /// @ingroup ge
  /// @brief Set shape to Data node in branch.
  /// @param [in] const NodePtr &data: data in branch.
  /// @brief Update Data node in Subgraph.
  /// @param [in] const NodePtr &data: data in Subgraph.
  /// @param [in] size_t index: The batch index.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status UpdateShapeToData(const NodePtr &data, size_t index);
  Status UpdateSubgraphData(const NodePtr &data, size_t index);

  ///
  /// @ingroup ge
  /// @brief Update output_node in Subgraph.
  /// @param [in] const NodePtr &output_node: output_node in Subgraph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status UpdateSubgraphOutput(const NodePtr &output_node);

  ///
  /// @ingroup ge
@@ -133,14 +141,6 @@ class MultiBatchClonePass : public GraphPass {

  ///
  /// @ingroup ge
  /// @brief Assign parent index for branches.
  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status PostProcSubgraph(const ComputeGraphPtr &graph);

  ///
  /// @ingroup ge
  /// @brief Remove subgraph supend output anchor.
  /// @param [in] ComputeGraphPtr &graph: Parent compute graph.
  /// @return 0: SUCCESS / others: FAILED
--- a/ge/graph/passes/subgraph_const_migration_pass.cc
+++ b/ge/graph/passes/subgraph_const_migration_pass.cc
@@ -20,11 +20,12 @@
 #include "graph/passes/folding_pass.h"

 namespace ge {
 constexpr uint32_t kDataOutIndex = 0;
 constexpr uint32_t kZeroIndex = 0;
 constexpr uint32_t kCaseInputBase = 1;
 constexpr uint32_t kInvalidParent = 0x7fffffffU;
 const string kMbatchNodeNameMark = "_ascend_mbatch_batch_";

 bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
 bool IsSameConstNode(const NodePtr &src_node, const NodePtr &dst_node) {
  if ((src_node == nullptr) && (dst_node == nullptr)) {
    return true;
  }
@@ -37,35 +38,9 @@ bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
    return false;
  }

  if ((src_node->GetInControlNodes().size() != dst_node->GetInControlNodes().size()) ||
      (src_node->GetOutDataNodesSize() != dst_node->GetOutDataNodesSize())) {
    return false;
  }

  set<uint32_t> related_parent;
  const auto in_nodes = src_node->GetInControlNodes();
  for (uint32_t i = 0; i < in_nodes.size(); ++i) {
    const auto owner_node = in_nodes.at(i);
    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      return false;
    }

    related_parent.insert(parent_index);
  }

  for (const auto &in_node : dst_node->GetInControlNodes()) {
    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      return false;
    }

    if (related_parent.count(parent_index) == 0) {
      return false;
    }
  }

  return true;
  const GeTensorDesc &src_desc = src_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
  const GeTensorDesc &dst_desc = dst_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
  return (src_desc == dst_desc);
 }

 /***********************************************************************************************************************
@@ -89,12 +64,12 @@ bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
 +-----------+ +-----------+ +-----------+ +-----------+ +-----------+    +-----------+    +-----------+
 |   Data    | |   Data    | |   Data    | |   Data    | |   Data    |    |   Data    |    |  Conv2D   |
 +-----------+ +-----------+ +-----------+ +-----------+ +-----------+    +-----------+    +-----------+
        \                 \        |        /                  /                |                |
         \                 \       |       /                  /                 |                |
          \                 \      |      /                  /                  |                |
           \                 \     |     /                  /                   |                |
            \                +-----------+                 /                    |          +-----------+
             +---------------|   Const   |----------------+                     |          |  Pooling  |
        \                 \        |        /                  /                |                |         +-----------+
         \                 \       |       /                  /                 |                |         |   Const   |
          \                 \      |      /                  /                  |                |         +-----------+
           \                 \     |     /                  /                   |                |             /
            \                +-----------+                 /                    |          +-----------+      /
             +---------------|   Const   |----------------+                     |          |  Pooling  |-----+
                             +-----------+                                      |          +-----------+
                                   \                                            |               /
                                    \                                           |              /
@@ -126,28 +101,26 @@ Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) {
      continue;
    }

    do {
      migration_append_ = false;
      map<ComputeGraphPtr, map<uint32_t, NodePtr>> graph_datas;
      if (ClassifyDataNodes(graph, func_desc, graph_datas) != SUCCESS) {
        return FAILED;
      }
    map<ComputeGraphPtr, map<string, NodePtr>> all_const_nodes;
    map<ComputeGraphPtr, map<uint32_t, NodePtr>> all_data_nodes;
    if (ClassifyGraphNodes(graph, func_desc, all_const_nodes, all_data_nodes) != SUCCESS) {
      return FAILED;
    }

      if (graph_datas.empty()) {
        GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str());
        break;
      }
    if (all_const_nodes.empty()) {
      GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str());
      break;
    }

      // {subgraph0, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
      // {subgraph1, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
      // {subgraph2, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
      const auto base_nodes = graph_datas.begin()->second;  // Need copy.
      for (const auto &node_item : base_nodes) {
        if (GraphNodeMigration(graph, node, graph_datas, node_item.second, node_item.first) != SUCCESS) {
          return FAILED;
        }
    // {subgraph0, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
    // {subgraph1, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
    // {subgraph2, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
    const auto &const_nodes = all_const_nodes.begin()->second;
    for (const auto &item : const_nodes) {
      if (GraphNodeMigration(graph, node, all_const_nodes, all_data_nodes, item.second, item.first) != SUCCESS) {
        return FAILED;
      }
    } while (migration_append_);
    }
  }

  return SUCCESS;
@@ -155,14 +128,16 @@ Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) {

 ///
 /// @ingroup ge
 /// @brief Get all Data nodes for all subgraph.
 /// @brief Get all Const/Data nodes for all subgraph.
 /// @param [in] graph: Root compute graph.
 /// @param [in] func_desc: functional OpDesc of Case.
 /// @param [out] graph_datas: Data groups of subgraph.
 /// @param [out] all_const_nodes: Const groups of subgraph.
 /// @param [out] all_data_nodes: Data groups of subgraph.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                                                     map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas) {
 Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                                                      map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes) {
  for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
    const auto &subgraph = graph->GetSubgraph(name);
    if (subgraph == nullptr) {
@@ -170,32 +145,47 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap
      return GE_GRAPH_EMPTY_SUBGRAPH;
    }

    auto &data_nodes = graph_datas[subgraph];
    for (auto &data : subgraph->GetDirectNode()) {
      if (data->GetType() != DATA) {
        continue;
      }
    auto &data_nodes = all_data_nodes[subgraph];
    auto &const_nodes = all_const_nodes[subgraph];
    for (auto &node : subgraph->GetDirectNode()) {
      if (node->GetType() == DATA) {
        uint32_t parent_index = kInvalidParent;
        if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
          return FAILED;
        }

      uint32_t parent_index = 0;
      if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
        GELOGE(FAILED, "Parent index not found, name: %s", data->GetName().c_str());
        return FAILED;
      }
        data_nodes[parent_index] = node;
        GELOGD("%s, index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, node->GetName().c_str());
      } else if ((node->GetType() == CONSTANT) && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) {
        set<string> peer_name_list;
        const auto &out_anchor = node->GetOutDataAnchor(kZeroIndex);
        for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
          const auto &peer_node = in_anchor->GetOwnerNode();
          // Trim subgraph node name prefix.
          string node_full_name = peer_node->GetName();
          size_t pos = node_full_name.find(kMbatchNodeNameMark);
          if (pos == string::npos) {
            GELOGE(FAILED, "find: %s of multi-batch in node: %s", kMbatchNodeNameMark.c_str(), node_full_name.c_str());
            return FAILED;
          }

          string fixed_name = node_full_name.substr(0, pos);
          pos = node_full_name.find("_", pos + kMbatchNodeNameMark.length());
          if (pos != string::npos) {
            fixed_name += node_full_name.substr(pos);
          }

          peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx()));
        }

      data_nodes[parent_index] = data;
      GELOGD("%s, Parent index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, data->GetName().c_str());
    }
  }
        string key_of_const;
        for (const string &name : peer_name_list) {
          key_of_const += (key_of_const.empty() ? name : "_" + name);
        }

  auto iter = graph_datas.begin();
  if (iter == graph_datas.end()) {
    return SUCCESS;
  }
  for (const auto &data_nodes : graph_datas) {
    if (data_nodes.second.size() != iter->second.size()) {
      GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]",
             data_nodes.first->GetName().c_str(), data_nodes.second.size(), iter->second.size());
      return FAILED;
        const_nodes[key_of_const] = node;
        GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str());
      }
    }
  }

@@ -204,36 +194,27 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap

 ///
 /// @ingroup ge
 /// @brief Get all Data nodes for all subgraph.
 /// @param [in] node: Const node of subgraph.
 /// @param [out] inputs: parent index to Const.
 /// @param [out] outputs: Data groups of subgraph.
 /// @brief Get parent_index for Const node migration.
 /// @param [in] all_data_nodes: Data groups of subgraph.
 /// @param [in] const_node: Const node will process.
 /// @param [out] parent_index: parent index for replace Data.
 /// @return true: SUCCESS / false: FAILED
 ///
 bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map<uint32_t, uint32_t> &inputs,
                                                    map<uint32_t, uint32_t> &outputs) {
  for (uint32_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) {
    outputs[i] = kInvalidParent;
  }

  uint32_t out_index = 0;
  const auto in_nodes = node->GetInAllNodes();
  for (size_t i = 0; i < in_nodes.size(); ++i) {
    const auto owner_node = in_nodes.at(i);
    if (owner_node->GetType() != DATA) {
 bool SubgraphConstMigrationPass::GetAssociatedNodes(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                                                    const NodePtr &const_node, uint32_t &parent_index) {
  for (const auto in_node : const_node->GetInAllNodes()) {
    if (in_node->GetType() != DATA) {
      return false;
    }

    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
    uint32_t node_index = 0;
    if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, node_index)) {
      return false;
    }

    // Input Data feed other Node, need add new Data.
    inputs[i] = parent_index;
    if ((out_index == outputs.size()) && owner_node->GetOutDataNodes().empty()) {
      outputs[out_index] = parent_index;
      ++out_index;
    if ((parent_index == kInvalidParent) && in_node->GetOutDataNodes().empty()) {
      parent_index = node_index;
    }
  }

@@ -242,43 +223,26 @@ bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map<uin

 ///
 /// @ingroup ge
 /// @brief Get all Data nodes for all subgraph.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] data_base: Data Node for migration.
 /// @param [in] data_idx: Data groups of subgraph.
 /// @param [in] data_idx: Data groups of subgraph.
 /// @brief Check parallel node is same for all subgraph.
 /// @param [in] all_const_nodes: Const groups of subgraph.
 /// @param [in] const_node: Const Node for migration.
 /// @param [in] node_key: Key of Const node.
 /// @return true: Same / false: not same
 ///
 bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                    const NodePtr &const_node, uint32_t parent_index, size_t index) {
  auto it = graph_datas.begin();
  for (++it; it != graph_datas.end(); ++it) {
    const auto &data_nodes = it->second;
    auto data_it = data_nodes.find(parent_index);
    if (data_it == data_nodes.end()) {
      GELOGE(FAILED, "Data: %s not fount, index: %u", const_node->GetName().c_str(), parent_index);
      return false;
    }

    const auto &work_data = data_it->second;
    const auto &out_anchor = work_data->GetOutControlAnchor();
    const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
    if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) {
      GELOGW("Node anchors not same, Data: %s -> %s anchor size: %zu, index: %zu",
             work_data->GetName().c_str(), const_node->GetName().c_str(), in_anchors.size(), index);
      return false;
    }

    const auto &in_anchor = in_anchors.at(index);
    const auto &work_node = in_anchor->GetOwnerNode();
    if (work_node == nullptr) {
      GELOGE(FAILED, "Data: %s not found, parent: %u, index: %zu", const_node->GetName().c_str(), parent_index, index);
 bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                    const NodePtr &const_node, const string &node_key) {
  auto it = all_const_nodes.begin();
  for (++it; it != all_const_nodes.end(); ++it) {
    const auto &const_nodes = it->second;
    auto node_it = const_nodes.find(node_key);
    if (node_it == const_nodes.end()) {
      GELOGW("Const node: %s not fount, key: %s", const_node->GetName().c_str(), node_key.c_str());
      return false;
    }

    if (!IsSameOpNode(const_node, work_node)) {
      GELOGI("OpDesc not same: %s %s, parent: %u, index: %zu",
             const_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, index);
    const auto &work_node = node_it->second;
    if (!IsSameConstNode(const_node, work_node)) {
      GELOGI("Not same: %s %s, key: %s", const_node->GetName().c_str(), work_node->GetName().c_str(), node_key.c_str());
      return false;
    }
  }
@@ -291,51 +255,34 @@ bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, m
 /// @brief Migration subgraph Node to Root
 /// @param [in] graph: Root compute graph.
 /// @param [in] func_node: functional Node of Case.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] data_base: Data Node for migration.
 /// @param [in] data_idx: Data groups of subgraph.
 /// @param [in] all_const_nodes: Const groups of subgraph.
 /// @param [in] all_data_nodes: Data groups of subgraph.
 /// @param [in] const_node: Const Node for migration.
 /// @param [in] node_key: Key of Const node for migration.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                      const NodePtr &data_node, uint32_t parent_index) {
  bool can_extrapolation = false;
  do {
    can_extrapolation = false;
    const auto &out_anchor = data_node->GetOutControlAnchor();
    const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
    for (size_t i = in_anchors.size(); i > 0; --i) {
      const auto &in_anchor = in_anchors.at(i - 1);
      const auto &work_node = in_anchor->GetOwnerNode();
      GELOGD("Data: %s, node: %s, parent: %u, index: %zu",
             data_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, i);
      if (work_node->GetType() != CONSTANT) {
        continue;
      }

      // Get associated Data, if Data feed other nodes, need append new Data.
      map<uint32_t, uint32_t> inputs;
      map<uint32_t, uint32_t> outputs;
      if (!GetAssociatedNodes(work_node, inputs, outputs)) {
        continue;
      }
                                                      const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                                                      const NodePtr &const_node, const string &node_key) {
  if (!IsParallelNodeSame(all_const_nodes, const_node, node_key)) {
    return SUCCESS;
  }

      if (!IsParallelNodeSame(graph_datas, work_node, parent_index, i - 1)) {
        continue;
      }
  // Get associated Data, if Data feed other nodes, need append new Data.
  uint32_t parent_index = kInvalidParent;
  if (!GetAssociatedNodes(all_data_nodes, const_node, parent_index)) {
    return SUCCESS;
  }

      GELOGI("Move node: %s, parent: %u, index: %zu", work_node->GetName().c_str(), parent_index, i);
      if (AppendParallelNode(graph_datas, func_node, outputs) != SUCCESS) {
        return FAILED;
      }
  GELOGI("Move node: %s, parent index: %u", const_node->GetName().c_str(), parent_index);
  if (AppendParallelNode(func_node, parent_index, all_data_nodes) != SUCCESS) {
    return FAILED;
  }

      if (MoveNodeToParent(graph, func_node, graph_datas, parent_index, i - 1, inputs, outputs) != SUCCESS) {
        return FAILED;
      }
      can_extrapolation = true;
      break;
    }
  } while (can_extrapolation);
  if (MoveNodeToParent(graph, func_node, all_const_nodes, all_data_nodes, node_key, parent_index) != SUCCESS) {
    return FAILED;
  }

  return SUCCESS;
 }
@@ -343,114 +290,100 @@ Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra
 ///
 /// @ingroup ge
 /// @brief Append Input Tensor for functional node.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] func_node: functional Node of Case.
 /// @param [in] outputs: Parent index of Node output.
 /// @param [in/out] parent_index: Parent index for migration.
 /// @param [in/out] all_data_nodes: Data groups of subgraph.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::AppendParallelNode(map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                      const NodePtr &func_node, map<uint32_t, uint32_t> &outputs) {
 Status SubgraphConstMigrationPass::AppendParallelNode(const NodePtr &func_node, uint32_t &parent_index,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes) {
  // If outputs index invalid, add Data and Input Tensor.
  for (auto &item : outputs) {
    if (item.second != kInvalidParent) {
      continue;
    }

    // Add Data to subgraph.
    map<ComputeGraphPtr, uint32_t> append_num;
    for (auto &groups : graph_datas) {
      const auto &subgraph = groups.first;
      auto &data_nodes = groups.second;

      item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index.
      const auto data_name = subgraph->GetName() + "_data_" + std::to_string(item.second);

      OpDescBuilder op_builder(data_name, DATA);
      const OpDescPtr op_desc = op_builder.AddInput("x").AddOutput("y").Build();
      if (op_desc == nullptr) {
        GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed");
        return OUT_OF_MEMORY;
      }
  if (parent_index != kInvalidParent) {
    return SUCCESS;
  }

      uint32_t data_index = item.second - kCaseInputBase;
      if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
        GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
        return FAILED;
      }
  // Add Data to subgraph.
  parent_index = func_node->GetAllInDataAnchorsSize();  // Update to valid parent index.
  for (auto &item : all_data_nodes) {
    const auto &subgraph = item.first;
    const auto data_name = subgraph->GetName() + "_data_" + std::to_string(parent_index);
    OpDescBuilder op_builder(data_name, DATA);
    const auto op_desc = op_builder.AddInput("x").AddOutput("y").Build();
    if (op_desc == nullptr) {
      GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed");
      return OUT_OF_MEMORY;
    }

      if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, item.second)) {
        GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
        return FAILED;
      }
    uint32_t data_index = parent_index - kCaseInputBase;
    if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
      GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
      return FAILED;
    }

      append_num[subgraph]++;
      data_nodes[item.second] = subgraph->AddNode(op_desc);
      GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second);
    if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
      return FAILED;
    }

    // Add InputTensor to functional Node.
    NodeUtils::AppendInputAnchor(func_node, item.second + 1);
    item.second[parent_index] = subgraph->AddNode(op_desc);
    GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), parent_index);
  }

  // Add InputTensor to functional Node.
  NodeUtils::AppendInputAnchor(func_node, parent_index + 1);
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Delete Node from all subgraph.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] detach: Node will move to parent.
 /// @param [in] outputs: Parent index of Node output.
 /// @brief Delete Node from subgraph.
 /// @param [in] graph: subgraph for process.
 /// @param [in] const_node: Node will move to parent.
 /// @param [in] data_node: Place holder for Const.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::DetachParallelNode(const map<uint32_t, NodePtr> &graph_datas, const NodePtr &detach,
                                                      const map<uint32_t, uint32_t> &outputs) {
 Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &graph, const NodePtr &const_node,
                                                      const NodePtr &data_node) {
  // Break Data and Move node.
  const auto &in_anchor = detach->GetInControlAnchor();
  const auto &out_anchors = in_anchor->GetPeerOutControlAnchors();
  for (size_t i = out_anchors.size(); i > 0; --i) {
    const auto &out_anchor = out_anchors.at(i - 1);
  const auto &in_anchor = const_node->GetInControlAnchor();
  const auto out_anchors = in_anchor->GetPeerOutControlAnchors();
  for (const auto out_anchor : out_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    const auto &owner_node = out_anchor->GetOwnerNode();
    GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), detach->GetName().c_str());
  }

  // Break Move and follow, Link Data and follow.
  for (uint32_t i = 0; i < detach->GetAllOutDataAnchorsSize(); ++i) {
    auto it_idx = outputs.find(i);
    if (it_idx == outputs.end()) {
      GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i);
      return FAILED;
    }

    auto it_data = graph_datas.find(it_idx->second);
    if (it_data == graph_datas.end()) {
      GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i);
      return FAILED;
    const auto owner_node = out_anchor->GetOwnerNode();
    GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str());
    if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) {
      graph->RemoveNode(owner_node);
    }
  }

    const auto &data_node = it_data->second;
    const auto &out_anchor = detach->GetOutDataAnchor(i);
  const auto &ctrl_anchor = const_node->GetOutControlAnchor();
  const auto ctrl_anchors = ctrl_anchor->GetPeerInControlAnchors();
  for (const auto in_anchor : ctrl_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(ctrl_anchor, in_anchor), "Remove edge failed");
    GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());

    const auto &out_desc = detach->GetOpDesc()->GetOutputDesc(i);
    const auto &data_desc = data_node->GetOpDesc();
    (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc);    // Set Data Input to new connect Node.
    (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc);   // Set Data Output to new connect Node.
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_node->GetOutControlAnchor(), in_anchor), "Add edge failed");
    GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
  }

    for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
      if (in_anchor == nullptr) {
          continue;
      }
      GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
      const auto &owner_node = in_anchor->GetOwnerNode();
      GELOGI("Remove Edge: %s %s", detach->GetName().c_str(), owner_node->GetName().c_str());
  // Break Move and follow, Link Data and follow.
  const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex);
  const auto in_anchors =out_anchor->GetPeerInDataAnchors();
  for (const auto in_anchor : in_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());

      const auto &data_out_anchor = data_node->GetOutDataAnchor(kDataOutIndex);
      GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_out_anchor, in_anchor), "Add edge failed");
      GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), owner_node->GetName().c_str());
    }
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
    GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
  }

  // Update Data op DataType.
  const auto &const_desc = const_node->GetOpDesc();
  const auto &tensor_desc = const_desc->GetOutputDesc(kZeroIndex);
  const auto &data_desc = data_node->GetOpDesc();
  (void)data_desc->UpdateInputDesc(kZeroIndex, tensor_desc);    // Set Data Input to new connect Node.
  (void)data_desc->UpdateOutputDesc(kZeroIndex, tensor_desc);   // Set Data Output to new connect Node.

  return SUCCESS;
 }

@@ -459,47 +392,37 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const map<uint32_t, NodePt
 /// @brief Move Node to Parent Graph.
 /// @param [in] graph: Parent compute graph.
 /// @param [in] func_node: functional Node of Case.
 /// @param [in] attach: Node will move to parent.
 /// @param [in] inputs: Parent index of Node input.
 /// @param [in] outputs: Parent index of Node output.
 /// @param [in] const_node: Node will move to parent.
 /// @param [in] parent_index: Parent index of Node input.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node,
                                                      const NodePtr &attach, const map<uint32_t, uint32_t> &inputs,
                                                      const map<uint32_t, uint32_t> &outputs) {
  GE_CHECK_NOTNULL(attach);
  for (const auto item : inputs) {
    if (item.second == kInvalidParent) {   // Not connect, Skip.
      continue;
    }

    const auto &in_anchor = func_node->GetInDataAnchor(item.second);
    const auto &out_anchor = in_anchor->GetPeerOutAnchor();
    const auto &owner_node = out_anchor->GetOwnerNode();
    const auto &in_control = attach->GetInControlAnchor();
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(owner_node->GetOutControlAnchor(), in_control), "Add edge failed");
    GELOGI("Add Edge: %s %s", owner_node->GetName().c_str(), attach->GetName().c_str());
                                                      const NodePtr &const_node, uint32_t parent_index) {
  GE_CHECK_NOTNULL(const_node);
  if (parent_index == kInvalidParent) {
    return INTERNAL_ERROR;
  }

  for (const auto &item : outputs) {
    const auto &func_desc = func_node->GetOpDesc();
    const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(item.second);
    (void)func_desc->UpdateInputDesc(item.second, out_desc);    // Set Data Input to new connect Node.

    const auto &in_anchor = func_node->GetInDataAnchor(item.second);
    const auto &out_anchor = in_anchor->GetPeerOutAnchor();
    if (out_anchor != nullptr) {
      GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
      const auto &owner_node = out_anchor->GetOwnerNode();
      GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
  const auto &func_desc = func_node->GetOpDesc();
  const auto &tensor_desc = const_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
  (void)func_desc->UpdateInputDesc(parent_index, tensor_desc);    // Set Data Input to new connect Node.

  const auto &in_anchor = func_node->GetInDataAnchor(parent_index);
  const auto &out_anchor = in_anchor->GetPeerOutAnchor();
  if (out_anchor != nullptr) {  // Break useless old link.
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    const auto owner_node = out_anchor->GetOwnerNode();
    GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
    if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) {
      graph->RemoveNode(owner_node);
    }
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(attach->GetOutDataAnchor(item.first), in_anchor), "Add edge failed");
    GELOGI("Add Edge: %s %s", attach->GetName().c_str(), func_node->GetName().c_str());
  }
  GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
  GELOGI("Add Edge: %s %s, index: %u", const_node->GetName().c_str(), func_node->GetName().c_str(), parent_index);

  (void)graph->AddNode(attach);
  (void)attach->SetOwnerComputeGraph(graph);
  GELOGI("Add Node: %s %s", graph->GetName().c_str(), attach->GetName().c_str());
  (void)graph->AddNode(const_node);
  (void)const_node->SetOwnerComputeGraph(graph);
  GELOGI("Add Node: %s %s", graph->GetName().c_str(), const_node->GetName().c_str());
  return SUCCESS;
 }

@@ -515,43 +438,37 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node,
                                                    const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                    uint32_t parent_index, uint32_t index,
                                                    const map<uint32_t, uint32_t> &inputs,
                                                    const map<uint32_t, uint32_t> &outputs) {
  if (inputs.empty()) {
                                                    const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                    const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                                                    const string &node_key, uint32_t parent_index) {
  if (node_key.empty() || parent_index == kInvalidParent) {
    GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str());
    return FAILED;
  }

  NodePtr move_node;
  for (auto &groups : graph_datas) {
    const auto &subgraph = groups.first;
    const auto &data_nodes = groups.second;
    auto it = data_nodes.find(parent_index);
    if (it == data_nodes.end()) {
      GELOGE(FAILED, "Graph: %s, Data: %u node not found", subgraph->GetName().c_str(), parent_index);
  for (auto &item : all_const_nodes) {
    const auto &subgraph = item.first;
    const auto it_const = item.second.find(node_key);
    if (it_const == item.second.end()) {
      GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
      return FAILED;
    }
    move_node = it_const->second;

    const auto &base_data = it->second;
    const auto &out_anchor = base_data->GetOutControlAnchor();
    const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
    if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) {
      GELOGE(FAILED, "Data: %s, anchor size: %zu, index: %u not found",
             base_data->GetName().c_str(), in_anchors.size(), index);
    const auto it_nodes = all_data_nodes.find(subgraph);
    if (it_nodes == all_data_nodes.end()) {
      GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
      return FAILED;
    }

    const auto &in_anchor = in_anchors.at(index);
    move_node = in_anchor->GetOwnerNode();
    if (move_node == nullptr) {
      GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index);
    const auto it_data = it_nodes->second.find(parent_index);
    if (it_data == it_nodes->second.end()) {
      GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
      return FAILED;
    }

    if (DetachParallelNode(data_nodes, move_node, outputs) != SUCCESS) {
      GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index);
    if (DetachParallelNode(subgraph, move_node, it_data->second) != SUCCESS) {
      GELOGE(FAILED, "Data: %s not found, index: %u", move_node->GetName().c_str(), parent_index);
      return FAILED;
    }

@@ -559,11 +476,10 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph
    GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str());
  }

  if (AttachParallelNode(graph, func_node, move_node, inputs, outputs) != SUCCESS) {
  if (AttachParallelNode(graph, func_node, move_node, parent_index) != SUCCESS) {
    return FAILED;
  }

  migration_append_ = true;
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/subgraph_const_migration_pass.h
+++ b/ge/graph/passes/subgraph_const_migration_pass.h
@@ -36,50 +36,54 @@ class SubgraphConstMigrationPass : public GraphPass {
 private:
  ///
  /// @ingroup ge
  /// @brief Get all Data nodes for all subgraph.
  /// @brief Get all Const/Data nodes for all subgraph.
  /// @param [in] graph: Root compute graph.
  /// @param [in] func_desc: functional OpDesc of Case.
  /// @param [out] graph_datas: Data groups of subgraph.
  /// @param [out] all_const_nodes: Const groups of subgraph.
  /// @param [out] all_data_nodes: Data groups of subgraph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                           map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas);
  Status ClassifyGraphNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                            map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes);

  ///
  /// @ingroup ge
  /// @brief Get all Data nodes for all subgraph.
  /// @param [in] node: Const node of subgraph.
  /// @param [in] func_desc: functional OpDesc of Case.
  /// @param [out] graph_nodes: Data groups of subgraph.
  /// @brief Get parent_index for Const node migration.
  /// @param [in] all_data_nodes: Data groups of subgraph.
  /// @param [in] const_node: Const node will process.
  /// @param [out] parent_index: parent index for replace Data.
  /// @return true: SUCCESS / false: FAILED
  ///
  bool GetAssociatedNodes(const NodePtr &node, map<uint32_t, uint32_t> &inputs, map<uint32_t, uint32_t> &outputs);
  bool GetAssociatedNodes(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                          const NodePtr &const_node, uint32_t &parent_index);

  ///
  /// @ingroup ge
  /// @brief Get all Data nodes for all subgraph.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] data_base: Data Node for migration.
  /// @param [in] data_idx: Data groups of subgraph.
  /// @param [in] data_idx: Data groups of subgraph.
  /// @brief Check parallel node is same for all subgraph.
  /// @param [in] all_const_nodes: Const groups of subgraph.
  /// @param [in] const_node: Const Node for migration.
  /// @param [in] node_key: Key of Const node.
  /// @return true: Same / false: not same
  ///
  bool IsParallelNodeSame(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                          const NodePtr &const_node, uint32_t parent_index, size_t index);
  bool IsParallelNodeSame(const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                          const NodePtr &const_node, const string &node_key);

  ///
  /// @ingroup ge
  /// @brief Migration subgraph Node to Root
  /// @param [in] graph: Root compute graph.
  /// @param [in] func_node: functional Node of Case.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] data_base: Data Node for migration.
  /// @param [in] data_idx: Data groups of subgraph.
  /// @param [in] all_const_nodes: Const groups of subgraph.
  /// @param [in] all_data_nodes: Data groups of subgraph.
  /// @param [in] const_node: Const Node for migration.
  /// @param [in] node_key: Key of Const node for migration.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                            const NodePtr &data_base, uint32_t data_idx);
                            const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                            const NodePtr &const_node, const string &node_key);

  ///
  /// @ingroup ge
@@ -93,46 +97,42 @@ class SubgraphConstMigrationPass : public GraphPass {
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node,
                          const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                          uint32_t parent_index, uint32_t anchor_idx,
                          const map<uint32_t, uint32_t> &inputs, const map<uint32_t, uint32_t> &outputs);
                          const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                          const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                          const string &node_key, uint32_t parent_index);

  ///
  /// @ingroup ge
  /// @brief Append Input Tensor for functional node.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] func_node: functional Node of Case.
  /// @param [in] outputs: Parent index of Node output.
  /// @param [in] graph_nodes: Const groups of subgraph.
  /// @param [in/out] parent_index: Parent index for migration.
  /// @param [in/out] all_data_nodes: Data groups of subgraph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status AppendParallelNode(map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                            const NodePtr &func_node, map<uint32_t, uint32_t> &outputs);
  Status AppendParallelNode(const NodePtr &func_node, uint32_t &parent_index,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes);

  ///
  /// @ingroup ge
  /// @brief Delete Node from all subgraph.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] detach: Node will move to parent.
  /// @param [in] outputs: Parent index of Node output.
  /// @brief Delete Node from subgraph.
  /// @param [in] graph: subgraph for process.
  /// @param [in] const_node: Node will move to parent.
  /// @param [in] data_node: Place holder for Const.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status DetachParallelNode(const map<uint32_t, NodePtr> &graph_datas, const NodePtr &detach,
                            const map<uint32_t, uint32_t> &outputs);
  Status DetachParallelNode(const ComputeGraphPtr &graph, const NodePtr &const_node, const NodePtr &data_node);

  ///
  /// @ingroup ge
  /// @brief Move Node to Parent Graph.
  /// @param [in] graph: Parent compute graph.
  /// @param [in] func_node: functional Node of Case.
  /// @param [in] attach: Node will move to parent.
  /// @param [in] inputs: Parent index of Node input.
  /// @param [in] outputs: Parent index of Node output.
  /// @param [in] const_node: Node will move to parent.
  /// @param [in] parent_index: Parent index of Node input.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, const NodePtr &attach,
                            const map<uint32_t, uint32_t> &inputs, const map<uint32_t, uint32_t> &outputs);

  bool migration_append_{false};
  Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node,
                            const NodePtr &const_node, uint32_t parent_index);
 };
 }  // namespace ge
 #endif  // GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_
--- a/ge/graph/passes/switch_to_stream_switch_pass.cc
+++ b/ge/graph/passes/switch_to_stream_switch_pass.cc
@@ -17,8 +17,13 @@
 #include "graph/passes/switch_to_stream_switch_pass.h"
 #include <stack>
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/types.h"
 #include "ge/ge_api_types.h"
 #include "graph/common/omg_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/utils/type_utils.h"

@@ -120,13 +125,12 @@ void SwitchToStreamSwitchPass::MarkCycleDependence(
      if (visited.count(tmp_node) > 0) {
        continue;
      }
      GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str());
      for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) {
        if (switch_nodes.find(out_node) == switch_nodes.end()) {
          out_nodes.push(out_node);
          continue;
        }
        GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.",
               tmp_node->GetName().c_str(), out_node->GetName().c_str());
        GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS,
                        GELOGW("set cyclic dependence attr failed."); return );
        auto map_iter = switch_cyclic_map_.find(out_node);
@@ -598,7 +602,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons
 ///
 Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node,
                                                        const std::set<NodePtr> &same_cond_switch) {
  GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
  GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
         cast_node->GetName().c_str());
  std::string orig_switch_name = switch_node->GetName();
  OpDescPtr switch_desc = switch_node->GetOpDesc();
@@ -649,7 +653,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no
 ///
 Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch,
                                                         const NodePtr &active_node) {
  GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
  GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
         stream_switch->GetName().c_str(), active_node->GetName().c_str());
  auto find_res = switch_node_map_.find(switch_node);
  GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), {
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -18,6 +18,7 @@
 #include <map>
 #include <set>
 #include <string>
 #include <utility>
 #include "common/formats/format_transfers/format_transfer_fractal_nz.h"
 #include "common/formats/format_transfers/format_transfer_fractal_z.h"
 #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
@@ -27,9 +28,13 @@
 #include "common/helper/model_helper.h"
 #include "common/math/math_util.h"
 #include "common/op/ge_op_utils.h"
 #include "common/util/error_manager/error_manager.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/common/local_context.h"
 #include "graph/common/transop_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/shape_refiner.h"
 #include "graph/manager/graph_var_manager.h"
@@ -39,21 +44,29 @@
 #include "graph/passes/aicpu_constant_folding_pass.h"
 #include "graph/passes/assert_pass.h"
 #include "graph/passes/assign_pass.h"
 #include "graph/passes/base_pass.h"
 #include "graph/passes/common_subexpression_elimination_pass.h"
 #include "graph/passes/cond_pass.h"
 #include "graph/passes/cond_remove_pass.h"
 #include "graph/passes/constant_folding_pass.h"
 #include "graph/passes/constant_fuse_same_pass.h"
 #include "graph/passes/control_trigger_pass.h"
 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/dimension_compute_pass.h"
 #include "graph/passes/dropout_pass.h"
 #include "graph/passes/enter_pass.h"
 #include "graph/passes/flow_ctrl_pass.h"
 #include "graph/passes/for_pass.h"
 #include "graph/passes/get_original_format_pass.h"
 #include "graph/passes/guarantee_const_pass.h"
 #include "graph/passes/hccl_group_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/infershape_pass.h"
 #include "graph/passes/iterator_op_pass.h"
 #include "graph/passes/merge_pass.h"
 #include "graph/passes/net_output_pass.h"
 #include "graph/passes/next_iteration_pass.h"
 #include "graph/passes/no_use_reshape_remove_pass.h"
 #include "graph/passes/parallel_concat_start_op_pass.h"
 #include "graph/passes/placeholder_with_default_pass.h"
@@ -68,18 +81,45 @@
 #include "graph/passes/shape_operate_op_remove_pass.h"
 #include "graph/passes/snapshot_pass.h"
 #include "graph/passes/stop_gradient_pass.h"
 #include "graph/passes/subgraph_pass.h"
 #include "graph/passes/switch_data_edges_bypass.h"
 #include "graph/passes/switch_dead_branch_elimination.h"
 #include "graph/passes/switch_logic_remove_pass.h"
 #include "graph/passes/merge_to_stream_merge_pass.h"
 #include "graph/passes/switch_to_stream_switch_pass.h"
 #include "graph/passes/attach_stream_label_pass.h"
 #include "graph/passes/unused_const_pass.h"
 #include "graph/passes/unused_op_remove_pass.h"
 #include "graph/passes/var_is_initialized_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/preprocess/insert_op/util_insert_aipp_op.h"
 #include "graph/types.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"
 #include "multi_batch_copy_graph.h"
 #include "runtime/dev.h"

 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/link_gen_mask_nodes_pass.h"
 #include "graph/passes/permute_pass.h"
 #include "graph/passes/reshape_remove_pass.h"
 #include "graph/passes/same_transdata_breadth_fusion_pass.h"
 #include "graph/passes/transop_breadth_fusion_pass.h"
 #include "graph/passes/transop_depth_fusion_pass.h"
 #include "graph/passes/transop_nearby_allreduce_fusion_pass.h"

 #include "graph/passes/cast_remove_pass.h"
 #include "graph/passes/data_pass.h"
 #include "graph/passes/transop_without_reshape_fusion_pass.h"
 #include "graph/passes/transpose_transdata_pass.h"
 #include "graph/passes/variable_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/passes/variable_ref_delete_op_pass.h"
 #include "graph/passes/mark_agnostic_pass.h"


 namespace ge {
 namespace {
 static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
@@ -1606,6 +1646,10 @@ Status GraphPrepare::InferShapeForPreprocess() {
  if (!options_.train_graph_flag) {
    names_to_passes.emplace_back("AssertPass", &assert_pass);
  }
  SwitchDeadBranchElimination switch_dead_branch_elimination;
  names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination);
  MergePass merge_pass;
  names_to_passes.emplace_back("MergePass", &merge_pass);
  InferShapePass infer_shape_pass;
  names_to_passes.emplace_back("InferShapePass", &infer_shape_pass);
  ReplaceWithEmptyConstPass replace_with_empty_const_pass;
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -1692,14 +1692,13 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) {
 }

 Status ProcessMultiBatch(ComputeGraphPtr &graph) {
  if (GetLocalOmgContext().dynamic_node_type.empty()) {
    const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
    if (multi_batch_with_switchn == nullptr) {
      PassManager pass_manager;
      GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
      return pass_manager.Run(graph);
    }
  const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE");
  if (multi_batch_with_case != nullptr) {
    PassManager pass_manager;  
    GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
    return pass_manager.Run(graph);
  }

  if (!GetLocalOmgContext().need_multi_batch) {
    GELOGI("No need to process_multi for no_train graph.");
    return SUCCESS;
--- a/ge/host_kernels/dynamic_stitch_kernel.cc
+++ b/ge/host_kernels/dynamic_stitch_kernel.cc
@@ -33,6 +33,8 @@ namespace {
 const int kDoubleAttrN = 2;
 const int kFirstOutputDescIdx = 0;
 const int kMergedShapeSecondDim = 1;
 const size_t kNullTensorDimNum = 1;
 const int64_t kNullTensorDimValue = 0;
 const std::set<DataType> kSupportedTypeSet = {DT_INT8,  DT_UINT8, DT_INT16,   DT_UINT16, DT_INT32,
                                              DT_INT64, DT_BOOL,  DT_FLOAT16, DT_FLOAT,  DT_DOUBLE};
 }  // namespace
@@ -177,7 +179,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec
  int64_t src_offset = 0;
  std::set<int32_t> indices_set;
  for (int i = 0; i < n_; i++) {
    auto indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize();
    GeShape indices_shape = input[i]->GetTensorDesc().GetShape();
    size_t indices_dim_num = indices_shape.GetDimNum();
    // skip null indices tensor
    if (indices_dim_num == kNullTensorDimNum && indices_shape.GetDim(0) == kNullTensorDimValue) {
      GELOGD("Input indices[%d] has null tensor, skip it.", i);
      continue;
    }
    auto indices_shape_size = indices_shape.GetShapeSize();
    // to normalize logic, assume scalar as vector with shape of [1].
    indices_shape_size = (indices_shape_size == 0) ? 1 : indices_shape_size;
    // all index for input is less than size of input
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -18,7 +18,6 @@
 #include <chrono>
 #include "framework/common/debug/log.h"
 #include "graph/compute_graph.h"
 #include "graph/utils/tensor_utils.h"
 #include "hybrid_execution_context.h"
 #include "subgraph_context.h"

@@ -36,31 +35,29 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
         this->num_pending_shapes_);
 }

 Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
 Status ShapeInferenceState::UpdateInputShape(int idx,
                                             const GeShape &ori_shape,
                                             const GeShape &shape) {
  if (node_item.IsInputShapeStatic(idx)) {
    GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
           node_item.NodeName().c_str(),
           idx,
           node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
           target.GetShape().ToString().c_str());
           shape.ToString().c_str());
    return SUCCESS;
  }

  int64_t tensor_size = -1;
  (void) TensorUtils::GetSize(target, tensor_size);
  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
         node_item.NodeName().c_str(),
         idx,
         target.GetShape().ToString().c_str(),
         target.GetOriginShape().ToString().c_str(),
         tensor_size);
         shape.ToString().c_str(),
         ori_shape.ToString().c_str());

  std::lock_guard<std::mutex> lk(mu_);
  auto tensor_desc = node_item.MutableInputDesc(idx);
  GE_CHECK_NOTNULL(tensor_desc);
  tensor_desc->SetShape(target.GetShape());
  tensor_desc->SetOriginShape(target.GetOriginShape());
  (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  tensor_desc->SetShape(shape);
  tensor_desc->SetOriginShape(ori_shape);
  if (--num_pending_shapes_ == 0) {
    ready_cv_.notify_all();
  }
@@ -113,24 +110,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
  for (auto &p : shape_futures) {
    auto idx = p.first;
    auto &future = p.second;
    GeShape shape;
    GeShape ori_shape;
    RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
    auto src_tensor_desc = future.GetTensorDesc();
    GE_CHECK_NOTNULL(src_tensor_desc);
    GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
                      "[%s] Get shape failed. index = %u",
                      node_item.NodeName().c_str(),
                      idx);
    RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);

    auto input_desc = node_item.MutableInputDesc(idx);
    GE_CHECK_NOTNULL(input_desc);
    int64_t tensor_size = -1;
    (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
           node_item.NodeName().c_str(),
           idx,
           src_tensor_desc->GetShape().ToString().c_str(),
           src_tensor_desc->GetOriginShape().ToString().c_str(),
           tensor_size);
    input_desc->SetShape(src_tensor_desc->GetShape());
    input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
    (void) TensorUtils::SetSize(*input_desc, tensor_size);
           shape.ToString().c_str(),
           ori_shape.ToString().c_str());
    auto input_desc = node_item.MutableInputDesc(idx);
    GE_CHECK_NOTNULL(input_desc);
    input_desc->SetShape(std::move(shape));
    input_desc->SetOriginShape(ori_shape);
  }

  return SUCCESS;
@@ -193,14 +190,5 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
  GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
  return SUCCESS;
 }

 GeTensorDescPtr ShapeFuture::GetTensorDesc() {
  GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
  if (!subgraph_context_->Await(src_node_)) {
    GELOGE(INTERNAL_ERROR, "cancelled");
    return nullptr;
  }
  return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -35,7 +35,6 @@ class ShapeFuture {
  ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
  ~ShapeFuture() = default;
  Status Get(GeShape &ori_shape, GeShape &shape);
  GeTensorDescPtr GetTensorDesc();

 private:
  NodePtr src_node_;
@@ -46,7 +45,7 @@ class ShapeFuture {
 struct ShapeInferenceState {
  explicit ShapeInferenceState(const NodeItem &node_item);

  Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);
  Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);

  void UpdateInputShapeFuture(int idx, ShapeFuture &&future);

--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
      GE_CHECK_NOTNULL(tensor_desc);
      auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
      GE_CHECK_NOTNULL(node_state);
      node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
      node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
    }
  }

@@ -268,6 +268,13 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  } else {
    node_state.SetKernelTask(node_item.kernel_task);
  }

  GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
                    "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
  GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
  return SUCCESS;
 }

--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -20,9 +20,12 @@
 #include "graph/utils/tensor_adapter.h"
 #include "graph/debug/ge_attr_define.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor//worker//shape_inference_engine.h"
 #include "common/dump/dump_manager.h"
 #include "common/dump/dump_op.h"
 #include "common/types.h"
 #include "common/ge_types.h"
 #include "common/profiling/profiling_manager.h"
 #include "runtime/base.h"

 namespace ge {
 namespace hybrid {
@@ -151,19 +154,18 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
  GE_CHECK_NOTNULL(node);
  GE_CHECK_NOTNULL(model);

  // only report aicpu and aicore node
  bool is_profiling_report = context_->GetNodeItem().is_profiling_report;
  if (!is_profiling_report) {
    GELOGD("Node[%s] is not aicore or aicpu, and no need to report data.", node->GetName().c_str());
    return SUCCESS;
  }

  GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
  auto op_desc = node->GetOpDesc();
  std::string op_name = op_desc->GetName();
  std::string dynamic_model_name = model->GetModelName();
  uint32_t task_id = context_->GetTaskId();
  uint32_t stream_id = context_->GetStreamId();

  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
    GELOGE(PARAM_INVALID, "Get task_id and stream_id failed.");
    return PARAM_INVALID;
  }

  TaskDescInfo tmp_task_desc_info;
  tmp_task_desc_info.model_name = dynamic_model_name;
  tmp_task_desc_info.op_name = op_name;
@@ -175,8 +177,6 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
  }
  tmp_task_desc_info.task_id = task_id;
  tmp_task_desc_info.stream_id = stream_id;
  tmp_task_desc_info.shape_type = "dynamic";
  tmp_task_desc_info.cur_iter_num = graph_context_->iteration;
  GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]",
         node->GetName().c_str(), task_id, stream_id);
  task_desc_info.emplace_back(tmp_task_desc_info);
@@ -348,10 +348,6 @@ Status NodeDoneCallback::OnNodeDone() {
  }

  GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
  if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
    // update output tensor sizes
    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
  }
  // PropagateOutputs for type == DEPEND_COMPUTE
  if (node_item.shape_inference_type == DEPEND_COMPUTE) {
    if (graph_context_->trace_enabled) {
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -17,15 +17,9 @@
 #include "hybrid/executor/worker/shape_inference_engine.h"
 #include "graph/shape_refiner.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "common/math/math_util.h"
 #include "hybrid/node_executor/node_executor.h"

 namespace ge {
 namespace {
 const int kAlignment = 32;
 }
 namespace hybrid {
 ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
    : execution_context_(execution_context),
@@ -46,9 +40,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
  }

  if (node_item.fused_subgraph != nullptr) {
    GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
    GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
    return SUCCESS;
    return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
  }

  // Skip shape inference for node of type DEPEND_COMPUTE
@@ -71,15 +63,21 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
    std::lock_guard<std::mutex> lk(mu_);
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
    GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
                      "Invoke InferShapeAndType failed.");
        "Invoke InferShapeAndType failed.");
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
  }
  // Check again to make sure shape is valid after shape inference
  if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
                      "Failed to get shape status. node = %s",
                      node_item.NodeName().c_str());

  // update output tensor sizes after shape inference
  // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
    GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
                           INTERNAL_ERROR,
                           "[%s] Shape is still unknown after shape inference.",
                           node_item.NodeName().c_str());
  }

  GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
         node_item.NodeName().c_str(),
@@ -129,6 +127,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
  // propagate each output
  for (int i = 0; i < node_item.num_outputs; ++i) {
    auto output_desc = node_item.op_desc->MutableOutputDesc(i);
    const auto &shape = output_desc->MutableShape();
    const auto &ori_shape = output_desc->GetOriginShape();
    auto &output_nodes = node_item.outputs[i];

    // propagate output to all sub-inputs
@@ -149,7 +149,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
        infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
                                           std::move(future));
      } else {
        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
                                                             ori_shape,
                                                             shape));
      }
    }
  }
@@ -228,92 +230,5 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
  }
  return SUCCESS;
 }

 Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
                                               std::vector<int64_t> &shape,
                                               bool fallback_with_range) {
  const auto &tensor_shape = tensor_desc.MutableShape();
  if (tensor_shape.IsUnknownShape()) {
    if (!fallback_with_range) {
      GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
             tensor_shape.ToString().c_str());
      return INTERNAL_ERROR;
    }

    GELOGD("Calc output size by range");
    std::vector<std::pair<int64_t, int64_t>> shape_range;
    GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
    if (shape_range.size() != shape.size()) {
      GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
             shape_range.size(),
             shape.size());
      return INTERNAL_ERROR;
    }

    for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
      if (shape[dim_index] == ge::UNKNOWN_DIM) {
        shape[dim_index] = shape_range[dim_index].second;
      }
    }

    GELOGD("After canonicalization, shape = [%s], before = [%s]",
           GeShape(shape).ToString().c_str(),
           tensor_shape.ToString().c_str());
  }

  return SUCCESS;
 }

 Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
                                            const std::vector<int64_t> &shape,
                                            int64_t &tensor_size) {
  GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
  uint32_t type_size;
  if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
    GELOGE(INTERNAL_ERROR, "Failed to get data type size");
    return INTERNAL_ERROR;
  }

  tensor_size = type_size;
  for (const auto &dim : shape) {
    GE_CHECK_GE(dim, 0);
    GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
                      "Shape size overflow, shape = [%s]",
                      GeShape(shape).ToString().c_str());
    tensor_size *= dim;
  }

  GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
                    "Tensor size is too large: %ld, shape = [%s]",
                    tensor_size,
                    GeShape(shape).ToString().c_str());
  tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
  return SUCCESS;
 }

 Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
  auto op_desc = node_item.GetOpDesc();
  for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
    auto tensor_desc = op_desc->MutableOutputDesc(output_index);
    GE_CHECK_NOTNULL(tensor_desc);
    const auto &shape = tensor_desc->MutableShape();
    // modify on copy
    auto dims = shape.GetDims();
    GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
                      "[%s] Failed to canonicalize shape for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);

    int64_t tensor_size;
    GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
                      "[%s] Failed to calc tensor size for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);
    GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
    (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  }

  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/worker/shape_inference_engine.h
+++ b/ge/hybrid/executor/worker/shape_inference_engine.h
@@ -34,11 +34,7 @@ class ShapeInferenceEngine {

  Status PropagateOutputShapes(const NodeItem &node_item);

  static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);

 private:
  static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
  static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
  static Status UpdatePeerNodeShape(const Node &node);
  Status AwaitDependentNodes(NodeState &node_state);

--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -21,6 +21,7 @@
 #include "graph/build/memory/var_mem_assign_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/new_model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
@@ -226,10 +227,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
  new_node->node_id = node_index;
  new_node->op_desc->SetId(node_index);
  node_index += 1;
  NodeExecutorManager::ExecutorType executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node);
  new_node->is_profiling_report = (executor_type == NodeExecutorManager::ExecutorType::AICORE) ||
                                  (executor_type == NodeExecutorManager::ExecutorType::AICPU_TF) ||
                                  (executor_type == NodeExecutorManager::ExecutorType::AICPU_CUSTOM);

  *node_item = new_node.get();
  node_items[node] = std::move(new_node);
  return SUCCESS;
@@ -924,6 +922,7 @@ Status HybridModelBuilder::InitWeights() {
 }

 Status HybridModelBuilder::LoadTasks() {
  GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
  for (auto &it : hybrid_model_.node_items_) {
    auto &node_item = it.second;
    auto &node_ptr = node_item->node;
@@ -1560,5 +1559,29 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,

  return SUCCESS;
 }

 Status HybridModelBuilder::CheckAicpuOpList() {
  std::vector<std::string> aicpu_optype_list;
  std::vector<std::string> aicpu_tf_optype_list;
  std::set<std::string> aicpu_optype_set;
  std::set<std::string> aicpu_tf_optype_set;
  for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
    auto &ge_model = it.second;
    GE_CHECK_NOTNULL(ge_model);
    if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) {
      aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
    }

    if (ge::AttrUtils::GetListStr(*ge_model, "needCheckTf", aicpu_tf_optype_list)) {
      aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
    }
  }
  // reset list with set
  aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
  aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
                    "Launch check aicpu op type failed.");
  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -78,6 +78,7 @@ class HybridModelBuilder {
  Status ParseVarOutputs(NodeItem &node_item);
  Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
  Status RecoverGraphUnknownFlag();
  Status CheckAicpuOpList();

  const char* GetGraphName() const {
    return hybrid_model_.model_name_.c_str();
--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -22,7 +22,6 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/node_utils.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor/worker/shape_inference_engine.h"

 namespace ge {
 namespace hybrid {
@@ -48,7 +47,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
    GE_CHECK_NOTNULL(dst_op_desc);
    auto in_idx = node_and_anchor.second->GetIdx();
    auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
    fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
    fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
    GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
  }

@@ -65,7 +64,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
    return FAILED;
  }

  fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
  fused_subgraph.output_mapping.emplace(parent_index, op_desc);
  return SUCCESS;
 }

@@ -127,7 +126,12 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
  return SUCCESS;
 }

 void NodeItem::ResolveOptionalInputs() {
 Status NodeItem::Init() {
  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
  num_inputs = static_cast<int>(op_desc->GetInputsSize());
  num_outputs = static_cast<int>(op_desc->GetOutputsSize());

  if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
    has_optional_inputs = true;
    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -139,18 +143,7 @@ void NodeItem::ResolveOptionalInputs() {
      }
    }
  }
 }

 Status NodeItem::InitInputsAndOutputs() {
  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
  num_inputs = static_cast<int>(op_desc->GetInputsSize());
  num_outputs = static_cast<int>(op_desc->GetOutputsSize());
  ResolveOptionalInputs();
  return SUCCESS;
 }

 Status NodeItem::ResolveDynamicState() {
  (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
  GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
  if (!is_dynamic) {
@@ -158,54 +151,38 @@ Status NodeItem::ResolveDynamicState() {
                      "[%s] Failed to get shape status.",
                      node->GetName().c_str());
  }
  return SUCCESS;
 }

 Status NodeItem::ResolveStaticInputsAndOutputs() {
  for (int i = 0; i < num_inputs; ++i) {
    const auto &input_desc = MutableInputDesc(i);
    GE_CHECK_NOTNULL(input_desc);
    if (input_desc->MutableShape().IsUnknownShape()) {
      is_input_shape_static_.push_back(false);
    } else {
      num_static_input_shapes++;
      is_input_shape_static_.push_back(true);
      GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
             NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
  if (is_dynamic) {
    for (int i = 0; i < num_inputs; ++i) {
      const auto &input_desc = MutableInputDesc(i);
      GE_CHECK_NOTNULL(input_desc);
      if (input_desc->MutableShape().IsUnknownShape()) {
        is_input_shape_static_.push_back(false);
      } else {
        num_static_input_shapes++;
        is_input_shape_static_.push_back(true);
        GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
               NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
      }
    }
  }

  for (int i = 0; i < num_outputs; ++i) {
    const auto &output_desc = op_desc->MutableOutputDesc(i);
    GE_CHECK_NOTNULL(output_desc);
    if (output_desc->MutableShape().IsUnknownShape()) {
      is_output_shape_static = false;
      break;
    for (int i = 0; i < num_outputs; ++i) {
      const auto &output_desc = op_desc->MutableOutputDesc(i);
      GE_CHECK_NOTNULL(output_desc);
      if (output_desc->MutableShape().IsUnknownShape()) {
        is_output_shape_static = false;
        break;
      }
    }
  }

  if (is_output_shape_static) {
    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
  }
  return SUCCESS;
 }

 void NodeItem::ResolveUnknownShapeType() {
  if (IsControlOp() || node_type == PARTITIONEDCALL) {
    shape_inference_type = DEPEND_COMPUTE;
  } else {
    int32_t unknown_shape_type_val = 0;
    (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
    shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
  }
 }
    if (IsControlOp() || node_type == PARTITIONEDCALL) {
      shape_inference_type = DEPEND_COMPUTE;
    } else {
      int32_t unknown_shape_type_val = 0;
      (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
      shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
    }

 Status NodeItem::Init() {
  GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
  GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
  if (is_dynamic) {
    ResolveUnknownShapeType();
    GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
    GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
  }

--- a/ge/hybrid/model/node_item.h
+++ b/ge/hybrid/model/node_item.h
@@ -99,16 +99,10 @@ struct NodeItem {
  std::map<int, int> reuse_inputs;
  std::map<int, int> reuse_outputs;
  int num_static_input_shapes = 0;
  bool is_profiling_report = false;

 private:
  explicit NodeItem(NodePtr node);
  Status Init();
  Status InitInputsAndOutputs();
  void ResolveOptionalInputs();
  Status ResolveDynamicState();
  Status ResolveStaticInputsAndOutputs();
  void ResolveUnknownShapeType();

  std::vector<bool> is_input_shape_static_;
  std::vector<uint32_t> input_desc_indices_;
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -165,16 +165,6 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
    }
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
    GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(rt_ret, "Get task_id and stream_id failed.");
      return rt_ret;
    }
    context.SetTaskId(task_id);
    context.SetStreamId(stream_id);
    GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
  }
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -189,17 +189,6 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(

  GE_CHK_STATUS_RET(LaunchTask(context));

  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "Get task_id and stream_id failed.");
    return rt_ret;
  }
  context.SetTaskId(task_id);
  context.SetStreamId(stream_id);
  GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);

  auto callback = [=, &context]() {
    GELOGD("Node[%s] callback start.", node_name_.c_str());
    RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -123,11 +123,22 @@ Status KnownNodeTask::Init(TaskContext &context) {
           davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
  }
  if (!load_flag_) {
    auto dump_properties = context.GetDumpProperties();
    if (dump_properties.IsDumpOpen()) {
      davinci_model_->SetDumpProperties(dump_properties);
    }
    int32_t device_id = 0;
    rtError_t rt_ret = rtGetDevice(&device_id);
    if (rt_ret != RT_ERROR_NONE || device_id < 0) {
      GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    davinci_model_->SetDeviceId(device_id);
    GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed.");
    load_flag_ = true;
  } else {
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
            davinci_model_->Id()), "KnownNodeTask::Init destroy aicpu kernel failed.");
            davinci_model_->Id(), davinci_model_->SubModelId()), "KnownNodeTask::Init destroy aicpu kernel failed.");
  }
  GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName());
  return SUCCESS;
@@ -161,8 +172,9 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node

  // set known node flag as true
  davinci_model->SetKnownNode(true);
  davinci_model->SetId(model.GetModelId());
  // set model id as root node's node id
  davinci_model->SetId(node->GetOpDesc()->GetId());
  davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
  GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId());

  GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed.");
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -148,10 +148,6 @@ Status TaskContext::AllocateWorkspaces() {
 }

 Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
  if (callback_fun == nullptr) {
    GELOGW("[%s] Callback is NULL", GetNodeName());
    return SUCCESS;
  }
  auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
  if (ret != SUCCESS) {
    GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -319,22 +315,6 @@ void TaskContext::SetStatus(Status status) {
  }
 }

 uint32_t TaskContext::GetTaskId() const {
  return task_id_;
 }

 void TaskContext::SetTaskId(uint32_t task_id) {
  task_id_ = task_id;
 }

 uint32_t TaskContext::GetStreamId() const {
  return stream_id_;
 }

 void TaskContext::SetStreamId(uint32_t stream_id) {
  stream_id_ = stream_id;
 }

 Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
  GE_CHECK_NOTNULL(buffer);
  if (ori_addr == nullptr) {
@@ -404,20 +384,6 @@ const char *TaskContext::GetNodeName() const {
  return node_item_->NodeName().c_str();
 }

 void TaskContext::ReleaseInputsAndOutputs() {
  for (int i = 0; i < node_item_->num_inputs; ++i) {
    auto tensor = inputs_start_ + i;
    tensor->Destroy();
    GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
  }

  for (int i = 0; i < node_item_->num_outputs; ++i) {
    auto tensor = outputs_start_ + i;
    tensor->Destroy();
    GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
  }
 }

 void TaskContext::ReleaseInput(int index) {
  auto input_tensor = MutableInput(index);
  if (input_tensor != nullptr) {
@@ -490,9 +456,5 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
 const DumpProperties &TaskContext::GetDumpProperties() const {
  return execution_context_->dump_properties;
 }

 bool TaskContext::NeedCallback() {
  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -50,8 +50,6 @@ class TaskContext {
  ConstGeTensorDescPtr GetOutputDesc(int index) const;
  GeTensorDescPtr MutableInputDesc(int index) const;
  GeTensorDescPtr MutableOutputDesc(int index) const;
  void ReleaseInputsAndOutputs();
  bool NeedCallback();
  void ReleaseInput(int index);
  const TensorValue *GetInput(int index) const;
  const TensorValue *GetOutput(int index) const;
@@ -96,12 +94,6 @@ class TaskContext {

  void SetStatus(Status status);

  uint32_t GetTaskId() const;
  void SetTaskId(uint32_t task_id);

  uint32_t GetStreamId() const;
  void SetStreamId(uint32_t stream_id);

  bool IsForceInferShape() const;
  void SetForceInferShape(bool force_infer_shape);
  void *handle_ = nullptr;
@@ -123,8 +115,6 @@ class TaskContext {
  Status status_ = SUCCESS;
  std::vector<void *> workspaces_;
  uint64_t iteration_ = 0;
  uint32_t task_id_= 0;
  uint32_t stream_id_ = 0;
 };
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/ir_build/atc_ir_common.cc
+++ b/ge/ir_build/atc_ir_common.cc
@@ -51,6 +51,7 @@ const char *const kDigitError = "is not digit";
 const char *const kCompressWeightError = "it must be appointed when appoint parameter[--optypelist_for_implmode]";
 const char *const kSelectImplmodeError = "only support high_performance, high_precision";
 const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \"";
 const char *const kKeepDtypeError = "file not found";

 vector<string> SplitInputShape(const std::string &input_shape) {
  vector<string> shape_pair_vec;
@@ -63,19 +64,18 @@ vector<string> SplitInputShape(const std::string &input_shape) {
 }
 }  // namespace

 Status CheckInputFormat(const string &input_format) {
 Status CheckInputFormat(const std::string &input_format) {
  if (input_format.empty()) {
    return ge::SUCCESS;
  }
  if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) {
    ErrorManager::GetInstance().ATCReportErrMessage(
      "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"});
    GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str());
      "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format not found"});
    GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str());
    return ge::PARAM_INVALID;
  }
  return ge::SUCCESS;
 }

 bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
                                          std::string &dynamic_batch_size) {
  int32_t size = 0;
@@ -439,6 +439,17 @@ Status CheckCompressWeightParamValid(const std::string enable_compress_weight, c
  return ge::SUCCESS;
 }

 Status CheckKeepTypeParamValid(const std::string &keep_dtype) {
  if ((!keep_dtype.empty()) && (!CheckInputPathValid(keep_dtype, "--keep_dtype"))) {
    ErrorManager::GetInstance().ATCReportErrMessage(
        "E10001", {"parameter", "value", "reason"}, {"--keep_dtype", keep_dtype, kKeepDtypeError});
    GELOGE(ge::PARAM_INVALID, "keep dtype config file not found, file_name:%s", keep_dtype.c_str());
    return ge::PARAM_INVALID;
  }

  return ge::SUCCESS;
 }

 int CheckLogParamValidAndSetLogLevel(const std::string log) {
  int ret = -1;
  if (log == "default") {
--- a/ge/ir_build/atc_ir_common.h
+++ b/ge/ir_build/atc_ir_common.h
@@ -75,7 +75,8 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf);
 Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory);
 Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream);
 Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode);
 Status CheckInputFormat(const string &input_format);
 Status CheckInputFormat(const std::string &input_format);
 Status CheckKeepTypeParamValid(const std::string &keep_dtype);
 void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips);
 void EraseEndSemicolon(std::string &param);
 }
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -581,42 +581,6 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat
  return GRAPH_SUCCESS;
 }

 graphStatus aclgrphInferShapeAndType(ge::Graph &graph) {
  auto compute_graph = GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);

  auto root_graph = compute_graph->GetParentGraph();
  if (root_graph != nullptr) {
    GELOGE(GRAPH_PARAM_INVALID, "Input param should not be subgraph");
    return GRAPH_PARAM_INVALID;
  }

  auto ret = compute_graph->TopologicalSorting();
  if (ret != GRAPH_SUCCESS) {
    GELOGE(ret, "Acl topo logical sort failed.");
    return ret;
  }

  ret = compute_graph->InferOriginFormat();
  if (ret != GRAPH_SUCCESS) {
    GELOGE(ret, "Acl InferOriginFormat failed.");
    return ret;
  }

  for (auto &node: compute_graph->GetAllNodes()) {
    graphStatus ret = ShapeRefiner::InferShapeAndType(node);
    if (ret == GRAPH_PARAM_INVALID) {
      GELOGW("Can not find infershape func.");
      continue;
    } else if (ret != GRAPH_SUCCESS) {
      GELOGE(ret, "Acl infershape failed.");
      return ret;
    }
  }

  return GRAPH_SUCCESS;
 }

 graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len) {
  GE_CHECK_NOTNULL(file);

--- a/ge/offline/CMakeLists.txt
+++ b/ge/offline/CMakeLists.txt
@@ -10,6 +10,7 @@ protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 set(SRC_LIST
    "main.cc"
    "single_op_parser.cc"
    "keep_dtype_option.cc"
    "../session/omg.cc"
    "../ir_build/atc_ir_common.cc"
 )
--- a/ge/offline/keep_dtype_option.cc
+++ b/ge/offline/keep_dtype_option.cc
@@ -0,0 +1,116 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "keep_dtype_option.h"
 #include <fstream>
 #include <iostream>
 #include <sstream>
 #include <vector>
 #include "graph/debug/ge_attr_define.h"
 #include "framework/common/util.h"
 #include "common/util/error_manager/error_manager.h"

 namespace ge {
 namespace {
 const size_t kMaxOpsNum = 10;
 }  // namespace
 bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) {
  std::vector<std::string> original_op_names;
  if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names)) {
    return false;
  }

  for (auto &origin_name : original_op_names) {
    if (origin_name == op_name) {
      return true;
    }
  }

  return false;
 }

 void KeepDtypeReportError(const std::vector<std::string> &invalid_list) {
  std::stringstream err_msg;
  size_t list_size = invalid_list.size();
  err_msg << "config file contains " << list_size;
  if (list_size == 1) {
    err_msg << " operator not in the graph, op name:";
  } else {
    err_msg << " operators not in the graph, op names:";
  }

  for (size_t i = 0; i < list_size; i++) {
    if (i == kMaxOpsNum) {
      err_msg << "..";
      break;
    }
    err_msg << invalid_list[i];
    if (i != list_size - 1) {
      err_msg << " ";
    }
  }

  ErrorManager::GetInstance().ATCReportErrMessage(
      "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()});
  GELOGE(FAILED, "%s", err_msg.str().c_str());
 }

 Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) {
  GE_CHECK_NOTNULL(graph);
  if (keep_dtype.empty()) {
    return SUCCESS;
  }
  std::string real_path = RealPath(keep_dtype.c_str());
  if (real_path.empty()) {
    GELOGE(PARAM_INVALID, "Can not get real path for %s.", keep_dtype.c_str());
    return PARAM_INVALID;
  }
  std::ifstream ifs(real_path);
  if (!ifs.is_open()) {
    GELOGE(FAILED, "Open file %s failed", keep_dtype.c_str());
    return FAILED;
  }

  std::string op_name;
  std::vector<std::string> invalid_list;
  while (std::getline(ifs, op_name)) {
    if (op_name.empty()) {
      continue;
    }
    op_name = StringUtils::Trim(op_name);
    bool is_find = false;
    for (auto &node_ptr : graph->GetDirectNode()) {
      auto op_desc = node_ptr->GetOpDesc();
      GE_CHECK_NOTNULL(op_desc);

      if ((op_desc->GetName() == op_name) || IsOriginalOpFind(op_desc, op_name)) {
        is_find = true;
        (void)AttrUtils::SetInt(op_desc, ATTR_NAME_KEEP_DTYPE, 1);
      }
    }
    if (!is_find) {
      invalid_list.push_back(op_name);
    }
  }
  ifs.close();

  if (!invalid_list.empty()) {
    KeepDtypeReportError(invalid_list);
    return PARAM_INVALID;
  }

  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/offline/keep_dtype_option.h
+++ b/ge/offline/keep_dtype_option.h
@@ -0,0 +1,26 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef KEEP_DTYPE_OPTION_H_
 #define KEEP_DTYPE_OPTION_H_

 #include <string>
 #include "graph/compute_graph.h"
 #include "framework/common/ge_inner_error_codes.h"

 namespace ge {
 Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype);
 }  // namespace
 #endif // KEEP_DTYPE_OPTION_H_
--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -43,6 +43,7 @@
 #include "parser/common/register_tbe.h"
 #include "register/op_registry.h"
 #include "single_op_parser.h"
 #include "keep_dtype_option.h"

 using domi::BuildMode;
 using domi::OpRegistrationData;
@@ -109,6 +110,9 @@ DEFINE_string(precision_mode, "force_fp16",
              "Optional; precision mode."
              "Support force_fp16, allow_mix_precision, allow_fp32_to_fp16, must_keep_origin_dtype.");

 DEFINE_string(keep_dtype, "",
              "Optional; config file to specify the precision used by the operator during compilation.");

 DEFINE_string(input_format, "",
              "Optional; input_format, format of input data, NCHW;NHWC."
              "Format:\"NHWC\"");
@@ -285,6 +289,8 @@ class GFlagUtils {
        "\n[Operator Tuning]\n"
        "  --precision_mode        precision mode, support force_fp16(default), allow_mix_precision, "
        "allow_fp32_to_fp16, must_keep_origin_dtype.\n"
        "  --keep_dtype            Retains the precision of certain operators in inference "
        "scenarios by using a configuration file.\n"
        "  --auto_tune_mode        Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n"
        "  --op_select_implmode    Set op select implmode. Support high_precision, high_performance. "
        "default: high_performance\n"
@@ -421,6 +427,9 @@ class GFlagUtils {
        FLAGS_enable_compress_weight, FLAGS_compress_weight_conf) == ge::SUCCESS,
        ret = ge::FAILED, "check compress weight failed!");

    GE_CHK_BOOL_EXEC(ge::CheckKeepTypeParamValid(FLAGS_keep_dtype) == ge::SUCCESS,
        ret = ge::FAILED, "check keep dtype failed!");

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
        !ge::CheckOutputPathValid(FLAGS_check_report, "--check_report"), ret = ge::FAILED,
        "check_report file %s not found!!", FLAGS_check_report.c_str());
@@ -979,6 +988,13 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
    }
  }

  Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype);
  if (ret != SUCCESS) {
    (void)ge_generator.Finalize();
    (void)ge::GELib::GetInstance()->Finalize();
    return ret;
  }

  geRet = ge_generator.GenerateOfflineModel(graph, output, inputs);
  if (geRet != ge::SUCCESS) {
    DOMI_LOGE("GE GenerateOfflineModel execute failed");
--- a/ge/offline/module.mk
+++ b/ge/offline/module.mk
@@ -11,6 +11,7 @@ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dg
 LOCAL_SRC_FILES := \
    main.cc \
    single_op_parser.cc \
    keep_dtype_option.cc \
    ../session/omg.cc \
    ../ir_build/atc_ir_common.cc \

@@ -64,6 +65,7 @@ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dg
 LOCAL_SRC_FILES := \
    main.cc \
    single_op_parser.cc \
    keep_dtype_option.cc \
    ../session/omg.cc \
    ../ir_build/atc_ir_common.cc \

@@ -117,6 +119,7 @@ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dg
 LOCAL_SRC_FILES := \
    main.cc \
    single_op_parser.cc \
    keep_dtype_option.cc \
    ../session/omg.cc \
    ../ir_build/atc_ir_common.cc \

--- a/ge/omm/csa_interact.cc
+++ b/ge/omm/csa_interact.cc
@@ -78,8 +78,8 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) {
 Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode,
                                  ErrorModule error_module) {
  if (!is_init_) {
    GELOGE(INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState");
    return INTERNAL_ERROR;
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState");
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }
  if ((curr_state_ == JOBSTATE_FAILED) || (curr_state_ == JOBSTATE_KILLED)) {
    return SUCCESS;
--- a/ge/proto/caffe/caffe.proto
+++ b/ge/proto/caffe/caffe.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe
 *
 * This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto2";

 package domi.caffe;
--- a/ge/proto/dump_task.proto
+++ b/ge/proto/dump_task.proto
@@ -108,4 +108,6 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name = 6;

 }
--- a/ge/proto/op_mapping_info.proto
+++ b/ge/proto/op_mapping_info.proto
@@ -15,7 +15,6 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -24,7 +23,6 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
Author	SHA1	Message	Date
i-robot	d1e3232154	!2038 bugfix for oms taskdef random change Merge pull request !2038 from gengchao/om_c76	3 years ago
gengchao4@huawei.com	9d94a5fef5	bugfix for om's taskdef random change	3 years ago
mindspore-ci-bot	59361d83a9	!1470 LinkToPotentialPrecedenceNode c76 From: @dimitri_rose Reviewed-by: @sheng-nan,@ji_chen Signed-off-by: @startzgf168	4 years ago
lianghao	1993675125	LinkToPotentialPrecedenceNode	4 years ago
mindspore-ci-bot	a205af43fc	!1324 Fix bug of single_op ageing. From: @zhao_zhixuan Reviewed-by: @xchu42,@startzgf168 Signed-off-by: @startzgf168	4 years ago
unknown	63e8d1e291	Fix bug of single_op ageing.	4 years ago
mindspore-ci-bot	5d094c7f72	!1289 modify static depends From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by:	4 years ago
wxl	11b77d9daf	modify static depends	4 years ago
mindspore-ci-bot	a8c137e5eb	!1058 kTaskNumPerHcclNode From: @dimitri_rose Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @startzgf168	4 years ago
lianghao	3dd23c428e	kTaskNumPerHcclNode	4 years ago
mindspore-ci-bot	d98d77371e	!1045 move end_task after assignadd on iterator_loop case From: @wangxiaotian22 Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @startzgf168	4 years ago
wangxiaotian22	2b0464ad81	move end_task before active stream when bpfp set by env	4 years ago
mindspore-ci-bot	145d6a97d6	!1011 move end_task after assignadd on iterator_loop case From: @wangxiaotian22 Reviewed-by: @xchu42,@wqtshg,@xchu42 Signed-off-by: @ji_chen,@startzgf168,@ji_chen	4 years ago
wangxiaotian22	c72ba8ad66	parser update	4 years ago
wangxiaotian22	d8db67d06f	add metadef update	4 years ago
wangxiaotian22	9712b297b7	move end_task after assignadd on iterator_loop case	4 years ago
mindspore-ci-bot	885b3a6b7f	!951 gensessionid add pid prefix From: @wangxiaotian22 Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @startzgf168,@ji_chen	4 years ago
wangxiaotian22	ee5c71962d	gensessionid add pid prefix	4 years ago
mindspore-ci-bot	c14c6a87cb	!947 change mult batch to switchn From: @jiming6 Reviewed-by: @ji_chen,@wangxiaotian22 Signed-off-by: @startzgf168,@ji_chen	4 years ago
wjm	81cd9527aa	fix error	4 years ago
wjm	d3515b1624	change mult batch to switchn	4 years ago
mindspore-ci-bot	cb019693b1	!945 Add cc_task task_info log. From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	6529491d90	!943 Add keep_dtype attribute From: @li-lei0106 Reviewed-by: @ji_chen,@sheng-nan Signed-off-by: @ji_chen	4 years ago
unknown	4318596b9a	Add cc_task task_info log.	4 years ago
lwx897429	970745eedf	Add keep_dtype attribute	4 years ago
mindspore-ci-bot	89cccaeb3c	!907 modify dump task proto in c76 From: @zhou_chao1993 Reviewed-by: @liujunzhu,@youui Signed-off-by: @ji_chen	4 years ago
zhou_chao1993	7f341ab53a	modify dump_task proto in c76	4 years ago
mindspore-ci-bot	ae91b07e6a	!917 Migration subgraph Const Node From: @jiming6 Reviewed-by: @wangxiaotian22,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	267ddd9801	!916 Bugfix: check pricision loss when cast from int64 to bool From: @hugo1 Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @startzgf168,@ji_chen	4 years ago
wjm	5bc603e52d	fix error	4 years ago
zhaoxinxin	a608eee4e2	modified: ge/graph/common/transop_util.cc	4 years ago
mindspore-ci-bot	1218a757c5	!898 Add submodelId in dynamic shape From: @taoxiangdong Reviewed-by: @ji_chen,@youui Signed-off-by: @startzgf168,@ji_chen	4 years ago
taoxiangdong	fadd5d1874	Add submodelId in dyanamic shape	4 years ago
mindspore-ci-bot	1cd83211d6	!891 fix l2 buffer error From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	3eeb8a9c97	!893 remove interface aclgrphInfershapeAndType From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	0126007d89	!888 add SwitchDeadBranchElimination & MergePass in graph prepare From: @chen_yemeng Reviewed-by: @sheng-nan,@xchu42 Signed-off-by: @startzgf168,@ji_chen	4 years ago
wxl	a69806eee1	remove interface aclgrphInfershapeAndType	4 years ago
wjm	aac7897a44	fix l2 buffer error	4 years ago
chenyemeng	2b90729519	add SwitchDeadBranchElimination & MergePass in graph prepare	4 years ago
mindspore-ci-bot	d5c6e8146b	!872 Log macro error in windows From: @taoxiangdong Reviewed-by: @xchu42,@liujunzhu Signed-off-by: @startzgf168	4 years ago
taoxiangdong	850f6efb29	Log print macro error	4 years ago
mindspore-ci-bot	65509ee0f0	!853 add whole graph optimize From: @ni100die Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @ji_chen	4 years ago
gengchao4@huawei.com	8ad6d4b463	add whole graph optimize	4 years ago
mindspore-ci-bot	103aa22616	!839 fixed issue of repeated profile subscription From: @li-lei0106 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen	4 years ago
mindspore-ci-bot	11b6f47be6	!811 modify p2p addr assigner bug in c76 From: @zhou_chao1993 Reviewed-by: @youui,@wqtshg Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	501e184095	!828 Free mem before return From: @taoxiangdong Reviewed-by: @liujunzhu,@wqtshg,@liujunzhu Signed-off-by: @startzgf168,@ji_chen	4 years ago
taoxiangdong	c8cc205f33	Free memory before return	4 years ago
lwx897429	d30dd18e09	fixed issue of repeated profile subscription	4 years ago
mindspore-ci-bot	63cc95c5e5	!837 Feature: delete compress_weight_conf para of aclgrphParse interface From: @lixiwen1 Reviewed-by: @xchu42,@sheng-nan Signed-off-by: @ji_chen	4 years ago
l00444296	b6aa9c0e4d	Feature: delete compress_weight_conf para of aclgrphParse interface	4 years ago
mindspore-ci-bot	032f9d1f07	!795 Parse traing trace switch in profstart func From: @taoxiangdong Reviewed-by: @liujunzhu,@xchu42 Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	dfd2314793	!782 Feature: delete compress_weight_conf para of aclgrphParse interface From: @lixiwen1 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	69e7d5bf64	!789 fix question that release all loaded model memory when memory is not enough From: @wan_xuelei Reviewed-by: @xchu42,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	c451c30026	!797 fix dynamic aipp error From: @jiming6 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	61b2de9c38	!786 dump From: @youui Reviewed-by: @liujunzhu,@xchu42 Signed-off-by:	4 years ago
mindspore-ci-bot	a552edfd11	!779 Feature: delete is_load_profiling_ reset to false From: @lixiwen1 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @startzgf168,@ji_chen	4 years ago
wjm	ba745a12d3	fix	4 years ago
zhou_chao1993	0554dd5942	modify p2p addr assigner bug	4 years ago
weiyang	628162c7b0	dump	4 years ago
wjm	a9b4cf400a	fix dynamic aipp error	4 years ago
taoxiangdong	5e85506711	Parse training trace on profstart	4 years ago
mindspore-ci-bot	5a8206cf7d	!793 license update, mentioning usage of tensorflow and caffe code From: @nicholas_yhr Reviewed-by: @liujunzhu,@guoqi1024 Signed-off-by: @guoqi1024	4 years ago
yanghaoran	fba4643a47	license update, mentioning usage of tensorflow and caffe code	4 years ago
wxl	2504b6b7b9	bugfix	4 years ago
l00444296	50fdb59274	Feature: delete compress_weight_conf para of aclgrphParse interface	4 years ago
l00444296	d1eb560616	Feature: delete is_load_profiling_ reset to false	4 years ago
mindspore-ci-bot	72eef81746	!767 for perfmance From: @youui Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @ji_chen	4 years ago
mindspore-ci-bot	856fb4419a	!724 Feature: repair dynamic_stitch_kernel folding bug From: @lixiwen1 Reviewed-by: @xchu42,@wqtshg Signed-off-by:	4 years ago
mindspore-ci-bot	b3cfade65e	!771 op_compiler_cache_dir From: @dimitri_rose Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	35983c7c38	!761 Check aicpu op type From: @taoxiangdong Reviewed-by: @youui,@ljl0711 Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	4fe214984d	!755 errorcode From: @youui Reviewed-by: @xchu42,@liujunzhu,@ji_chen Signed-off-by: @startzgf168,@ji_chen	4 years ago
lianghao	bb6de73c97	op_compiler_cache_dir	4 years ago
weiyang	43c1e02265	perf	4 years ago
taoxiangdong	546e9f7cf9	Check aicpu op type	4 years ago
mindspore-ci-bot	c2fb4adbce	!760 device os log missing From: @taoxiangdong Reviewed-by: @youui,@ji_chen Signed-off-by: @ji_chen	4 years ago
taoxiangdong	c485a99932	device os log missing	4 years ago
mindspore-ci-bot	5de4cd5479	!733 decrease om size From: @dimitri_rose Reviewed-by: @ji_chen,@sheng-nan Signed-off-by: @startzgf168,@ji_chen	4 years ago
mindspore-ci-bot	30000dd4e7	!747 fix case plugin error From: @jiming6 Reviewed-by: @ji_chen,@sheng-nan Signed-off-by: @ji_chen	4 years ago
mindspore-ci-bot	60ceda422f	!752 Fix storage bug. From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen	4 years ago
weiyang	48585c78f0	errorcode	4 years ago
mindspore-ci-bot	51314c970b	!751 Fix bug of modify output shape to -2. From: @zhao_zhixuan Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @xchu42	4 years ago
unknown	17428ef7a8	Fix storage bug.	4 years ago
unknown	4a315e1d4f	Fix bug of modify output shape to -2.	4 years ago
wjm	c694a907e2	fix case flugin	4 years ago
mindspore-ci-bot	8bb847b429	!746 delete invalid comment From: @wqtshg Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen	4 years ago
wqtshg	acae7cfaea	delete invalid comment	4 years ago
mindspore-ci-bot	3efd05e1c4	!744 update c76 code From: @wqtshg Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen	4 years ago
wqtshg	7f542c2b68	update c76 code ut	4 years ago
wqtshg	0b6354215a	update c76 ut	4 years ago
wqtshg	8a8a42cf03	update c76 code ut	4 years ago
wqtshg	8e87e4b7a5	update ge ut	4 years ago
wqtshg	ea477de6eb	update test	4 years ago
wqtshg	0f36063e8c	update ut	4 years ago
wqtshg	1ac3bff4af	update c76 submodule	4 years ago
lianghao	2898b2d83c	decrease om size	4 years ago
wqtshg	455f21252f	update c76 log_cpp	4 years ago
l00444296	48973d4ea1	Feature: repair dynamic_stitch_kernel folding bug	4 years ago
wqtshg	4ac0f69204	add c76 LOG_CPP	4 years ago
wqtshg	d662b5e84e	update c76 submodule	4 years ago
wqtshg	e38e5e06a2	update c76 cmake	4 years ago
wqtshg	eea696c45b	update c76 code	4 years ago
wqtshg	eaeaec68ff	update slog to alog	4 years ago
wqtshg	86bb779cee	update c76 submodule	4 years ago
计晨	fe5db33358	!712 update c76 code Merge pull request !712 from 王涛/r1.2.0	4 years ago
wqtshg	5cc51efc74	update c76 code and submodule	4 years ago
计晨	ca855a5bf7	!707 update c76 code Merge pull request !707 from 王涛/r1.2.0	4 years ago
wqtshg	16758ee2b1	update c76 code	4 years ago
wqtshg	5d043adbca	update c76 code	4 years ago