synchronize with latest Ascend software suite 28 Jun 2020

5 years ago · aef44e1557
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -79,6 +79,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session {

  ///
  /// @ingroup ge_graph
  /// @brief build graph in the session with specific session id
  /// @param [in] graphId: graph id
  /// @param [in] inputs: input data
  /// @return Status result of function
  ///
  Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs);

  ///
  /// @ingroup ge_graph
  /// @brief run graph in the session with specific session id asynchronously
  /// @param [in] graphId: graph id
  /// @param [in] inputs: input data
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -157,6 +157,9 @@ const std::string OUTPUT_DATATYPE = "ge.outputDatatype";
 // congigure opSelectImplmode to setting op select implmode
 const std::string OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";

 // congigure optypelist_for_implmode to setting which op use implmode
 const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";

 // configure whether to enable hcom parallel by session constructor options param,
 // its value should be "0" or "1", default value is "0"
 const std::string HCOM_PARALLEL = "ge.hcomParallel";
@@ -258,12 +261,12 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::OutputTensor
 namespace ir_option {
 static const char *const INPUT_FORMAT = "input_format";
 static const char *const INPUT_SHAPE = "input_shape";
 static const char *const OP_NAME_MAP = "op_name_map";
 static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
 static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
 static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
 static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
 static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
 static const char *const HEAD_STREAM = ge::HEAD_STREAM.c_str();
 static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
 static const char *const CORE_TYPE = ge::CORE_TYPE.c_str();
 static const char *const SOC_VERSION = ge::SOC_VERSION.c_str();
@@ -280,16 +283,20 @@ static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
 static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
 static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
 static const char *const LOG_LEVEL = "log";
 static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();

 // for interface: aclgrphBuildModel
 const std::set<std::string> ir_builder_suppported_options = {
  INPUT_FORMAT,         INPUT_SHAPE, DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE,
  INSERT_OP_FILE,       OUTPUT_TYPE, BUFFER_OPTIMIZE,    ENABLE_COMPRESS_WEIGHT,
  COMPRESS_WEIGHT_CONF, OUT_NODES,   INPUT_FP16_NODES,   LOG_LEVEL};
  INPUT_FORMAT,       INPUT_SHAPE,    OP_NAME_MAP,    DYNAMIC_BATCH_SIZE,
  DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY,
  AUTO_TUNE_MODE,     OUTPUT_TYPE,    OUT_NODES,      INPUT_FP16_NODES,
  LOG_LEVEL};
 // for interface: aclgrphBuildInitialize
 const std::set<std::string> global_options = {HEAD_STREAM,
                                              CORE_TYPE,
 const std::set<std::string> global_options = {CORE_TYPE,
                                              SOC_VERSION,
                                              BUFFER_OPTIMIZE,
                                              ENABLE_COMPRESS_WEIGHT,
                                              COMPRESS_WEIGHT_CONF,
                                              PRECISION_MODE,
                                              EXEC_DISABLE_REUSED_MEMORY,
                                              AUTO_TUNE_MODE,
@@ -298,7 +305,8 @@ const std::set<std::string> global_options = {HEAD_STREAM,
                                              FUSION_SWITCH_FILE,
                                              ENABLE_SMALL_CHANNEL,
                                              QUANT_OPTIMIZE,
                                              OP_SELECT_IMPL_MODE};
                                              OP_SELECT_IMPL_MODE,
                                              OPTYPELIST_FOR_IMPLMODE};
 }  // namespace ir_option
 }  // namespace ge

--- a/inc/external/graph/types.h
+++ b/inc/external/graph/types.h
@@ -143,6 +143,7 @@ enum Format {
  FORMAT_DHWNC,
  FORMAT_FRACTAL_Z_3D_TRANSPOSE,  // 3D filter(transpose) input tensor format
  FORMAT_FRACTAL_ZN_LSTM,
  FORMAT_FRACTAL_Z_G,
  FORMAT_RESERVED,
  FORMAT_ALL
 };
--- a/inc/external/register/register_fmk_types.h
+++ b/inc/external/register/register_fmk_types.h
@@ -25,11 +25,12 @@ namespace domi {
 /// @brief  AI framework types
 ///
 enum FrameworkType {
  FMK_TYPE_C = 0,
  FMK_TYPE_MINDSPORE = 1,
  FMK_TYPE_T = 3,
  FMK_TYPE_A_NN,
  FMK_TYPE_RESERVED,
  CAFFE = 0,
  MINDSPORE = 1,
  TENSORFLOW = 3,
  ANDROID_NN,
  ONNX,
  FRAMEWORK_RESERVED,
 };
 }  // namespace domi

--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -231,7 +231,7 @@ using cce::ccStatus_t;
      DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
      exec_expr;                                           \
    }                                                      \
  }
  };

 // If expr is not RT_ERROR_NONE, print the log and return
 #define GE_CHK_RT_RET(expr)                                \
@@ -259,7 +259,7 @@ using cce::ccStatus_t;
    if (expr) {                          \
      exec_expr;                         \
    }                                    \
  }
  };

 // If make_shared is abnormal, print the log and execute the statement
 #define GE_MAKE_SHARED(exec_expr0, exec_expr1) \
--- a/inc/framework/common/ge_inner_error_codes.h
+++ b/inc/framework/common/ge_inner_error_codes.h
@@ -280,6 +280,8 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r

 // Executor module error code definition
 GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized.");
 GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist.");
 GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily.");

 // Generator module error code definition
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed.");
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -33,11 +33,11 @@ enum RuntimeType { HOST = 0, DEVICE = 1 };
 enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 };

 enum FrameworkType {
  FMK_TYPE_C = 0,
  FMK_TYPE_MINDSPORE = 1,
  FMK_TYPE_T = 3,
  FMK_TYPE_A_NN,
  FMK_TYPE_RESERVED,
  CAFFE = 0,
  MINDSPORE = 1,
  TENSORFLOW = 3,
  ANDROID_NN,
  FRAMEWORK_RESERVED,
 };

 enum OpEngineType {
@@ -111,6 +111,72 @@ struct InputOutputDescInfo {
  ShapeDescription shape_info;
 };

 // Definition of model io dims
 struct InputOutputDims {
  std::string name;
  size_t dim_num;
  uint32_t size;
  std::vector<int64_t> dims;
 };

 // Definition of model io dims
 struct OriginInputInfo {
  Format format;
  DataType data_type;
  uint32_t dim_num;
 };

 // The structure of AIPP info
 struct AippConfigInfo {
  int8_t input_format;
  int32_t src_image_size_w;
  int32_t src_image_size_h;
  int8_t crop;
  int32_t load_start_pos_w;
  int32_t load_start_pos_h;
  int32_t crop_size_w;
  int32_t crop_size_h;
  int8_t resize;
  int32_t resize_output_w;
  int32_t resize_output_h;
  int8_t padding;
  int32_t left_padding_size;
  int32_t right_padding_size;
  int32_t top_padding_size;
  int32_t bottom_padding_size;
  int8_t csc_switch;
  int8_t rbuv_swap_switch;
  int8_t ax_swap_switch;
  int8_t single_line_mode;
  int32_t matrix_r0c0;
  int32_t matrix_r0c1;
  int32_t matrix_r0c2;
  int32_t matrix_r1c0;
  int32_t matrix_r1c1;
  int32_t matrix_r1c2;
  int32_t matrix_r2c0;
  int32_t matrix_r2c1;
  int32_t matrix_r2c2;
  int32_t output_bias_0;
  int32_t output_bias_1;
  int32_t output_bias_2;
  int32_t input_bias_0;
  int32_t input_bias_1;
  int32_t input_bias_2;
  int32_t mean_chn_0;
  int32_t mean_chn_1;
  int32_t mean_chn_2;
  int32_t mean_chn_3;
  float min_chn_0;
  float min_chn_1;
  float min_chn_2;
  float min_chn_3;
  float var_reci_chn_0;
  float var_reci_chn_1;
  float var_reci_chn_2;
  float var_reci_chn_3;
 };

 // The structure of offline Modeldata
 struct ModelData {
  void *model_data = nullptr;  // Model binary data start addr
--- a/inc/framework/common/helper/om_file_helper.h
+++ b/inc/framework/common/helper/om_file_helper.h
@@ -59,15 +59,14 @@ class OmFileLoadHelper {

  Status GetModelPartition(ModelPartitionType type, ModelPartition &partition);

  OmFileContext context_;

 private:
  Status CheckModelValid(const ge::ModelData &model) const;

  Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size);

  bool is_inited_{false};

 public:
  OmFileContext context_;
 };

 class OmFileSaveHelper {
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -160,6 +160,7 @@ REGISTER_OPTYPE_DECLARE(SLICE, "Slice");
 REGISTER_OPTYPE_DECLARE(SLICED, "SliceD");
 REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv");
 REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze");
 REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze");
 REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice");
 REGISTER_OPTYPE_DECLARE(RANGE, "Range");
 REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals");
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -96,6 +96,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
  ///
  ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);

  ///
  /// @ingroup ge
  /// @brief Set dynamic image info
@@ -110,6 +112,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
                                const std::vector<kAippDynamicBatchPara> &aippBatchPara,
                                const kAippDynamicPara &aippParms);

  ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
  ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);

  ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                         std::vector<ge::TensorDesc> &output_desc);

@@ -206,6 +211,11 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {

  static ge::Status ReleaseSingleOpResource(void *stream);

  ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                       std::vector<InputOutputDims> &output_dims);

 private:
  static bool isInit_;
 };
--- a/inc/framework/ge_runtime_dummy/davinci_model.h
+++ b/inc/framework/ge_runtime_dummy/davinci_model.h
@@ -1,113 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_
 #define INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_

 #include <memory>
 #include <vector>

 #include "ge_runtime/op_info.h"
 #include "ge_runtime/task_info.h"

 namespace ge {
 namespace model_runner {
 class DavinciModel {
 public:
  DavinciModel(const std::vector<std::shared_ptr<TaskInfo>> &task_info_list,
               const std::vector<std::shared_ptr<OpInfo>> &data_info_list,
               const std::vector<std::shared_ptr<OpInfo>> &output_info_list,
               const std::vector<std::shared_ptr<OpInfo>> &constant_info_list,
               const std::vector<model_runner::OpInfoPtr> &variable_info_list,
               const std::vector<uint32_t> &wait_active_stream_list,
               const std::vector<uint32_t> &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0,
               uint64_t var_size = 0, uintptr_t logic_mem_base = 0, uintptr_t logic_weight_base = 0,
               uintptr_t logic_var_base = 0, uint32_t stream_num = 0, uint32_t batch_num = 0, uint32_t event_num = 0,
               int32_t priority = 0)
      : task_info_list_(task_info_list),
        data_info_list_(data_info_list),
        output_info_list_(output_info_list),
        constant_info_list_(constant_info_list),
        variable_info_list_(variable_info_list),
        wait_active_stream_list_(wait_active_stream_list),
        force_copy_stream_list_(force_copy_stream_list),
        mem_size_(mem_size),
        weight_size_(weight_size),
        var_size_(var_size),
        logic_mem_base_(logic_mem_base),
        logic_weight_base_(logic_weight_base),
        logic_var_base_(logic_var_base),
        stream_num_(stream_num),
        batch_num_(batch_num),
        event_num_(event_num),
        priority_(priority) {}
  ~DavinciModel() {}

  uint64_t GetMemSize() const { return mem_size_; }
  uint64_t GetWeightSize() const { return weight_size_; }
  uint64_t GetVarSize() const { return var_size_; }

  uintptr_t GetLogicMemBase() const { return logic_mem_base_; }
  uintptr_t GetLogicWeightBase() const { return logic_weight_base_; }
  uintptr_t GetLogicVarBase() const { return logic_var_base_; }

  uint32_t GetStreamNum() const { return stream_num_; }
  uint32_t GetBatchNum() const { return batch_num_; }
  uint32_t GetEventNum() const { return event_num_; }

  const std::vector<uint32_t> &GetWaitActiveStreams() const { return wait_active_stream_list_; }
  const std::vector<uint32_t> &GetForceCopyStreams() const { return force_copy_stream_list_; }

  int32_t GetPriority() const { return priority_; }

  const std::vector<std::shared_ptr<TaskInfo>> &GetTaskInfoList() const { return task_info_list_; }
  const std::vector<std::shared_ptr<OpInfo>> &GetDataInfoList() const { return data_info_list_; }
  const std::vector<std::shared_ptr<OpInfo>> &GetOutputInfoList() const { return output_info_list_; }
  const std::vector<std::shared_ptr<OpInfo>> &GetConstantInfoList() const { return output_info_list_; }
  const std::vector<model_runner::OpInfoPtr> &GetVariableInfoList() const { return variable_info_list_; }

 private:
  std::vector<std::shared_ptr<TaskInfo>> task_info_list_;
  std::vector<std::shared_ptr<OpInfo>> data_info_list_;
  std::vector<std::shared_ptr<OpInfo>> output_info_list_;
  std::vector<std::shared_ptr<OpInfo>> constant_info_list_;
  std::vector<model_runner::OpInfoPtr> variable_info_list_;

  std::vector<uint32_t> wait_active_stream_list_;
  std::vector<uint32_t> force_copy_stream_list_;

  uint64_t mem_size_;
  uint64_t weight_size_;
  uint64_t var_size_;

  uintptr_t logic_mem_base_;
  uintptr_t logic_weight_base_;
  uintptr_t logic_var_base_;

  uint32_t stream_num_;
  uint32_t batch_num_;
  uint32_t event_num_;

  int32_t priority_;

  // Disable to copy constructor and assignment operator
  DavinciModel &operator=(const DavinciModel &) = delete;
  DavinciModel(const DavinciModel &) = delete;
 };
 }  // namespace model_runner
 }  // namespace ge

 #endif  // INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_
--- a/inc/framework/ge_runtime_dummy/model_runner.h
+++ b/inc/framework/ge_runtime_dummy/model_runner.h
@@ -1,58 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_
 #define INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_

 #include <memory>
 #include <unordered_map>
 #include <vector>

 #include "common/ge_inner_error_codes.h"
 #include "common/ge_types.h"
 #include "ge_runtime/davinci_model.h"

 namespace ge {
 namespace model_runner {
 class RuntimeModel;

 class ModelRunner {
 public:
  static ModelRunner &Instance();

  bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id,
                        std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener);

  const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const;

  bool UnloadModel(uint32_t model_id);

  bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data);

  bool GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector<InputOutputDescInfo> *input_desc,
                              std::vector<InputOutputDescInfo> *output_desc, std::vector<uint32_t> *input_format,
                              std::vector<uint32_t> *output_format);

 private:
  ModelRunner() = default;
  ~ModelRunner() = default;

  std::unordered_map<uint32_t, std::shared_ptr<RuntimeModel>> runtime_models_;
 };
 }  // namespace model_runner
 }  // namespace ge

 #endif  // INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_
--- a/inc/framework/ge_runtime_dummy/op_info.h
+++ b/inc/framework/ge_runtime_dummy/op_info.h
@@ -1,72 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_
 #define INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_

 #include <memory>
 #include <string>
 #include <vector>

 namespace ge {
 namespace model_runner {
 struct TensorInfo {
  int64_t GetShapeSize() const {
    int64_t res = 1;
    if (dims.empty()) {
      return 0;
    }
    for (auto dim : dims) {
      res *= dim;
    }
    return res;
  }

  int64_t GetDim(uint32_t index) {
    if (index >= dims.size()) {
      return 0;
    }
    return dims[index];
  }

  std::vector<int64_t> dims;
  uint32_t datatype;
  uint32_t format;
  uint32_t real_dim_cnt;
  uint32_t size;
  bool is_output;
 };

 struct OpInfo {
  uint32_t index;
  std::string name;
  std::string type;
  bool var_is_broadcast;
  std::vector<uintptr_t> input_addrs;
  std::vector<uintptr_t> output_addrs;
  std::vector<TensorInfo> input_tensors;
  std::vector<TensorInfo> output_tensors;
  std::vector<TensorInfo> weight_tensors;
  std::vector<std::string> src_name;
  std::vector<int64_t> src_index;
  std::string weight_data;
 };

 using TensorInfoPtr = std::shared_ptr<TensorInfo>;
 using OpInfoPtr = std::shared_ptr<OpInfo>;
 }  // namespace model_runner
 }  // namespace ge
 #endif  // INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_
--- a/inc/framework/ge_runtime_dummy/task_info.h
+++ b/inc/framework/ge_runtime_dummy/task_info.h
@@ -1,394 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_
 #define INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_

 #include <stdint.h>
 #include <functional>
 #include <memory>
 #include <string>
 #include <vector>

 #include "cce/taskdown_api.h"

 namespace ge {
 namespace model_runner {
 enum TaskInfoType {
  CCE = 0,
  TBE,
  AICPU,
  LABEL_SET,
  LABEL_SWITCH,
  LABEL_GOTO,
  EVENT_RECORD,
  EVENT_WAIT,
  FUSION_START,
  FUSION_END,
  HCCL,
  PROFILER_TRACE,
  MEMCPY_ASYNC,
  STREAM_SWITCH,
  STREAM_ACTIVE,
  // Insert new task type here
  REVSERVED = 23
 };

 class TaskInfo {
 public:
  virtual ~TaskInfo() {}
  uint32_t stream_id() const { return stream_id_; }
  TaskInfoType type() const { return type_; }

 protected:
  TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {}

 private:
  uint32_t stream_id_;
  TaskInfoType type_;
 };

 class CceTaskInfo : public TaskInfo {
 public:
  CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim,
              const std::vector<uint8_t> &args, uint32_t args_size, const std::vector<uint8_t> &sm_desc,
              const std::vector<uint8_t> &flow_table, const std::vector<uint8_t> &args_offset, bool is_flowtable)
      : TaskInfo(stream_id, TaskInfoType::CCE),
        ctx_(ctx),
        stub_func_(stub_func),
        block_dim_(block_dim),
        args_(args),
        args_size_(args_size),
        sm_desc_(sm_desc),
        flow_table_(flow_table),
        args_offset_(args_offset),
        is_flowtable_(is_flowtable) {}
  ~CceTaskInfo() override {}

  cce::ccOpContext cc_context() const { return ctx_; }
  std::string stub_func() const { return stub_func_; }
  uint32_t block_dim() const { return block_dim_; }
  const std::vector<uint8_t> &args() const { return args_; }
  uint32_t args_size() const { return args_size_; }
  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
  const std::vector<uint8_t> &flow_table() const { return flow_table_; }
  const std::vector<uint8_t> &args_offset() const { return args_offset_; }
  bool is_flowtable() const { return is_flowtable_; }

 private:
  cce::ccOpContext ctx_;
  std::string stub_func_;
  uint32_t block_dim_;
  std::vector<uint8_t> args_;
  uint32_t args_size_;
  std::vector<uint8_t> sm_desc_;
  std::vector<uint8_t> flow_table_;
  std::vector<uint8_t> args_offset_;
  bool is_flowtable_;
 };

 class TbeTaskInfo : public TaskInfo {
 public:
  TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector<uint8_t> &args,
              uint32_t args_size, const std::vector<uint8_t> &sm_desc, void *binary, uint32_t binary_size,
              const std::vector<uint8_t> &meta_data, const std::vector<void *> &input_data_addrs,
              const std::vector<void *> &output_data_addrs, const std::vector<void *> &workspace_addrs)
      : TaskInfo(stream_id, TaskInfoType::TBE),
        stub_func_(stub_func),
        block_dim_(block_dim),
        args_(args),
        args_size_(args_size),
        sm_desc_(sm_desc),
        binary_(binary),
        binary_size_(binary_size),
        meta_data_(meta_data),
        input_data_addrs_(input_data_addrs),
        output_data_addrs_(output_data_addrs),
        workspace_addrs_(workspace_addrs) {}
  ~TbeTaskInfo() override {}

  const std::string &stub_func() const { return stub_func_; }
  uint32_t block_dim() const { return block_dim_; }
  const std::vector<uint8_t> &args() const { return args_; }
  uint32_t args_size() const { return args_size_; }
  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
  void *binary() const { return binary_; }
  uint32_t binary_size() const { return binary_size_; }
  const std::vector<uint8_t> &meta_data() const { return meta_data_; }
  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
  const std::vector<void *> &workspace_addrs() const { return workspace_addrs_; }

  void SetBinary(void *binary, uint32_t binary_size) {
    binary_ = binary;
    binary_size_ = binary_size;
  }

 private:
  std::string stub_func_;
  uint32_t block_dim_;
  std::vector<uint8_t> args_;
  uint32_t args_size_;
  std::vector<uint8_t> sm_desc_;
  void *binary_;
  uint32_t binary_size_;
  std::vector<uint8_t> meta_data_;
  std::vector<void *> input_data_addrs_;
  std::vector<void *> output_data_addrs_;
  std::vector<void *> workspace_addrs_;
 };

 class AicpuTaskInfo : public TaskInfo {
 public:
  AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def,
                const std::vector<void *> &input_data_addrs, const std::vector<void *> &output_data_addrs)
      : TaskInfo(stream_id, TaskInfoType::AICPU),
        so_name_(so_name),
        kernel_name_(kernel_name),
        node_def_(node_def),
        input_data_addrs_(input_data_addrs),
        output_data_addrs_(output_data_addrs) {}
  ~AicpuTaskInfo() override {}

  const std::string &so_name() const { return so_name_; }
  const std::string &kernel_name() const { return kernel_name_; }
  const std::string &node_def() const { return node_def_; }
  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }

 private:
  std::string so_name_;
  std::string kernel_name_;
  std::string node_def_;
  std::vector<void *> input_data_addrs_;
  std::vector<void *> output_data_addrs_;
 };

 class LabelTaskInfo : public TaskInfo {
 public:
  uint32_t label_id() const { return label_id_; }

 protected:
  LabelTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t label_id)
      : TaskInfo(stream_id, type), label_id_(label_id) {}
  virtual ~LabelTaskInfo() override {}

  uint32_t label_id_;
 };

 class LabelSetTaskInfo : public LabelTaskInfo {
 public:
  LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id)
      : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {}
  ~LabelSetTaskInfo() override {}
 };

 class LabelSwitchTaskInfo : public LabelTaskInfo {
 public:
  LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id)
      : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {}
  ~LabelSwitchTaskInfo() override {}
 };

 class LabelGotoTaskInfo : public LabelTaskInfo {
 public:
  LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id)
      : LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {}
  ~LabelGotoTaskInfo() override {}
 };

 class EventTaskInfo : public TaskInfo {
 public:
  uint32_t event_id() const { return event_id_; }

 protected:
  EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id)
      : TaskInfo(stream_id, type), event_id_(event_id) {}
  virtual ~EventTaskInfo() override {}

  uint32_t event_id_;
 };

 class EventRecordTaskInfo : public EventTaskInfo {
 public:
  EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id)
      : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {}
  ~EventRecordTaskInfo() override {}
 };

 class EventWaitTaskInfo : public EventTaskInfo {
 public:
  EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id)
      : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {}
  ~EventWaitTaskInfo() override {}
 };

 class FusionStartTaskInfo : public TaskInfo {
 public:
  explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {}
  ~FusionStartTaskInfo() override {}
 };

 class FusionEndTaskInfo : public TaskInfo {
 public:
  explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {}
  ~FusionEndTaskInfo() override {}
 };

 class HcclTaskInfo : public TaskInfo {
 public:
  HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr,
               void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
               const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
               int64_t op_type, int64_t data_type, std::function<bool(void *, void *)> hcom_bind_model,
               std::function<bool(void *)> hcom_unbind_model,
               std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task)
      : TaskInfo(stream_id, TaskInfoType::HCCL),
        hccl_type_(hccl_type),
        input_data_addr_(input_data_addr),
        output_data_addr_(output_data_addr),
        workspace_addr_(workspace_addr),
        workspace_size_(workspace_size),
        hccl_stream_num_(hccl_stream_num),
        private_def_(private_def),
        ops_kernel_store_(ops_kernel_store),
        count_(count),
        root_id_(root_id),
        op_type_(op_type),
        data_type_(data_type),
        hcom_bind_model_(hcom_bind_model),
        hcom_unbind_model_(hcom_unbind_model),
        hcom_distribute_task_(hcom_distribute_task) {}
  ~HcclTaskInfo() override {}

  const std::string &hccl_type() const { return hccl_type_; }
  void *input_data_addr() const { return input_data_addr_; }
  void *output_data_addr() const { return output_data_addr_; }
  void *workspace_addr() const { return workspace_addr_; }
  int64_t workspace_size() const { return workspace_size_; }
  int64_t hccl_stream_num() const { return hccl_stream_num_; }
  const std::vector<uint8_t> &private_def() const { return private_def_; }
  void *ops_kernel_store() const { return ops_kernel_store_; }
  int32_t count() const { return count_; }
  int64_t root_id() const { return root_id_; }
  int64_t op_type() const { return op_type_; }
  int64_t data_type() const { return data_type_; }
  std::function<bool(void *, void *)> hcom_bind_model() const { return hcom_bind_model_; }
  std::function<bool(void *)> hcom_unbind_model() const { return hcom_unbind_model_; }
  std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task() const {
    return hcom_distribute_task_;
  }

 private:
  std::string hccl_type_;
  void *input_data_addr_;
  void *output_data_addr_;
  void *workspace_addr_;
  int64_t workspace_size_;
  int64_t hccl_stream_num_;
  std::vector<uint8_t> private_def_;
  void *ops_kernel_store_;
  int32_t count_;
  int64_t root_id_;
  int64_t op_type_;
  int64_t data_type_;
  std::function<bool(void *, void *)> hcom_bind_model_;
  std::function<bool(void *)> hcom_unbind_model_;
  std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_;
 };

 class ProfilerTraceTaskInfo : public TaskInfo {
 public:
  ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
      : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {}
  ~ProfilerTraceTaskInfo() override {}

  uint64_t log_id() const { return log_id_; }
  bool notify() const { return notify_; }
  uint32_t flat() const { return flat_; }

 private:
  uint64_t log_id_;
  bool notify_;
  uint32_t flat_;
 };

 class MemcpyAsyncTaskInfo : public TaskInfo {
 public:
  MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind)
      : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC),
        dst_(dst),
        dst_max_(dst_max),
        src_(src),
        count_(count),
        kind_(kind) {}
  ~MemcpyAsyncTaskInfo() override {}

  void *dst() const { return dst_; }
  uint64_t dst_max() const { return dst_max_; }
  void *src() const { return src_; }
  uint64_t count() const { return count_; }
  uint32_t kind() const { return kind_; }

 private:
  void *dst_;
  uint64_t dst_max_;
  void *src_;
  uint64_t count_;
  int32_t kind_;
 };

 class StreamSwitchTaskInfo : public TaskInfo {
 public:
  StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond,
                       int64_t data_type)
      : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH),
        true_stream_id_(true_stream_id),
        input_addr_(input_addr),
        value_addr_(value_addr),
        cond_(cond),
        data_type_(data_type) {}
  ~StreamSwitchTaskInfo() override {}

  int64_t true_stream_id() const { return true_stream_id_; }
  void *input_addr() const { return input_addr_; }
  void *value_addr() const { return value_addr_; }
  int64_t cond() const { return cond_; }
  int64_t data_type() const { return data_type_; }

 private:
  int64_t true_stream_id_;
  void *input_addr_;
  void *value_addr_;
  int64_t cond_;
  int64_t data_type_;
 };

 class StreamActiveTaskInfo : public TaskInfo {
 public:
  StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id)
      : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {}
  ~StreamActiveTaskInfo() override {}

  uint32_t active_stream_id() const { return active_stream_id_; }

 private:
  uint32_t active_stream_id_;
 };
 }  // namespace model_runner
 }  // namespace ge

 #endif  // INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_
--- a/inc/framework/omg/omg.h
+++ b/inc/framework/omg/omg.h
@@ -23,6 +23,7 @@
 #include <vector>
 #include "framework/common/types.h"
 #include "framework/omg/omg_inner_types.h"
 #include "framework/omg/parser/parser_inner_ctx.h"
 #include "proto/ge_ir.pb.h"
 #include "proto/om.pb.h"

@@ -99,6 +100,11 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const

 Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
                     std::vector<std::string> &output_nodes_name);

 void UpdateOmgCtxWithParserCtx();

 void UpdateParserCtxWithOmgCtx();

 }  // namespace ge

 namespace domi {
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -31,7 +31,7 @@
 using domi::DOMI_TENSOR_ND;
 using domi::DOMI_TENSOR_RESERVED;
 using domi::domiTensorFormat_t;
 using domi::FMK_TYPE_RESERVED;
 using domi::FRAMEWORK_RESERVED;
 using domi::FrameworkType;
 using std::map;
 using std::string;
@@ -100,7 +100,7 @@ struct OmgContext {
  std::string ddk_version;
  // preferential format used by the entire network
  domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED;
  domi::FrameworkType type = domi::FMK_TYPE_RESERVED;
  domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
  RunMode run_mode = ONLY_PRE_CHECK;
  bool train_flag = false;
  // whether to use FP16 high precision
--- a/inc/graph/compute_graph.h
+++ b/inc/graph/compute_graph.h
@@ -80,6 +80,7 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
  Vistor<NodePtr> GetOutputNodes() const;

  NodePtr FindNode(const std::string &name) const;
  NodePtr FindFirstNodeMatchType(const std::string &name) const;
  // AddNode with NodePtr
  NodePtr AddNode(NodePtr node);
  NodePtr AddNode(OpDescPtr op);
@@ -235,8 +236,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
                                    std::vector<NodePtr> &stack);
  graphStatus BFSTopologicalSorting(std::vector<NodePtr> &node_vec, std::map<NodePtr, uint32_t> &map_in_edge_num,
                                    std::deque<NodePtr> &stack);
  graphStatus BFSTopologicalSortingWithGroup(std::vector<NodePtr> &node_vec,
                                             std::map<NodePtr, uint32_t> &map_in_edge_num, std::deque<NodePtr> &stack);
  graphStatus CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num,
                                    std::map<string, NodePtr> &breadth_node_map);
  graphStatus TopologicalSortingGraph();
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -136,6 +136,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP_CONV_OP;

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS;

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME;

@@ -176,6 +179,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS;

 // to be deleted
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED;
--- a/inc/graph/ge_tensor.h
+++ b/inc/graph/ge_tensor.h
@@ -102,6 +102,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrH
  Format GetOriginFormat() const;
  void SetOriginFormat(Format originFormat);

  void SetName(const std::string &name);
  const std::string GetName() const;

  DataType GetDataType() const;
  void SetDataType(DataType dt);

--- a/inc/graph/utils/graph_utils.h
+++ b/inc/graph/utils/graph_utils.h
@@ -22,6 +22,7 @@
 #include <map>
 #include <string>
 #include <vector>
 #include <list>
 #include "graph/anchor.h"
 #include "graph/node.h"
 #include "graph/compute_graph.h"
@@ -111,21 +112,25 @@ enum IOType { kIn, kOut };

 struct NodeIndexIO {
  NodeIndexIO(ge::NodePtr node, uint32_t index, IOType io_type)
      : node(std::move(node)), index(index), io_type(io_type) {}
      : node_(std::move(node)), index_(index), io_type_(io_type) {
    if (node_ != nullptr) {
      value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_);
    }
  }
  NodeIndexIO(ge::NodePtr node, int index, IOType io_type)
      : node(std::move(node)), index(static_cast<uint32_t>(index)), io_type(io_type) {}
      : node_(std::move(node)), index_(static_cast<uint32_t>(index)), io_type_(io_type) {
    if (node_ != nullptr) {
      value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_);
    }
  }
  ~NodeIndexIO() {}

  NodePtr node = nullptr;
  uint32_t index = 0;
  IOType io_type = kOut;
  NodePtr node_ = nullptr;
  uint32_t index_ = 0;
  IOType io_type_ = kOut;
  std::string value_;

  std::string ToString() const {
    if ((node == nullptr) || (node->GetOwnerComputeGraph() == nullptr)) {
      return "";
    }
    return node->GetName() + (io_type == kOut ? "_out_" : "_in_") + std::to_string(index);
  }
  std::string ToString() const { return value_; }
 };

 class GraphUtils {
@@ -310,7 +315,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus GetRefMapping(const ComputeGraphPtr &graph,
                                   std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                   std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                   std::map<std::string, std::string> &anchor_to_symbol);

  ///
@@ -340,7 +345,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus HandleInAnchorMapping(const NodePtr &node,
                                           std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                           std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                           std::map<std::string, std::string> &anchor_to_symbol);

  ///
@@ -351,7 +356,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus HandleOutAnchorMapping(const NodePtr &node,
                                            std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                            std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                            std::map<std::string, std::string> &anchor_to_symbol);

  ///
@@ -362,7 +367,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus HandleSubgraphInput(const NodePtr &node,
                                         std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                         std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                         std::map<std::string, std::string> &anchor_to_symbol);

  ///
@@ -373,7 +378,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus HandleMergeInput(const NodePtr &node,
                                      std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                      std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                      std::map<std::string, std::string> &anchor_to_symbol);

  ///
@@ -384,7 +389,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus HandleSubgraphOutput(const NodePtr &node,
                                          std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                          std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                          std::map<std::string, std::string> &anchor_to_symbol);

  ///
@@ -397,7 +402,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2,
                                        std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                        std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                        std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol);

  ///
@@ -409,7 +414,7 @@ class GraphUtils {
  /// @return success: GRAPH_SUCESS
  ///
  static graphStatus UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info,
                                      std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                      std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                      std::map<std::string, std::string> &anchor_to_symbol);

  ///
--- a/inc/graph/utils/type_utils.h
+++ b/inc/graph/utils/type_utils.h
@@ -25,6 +25,7 @@
 #include "graph/types.h"
 #include "graph/usr_types.h"
 #include "register/register_types.h"
 #include "external/register/register_fmk_types.h"

 namespace ge {
 class TypeUtils {
@@ -39,6 +40,7 @@ class TypeUtils {
  static Format SerialStringToFormat(const std::string &str);
  static Format DataFormatToFormat(const std::string &str);
  static Format DomiFormatToFormat(domi::domiTensorFormat_t domi_format);
  static std::string FmkTypeToSerialString(domi::FrameworkType fmk_type);

  static graphStatus Usr2DefQuantizeFactorParams(const UsrQuantizeFactorParams &usr, QuantizeFactorParams &def);
  static graphStatus Def2UsrQuantizeFactorParams(const QuantizeFactorParams &def, UsrQuantizeFactorParams &usr);
--- a/src/common/graph/compute_graph.cc
+++ b/src/common/graph/compute_graph.cc
@@ -48,63 +48,6 @@ bool IsUseBFS() {
  }
  return false;
 }
 bool IsTailingOptimization() {
  string is_tailing_optimization_option;
  auto ret = GetContext().GetOption(ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, is_tailing_optimization_option);
  if (ret == GRAPH_SUCCESS) {
    GELOGI("Option ge.exec.isTailingOptimization is %s", is_tailing_optimization_option.c_str());
    // "1" means it's True from frontend option
    return is_tailing_optimization_option == "1";
  }
  GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default.");
  return false;
 }
 bool IsFusedNode(const NodePtr &node) {
  bool is_fused_node = false;
  AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_FLAG, is_fused_node);
  return is_fused_node;
 }
 string GetGroupId(const NodePtr &node) {
  string group_id;
  AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id);
  return group_id;
 }
 bool IsGroupEnd(const NodePtr &node) {
  if (GetGroupId(node).empty()) {
    return false;
  }
  if (node->GetOutDataNodesSize() == 0) {
    return true;
  }
  for (const auto &out_data_node : node->GetOutDataNodes()) {
    if (IsFusedNode(out_data_node)) {
      return true;
    }
  }
  return false;
 }
 void SplitNodeToStack(const std::map<string, NodePtr> &breadth_node_map, string current_group_id,
                      std::vector<NodePtr> &stack_input, std::deque<NodePtr> &group_stack, std::deque<NodePtr> &stack) {
  for (const auto &name_node : breadth_node_map) {
    // group first
    string group_id;
    if (AttrUtils::GetStr(name_node.second->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id)) {
      GELOGI("current node %s, group id: %s , current group id %s", name_node.second->GetName().c_str(),
             group_id.c_str(), current_group_id.c_str());
      if (!current_group_id.empty() && group_id != current_group_id) {
        GELOGI("node go to input_stack back: %s", name_node.second->GetName().c_str());
        (void)stack_input.insert(stack_input.begin(), name_node.second);
      } else {
        current_group_id = group_id;
        GELOGI("node go to group_stack: %s", name_node.second->GetName().c_str());
        (void)group_stack.push_front(name_node.second);
      }
      continue;
    }
    GELOGI("node go to stack: %s ", name_node.second->GetName().c_str());
    (void)stack.push_front(name_node.second);
  }
 }
 }  // namespace

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::ComputeGraph(const std::string &name)
@@ -193,6 +136,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::FindNode(co
  return nullptr;
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr
 ComputeGraph::FindFirstNodeMatchType(const std::string &name) const {
  for (const auto &node : nodes_) {
    if (node == nullptr) {
      continue;
    }
    if (node->GetType() == name) {
      return node;
    }
  }
  return nullptr;
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreEqual(
  const ComputeGraph &r_graph) const {
  // ProtoMsgOwner <::google::protobuf::Message> is temporarily ignored
@@ -642,9 +598,9 @@ ComputeGraph::UpdateInputMapping(const std::map<uint32_t, uint32_t> &input_mappi
 ///
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
 ComputeGraph::UpdateOutputMapping(const std::map<uint32_t, uint32_t> &output_mapping) {
  NodePtr net_output = FindNode(NODE_NAME_NET_OUTPUT);
  NodePtr net_output = FindFirstNodeMatchType(NETOUTPUT);
  if (net_output == nullptr) {
    GE_LOGE("UpdateOutputMapping failed: node %s not exist in graph.", NODE_NAME_NET_OUTPUT);
    GE_LOGE("UpdateOutputMapping failed: node type %s not exist in graph.", NETOUTPUT);
    return GRAPH_FAILED;
  }
  OpDescPtr op_desc = net_output->GetOpDesc();
@@ -799,65 +755,6 @@ graphStatus ComputeGraph::BFSTopologicalSorting(std::vector<NodePtr> &node_vec,
  return GRAPH_SUCCESS;
 }

 graphStatus ComputeGraph::BFSTopologicalSortingWithGroup(std::vector<NodePtr> &node_vec,
                                                         std::map<NodePtr, uint32_t> &map_in_edge_num,
                                                         std::deque<NodePtr> &stack) {
  GELOGI("Runing_Bfs_Sort_With_Group");
  std::string current_group_id;
  std::vector<NodePtr> stack_input;
  std::deque<NodePtr> group_stack;
  std::deque<NodePtr> fused_node_stack;
  std::map<string, NodePtr> breadth_node_map;
  // Record the number of non data nodes but no input nodes
  GE_CHK_BOOL_EXEC(SortNodes(stack_input, map_in_edge_num) == GRAPH_SUCCESS, return GRAPH_FAILED, "sort nodes failed");

  // Only data nodes here
  while (!stack_input.empty() || !stack.empty() || !group_stack.empty()) {
    NodePtr node = nullptr;
    if (!group_stack.empty()) {
      // Traversal node in group has priority
      node = group_stack.back();
      group_stack.pop_back();
    } else if (!stack.empty()) {
      node = stack.back();
      stack.pop_back();
    } else {
      node = stack_input.back();
      stack_input.pop_back();
    }

    if (IsFusedNode(node) && current_group_id.empty()) {
      current_group_id = node->GetName();
    }
    if (GetGroupId(node).empty() || GetGroupId(node) == current_group_id) {
      node_vec.push_back(node);
      GE_CHECK_NOTNULL(node->GetOpDesc());
      GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str());
    } else {
      if (current_group_id.empty()) {
        current_group_id = GetGroupId(node);
        node_vec.push_back(node);
        GE_CHECK_NOTNULL(node->GetOpDesc());
        GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str());
      } else {
        GELOGI("current group id is %s ,node go to input_stack back: %s", current_group_id.c_str(),
               node->GetName().c_str());
        (void)stack_input.insert(stack_input.begin(), node);
        continue;
      }
    }
    CollectBreadthOutNode(node, map_in_edge_num, breadth_node_map);
    SplitNodeToStack(breadth_node_map, current_group_id, stack_input, group_stack, stack);
    breadth_node_map.clear();
    // check the end of group
    if (IsGroupEnd(node)) {
      GELOGI("Current node %s is end of group %s.", node->GetName().c_str(), current_group_id.c_str());
      current_group_id = "";
    }
  }
  return GRAPH_SUCCESS;
 }

 graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num,
                                                std::map<string, NodePtr> &breadth_node_map) {
  for (const auto &anchor : node->GetAllOutDataAnchors()) {
@@ -907,7 +804,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Topolog
  }

  std::vector<std::shared_ptr<ComputeGraph>> subgraphs;
  (void)AllGraphNodes(subgraphs);
  auto nodes = AllGraphNodes(subgraphs);
  for (size_t i = 0; i < nodes.size(); i++) {
    NodePtr node = nodes.at(i);   // [node: should not be null]
    node->GetOpDesc()->SetId(i);  // [node->GetOpDesc(): should not be null]
  }
  if (sub_graph_.size() != subgraphs.size()) {  // Graph Partition use subgraph, Keep original
    GELOGW("Keep original subgraph for graph size %zu not equal %zu.", sub_graph_.size(), subgraphs.size());
    return SUCCESS;
@@ -920,17 +821,10 @@ graphStatus ComputeGraph::TopologicalSortingGraph() {
  std::vector<NodePtr> node_vec;
  std::map<NodePtr, uint32_t> map_in_edge_num;
  bool use_BFS = IsUseBFS();
  bool is_tailing_optimization = IsTailingOptimization();
  if (use_BFS) {
    std::deque<NodePtr> stack;
    if (is_tailing_optimization) {
      if (BFSTopologicalSortingWithGroup(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
        return GRAPH_FAILED;
      }
    } else {
      if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
        return GRAPH_FAILED;
      }
    if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
      return GRAPH_FAILED;
    }
  } else {
    std::vector<NodePtr> stack;
--- a/src/common/graph/format_refiner.cc
+++ b/src/common/graph/format_refiner.cc
@@ -41,7 +41,7 @@ using namespace ge;
 using namespace std;
 namespace ge {
 namespace {
 static const std::unordered_set<string> kChangeDimNodes = {RESHAPE, PERMUTE, EXPANDDIMS, SQUEEZE};
 static const std::unordered_set<string> kChangeDimNodes = {PERMUTE, EXPANDDIMS, SQUEEZE};
 static bool net_format_is_nd = true;
 static Format g_user_set_format = FORMAT_ND;
 static bool is_first_infer = true;
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -118,6 +118,9 @@ const std::string ATTR_NAME_NAN_OPT = "nan_opt";
 const std::string ATTR_NAME_AIPP = "aipp";
 const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp";

 const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs";
 const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs";

 const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id";
 const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name";

@@ -150,6 +153,7 @@ const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG = "need_stream_cycle_event";
 const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id";
 const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start";
 const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size";
 const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims";

 // To be deleted
 const std::string ATTR_TO_BE_DELETED = "to_be_deleted";
@@ -1000,7 +1004,7 @@ const std::string ATTR_NAME_FUSION_TYPE_LIST = "_fusion_type_list";
 const std::string ATTR_NAME_VALID_INPUT_SHAPE_LIST_LIST = "_valid_input_shape_list_list";
 const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_list_list";
 const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list";
 const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_input_offset_list_list";
 const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list";

 // used for Horovod
 const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id";
--- a/src/common/graph/ge_attr_value.cc
+++ b/src/common/graph/ge_attr_value.cc
@@ -1233,6 +1233,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc(
      GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed.");
    }
  }

  if (!op_desc->output_name_idx_.empty()) {
    op_desc->output_name_idx_.clear();
  }

  return op_desc;
 }

--- a/src/common/graph/ge_tensor.cc
+++ b/src/common/graph/ge_tensor.cc
@@ -464,6 +464,24 @@ void GeTensorDesc::SetFormat(Format format) {
  }
 }

 void GeTensorDesc::SetName(const std::string &name) {
  auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg();
  if (tensor_descriptor_msg != nullptr) {
    tensor_descriptor_msg->set_name(name);
    return;
  }
  GELOGW("[SetName]tensor_descriptor_msg is null.");
 }

 const std::string GeTensorDesc::GetName() const {
  auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg();
  if (tensor_descriptor_msg != nullptr) {
    return tensor_descriptor_msg->name();
  }
  GELOGW("[GetName]tensor_descriptor_msg is null.");
  return "";
 }

 Format GeTensorDesc::GetOriginFormat() const {
  std::string origin_format_str;
  if (!AttrUtils::GetStr(this, TENSOR_UTILS_ORIGIN_FORMAT, origin_format_str)) {
--- a/src/common/graph/graph.mk
+++ b/src/common/graph/graph.mk
@@ -0,0 +1,182 @@
 LOCAL_PATH := $(call my-dir)

 COMMON_LOCAL_SRC_FILES := \
    ./proto/om.proto \
    ./proto/ge_ir.proto \
    ./proto/ge_onnx.proto \
    ./proto/insert_op.proto \
    ./proto/task.proto \
    ./proto/fwk_adapter.proto \
    ./proto/op_mapping_info.proto \
    ./anchor.cc \
    ./ge_attr_value.cc \
    ./attr_value.cc \
    ./buffer.cc \
    ./compute_graph.cc \
    ./graph.cc \
    ./inference_context.cc \
    ./shape_refiner.cc \
    ./format_refiner.cc \
    ./ref_relation.cc \
    ./model.cc \
    ./model_serialize.cc \
    ./node.cc \
    ./op_desc.cc \
    ./operator.cc \
    ./operator_factory.cc \
    ./operator_factory_impl.cc \
    ./ge_attr_define.cc \
    ./ge_tensor.cc \
    ./detail/attributes_holder.cc \
    ./utils/anchor_utils.cc \
    ./utils/graph_utils.cc \
    ./utils/ge_ir_utils.cc \
    ./utils/node_utils.cc \
    ./utils/op_desc_utils.cc \
    ./utils/type_utils.cc \
    ./utils/tensor_utils.cc \
    ./tensor.cc \
    ./debug/graph_debug.cc \
    ./opsproto/opsproto_manager.cc \
    ../ops/op_imp.cpp \
    option/ge_context.cc \
    option/ge_local_context.cc \
    ./runtime_inference_context.cc \

 COMMON_LOCAL_C_INCLUDES := \
    proto/om.proto \
    proto/ge_ir.proto \
    proto_inner/ge_onnx.proto \
    proto/insert_op.proto \
    proto/task.proto \
    proto/fwk_adapter.proto \
    proto/op_mapping_info.proto \
    inc \
    inc/external \
    inc/external/graph \
    inc/graph \
    inc/common \
    common \
    common/graph \
    third_party/protobuf/include \
    libc_sec/include \
    ops/built-in/op_proto/inc \


 #compiler for host
 include $(CLEAR_VARS)
 LOCAL_MODULE := libgraph

 LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
 LOCAL_CPPFLAGS += -fexceptions

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES  := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libc_sec      \
    libprotobuf   \
    libslog       \

 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_MULTILIB := 64
 LOCAL_PROPRIETARY_MODULE := true

 include $(BUILD_HOST_SHARED_LIBRARY)


 #compiler for device
 include $(CLEAR_VARS)
 LOCAL_MODULE := libgraph

 LOCAL_CFLAGS += -O2

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES  := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libc_sec      \
    libprotobuf   \
    libslog       \

 LOCAL_LDFLAGS := -lrt -ldl

 ifeq ($(device_os),android)
 LOCAL_LDFLAGS := -ldl
 endif

 LOCAL_MULTILIB := 64
 LOCAL_PROPRIETARY_MODULE := true

 include $(BUILD_SHARED_LIBRARY)


 # compile for ut/st
 include $(CLEAR_VARS)
 LOCAL_MODULE := libgraph

 LOCAL_CFLAGS +=

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES  := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libc_sec      \
    libprotobuf   \
    libslog       \

 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_MULTILIB := 64
 LOCAL_PROPRIETARY_MODULE := true

 include $(BUILD_LLT_SHARED_LIBRARY)


 #compiler for host static lib
 include $(CLEAR_VARS)
 LOCAL_MODULE := libgraph

 LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
 LOCAL_CPPFLAGS += -fexceptions

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES  := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := \
    libprotobuf   \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec      \
    libslog       \

 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_MULTILIB := 64
 LOCAL_PROPRIETARY_MODULE := true

 include $(BUILD_HOST_STATIC_LIBRARY)

 #compiler for device static lib
 include $(CLEAR_VARS)
 LOCAL_MODULE := libgraph

 LOCAL_CFLAGS += -O2

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES  := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := \
    libprotobuf   \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec      \
    libslog       \

 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_MULTILIB := 64
 LOCAL_PROPRIETARY_MODULE := true

 include $(BUILD_STATIC_LIBRARY)
--- a/src/common/graph/model_serialize.cc
+++ b/src/common/graph/model_serialize.cc
@@ -130,6 +130,16 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op
    for (const std::string &name : op_desc->GetSubgraphInstanceNames()) {
      op_def_proto->add_subgraph_name(name);
    }

    proto::AttrDef key;
    proto::AttrDef value;
    for (auto &item : op_desc->output_name_idx_) {
      key.mutable_list()->add_s(item.first);
      value.mutable_list()->add_i(item.second);
    }
    auto op_desc_attr = op_def_proto->mutable_attr();
    op_desc_attr->insert({"_output_name_key", key});
    op_desc_attr->insert({"_output_name_value", value});
  }
  return true;
 }
@@ -228,6 +238,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali
 }

 bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) {
  std::vector<string> key;
  std::vector<uint32_t> value;
  if (op_def_proto.attr().count("_output_name_key") > 0) {
    auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list();
    for (const auto &item_s : output_name_key_list.s()) {
      key.push_back(item_s);
    }
    auto op_desc_attr = op_def_proto.mutable_attr();
    op_desc_attr->erase("_output_name_key");
  }
  if (op_def_proto.attr().count("_output_name_value") > 0) {
    auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list();
    for (const auto &item_i : output_name_value_list.i()) {
      value.push_back(static_cast<uint32_t>(item_i));
    }
    auto op_desc_attr = op_def_proto.mutable_attr();
    op_desc_attr->erase("_output_name_value");
  }

  op_desc = std::shared_ptr<OpDesc>(new (std::nothrow) OpDesc(protobuf_owner_, &op_def_proto));
  GE_CHK_BOOL_EXEC(op_desc != nullptr, return false, "op_desc is nullptr.");

@@ -253,6 +282,16 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d
    op_desc->SetSubgraphInstanceName(graph_index++, name);
  }

  if (key.size() != 0) {
    if (key.size() != value.size()) {
      GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size());
    } else {
      for (uint32_t i = 0; i < key.size(); ++i) {
        op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key.at(i), value.at(i)));
      }
    }
  }

  return true;
 }

--- a/src/common/graph/module.mk
+++ b/src/common/graph/module.mk
@@ -0,0 +1,3 @@
 LOCAL_PATH := $(call my-dir)

 include $(LOCAL_PATH)/graph.mk
--- a/src/common/graph/tensor.cc
+++ b/src/common/graph/tensor.cc
@@ -589,6 +589,7 @@ GeTensorDesc TensorAdapter::TensorDesc2GeTensorDesc(const TensorDesc &tensor_des
                              tensor_desc.GetDataType());
  ge_tensor_desc.SetOriginShape(GeShape(tensor_desc.GetOriginShape().GetDims()));
  ge_tensor_desc.SetOriginFormat(tensor_desc.GetOriginFormat());
  ge_tensor_desc.SetName(tensor_desc.GetName());
  std::vector<std::pair<int64_t, int64_t>> shape_range;
  auto status = tensor_desc.GetShapeRange(shape_range);
  if (status != GRAPH_SUCCESS) {
@@ -613,6 +614,7 @@ TensorDesc TensorAdapter::GeTensorDesc2TensorDesc(const GeTensorDesc &ge_tensor_
                         ge_tensor_desc.GetDataType());
  tensor_desc.SetOriginShape(Shape(ge_tensor_desc.GetOriginShape().GetDims()));
  tensor_desc.SetOriginFormat(ge_tensor_desc.GetOriginFormat());
  tensor_desc.SetName(ge_tensor_desc.GetName());
  std::vector<std::pair<int64_t, int64_t>> shape_range;
  auto status = ge_tensor_desc.GetShapeRange(shape_range);
  if (status != GRAPH_SUCCESS) {
--- a/src/common/graph/utils/graph_utils.cc
+++ b/src/common/graph/utils/graph_utils.cc
@@ -1336,7 +1336,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraphPtr GraphUtils::FindR
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::GetRefMapping(const ComputeGraphPtr &graph,
                                      std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                      std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                      std::map<std::string, std::string> &anchor_to_symbol) {
  GE_CHECK_NOTNULL(graph);
  for (auto &node : graph->GetAllNodes()) {
@@ -1384,7 +1384,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr GraphUtils::FindNodeFromA
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
                                              std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                              std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                              std::map<std::string, std::string> &anchor_to_symbol) {
  GE_CHECK_NOTNULL(node);

@@ -1402,7 +1402,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
  }

  for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
    NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn);
    NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn);
    OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
    if (peer_out_anchor == nullptr) {
      std::string symbol = cur_node_info.ToString();
@@ -1410,7 +1410,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
      symbol_to_anchors[symbol] = {cur_node_info};
      anchor_to_symbol[symbol] = symbol;
    } else {
      NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
      NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
      if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
        GE_LOGE("Update symbol mapping failed.");
        return GRAPH_FAILED;
@@ -1429,18 +1429,18 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node,
                                               std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                               std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                               std::map<std::string, std::string> &anchor_to_symbol) {
  GE_CHECK_NOTNULL(node);
  for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
    NodeIndexIO cur_node_info = NodeIndexIO(node, out_data_anchor->GetIdx(), kOut);
    NodeIndexIO cur_node_info(node, out_data_anchor->GetIdx(), kOut);
    if (anchor_to_symbol.find(cur_node_info.ToString()) != anchor_to_symbol.end()) {
      continue;
    }

    int32_t reuse_in_index = -1;
    if (IsRefFromInput(out_data_anchor, reuse_in_index)) {
      NodeIndexIO exist_node_info = NodeIndexIO(node, reuse_in_index, kIn);
      NodeIndexIO exist_node_info(node, reuse_in_index, kIn);
      if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
        GE_LOGE("Update symbol mapping failed.");
        return GRAPH_FAILED;
@@ -1448,7 +1448,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node,
    } else {
      std::string symbol = cur_node_info.ToString();
      GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str());
      symbol_to_anchors.emplace(std::make_pair(symbol, std::vector<NodeIndexIO>{cur_node_info}));
      symbol_to_anchors.emplace(std::make_pair(symbol, std::list<NodeIndexIO>{cur_node_info}));
      anchor_to_symbol.emplace(std::make_pair(symbol, symbol));
    }
  }
@@ -1464,7 +1464,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node,
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node,
                                            std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                            std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                            std::map<std::string, std::string> &anchor_to_symbol) {
  GE_CHECK_NOTNULL(node);
  GE_CHECK_NOTNULL(node->GetOpDesc());
@@ -1482,8 +1482,8 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node,
  OutDataAnchorPtr peer_out_anchor = parent_in_anchor->GetPeerOutAnchor();
  if (peer_out_anchor != nullptr) {
    // Data has and only has one input
    NodeIndexIO cur_node_info = NodeIndexIO(node, 0, kIn);
    NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
    NodeIndexIO cur_node_info(node, 0, kIn);
    NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
    if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
      GE_LOGE("Update symbol mapping failed.");
      return GRAPH_FAILED;
@@ -1501,7 +1501,7 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node,
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::HandleMergeInput(const NodePtr &node,
                                         std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                         std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                         std::map<std::string, std::string> &anchor_to_symbol) {
  GE_CHECK_NOTNULL(node);
  std::vector<NodeIndexIO> exist_node_infos;
@@ -1574,7 +1574,7 @@ graphStatus GraphUtils::HandleMergeInput(const NodePtr &node,
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
                                             std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                             std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                             std::map<std::string, std::string> &anchor_to_symbol) {
  GE_CHECK_NOTNULL(node);
  ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph();
@@ -1595,8 +1595,8 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
    }
    GE_CHECK_NOTNULL(parent_node->GetOutDataAnchor(index));
    // Union symbol of peer_out_anchor & parent_out_anchor
    NodeIndexIO peer_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
    NodeIndexIO parent_node_info = NodeIndexIO(parent_node, index, kOut);
    NodeIndexIO peer_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
    NodeIndexIO parent_node_info(parent_node, index, kOut);
    std::string symbol;
    if ((UnionSymbolMapping(peer_node_info, parent_node_info, symbol_to_anchors, anchor_to_symbol, symbol) !=
         GRAPH_SUCCESS) ||
@@ -1606,7 +1606,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
      return GRAPH_FAILED;
    }

    NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn);
    NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn);
    GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str());
    symbol_to_anchors[symbol].emplace_back(cur_node_info);
    anchor_to_symbol.emplace(std::make_pair(cur_node_info.ToString(), symbol));
@@ -1625,7 +1625,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2,
                                           std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                           std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                           std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol) {
  std::string symbol1 = anchor_to_symbol[exist_node_info1.ToString()];
  std::string symbol2 = anchor_to_symbol[exist_node_info2.ToString()];
@@ -1675,7 +1675,7 @@ graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1,
 /// @return success: GRAPH_SUCESS
 ///
 graphStatus GraphUtils::UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info,
                                         std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
                                         std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
                                         std::map<std::string, std::string> &anchor_to_symbol) {
  auto iter1 = anchor_to_symbol.find(exist_node_info.ToString());
  if (iter1 == anchor_to_symbol.end()) {
--- a/src/common/graph/utils/op_desc_utils.cc
+++ b/src/common/graph/utils/op_desc_utils.cc
@@ -524,7 +524,6 @@ OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) {
    return nullptr;
  }

  GE_CHK_BOOL_EXEC(const_opdesc != nullptr, return nullptr, "const_opdesc is nullptr!");
  CHECK_FALSE_EXEC(SetWeights(const_opdesc, tensor_ptr) == ge::GRAPH_SUCCESS, return nullptr);

  const_opdesc->SetType(CONSTANT);
--- a/src/common/graph/utils/tensor_utils.cc
+++ b/src/common/graph/utils/tensor_utils.cc
@@ -273,6 +273,7 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format
    case FORMAT_FRACTAL_Z:
      graph_status = CalcElementCntOfFractalZ(dims, data_type, element_cnt);
      break;
    case FORMAT_NC1HWC0_C04:
    case FORMAT_FRACTAL_NZ:
    case FORMAT_FRACTAL_ZZ:
    case FORMAT_NDHWC:
@@ -283,6 +284,7 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format
    case FORMAT_FRACTAL_Z_3D_TRANSPOSE:
    case FORMAT_NDC1HWC0:
    case FORMAT_FRACTAL_Z_C04:
    case FORMAT_FRACTAL_ZN_LSTM:
      graph_status = CalcElementCntByDims(dims, element_cnt);
      break;
    default:
--- a/src/common/graph/utils/type_utils.cc
+++ b/src/common/graph/utils/type_utils.cc
@@ -59,6 +59,7 @@ static const std::map<Format, std::string> kFormatToStringMap = {
  {FORMAT_CN, "CN"},
  {FORMAT_NC, "NC"},
  {FORMAT_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"},
  {FORMAT_FRACTAL_Z_G, "FRACTAL_Z_G"},
  {FORMAT_RESERVED, "FORMAT_RESERVED"},
  {FORMAT_ALL, "ALL"}};

@@ -98,8 +99,9 @@ static const std::unordered_set<std::string> kInternalFormat = {"NC1HWC0",
                                                                "FRACTAL_NZ",
                                                                "NDC1HWC0",
                                                                "FORMAT_FRACTAL_Z_3D",
                                                                "FORMAT_FRACTAL_Z_3D_TRANSPOSE"
                                                                "FORMAT_FRACTAL_ZN_LSTM"};
                                                                "FORMAT_FRACTAL_Z_3D_TRANSPOSE",
                                                                "FORMAT_FRACTAL_ZN_LSTM",
                                                                "FORMAT_FRACTAL_Z_G"};

 static const std::map<std::string, Format> kDataFormatMap = {
  {"NCHW", FORMAT_NCHW}, {"NHWC", FORMAT_NHWC}, {"NDHWC", FORMAT_NDHWC}, {"NCDHW", FORMAT_NCDHW}, {"ND", FORMAT_ND}};
@@ -143,6 +145,7 @@ static const std::map<std::string, Format> kStringToFormatMap = {
  {"CN", FORMAT_CN},
  {"NC", FORMAT_NC},
  {"FRACTAL_ZN_LSTM", FORMAT_FRACTAL_ZN_LSTM},
  {"FRACTAL_Z_G", FORMAT_FRACTAL_Z_G},
  {"FORMAT_RESERVED", FORMAT_RESERVED},
  {"ALL", FORMAT_ALL}};

@@ -235,6 +238,11 @@ static const std::map<ge::DataType, uint32_t> kDataTypeToLength = {
  {DT_RESOURCE, sizeof(uint64_t)},
 };

 static const std::map<domi::FrameworkType, std::string> kFmkTypeToString = {
  {domi::CAFFE, "caffe"},           {domi::MINDSPORE, "mindspore"}, {domi::TENSORFLOW, "tensorflow"},
  {domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"},           {domi::FRAMEWORK_RESERVED, "framework_reserved"},
 };

 bool TypeUtils::IsDataTypeValid(DataType dt) {
  uint32_t num = static_cast<uint32_t>(dt);
  GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid");
@@ -312,6 +320,16 @@ Format TypeUtils::DomiFormatToFormat(domi::domiTensorFormat_t domi_format) {
  return FORMAT_RESERVED;
 }

 std::string TypeUtils::FmkTypeToSerialString(domi::FrameworkType fmk_type) {
  auto it = kFmkTypeToString.find(fmk_type);
  if (it != kFmkTypeToString.end()) {
    return it->second;
  } else {
    GELOGW("Framework type not support %d.", fmk_type);
    return "";
  }
 }

 static inline void CopyDataFromBuffer(vector<uint8_t> &data, const Buffer &buffer) {
  data.clear();
  if (buffer.GetData() != nullptr && buffer.GetSize() != 0) {
--- a/src/ge/CMakeLists.txt
+++ b/src/ge/CMakeLists.txt
@@ -45,7 +45,7 @@ include_directories(${GE_SOURCE_DIR}/inc/external)
 include_directories(${GE_SOURCE_DIR}/inc/external/graph)
 include_directories(${GE_SOURCE_DIR}/inc/framework)
 include_directories(${GE_SOURCE_DIR}/inc/framework/common)
 include_directories(${GE_SOURCE_DIR}/inc/runtime)
 include_directories(${GE_SOURCE_DIR}/inc/graph)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce)
@@ -108,6 +108,10 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "graph/partition/engine_place.cc"
        "graph/partition/graph_partition.cc"
        "graph/passes/*.cc"
        "graph/preprocess/graph_preprocess.cc"
        "graph/preprocess/insert_op/ge_aipp_op.cc"
        "graph/preprocess/insert_op/util_insert_aipp_op.cc"
        "graph/preprocess/multi_batch_copy_graph.cc"
        "host_kernels/add_kernel.cc"
        "host_kernels/broadcast_args_kernel.cc"
        "host_kernels/broadcast_gradient_args_kernel.cc"
@@ -144,10 +148,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "host_kernels/transdata_kernel.cc"
        "host_kernels/transpose_kernel.cc"
        "host_kernels/unpack_kernel.cc"
        "graph/preprocess/graph_preprocess.cc"
        "graph/preprocess/insert_op/ge_aipp_op.cc"
        "graph/preprocess/insert_op/util_insert_aipp_op.cc"
        "graph/preprocess/multi_batch_copy_graph.cc"
        "host_kernels/unsqueeze_kernel.cc"
        "hybrid/common/npu_memory_allocator.cc"
        "hybrid/common/tensor_value.cc"
        "hybrid/executor/*.cc"
@@ -155,6 +156,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "hybrid/hybrid_davinci_model.cc"
        "hybrid/model/*.cc"
        "hybrid/node_executor/aicore/*.cc"
        "hybrid/node_executor/aicpu/aicpu_ext_info.cc"
        "hybrid/node_executor/aicpu/aicpu_node_executor.cc"
        "hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
        "hybrid/node_executor/hostcpu/ge_local_node_executor.cc"
@@ -246,6 +248,10 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "graph/partition/engine_place.cc"
        "graph/partition/graph_partition.cc"
        "graph/passes/*.cc"
        "graph/preprocess/graph_preprocess.cc"
        "graph/preprocess/insert_op/ge_aipp_op.cc"
        "graph/preprocess/insert_op/util_insert_aipp_op.cc"
        "graph/preprocess/multi_batch_copy_graph.cc"
        "host_kernels/add_kernel.cc"
        "host_kernels/broadcast_args_kernel.cc"
        "host_kernels/broadcast_gradient_args_kernel.cc"
@@ -282,11 +288,8 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "host_kernels/transdata_kernel.cc"
        "host_kernels/transpose_kernel.cc"
        "host_kernels/unpack_kernel.cc"
        "host_kernels/unsqueeze_kernel.cc"
        "hybrid/hybrid_davinci_model_stub.cc"
        "graph/preprocess/graph_preprocess.cc"
        "graph/preprocess/insert_op/ge_aipp_op.cc"
        "graph/preprocess/insert_op/util_insert_aipp_op.cc"
        "graph/preprocess/multi_batch_copy_graph.cc"
        "init/gelib.cc"
        "ir_build/atc_ir_common.cc"
        "ir_build/ge_ir_build.cc"
--- a/src/ge/client/ge_api.cc
+++ b/src/ge/client/ge_api.cc
@@ -29,6 +29,7 @@
 #include "graph/utils/type_utils.h"
 #include "graph/manager/util/rt_context_util.h"
 #include "register/op_registry.h"
 #include "common/ge/tbe_plugin_manager.h"

 using domi::GetContext;
 using domi::OpRegistry;
@@ -132,6 +133,9 @@ Status GEInitialize(const std::map<string, string> &options) {
  }
  GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid");

  GE_TIMESTAMP_START(InitPreparation);
  TBEPluginManager::Instance().InitPreparation(options);
  GE_TIMESTAMP_END(InitPreparation, "GEInitialize::InitPreparation");
  // call Initialize
  GELOGT(TRACE_RUNNING, "Initializing environment");
  GE_TIMESTAMP_START(GELibInitialize);
@@ -178,6 +182,10 @@ Status GEFinalize() {
      ret = middle_ret;
    }
  }
  middle_ret = TBEPluginManager::Instance().Finalize();
  if (middle_ret != SUCCESS) {
    ret = middle_ret;
  }

  if (kGeInitialized && ret == SUCCESS) {
    // Unified destruct rt_context
@@ -262,10 +270,10 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
 }

 Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
  GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, sessinon_id: %lu.", graph_id, sessionId_);
  GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Sesson.");
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
    return FAILED;
  }
  GELOGD("Adding graph to session");
@@ -340,7 +348,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) {
        GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i));
        break;
      default:
        GELOGI("Output datatype %s is not support print.", TypeUtils::DataTypeToSerialString(data_type).c_str());
        GELOGI("Output datatype %s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str());
        return;
    }
  }
@@ -378,6 +386,21 @@ Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc
  return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
 }

 Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
    return FAILED;
  }
  GELOGT(TRACE_RUNNING, "Building Graph");
  Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs);
  if (ret != SUCCESS) {
    GELOGE(ret, "Session BuildGraph failed");
    return FAILED;
  }
  return SUCCESS;
 }

 Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
                              RunAsyncCallback callback) {
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
--- a/src/ge/client/module.mk
+++ b/src/ge/client/module.mk
@@ -0,0 +1,111 @@

 LOCAL_PATH := $(call my-dir)

 COMMON_LOCAL_SRC_FILES := \
    proto/ge_api.proto \
    ge_api.cc \


 COMMON_LOCAL_C_INCLUDES := \
    proto/ge_ir.proto \
    proto/task.proto \
    proto/om.proto \
    proto/insert_op.proto \
    $(LOCAL_PATH) ./ \
    $(LOCAL_PATH)/../ \
    $(LOCAL_PATH)/../../ \
    $(TOPDIR)inc \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/common \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/graph \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)ops/built-in/op_proto/inc \
    third_party/json/include \
    third_party/protobuf/include \
    third_party/opencv/include \

 DEVICE_LOCAL_C_INCLUDES := \
    proto/ge_ir.proto \
    proto/task.proto \
    proto/om.proto \
    proto/insert_op.proto \
    $(LOCAL_PATH) ./ \
    $(LOCAL_PATH)/../ \
    $(LOCAL_PATH)/../../ \
    $(TOPDIR)inc \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/common \
    $(TOPDIR)inc/graph \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)ops/built-in/op_proto/inc \
    third_party/json/include \
    third_party/protobuf/include \
    third_party/opencv/include \

 #compiler for host infer
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_client
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libprotobuf \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    libge_compiler \
    libge_common \


 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_SHARED_LIBRARIES += \
    libruntime \

 include $(BUILD_HOST_SHARED_LIBRARY)

 #compiler for device
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_client
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
 LOCAL_CFLAGS += -DOMG_DEVICE_VERSION  -DREUSE_MEMORY=1
 LOCAL_MODULE_CLASS := SHARED_LIBRARIES

 LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libprotobuf \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    libruntime \
    libge_compiler \
    libge_common \


 LOCAL_LDFLAGS := -lrt -ldl
 LOCAL_CFLAGS += \
    -Wall

 include $(BUILD_SHARED_LIBRARY)
--- a/src/ge/common/auth/file_saver.cc
+++ b/src/ge/common/auth/file_saver.cc
@@ -40,9 +40,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) {
  }

  char real_path[PATH_MAX] = {0};
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_path.length() >= PATH_MAX, return FAILED, "File path is longer than PATH_MAX!");
  GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr,
                  GELOGI("File %s is not exit, it will be created.", file_path.c_str()));
                  GELOGI("File %s is not exist, it will be created.", file_path.c_str()));
  // Open file
  mode_t mode = S_IRUSR | S_IWUSR;
  fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode);
--- a/src/ge/common/ge/plugin_manager.cc
+++ b/src/ge/common/ge/plugin_manager.cc
@@ -50,13 +50,13 @@ PluginManager::~PluginManager() { ClearHandles_(); }
 string PluginManager::GetPath() {
  Dl_info dl_info;
  if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) {
    GELOGW("Failed to read so_path!");
    GELOGW("Failed to read the shared library file path!");
    return string();
  } else {
    std::string so_path = dl_info.dli_fname;
    char path[PATH_MAX] = {0};
    if (so_path.length() >= PATH_MAX) {
      GELOGW("File path is too long!");
      GELOGW("The shared library file path is too long!");
      return string();
    }
    if (realpath(so_path.c_str(), path) == nullptr) {
@@ -93,11 +93,15 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
  std::vector<std::string> path_vec;
  SplitPath(path, path_vec);
  for (const auto &single_path : path_vec) {
    GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, "File path is too long!");
    GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX,
                    GELOGE(GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!");
                    continue);
    // load break when number of loaded so reach maximum
    if (num_of_loaded_so >= kMaxNumOfSo) {
      GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo);
      GELOGW(
        "The number of dynamic libraries loaded exceeds the kMaxNumOfSo,"
        " and only the first %d shared libraries will be loaded.",
        kMaxNumOfSo);
      break;
    }

@@ -110,11 +114,11 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec

    int64_t file_size = 0;
    if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) {
      GELOGW("Failed to validate so %s", file_path_dlopen.c_str());
      GELOGW("Failed to validate the shared library: %s", file_path_dlopen.c_str());
      continue;
    }

    GELOGI("dlopen so path name: %s. ", file_path_dlopen.c_str());
    GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str());

    // load continue when dlopen is failed
    auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL);
@@ -128,14 +132,14 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
    for (const auto &func_name : func_check_list) {
      auto real_fn = (void (*)())dlsym(handle, func_name.c_str());
      if (real_fn == nullptr) {
        GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not exist!", func_name.c_str(),
        GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(),
               func_name.c_str());
        is_valid = false;
        break;
      }
    }
    if (!is_valid) {
      GE_LOGE_IF(dlclose(handle), "Failed to dlclose ret");
      GE_LOGE_IF(dlclose(handle), "Failed to dlclose.");
      continue;
    }

@@ -146,13 +150,13 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
    num_of_loaded_so++;
  }

  GELOGI("load so total num %u", num_of_loaded_so);
  GELOGI("The total number of shared libraries loaded: %u", num_of_loaded_so);
  for (auto name : so_list_) {
    GELOGI("load %s successfully", name.c_str());
    GELOGI("load shared library %s successfully", name.c_str());
  }

  if (num_of_loaded_so == 0) {
    GELOGW("Failed to find any valid so in path %s!", path.c_str());
    GELOGW("No loadable shared library found in the path: %s", path.c_str());
    return SUCCESS;
  }

@@ -163,7 +167,7 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded
  // read file size
  struct stat stat_buf;
  if (stat(file_path.c_str(), &stat_buf) != 0) {
    GELOGW("%s check fail.", file_path.c_str());
    GELOGW("The shared library file check failed: %s", file_path.c_str());
    return FAILED;
  }

@@ -178,8 +182,8 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded
  // load continue if the total size of so reaches maximum when it is loaded
  if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) {
    GELOGW(
      "%s is skipped because the size of loaded so reaches maximum if it is load! "
      "(size: %ldB, size of loaded so: %ldB, maximum: %dB)",
      "%s is skipped because the size of loaded share library reaches maximum if it is loaded! "
      "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)",
      file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo);
    return FAILED;
  }
@@ -227,7 +231,10 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_

    // load break when number of loaded so reach maximum
    if (num_of_loaded_so >= kMaxNumOfSo) {
      GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo);
      GELOGW(
        "The number of dynamic libraries loaded exceeds the kMaxNumOfSo,"
        " and only the first %d shared libraries will be loaded.",
        kMaxNumOfSo);
      break;
    }

@@ -240,7 +247,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_

    int64_t file_size = 0;
    if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) {
      GELOGW("Failed to validate so %s", canonical_path_str.c_str());
      GELOGW("Failed to validate the shared library: %s", canonical_path_str.c_str());
      continue;
    }

@@ -266,8 +273,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
      }
    }
    if (!is_valid) {
      GE_LOGE_IF(dlclose(handle), "Dlclose ret fail");
      GELOGW("Dlclose ret fail!");
      GE_LOGE_IF(dlclose(handle), "Failed to dlclose.");
      continue;
    }

@@ -279,7 +285,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
  }
  closedir(dir);
  if (num_of_loaded_so == 0) {
    GELOGW("Failed to find any valid so under %s!", path.c_str());
    GELOGW("No loadable shared library found in the path: %s", path.c_str());
    return SUCCESS;
  }

--- a/src/ge/common/ge/tbe_plugin_manager.cc
+++ b/src/ge/common/ge/tbe_plugin_manager.cc
@@ -0,0 +1,293 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/ge/tbe_plugin_manager.h"

 #include <dirent.h>
 #include <unistd.h>
 #include <algorithm>
 #include <cstring>
 #include <fstream>
 #include <iostream>
 #include <map>
 #include <memory>
 #include <string>

 #include "common/ge/ge_util.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/engine/dnnengine.h"
 #include "framework/omg/omg_inner_types.h"
 #include "external/ge/ge_api_types.h"
 #include "register/op_registry.h"
 #include "graph/opsproto_manager.h"
 #include "graph/utils/type_utils.h"

 namespace ge {
 std::map<string, string> TBEPluginManager::options_ = {};

 // Get Singleton Instance
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginManager::Instance() {
  static TBEPluginManager instance_ptr_;
  return instance_ptr_;
 }

 Status TBEPluginManager::ClearHandles_() {
  Status ret = SUCCESS;
  for (const auto &handle : handles_vec_) {
    if (dlclose(handle) != 0) {
      ret = FAILED;
      GELOGW("Failed to close handle: %s", dlerror());
    }
  }
  handles_vec_.clear();
  return ret;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finalize() {
  Status ret = ClearHandles_();
  return ret;
 }

 string TBEPluginManager::GetPath() {
  Dl_info dl_info;
  if (dladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) == 0) {
    GELOGW("Failed to read so path!");
    return string();
  } else {
    string so_path = dl_info.dli_fname;
    char path[PATH_MAX] = {0};
    if (so_path.length() >= PATH_MAX) {
      GELOGW("File path is too long!");
      return string();
    }
    if (realpath(so_path.c_str(), path) == nullptr) {
      GELOGW("Failed to get realpath of %s", so_path.c_str());
      return string();
    }

    so_path = path;
    so_path = so_path.substr(0, so_path.rfind('/') + 1);
    return so_path;
  }
 }

 void TBEPluginManager::ProcessSoFullName(vector<string> &file_list, string &caffe_parser_path, string &full_name,
                                         const string &caffe_parser_so_suff, const string &aicpu_so_suff,
                                         const string &aicpu_host_so_suff) {
  if (full_name.size() >= caffe_parser_so_suff.size() &&
      full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(),
                        caffe_parser_so_suff) == 0) {
    caffe_parser_path = full_name;
  } else if ((full_name.size() >= aicpu_so_suff.size() &&
              full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == 0) ||
             (full_name.size() >= aicpu_host_so_suff.size() &&
              full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(),
                                aicpu_host_so_suff) == 0)) {
    // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage.
    domi::GetContext().aicpu_op_run_paths.push_back(full_name);
  } else {
    // Save parser so path into file_list vector
    file_list.push_back(full_name);
  }
 }

 void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path) {
  // Path, change to absolute path
  string real_path = RealPath(path.c_str());
  // Plugin path does not exist
  if (real_path.empty()) {
    GELOGW("RealPath is empty.");
    return;
  }
  struct stat stat_buf;
  if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) {
    GELOGW("%s is not a dir.", real_path.c_str());
    return;
  }
  struct dirent *dent(0);
  DIR *dir = opendir(real_path.c_str());
  // Plugin path does not exist
  if (dir == nullptr) {
    GELOGW("Open directory %s failed.", real_path.c_str());
    return;
  }

  while ((dent = readdir(dir)) != nullptr) {
    if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue;
    string name = dent->d_name;
    string full_name = real_path + "/" + name;
    const string so_suff = ".so";
    const string caffe_parser_so_suff = "lib_caffe_parser.so";
    const string aicpu_so_suff = "_aicpu.so";
    const string aicpu_host_so_suff = "_online.so";
    if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) {
      ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff,
                        aicpu_host_so_suff);
    } else {
      FindParserSo(full_name, file_list, caffe_parser_path);
    }
  }
  closedir(dir);
 }

 void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) {
  // Support to split multiple so directories by ":"
  vector<string> v_path = StringUtils::Split(path, ':');
  for (size_t i = 0; i < v_path.size(); ++i) {
    FindParserSo(v_path[i], file_list, caffe_parser_path);
    GELOGI("CustomOpLib full name = %s", v_path[i].c_str());
  }
 }

 void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
  GELOGI("Enter get custom op path schedule");
  std::string fmk_type;
  domi::FrameworkType type = domi::TENSORFLOW;
  auto it = options_.find(FRAMEWORK_TYPE);
  if (it != options_.end()) {
    type = static_cast<domi::FrameworkType>(std::strtol(it->second.c_str(), nullptr, 10));
  }
  fmk_type = ge::TypeUtils::FmkTypeToSerialString(type);
  GELOGI("Framework type is %s.", fmk_type.c_str());

  const char *path_env = std::getenv("ASCEND_OPP_PATH");
  if (path_env != nullptr) {
    std::string path = path_env;
    customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type);
    GELOGI("Get custom so path from env : %s", path_env);
    return;
  }
  std::string path_base = GetPath();
  GELOGI("path_base is %s", path_base.c_str());
  path_base = path_base.substr(0, path_base.rfind('/'));
  path_base = path_base.substr(0, path_base.rfind('/') + 1);
  customop_path = (path_base + "ops/framework/custom" + "/:") + (path_base + "ops/framework/built-in/" + fmk_type);
  return;
 }

 void TBEPluginManager::LoadCustomOpLib() {
  LoadPluginSo();

  std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
  GELOGI("The size of registration_datas is: %zu", registration_datas.size());
  for (OpRegistrationData reg_data : registration_datas) {
    bool ret = CheckRegisterStatus(reg_data);
    if (ret) {
      GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
             static_cast<uint32_t>(reg_data.GetImplyType()));
      domi::OpRegistry::Instance()->Register(reg_data);
    }
  }
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() {
  vector<string> file_list;
  string caffe_parser_path;
  std::string plugin_path;
  GetCustomOpPath(plugin_path);

  // Whether there are files in the plugin so path
  GetPluginSoFileList(plugin_path, file_list, caffe_parser_path);

  //  No file
  if (file_list.empty()) {
    // Print log
    GELOGW("Can not find any plugin file in plugin_path: %s", plugin_path.c_str());
  }

  GELOGW("The shared library will not be checked. Please ensure that the source of the shared library is trusted.");

  // Load other so files except lib_caffe_parser.so in the plugin so path
  for (auto elem : file_list) {
    StringUtils::Trim(elem);

    void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
    if (handle == nullptr) {
      GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror());
    } else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) {
      // Close dl when the program exist, not close here
      GELOGI("Plugin load %s success.", elem.c_str());
      handles_vec_.push_back(handle);
    } else {
      GELOGI("Plugin so has already been loaded, no need to load again.");
    }
  }
 }

 bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData &reg_data) {
  bool ret = true;
  static char *parser_priority = std::getenv("PARSER_PRIORITY");
  static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce";
  auto ori_optype_set = reg_data.GetOriginOpTypeSet();
  for (const auto &op_type : ori_optype_set) {
    domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type);
    GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str());
    if (imply_type != domi::ImplyType::BUILDIN) {
      if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) ||
          (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) {
        GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(),
               reg_data.GetOmOptype().c_str());
        ret = false;
      } else {
        GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str());
      }
    } else {
      GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(),
             reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType()));
    }
  }
  return ret;
 }

 Status TBEPluginManager::CheckCustomAiCpuOpLib() {
  std::vector<std::string> vec_op_type;

  domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM);
  for (size_t i = 0; i < vec_op_type.size(); i++) {
    bool aicpu_so_exist = false;
    std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so";
    for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) {
      string bin_file_path = domi::GetContext().aicpu_op_run_paths[j];
      if (bin_file_path.size() >= ai_cpu_so_name.size() &&
          bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) ==
            0) {
        aicpu_so_exist = true;
        break;
      }
    }
    if (!aicpu_so_exist) {
      GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str());
      return FAILED;
    }
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation(
  const std::map<string, string> &options) {
  options_.insert(options.begin(), options.end());
  // Load TBE plugin
  TBEPluginManager::Instance().LoadCustomOpLib();
  Status ret = CheckCustomAiCpuOpLib();
  if (ret != SUCCESS) {
    GELOGE(ret, "Check custom aicpu run so failed!");
    return;
  }
 }
 }  // namespace ge
--- a/src/ge/common/ge/tbe_plugin_manager.h
+++ b/src/ge/common/ge/tbe_plugin_manager.h
@@ -0,0 +1,73 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_
 #define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_

 #include <dlfcn.h>
 #include <functional>
 #include <iostream>
 #include <map>
 #include <memory>
 #include <string>
 #include <type_traits>
 #include <typeinfo>
 #include <vector>

 #include "external/ge/ge_api_error_codes.h"
 #include "external/register/register.h"

 namespace ge {
 using SoHandlesVec = std::vector<void *>;
 using std::function;
 using std::map;
 using std::string;
 using std::vector;

 class TBEPluginManager {
 public:
  Status Finalize();

  // Get TBEPluginManager singleton instance
  static TBEPluginManager &Instance();

  static string GetPath();

  static void InitPreparation(const std::map<string, string> &options);

  void LoadPluginSo();

 private:
  TBEPluginManager() = default;
  ~TBEPluginManager() = default;
  Status ClearHandles_();

  static void ProcessSoFullName(vector<string> &file_list, string &caffe_parser_path, string &full_name,
                                const string &caffe_parser_so_suff, const string &aicpu_so_suff,
                                const string &aicpu_host_so_suff);
  static void FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path);
  static void GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path);
  static void GetCustomOpPath(std::string &customop_path);
  void LoadCustomOpLib();
  static Status CheckCustomAiCpuOpLib();
  static bool CheckRegisterStatus(const OpRegistrationData &reg_data);

  SoHandlesVec handles_vec_;
  static std::map<string, string> options_;
 };
 }  // namespace ge

 #endif  // GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_
--- a/src/ge/common/ge_common.mk
+++ b/src/ge/common/ge_common.mk
@@ -0,0 +1,241 @@
 LOCAL_PATH := $(call my-dir)

 GE_COMMON_LOCAL_SRC_FILES := \
    context/ctx.cc \
    model_saver.cc \
    ge/datatype_util.cc \
    helper/om_file_helper.cc \
    helper/model_helper.cc \
    ../model/ge_model.cc \
    auth/file_saver.cc \
    fp16_t.cc \
    math/fp16_math.cc \
    debug/memory_dumper.cc \
    formats/utils/formats_trans_utils.cc \
    formats/format_transfers/datatype_transfer.cc \
    formats/format_transfers/format_transfer_transpose.cc \
    formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
    formats/format_transfers/format_transfer_fractal_z.cc \
    formats/format_transfers/format_transfer_fractal_nz.cc \
    formats/format_transfers/format_transfer_fractal_zz.cc \
    formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
    formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
    formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
    formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
    formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
    formats/format_transfers/format_transfer_fracz_nchw.cc \
    formats/format_transfers/format_transfer_fracz_nhwc.cc \
    formats/format_transfers/format_transfer_fracz_hwcn.cc \
    formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
    formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
    formats/format_transfers/format_transfer_nchw_fz_c04.cc \
    formats/formats.cc \
    ge_format_util.cc \
    fmk_error_codes.cc \
    util.cc \
    properties_manager.cc \
    types.cc\
    model_parser/base.cc \
    tbe_kernel_store.cc \
    op/attr_value_util.cc \
    op/ge_op_utils.cc \
    thread_pool.cc \
    ge/tbe_plugin_manager.cc \

 GE_COMMON_LOCAL_C_INCLUDES := \
    proto/om.proto \
    proto/ge_ir.proto \
    proto/task.proto \
    proto/insert_op.proto \
    proto/tensorflow/graph.proto \
    proto/tensorflow/node_def.proto \
    proto/tensorflow/function.proto \
    proto/tensorflow/versions.proto \
    proto/tensorflow/attr_value.proto \
    proto/tensorflow/tensor.proto \
    proto/tensorflow/tensor_shape.proto \
    proto/tensorflow/op_def.proto \
    proto/tensorflow/types.proto \
    proto/tensorflow/resource_handle.proto \
    $(TOPDIR)inc \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/common/util \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)third_party/json/include \
    $(TOPDIR)third_party/protobuf/include \
    $(TOPDIR)third_party/openssl/include/x86/include \
    $(TOPDIR)framework/domi \
    $(TOPDIR)framework/domi/common \
    $(TOPDIR)framework/domi/common/op

 #compile host libge_common
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_common

 LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
 ifeq ($(DEBUG), 1)
    LOCAL_CFLAGS += -g -O0
 else
    LOCAL_CFLAGS += -fvisibility=hidden -DHOST_VISIBILITY
 endif
 ifeq ($(host_os), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(host_os), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif

 LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libprotobuf \
    libc_sec \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    liberror_manager \

 LOCAL_LDFLAGS := -lrt -ldl

 include $(BUILD_HOST_SHARED_LIBRARY)

 #compile device libge_common
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_common

 LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
 ifeq ($(DEBUG), 1)
    LOCAL_CFLAGS += -g -O0
 else
    LOCAL_CFLAGS += -fvisibility=hidden -DDEV_VISIBILITY
 endif
 ifeq ($(host_os), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(host_os), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif

 LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

 LOCAL_SHARED_LIBRARIES := \
    libprotobuf \
    libc_sec \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    liberror_manager \

 ifeq ($(device_os),android)
 LOCAL_LDFLAGS += -ldl
 LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
 else
 LOCAL_LDFLAGS := -lrt -ldl
 endif

 include $(BUILD_SHARED_LIBRARY)

 #compile host libge_common static lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_common

 LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
 ifeq ($(DEBUG), 1)
    LOCAL_CFLAGS += -g -O0
 endif

 ifeq ($(host_os), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(host_os), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif

 LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := \
    libgraph     \
    libprotobuf  \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libregister \
    liberror_manager \

 LOCAL_LDFLAGS := -lrt -ldl

 include $(BUILD_HOST_STATIC_LIBRARY)

 #compile device libge_common static_lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_common

 LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
 ifeq ($(DEBUG), 1)
    LOCAL_CFLAGS += -g -O0
 endif
 ifeq ($(host_os), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(host_os), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), euleros)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif
 ifeq ($(TARGET_OS), centos)
    LOCAL_CFLAGS += -DOS_CENTOS
 endif

 LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
 LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := \
    libgraph     \
    libprotobuf  \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libregister \
    liberror_manager \

 LOCAL_LDFLAGS := -lrt -ldl

 include $(BUILD_STATIC_LIBRARY)
--- a/src/ge/common/helper/model_cache_helper.cc
+++ b/src/ge/common/helper/model_cache_helper.cc
@@ -178,7 +178,7 @@ bool ModelCacheHelper::IsModelCacheHit() const {
    return false;
  }
  if (!IsVarManagerSameAsCache(var_manager_json)) {
    GELOGI("Graph id[%u] cache miss: the VarManager dos not match the cache info.", graph_id_);
    GELOGI("Graph id[%u] cache miss: the VarManager does not match the cache info.", graph_id_);
    return false;
  }
  GELOGI("Graph id[%u] cache hit.", graph_id_);
@@ -563,7 +563,7 @@ Status ModelCacheHelper::GetCacheInfo(CacheInfo &cache_info) const {
    cache_info.graph_hash = cache_json[kGraphHash];
    Json nodes_hash_json = cache_json[kNodeHash];
    if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) {
      GELOGW("Nodes hash in cache be null or array.");
      GELOGW("Nodes hash in cache should be null or array.");
      return FAILED;
    }
    for (const auto &iter : nodes_hash_json) {
@@ -1670,7 +1670,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const {
  ModelData model_data;
  ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
  if (ret != SUCCESS) {
    GELOGW("LoadOmModelFromCache: Load model from file fialed. ret = %u", ret);
    GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret);
    return ret;
  }

--- a/src/ge/common/helper/model_helper.cc
+++ b/src/ge/common/helper/model_helper.cc
@@ -144,7 +144,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
  err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(),
                 platform_version.size() + 1);
  if (err != EOK) {
    GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while while allocating memory for platform_version");
    GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while allocating memory for platform_version.");
    return MEMALLOC_FAILED;
  }
  string version = reinterpret_cast<char *>(model_header.platform_version);
--- a/src/ge/common/helper/om_file_helper.cc
+++ b/src/ge/common/helper/om_file_helper.cc
@@ -52,7 +52,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type,
                                                                                            ModelPartition &partition) {
  if (!is_inited_) {
    GELOGE(PARAM_INVALID, "OmFileLoadHelper not Inited!");
    GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!");
    return PARAM_INVALID;
  }

@@ -67,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod

  if (!found) {
    if (type != ModelPartitionType::TBE_KERNELS) {
      GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas", static_cast<int>(type));
      GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type));
      return FAILED;
    }
  }
@@ -77,7 +77,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod
 Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {
  // Parameter validity check
  if (model.model_data == nullptr) {
    GELOGE(PARAM_INVALID, "Model_data must not be null");
    GELOGE(PARAM_INVALID, "Model_data must not be null!");
    return PARAM_INVALID;
  }

@@ -103,7 +103,7 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {

 Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size) {
  if (model_data == nullptr) {
    GELOGE(PARAM_INVALID, "Param model_data must not be null");
    GELOGE(PARAM_INVALID, "Param model_data must not be null!");
    return PARAM_INVALID;
  }
  // Init partition table
@@ -131,7 +131,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
    context_.partition_datas_.push_back(partition);

    if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) {
      GELOGE(PARAM_INVALID, "the current need partition sizes %zu greater than the model data size %u ",
      GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.",
             partition.size + mem_offset, model_data_size);
      return PARAM_INVALID;
    }
@@ -199,7 +199,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat

  ModelPartitionTable *partition_table = GetPartitionTable();
  if (partition_table == nullptr) {
    GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile exe failed: partition_table is NULL");
    GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL.");
    return ge::GE_GRAPH_SAVE_FAILED;
  }
  uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
--- a/src/ge/common/model_saver.cc
+++ b/src/ge/common/model_saver.cc
@@ -26,6 +26,7 @@
 #include "framework/common/debug/log.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "common/util/error_manager/error_manager.h"

 namespace ge {
 const uint32_t kInteval = 2;
@@ -41,10 +42,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
  try {
    model_str = model.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
  } catch (std::exception &e) {
    GELOGE(FAILED, "Transfer json to string failed, reason: %s.", e.what());
    ErrorManager::GetInstance().ATCReportErrMessage("E19007", {"exception"}, {e.what()});
    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
    return FAILED;
  } catch (...) {
    GELOGE(FAILED, "Transfer json to string failed.");
    ErrorManager::GetInstance().ATCReportErrMessage("E19008");
    GELOGE(FAILED, "Failed to convert JSON to string.");
    return FAILED;
  }

@@ -57,6 +60,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
  mode_t mode = S_IRUSR | S_IWUSR;
  int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode);
  if (fd == EN_ERROR || fd == EN_INVALID_PARAM) {
    ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"filepath", "errMsg"}, {file_path, strerror(errno)});
    GELOGE(FAILED, "Open file failed. file path : %s, %s", file_path, strerror(errno));
    return FAILED;
  }
@@ -65,6 +69,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
  // Write data to file
  mmSsize_t mmpa_ret = mmWrite(fd, const_cast<void *>((const void *)model_char), len);
  if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) {
    ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"mmpa_ret", "errMsg"},
                                                    {std::to_string(mmpa_ret), strerror(errno)});
    // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose
    GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno));
    ret = FAILED;
--- a/src/ge/common/module.mk
+++ b/src/ge/common/module.mk
@@ -0,0 +1,3 @@
 LOCAL_PATH := $(call my-dir)

 include $(LOCAL_PATH)/ge_common.mk
--- a/src/ge/common/op/ge_op_utils.cc
+++ b/src/ge/common/op/ge_op_utils.cc
@@ -114,6 +114,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
 OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::AippOpParams *aipp_params) {
  GE_CHECK_NOTNULL(aipp_params);
  AIPP_CONVERT_FORMAT_EX(aipp_mode, domi::AippOpParams::AippMode, int32_t, GeAttrValue::INT);
  AIPP_CONVERT_INT(related_input_rank);

  if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) {
    AIPP_CONVERT_INT(max_src_image_size);
@@ -149,6 +150,7 @@ OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::Aipp
    AIPP_CONVERT_LIST_FLOAT(var_reci_chn_0, true);
    AIPP_CONVERT_LIST_FLOAT(var_reci_chn_1, true);
    AIPP_CONVERT_LIST_FLOAT(var_reci_chn_2, true);
    AIPP_CONVERT_LIST_FLOAT(var_reci_chn_3, true);

    const bool csc_switch = aipp_params->csc_switch();
    AIPP_CONVERT_LIST_INT(matrix_r0c0, csc_switch);
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -478,24 +478,32 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
  const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  int32_t device_id = 0;
  rtError_t rt_ret = rtGetDevice(&device_id);
  int32_t logic_device_id = 0;
  rtError_t rt_ret = rtGetDevice(&logic_device_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "runtime get device_id failed, current device_id:%d", device_id);
    GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
    return;
  }
  GELOGI("current device_id:%d", device_id);
  GELOGI("current logic_device_id:%d", logic_device_id);

  auto ret = std::find(device_id_.begin(), device_id_.end(), device_id);
  uint32_t phy_device_id = 0;
  rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
    return;
  }
  GELOGI("current phy_device_id:%d", phy_device_id);

  auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
  if (ret == device_id_.end()) {
    GELOGE(FAILED, "get valid device_id failed, profiling report failed.");
    GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
    return;
  }

  GELOGI("start ProfilingTaskDescInfo.");
  ProfilingTaskDescInfo(task_desc_info, device_id);
  ProfilingTaskDescInfo(task_desc_info, phy_device_id);
  GELOGI("start ProfilingGraphDescInfo.");
  ProfilingGraphDescInfo(compute_graph_desc_info, device_id);
  ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id);
  GELOGI("Report profiling data for GE end.");
 #endif
 }
--- a/src/ge/common/types.cc
+++ b/src/ge/common/types.cc
@@ -116,6 +116,7 @@ REGISTER_OPTYPE_DEFINE(SLICE, "Slice");
 REGISTER_OPTYPE_DEFINE(SLICED, "SliceD");
 REGISTER_OPTYPE_DEFINE(FLOORDIV, "FloorDiv");
 REGISTER_OPTYPE_DEFINE(SQUEEZE, "Squeeze");
 REGISTER_OPTYPE_DEFINE(UNSQUEEZE, "Unsqueeze");
 REGISTER_OPTYPE_DEFINE(STRIDEDSLICE, "StridedSlice");
 REGISTER_OPTYPE_DEFINE(RANGE, "Range");
 REGISTER_OPTYPE_DEFINE(RPNPROPOSALS, "RpnProposals");
--- a/src/ge/common/util.cc
+++ b/src/ge/common/util.cc
@@ -67,9 +67,8 @@ static bool ReadProtoFromCodedInputStream(CodedInputStream &coded_stream, Messag
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr),
                                 ErrorManager::GetInstance().ATCReportErrMessage("E19001");
                                 return false, "Input parameter file or proto is nullptr!");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr), return false,
                                 "Input parameter file or proto is nullptr!");

  std::string real_path = RealPath(file);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file);
@@ -119,8 +118,9 @@ long GetFileLength(const std::string &input_file) {
                                 ErrorManager::GetInstance().ATCReportErrMessage("E10037", {"filepath"}, {input_file});
                                 return -1, "Open file[%s] failed", input_file.c_str());

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E10038");
                                 return -1, "File[%s] length is 0, not valid.", input_file.c_str());
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0),
                                 ErrorManager::GetInstance().ATCReportErrMessage("E10038", {"filepath"}, {input_file});
                                 return -1, "File[%s] size is 0, not valid.", input_file.c_str());

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
    file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage(
@@ -207,7 +207,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::
  if (dir_path_len >= PATH_MAX) {
    ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"},
                                                    {directory_path, std::to_string(PATH_MAX)});
    GELOGW("Path[%s] len is too long, it must smaller than %d", directory_path.c_str(), PATH_MAX);
    GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX);
    return -1;
  }
  char tmp_dir_path[PATH_MAX] = {0};
@@ -338,14 +338,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
    strlen(path) >= PATH_MAX,
    ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)});
    return "", "Path[%s] len is too long, it must smaller than %d", path, PATH_MAX);
    return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX);
  // PATH_MAX is the system's own macro, indicating the maximum file path length supported
  std::shared_ptr<char> resolved_path(new (std::nothrow) char[PATH_MAX](), std::default_delete<char[]>());

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
    resolved_path == nullptr,
    ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"filepath", "size"}, {path, std::to_string(PATH_MAX)});
    return "", "Path[%s] new string object len[%d] failed.", path, PATH_MAX);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(resolved_path == nullptr, return "", "Path[%s] new string object len[%d] failed.",
                                 path, PATH_MAX);

  // Nullptr is returned when the path does not exist or there is no permission
  // Return absolute path when path is accessible
@@ -384,7 +382,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
    !ValidateStr(real_path, mode),
    ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path});
    return false,
           "Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
           "Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
           "and chinese character.",
           atc_param.c_str(), real_path.c_str());

@@ -420,7 +418,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
      !ValidateStr(real_path, mode),
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path});
      return false,
             "Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
             "Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
             "and chinese character.",
             atc_param.c_str(), real_path.c_str());

--- a/src/ge/engine_manager/dnnengine_manager.cc
+++ b/src/ge/engine_manager/dnnengine_manager.cc
@@ -75,7 +75,7 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
    return status;
  }

  GELOGI("The number of DNNEngineObjs are %zu.", engines_map_.size());
  GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size());

  // Engines initialize
  for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
@@ -373,7 +373,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
      GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno));
      return FAILED;
    } else {
      GELOGW("The json file %s is not need", file_path.c_str());
      GELOGW("The json file %s is not needed.", file_path.c_str());
      return SUCCESS;
    }
  }
--- a/src/ge/executor/CMakeLists.txt
+++ b/src/ge/executor/CMakeLists.txt
@@ -30,6 +30,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "../common/profiling/profiling_manager.cc"
        "../graph/execute/graph_execute.cc"
        "../graph/load/graph_loader.cc"
        "../graph/load/new_model_manager/aipp_utils.cc"
        "../graph/load/new_model_manager/cpu_queue_schedule.cc"
        "../graph/load/new_model_manager/data_dumper.cc"
        "../graph/load/new_model_manager/data_inputer.cc"
--- a/src/ge/executor/ge_executor.cc
+++ b/src/ge/executor/ge_executor.cc
@@ -38,6 +38,7 @@

 namespace {
 const size_t kDynamicBatchSizeVecSize = 1;
 const size_t kStaticBatchInfoSize = 1;
 const size_t kDynamicImageSizeVecSize = 2;
 const size_t kDynamicImageSizeInputSize = 2;
 const char *const kBatchLabel = "Batch_";
@@ -180,16 +181,16 @@ class ModelListenerAdapter : public ModelListener {
 GeExecutor::GeExecutor() {}

 Status GeExecutor::Initialize() {
  GELOGI("Init ge_executor begin.");
  GELOGI("Init GeExecutor begin.");
  if (isInit_) {
    GELOGW("Already inited, don't need to init again.");
    GELOGW("Already initialized, no need to be initialized again.");
    return ge::SUCCESS;
  }

  std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
  auto ret = MemManager::Instance().Initialize(mem_type);
  if (ret != SUCCESS) {
    GELOGE(ret, "Memory Manager init fail.");
    GELOGE(ret, "Memory Manager init failed.");
    return ret;
  }

@@ -200,14 +201,14 @@ Status GeExecutor::Initialize() {
  ProfilingManager::Instance().Init(profiling_options);

  isInit_ = true;
  GELOGI("Init ge_executor over.");
  GELOGI("Init GeExecutor over.");
  return ge::SUCCESS;
 }

 Status GeExecutor::Finalize() {
  GELOGI("Uninit ge_executor begin.");
  GELOGI("Uninit GeExecutor begin.");
  if (isInit_ == false) {
    GELOGW("ge_executor needs to init begin.");
    GELOGW("GeExecutor has not been initialized.");
    return ge::SUCCESS;
  }

@@ -217,7 +218,7 @@ Status GeExecutor::Finalize() {
    ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE);
  }

  GELOGI("Uninit ge_executor over.");
  GELOGI("Uninit GeExecutor over.");
  return ge::SUCCESS;
 }

@@ -236,6 +237,7 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad

  // Verify whether the input dynamic batch matches the model gear
  std::vector<std::vector<int64_t>> batch_info;
  std::vector<uint64_t> batch_num{batch_size};
  Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "Get dynamic input info failed.");
@@ -247,6 +249,11 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad
    return FAILED;
  }

  ret = GraphExecutor::SetDynamicSize(model_id, batch_num);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "Set dynamic size failed");
    return FAILED;
  }
  // memcpy dynamic_batch_size from host to device
  if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) {
    GELOGE(FAILED, "memcpy dynamic batch input data failed!");
@@ -270,6 +277,7 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad

  // Verify whether the input dynamic resolution matches the model gear
  std::vector<std::vector<int64_t>> batch_info;
  std::vector<uint64_t> batch_num{image_height, image_width};
  Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "Get dynamic input info failed.");
@@ -281,6 +289,11 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad
    return FAILED;
  }

  ret = GraphExecutor::SetDynamicSize(model_id, batch_num);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "Set dynamic size failed");
    return FAILED;
  }
  // Memcpy dynamic resolution height from host to device
  if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) !=
      RT_ERROR_NONE) {
@@ -298,6 +311,20 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad
  return SUCCESS;
 }

 Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
  GELOGI("Begin to get current shape");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }
  Status ret = GraphExecutor::GetCurShape(model_id, batch_info);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "Get current shape failed");
    return FAILED;
  }
  return SUCCESS;
 }

 Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
                                      const std::vector<kAippDynamicBatchPara> &aippBatchPara,
                                      const kAippDynamicPara &aippParms) {
@@ -346,13 +373,13 @@ Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path,
                                    int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("load model offline begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

  string filePath = RealPath(path.c_str());
  if (filePath.empty()) {
    GELOGE(ge::FAILED, "fileath is invalid. please check your text file '%s'.", path.c_str());
    GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str());
    return ge::FAILED;
  }

@@ -375,7 +402,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
                             std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("Load model begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -397,7 +424,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
 Status GeExecutor::UnloadModel(uint32_t model_id) {
  GELOGI("unload model %u begin.", model_id);
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }
  Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
@@ -411,7 +438,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
 Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
  GELOGI("run model begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -428,7 +455,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
                                    std::vector<ge::TensorDesc> &output_desc) {
  GELOGI("get model desc info begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -436,12 +463,11 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
  std::vector<InputOutputDescInfo> output_desc_infos;
  std::vector<uint32_t> input_formats;
  std::vector<uint32_t> output_formats;
  GELOGI("GetInputOutputDescInfo via new ome.");

  Status ret =
    GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats);
  if (ret != domi::SUCCESS) {
    GELOGE(ret, "GetInputOutputDescInfo  failed. ret = %u", ret);
    GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret);
    return TransferDomiErrorCode(ret);
  }

@@ -473,7 +499,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
 Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
  GELOGI("Begin to get dynamic batch info.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -487,11 +513,49 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP input format
 /// @param [in] model_id
 /// @param [in] index
 /// @param [out] input_format
 /// @return execute result
 ///
 Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  GELOGI("Begin to GetAIPPInfo.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    return GE_EXEC_NOT_INIT;
  }
  Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetAIPPInfo failed.");
    return ret;
  }
  GELOGI("GetAIPPInfo succ.");
  return SUCCESS;
 }
 Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) {
  GELOGI("Begin to get dynamic batch output shape info");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    return GE_EXEC_NOT_INIT;
  }
  Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "Get dynamic batch output shape info failed.");
    return ret;
  }

  GELOGI("Get dynamic batch output shape info succ.");
  return SUCCESS;
 }

 Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                               std::vector<TensorDesc> &output_desc) {
  GELOGI("get model desc info for zero copy begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -499,12 +563,11 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge
  std::vector<InputOutputDescInfo> output_desc_infos;
  std::vector<uint32_t> input_formats;
  std::vector<uint32_t> output_formats;
  GELOGI("GetInputOutputDescInfoForZeroCopy via new ome.");

  Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos,
                                                                input_formats, output_formats);
  if (ret != domi::SUCCESS) {
    GELOGE(ret, "Get DescInfo For ZeroCopy failed. ret = %u", ret);
    GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret);
    return TransferDomiErrorCode(ret);
  }

@@ -521,7 +584,7 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge
  GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats);
  GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats);

  GELOGI("get model desc info for zero copy end.");
  GELOGI("get model desc info from zero copy end.");
  return ge::SUCCESS;
 }

@@ -539,7 +602,7 @@ Status GeExecutor::CommandHandle(const Command &command) {
 Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) {
  GELOGI("Get max used memory begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -559,13 +622,13 @@ Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) {
 Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) {
  GELOGI("Load data from file begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

  string filePath = RealPath(path.c_str());
  if (filePath.empty()) {
    GELOGE(ge::FAILED, "filePath is invalid. please check your text file '%s'.", path.c_str());
    GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str());
    return ge::FAILED;
  }
  GELOGI("load modelData from file: %s.", path.c_str());
@@ -618,7 +681,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
                                  const std::vector<uint32_t> &output_queue_ids) {
  GELOGI("Load model with queue begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }
  return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
@@ -638,7 +701,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
                             ge::RunModelData &run_output_data, bool async_mode) {
  GELOGI("Execute model begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -674,7 +737,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
 Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) {
  GELOGI("Get memory and weight size from file begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -707,7 +770,7 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size
                                       size_t &weight_size) {
  GELOGI("Get memory and weight size from data begin.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

@@ -741,4 +804,56 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer
 Status GeExecutor::ReleaseSingleOpResource(void *stream) {
  return SingleOpManager::GetInstance().ReleaseResource(stream);
 }

 Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) {
  std::vector<std::vector<int64_t>> batch_info;
  Status ret = GetDynamicBatchInfo(model_id, batch_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "Calc batch info size failed. ret = %d", ret);
    return ret;
  }
  if (batch_info.empty()) {
    shape_count = kStaticBatchInfoSize;
  } else {
    shape_count = batch_info.size();
  }
  return SUCCESS;
 }

 Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
  GELOGI("Begin to GetOrigInputInfo.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    return GE_EXEC_NOT_INIT;
  }

  Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetOrigInputInfo failed.");
    return ret;
  }

  GELOGI("GetOrigInputInfo succ.");
  return SUCCESS;
 }

 Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
                                             std::vector<InputOutputDims> &input_dims,
                                             std::vector<InputOutputDims> &output_dims) {
  GELOGI("Begin to GetAllAippInputOutputDims.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
    return GE_EXEC_NOT_INIT;
  }

  Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetAllAippInputOutputDims failed.");
    return ret;
  }

  GELOGI("GetAllAippInputOutputDims succ.");
  return SUCCESS;
 }

 }  // namespace ge
--- a/src/ge/executor/module.mk
+++ b/src/ge/executor/module.mk
@@ -0,0 +1,202 @@
 LOCAL_PATH := $(call my-dir)

 local_ge_executor_src_files :=  \
    ge_executor.cc \
    ../common/profiling/profiling_manager.cc \
    ../common/ge/plugin_manager.cc \
    ../graph/load/graph_loader.cc \
    ../graph/execute/graph_execute.cc \
    ../omm/csa_interact.cc \
    ../graph/manager/graph_manager_utils.cc \
    ../graph/manager/graph_var_manager.cc \
    ../graph/manager/graph_mem_allocator.cc \
    ../graph/manager/graph_caching_allocator.cc \
    ../graph/manager/trans_var_data_utils.cc \
    ../graph/manager/util/debug.cc \
    ../model/ge_model.cc \
    ../model/ge_root_model.cc \
    ../graph/load/new_model_manager/davinci_model.cc \
    ../graph/load/new_model_manager/davinci_model_parser.cc \
    ../graph/load/new_model_manager/model_manager.cc \
    ../graph/load/new_model_manager/tbe_handle_store.cc \
    ../graph/load/new_model_manager/cpu_queue_schedule.cc \
    ../graph/load/new_model_manager/model_utils.cc \
    ../graph/load/new_model_manager/aipp_utils.cc \
    ../graph/load/new_model_manager/data_inputer.cc \
    ../graph/load/new_model_manager/data_dumper.cc \
    ../graph/load/new_model_manager/zero_copy_task.cc \
    ../graph/load/new_model_manager/task_info/task_info.cc                  \
    ../graph/load/new_model_manager/task_info/event_record_task_info.cc     \
    ../graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
    ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
    ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
    ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
    ../graph/load/new_model_manager/task_info/kernel_task_info.cc           \
    ../graph/load/new_model_manager/task_info/label_set_task_info.cc        \
    ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
    ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
    ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
    ../graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
    ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
    ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
    ../graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
    ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
    ../graph/load/output/output.cc \
    ../single_op/single_op_manager.cc \
    ../single_op/single_op_model.cc \
    ../single_op/single_op.cc \
    ../single_op/stream_resource.cc \
    ../single_op/task/op_task.cc \
    ../single_op/task/build_task_utils.cc \
    ../single_op/task/tbe_task_builder.cc \
    ../single_op/task/aicpu_task_builder.cc \
    ../hybrid/hybrid_davinci_model_stub.cc\

 local_ge_executor_c_include :=             \
    proto/insert_op.proto                  \
    proto/op_mapping_info.proto            \
    proto/ge_ir.proto                      \
    proto/task.proto                       \
    proto/om.proto                         \
    $(TOPDIR)inc/external                  \
    $(TOPDIR)inc/external/graph            \
    $(TOPDIR)inc/framework                 \
    $(TOPDIR)inc                           \
    $(LOCAL_PATH)/../                      \
    $(TOPDIR)libc_sec/include              \
    third_party/protobuf/include           \
    third_party/json/include               \

 local_ge_executor_shared_library :=        \
    libprotobuf                            \
    libc_sec                               \
    libge_common                           \
    libruntime                             \
    libslog                                \
    libmmpa                                \
    libgraph                               \
    libmsprof                              \

 local_ge_executor_ldflags := -lrt -ldl     \


 #compile arm  device dynamic lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_executor
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING

 LOCAL_SRC_FILES := $(local_ge_executor_src_files)
 LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

 LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library)
 ifeq ($(device_os),android)
 LOCAL_LDFLAGS += -ldl
 LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
 else
 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
 endif

 include $(BUILD_SHARED_LIBRARY)

 #compile x86 host dynamic lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_executor
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 else
 LOCAL_CFLAGS += -O2
 endif

 LOCAL_SRC_FILES := $(local_ge_executor_src_files)

 LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

 LOCAL_SHARED_LIBRARIES :=                  \
    libprotobuf                            \
    libc_sec                               \
    libge_common                           \
    libruntime                             \
    libslog                                \
    libmmpa                                \
    libgraph                               \
    libmsprof                              \

 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)

 include $(BUILD_HOST_SHARED_LIBRARY)

 #compile for host static lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_executor
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 else
 LOCAL_CFLAGS += -O2
 endif

 LOCAL_SRC_FILES := $(local_ge_executor_src_files)

 LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

 LOCAL_STATIC_LIBRARIES := \
    libge_common \
    libgraph     \
    libprotobuf  \

 LOCAL_SHARED_LIBRARIES :=                  \
    libc_sec                               \
    libruntime                             \
    libslog                                \
    libmmpa                                \
    libmsprof                              \

 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)

 include $(BUILD_HOST_STATIC_LIBRARY)

 #compile for device static lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_executor
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 else
 LOCAL_CFLAGS += -O2
 endif

 LOCAL_SRC_FILES := $(local_ge_executor_src_files)
 LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

 LOCAL_STATIC_LIBRARIES := \
    libge_common \
    libgraph     \
    libprotobuf  \

 LOCAL_SHARED_LIBRARIES :=                  \
    libc_sec                               \
    libruntime                             \
    libslog                                \
    libmmpa                                \
    libmsprof                              \

 ifeq ($(device_os),android)
 LOCAL_LDFLAGS += -ldl
 LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
 else
 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
 endif

 include $(BUILD_STATIC_LIBRARY)
--- a/src/ge/ge_inference.mk
+++ b/src/ge/ge_inference.mk
@@ -0,0 +1,407 @@
 LOCAL_PATH := $(call my-dir)

 COMMON_LOCAL_SRC_FILES := \
    proto/fusion_model.proto \
    proto/optimizer_priority.proto \
    graph/manager/trans_var_data_utils.cc \
    omm/csa_interact.cc \
    common/fp16_t.cc \
    common/formats/utils/formats_trans_utils.cc \
    common/formats/format_transfers/datatype_transfer.cc \
    common/formats/format_transfers/format_transfer_transpose.cc \
    common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
    common/formats/format_transfers/format_transfer_fractal_z.cc \
    common/formats/format_transfers/format_transfer_fractal_nz.cc \
    common/formats/format_transfers/format_transfer_fractal_zz.cc \
    common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
    common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
    common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
    common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
    common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
    common/formats/format_transfers/format_transfer_fracz_nchw.cc \
    common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
    common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
    common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
    common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
    common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \
    common/formats/formats.cc \
    common/profiling/profiling_manager.cc \
    common/helper/model_cache_helper.cc \
    ge_local_engine/engine/host_cpu_engine.cc \


 GRAPH_MANAGER_LOCAL_SRC_FILES := \
    common/ge/plugin_manager.cc\
    init/gelib.cc \
    session/inner_session.cc \
    session/session_manager.cc \
    engine_manager/dnnengine_manager.cc \
    opskernel_manager/ops_kernel_manager.cc \
    graph/manager/graph_manager.cc \
    graph/manager/graph_manager_utils.cc \
    graph/manager/graph_context.cc \
    graph/preprocess/graph_preprocess.cc \
    graph/preprocess/multi_batch_copy_graph.cc \
    graph/execute/graph_execute.cc \
    graph/load/graph_loader.cc \
    graph/optimize/graph_optimize.cc \
    graph/optimize/summary_optimize.cc \
    graph/build/graph_builder.cc \
    graph/partition/engine_place.cc \
    graph/partition/graph_partition.cc \
    graph/partition/dynamic_shape_partition.cc \
    generator/ge_generator.cc \
    generator/generator_api.cc \
    graph/manager/graph_var_manager.cc \
    graph/manager/graph_mem_allocator.cc \
    graph/manager/graph_caching_allocator.cc \

 BUILER_SRC_FILES := \
    ir_build/ge_ir_build.cc \
    ir_build/atc_ir_common.cc \

 OMG_HOST_SRC_FILES := \
    model/ge_model.cc \
    model/ge_root_model.cc \
    graph/common/transop_util.cc \
    graph/passes/pass_manager.cc \
    graph/passes/resource_pair_add_control_pass.cc \
    graph/passes/resource_pair_remove_control_pass.cc \
    graph/passes/pass_utils.cc \
    graph/passes/base_pass.cc \
    graph/passes/constant_folding_pass.cc \
    graph/passes/aicpu_constant_folding_pass.cc \
    graph/passes/reshape_remove_pass.cc \
    graph/passes/reshape_recovery_pass.cc \
    graph/passes/transop_breadth_fusion_pass.cc \
    graph/passes/transop_depth_fusion_pass.cc \
    graph/passes/transop_nearby_allreduce_fusion_pass.cc \
    graph/passes/same_transdata_breadth_fusion_pass.cc \
    graph/passes/transop_without_reshape_fusion_pass.cc \
    graph/passes/compile_nodes_pass.cc \
    graph/passes/variable_prepare_op_pass.cc \
    graph/passes/variable_ref_delete_op_pass.cc \
    graph/passes/variable_ref_useless_control_out_delete_pass.cc \
    graph/passes/subgraph_pass.cc \
    graph/passes/data_pass.cc \
    graph/passes/net_output_pass.cc \
    graph/passes/replace_transshape_pass.cc \
    graph/passes/constant_fuse_same_pass.cc \
    graph/passes/print_op_pass.cc \
    graph/passes/no_use_reshape_remove_pass.cc \
    graph/passes/iterator_op_pass.cc \
    graph/passes/atomic_addr_clean_pass.cc \
    graph/common/omg_util.cc \
    graph/common/bcast.cc \
    graph/passes/dimension_compute_pass.cc \
    graph/passes/dimension_adjust_pass.cc \
    graph/passes/get_original_format_pass.cc \
    graph/passes/shape_operate_op_remove_pass.cc \
    graph/passes/unused_op_remove_pass.cc \
    graph/passes/assert_pass.cc \
    graph/passes/dropout_pass.cc \
    graph/passes/infershape_pass.cc \
    graph/passes/unused_const_pass.cc \
    graph/passes/isolated_op_remove_pass.cc \
    graph/passes/permute_pass.cc \
    graph/passes/ctrl_edge_transfer_pass.cc \
    host_kernels/broadcast_gradient_args_kernel.cc \
    host_kernels/greater_kernel.cc \
    host_kernels/gather_v2_kernel.cc  \
    host_kernels/maximum_kernel.cc \
    host_kernels/floormod_kernel.cc \
    host_kernels/floordiv_kernel.cc \
    host_kernels/range_kernel.cc \
    host_kernels/shape_kernel.cc \
    host_kernels/size_kernel.cc \
    host_kernels/shape_n_kernel.cc \
    host_kernels/rank_kernel.cc \
    host_kernels/broadcast_args_kernel.cc \
    host_kernels/fill_kernel.cc \
    host_kernels/empty_kernel.cc \
    host_kernels/expanddims_kernel.cc \
    host_kernels/reshape_kernel.cc \
    host_kernels/squeeze_kernel.cc \
    host_kernels/unsqueeze_kernel.cc \
    host_kernels/kernel_utils.cc \
    host_kernels/cast_kernel.cc \
    host_kernels/transdata_kernel.cc \
    host_kernels/unpack_kernel.cc \
    host_kernels/transpose_kernel.cc \
    host_kernels/permute_kernel.cc \
    host_kernels/pack_kernel.cc \
    host_kernels/concat_v2_kernel.cc \
    host_kernels/concat_offset_kernel.cc \
    host_kernels/strided_slice_kernel.cc \
    host_kernels/ssd_prior_box_kernel.cc \
    host_kernels/add_kernel.cc \
    host_kernels/sub_kernel.cc \
    host_kernels/mul_kernel.cc \
    host_kernels/reduce_prod_kernel.cc \
    host_kernels/rsqrt_kernel.cc \
    host_kernels/slice_kernel.cc \
    host_kernels/slice_d_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    graph/passes/stop_gradient_pass.cc \
    graph/passes/prevent_gradient_pass.cc \
    graph/passes/identity_pass.cc \
    graph/passes/placeholder_with_default_pass.cc \
    graph/passes/snapshot_pass.cc \
    graph/passes/guarantee_const_pass.cc \
    graph/passes/var_is_initialized_op_pass.cc \
    graph/passes/parallel_concat_start_op_pass.cc \
    graph/passes/folding_pass.cc \
    graph/passes/cast_translate_pass.cc \
    graph/passes/prune_pass.cc \
    graph/passes/switch_op_pass.cc \
    graph/passes/multi_batch_pass.cc \
    graph/passes/next_iteration_pass.cc \
    graph/passes/control_trigger_pass.cc \
    graph/passes/cond_pass.cc \
    graph/passes/cond_remove_pass.cc \
    graph/passes/for_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/addn_pass.cc \
    graph/passes/common_subexpression_elimination_pass.cc \
    graph/passes/transop_symmetry_elimination_pass.cc \
    graph/passes/save_pass.cc \
    graph/passes/switch_dead_branch_elimination.cc \
    graph/passes/switch_logic_remove_pass.cc \
    graph/passes/switch_data_edges_bypass.cc \
    graph/passes/merge_pass.cc \
    graph/passes/variable_format_pass.cc \
    graph/passes/variable_op_pass.cc \
    graph/passes/cast_remove_pass.cc \
    graph/passes/transpose_transdata_pass.cc \
    graph/passes/identify_reference_pass.cc \
    graph/passes/hccl_memcpy_pass.cc \
    graph/passes/flow_ctrl_pass.cc \
    graph/passes/link_gen_mask_nodes_pass.cc \
    graph/passes/replace_with_empty_const_pass.cc \
    graph/passes/hccl_group_pass.cc \
    graph/passes/switch_fusion_pass.cc \
    graph/passes/switch_split_pass.cc \

 OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES)


 OME_HOST_SRC_FILES := \
    graph/manager/model_manager/event_manager.cc        \
    graph/manager/util/rt_context_util.cc               \
    graph/manager/util/variable_accelerate_ctrl.cc       \
    graph/manager/util/debug.cc  \
    graph/load/new_model_manager/model_manager.cc                        \
    graph/load/new_model_manager/data_inputer.cc                         \
    graph/load/new_model_manager/davinci_model.cc                        \
    graph/load/new_model_manager/davinci_model_parser.cc                 \
    graph/load/new_model_manager/model_utils.cc                          \
    graph/load/new_model_manager/aipp_utils.cc                           \
    graph/load/new_model_manager/tbe_handle_store.cc                     \
    graph/load/new_model_manager/cpu_queue_schedule.cc                   \
    graph/load/new_model_manager/zero_copy_task.cc                       \
    graph/load/output/output.cc                                          \
    graph/load/new_model_manager/data_dumper.cc                          \
    graph/load/new_model_manager/task_info/task_info.cc                  \
    graph/load/new_model_manager/task_info/event_record_task_info.cc     \
    graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
    graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
    graph/load/new_model_manager/task_info/kernel_task_info.cc           \
    graph/load/new_model_manager/task_info/label_set_task_info.cc        \
    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
    graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
    graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
    graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
    single_op/task/op_task.cc                                            \
    single_op/task/build_task_utils.cc                                   \
    single_op/task/tbe_task_builder.cc                                   \
    single_op/task/aicpu_task_builder.cc                                 \
    single_op/single_op.cc                                               \
    single_op/single_op_model.cc                                         \
    single_op/stream_resource.cc                                         \
    single_op/single_op_manager.cc                                       \
    hybrid/hybrid_davinci_model_stub.cc                                  \
    # graph/load/new_model_manager/task_info/hccl_task_info.cc

 OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES)

 COMMON_LOCAL_C_INCLUDES := \
    proto/om.proto \
    proto/task.proto \
    proto/insert_op.proto \
    proto/ge_ir.proto \
    proto/fwk_adapter.proto    \
    proto/op_mapping_info.proto \
    proto/tensorflow/attr_value.proto \
    proto/tensorflow/function.proto \
    proto/tensorflow/graph.proto \
    proto/tensorflow/node_def.proto \
    proto/tensorflow/op_def.proto \
    proto/tensorflow/resource_handle.proto \
    proto/tensorflow/tensor.proto \
    proto/tensorflow/tensor_shape.proto \
    proto/tensorflow/types.proto \
    proto/tensorflow/versions.proto \
    $(LOCAL_PATH) ./ \
    $(TOPDIR)inc \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/framework/common \
    $(TOPDIR)inc/common \
    $(TOPDIR)inc/runtime \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)ops/built-in/op_proto/inc \
    third_party/json/include \
    third_party/protobuf/include \
    third_party/opencv/include \

 NEW_OMG_HOST_SRC_FILES := \
    graph/preprocess/insert_op/util_insert_aipp_op.cc \
    graph/preprocess/insert_op/ge_aipp_op.cc \
    graph/build/model_builder.cc \
    graph/build/task_generator.cc \
    graph/build/stream_allocator.cc \
    graph/build/logical_stream_allocator.cc \
    graph/build/stream_graph_optimizer.cc \
    graph/build/run_context.cc \
    graph/build/label_allocator.cc \
    graph/label/label_maker.cc \
    graph/label/if_label_maker.cc \
    graph/label/case_label_maker.cc \
    graph/label/while_label_maker.cc \
    graph/label/partitioned_call_label_maker.cc \

 OME_HOST_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)
 OMG_DEVICE_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)

 DEVICE_LOCAL_C_INCLUDES := \
    proto/om.proto \
    proto/task.proto \
    proto/insert_op.proto \
    proto/ge_ir.proto \
    proto/fwk_adapter.proto    \
    proto/op_mapping_info.proto \
    proto/tensorflow/attr_value.proto \
    proto/tensorflow/function.proto \
    proto/tensorflow/graph.proto \
    proto/tensorflow/node_def.proto \
    proto/tensorflow/op_def.proto \
    proto/tensorflow/resource_handle.proto \
    proto/tensorflow/tensor.proto \
    proto/tensorflow/tensor_shape.proto \
    proto/tensorflow/types.proto \
    proto/tensorflow/versions.proto \
    $(LOCAL_PATH) ./ \
    $(TOPDIR)inc \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/common/util \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/framework/common \
    $(TOPDIR)inc/runtime \
    $(TOPDIR)ops/built-in/op_proto/inc \
    $(TOPDIR)framework/domi \
    third_party/json/include \
    third_party/protobuf/include \
    third_party/opencv/include \

 #compiler for host infer
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_compiler

 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 # from ome_inference.mk
 LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES)
 LOCAL_SRC_FILES += $(OME_HOST_SRC_FILES)
 LOCAL_SRC_FILES += $(NEW_OME_DEVICE_SRC_FILES)
 LOCAL_SRC_FILES += $(BUILER_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libprotobuf \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    libge_common \
    libruntime_compile \
    libresource \
    liberror_manager \

 LOCAL_LDFLAGS := -lrt -ldl


 include $(BUILD_HOST_SHARED_LIBRARY)

 #compiler for device
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_compiler
 LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE
 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
 LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP
 LOCAL_CFLAGS += -DOMG_DEVICE_VERSION
 LOCAL_CFLAGS += -O2
 LOCAL_MODULE_CLASS := SHARED_LIBRARIES


 LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(OMG_DEVICE_SRC_FILES)
 LOCAL_SRC_FILES += $(OME_DEVICE_SRC_FILES)
 LOCAL_SRC_FILES += $(BUILER_SRC_FILES)


 LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)

 LOCAL_STATIC_LIBRARIES := libge_memory \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libprotobuf \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    libresource \
    libruntime_compile  \
    libge_common \




 ifeq ($(device_os),android)
 LOCAL_LDFLAGS := -ldl
 else
 LOCAL_LDFLAGS := -lrt -ldl
 endif

 LOCAL_CFLAGS += \
    -Wall

 ifeq ($(device_os),android)
 LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
 endif
 include $(BUILD_SHARED_LIBRARY)
--- a/src/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/src/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -124,7 +124,7 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector<G
 Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_kernel,
                                  map<std::string, const Tensor> &named_inputs,
                                  map<std::string, Tensor> &named_outputs) {
  GELOGD("To run host cpu op: %s", op_desc->GetName().c_str());
  GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str());
  Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc);
  auto ret = op_kernel.Compute(op, named_inputs, named_outputs);
  if (ret != GRAPH_SUCCESS) {
@@ -139,7 +139,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs,
  GE_CHECK_NOTNULL(node);
  GE_CHECK_NOTNULL(node->GetOpDesc());

  GELOGD("To run node by host cpu engine. node name = %s", node->GetName().c_str());
  GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str());
  std::unique_ptr<HostCpuOp> op_kernel;
  GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel));

@@ -151,7 +151,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs,
  GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs));
  GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs));

  GELOGD("Ran node by host cpu engine successfully. name node = %s", node->GetName().c_str());
  GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str());
  outputs.swap(tmp_outputs);
  return SUCCESS;
 }
--- a/src/ge/ge_local_engine/module.mk
+++ b/src/ge/ge_local_engine/module.mk
@@ -0,0 +1,59 @@
 LOCAL_PATH := $(call my-dir)


 local_lib_src_files :=  engine/ge_local_engine.cc \
                        ops_kernel_store/ge_local_ops_kernel_info.cc \
                        ops_kernel_store/op/op_factory.cc \
                        ops_kernel_store/op/op.cc \
                        ops_kernel_store/op/ge_deleted_op.cc \
                        ops_kernel_store/op/no_op.cc \

 local_lib_inc_path :=   proto/task.proto \
                        ${LOCAL_PATH} \
                        ${TOPDIR}inc \
                        ${TOPDIR}inc/external \
                        ${TOPDIR}inc/external/graph \
                        $(TOPDIR)libc_sec/include \
                        ${TOPDIR}third_party/protobuf/include \
                        ${TOPDIR}inc/framework \
                        $(TOPDIR)framework/domi \

 #compiler for host
 include $(CLEAR_VARS)
 LOCAL_MODULE := libge_local_engine
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -std=c++11
 LOCAL_LDFLAGS := 

 LOCAL_STATIC_LIBRARIES :=
 LOCAL_SHARED_LIBRARIES :=   libprotobuf \
                            libc_sec \
                            libslog \
                            libgraph \
                            libregister \
                            libruntime

 LOCAL_SRC_FILES := $(local_lib_src_files)
 LOCAL_C_INCLUDES := $(local_lib_inc_path)

 include ${BUILD_HOST_SHARED_LIBRARY}

 #compiler for atc
 include $(CLEAR_VARS)
 LOCAL_MODULE := atclib/libge_local_engine
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -std=c++11
 LOCAL_LDFLAGS :=

 LOCAL_STATIC_LIBRARIES :=
 LOCAL_SHARED_LIBRARIES :=   libprotobuf \
                            libc_sec \
                            libslog \
                            libgraph \
                            libregister \
                            libruntime_compile

 LOCAL_SRC_FILES := $(local_lib_src_files)
 LOCAL_C_INCLUDES := $(local_lib_inc_path)

 include ${BUILD_HOST_SHARED_LIBRARY}
--- a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc
+++ b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc
@@ -81,7 +81,7 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) {
  const string node_name = ge_node.GetName();
  const string node_type = ge_node.GetType();
  size_t output_size = op_desc->GetOutputsSize();
  GELOGD("Calc op[%s:%s] op running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size);
  GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size);

  for (size_t i = 0; i < output_size; ++i) {
    GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast<uint32_t>(i));
--- a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc
+++ b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc
@@ -24,7 +24,7 @@ namespace ge_local {
 NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}

 Status NoOp::Run() {
  GELOGI("Node:%s type is %s, no need gen task.", name_.c_str(), type_.c_str());
  GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str());
  // Do nothing
  return SUCCESS;
 }
--- a/src/ge/ge_runner.mk
+++ b/src/ge/ge_runner.mk
@@ -0,0 +1,429 @@
 LOCAL_PATH := $(call my-dir)

 LIBGE_LOCAL_SRC_FILES := \
    proto/fusion_model.proto \
    proto/optimizer_priority.proto \
    common/formats/format_transfers/datatype_transfer.cc \
    common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
    common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
    common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
    common/formats/format_transfers/format_transfer_fractal_nz.cc \
    common/formats/format_transfers/format_transfer_fractal_z.cc \
    common/formats/format_transfers/format_transfer_fractal_zz.cc \
    common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
    common/formats/format_transfers/format_transfer_fracz_nchw.cc \
    common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
    common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
    common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
    common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
    common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
    common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
    common/formats/format_transfers/format_transfer_transpose.cc \
    common/formats/formats.cc \
    common/formats/utils/formats_trans_utils.cc \
    common/fp16_t.cc \
    common/ge/plugin_manager.cc\
    common/helper/model_cache_helper.cc \
    common/profiling/profiling_manager.cc \
    engine_manager/dnnengine_manager.cc \
    ge_local_engine/engine/host_cpu_engine.cc \
    generator/ge_generator.cc \
    generator/generator_api.cc \
    graph/build/graph_builder.cc \
    graph/build/label_allocator.cc \
    graph/build/logical_stream_allocator.cc \
    graph/build/model_builder.cc \
    graph/build/run_context.cc \
    graph/build/stream_allocator.cc \
    graph/build/stream_graph_optimizer.cc \
    graph/build/task_generator.cc \
    graph/common/bcast.cc \
    graph/common/omg_util.cc \
    graph/common/transop_util.cc \
    graph/execute/graph_execute.cc \
    graph/label/case_label_maker.cc \
    graph/label/if_label_maker.cc \
    graph/label/label_maker.cc \
    graph/label/partitioned_call_label_maker.cc \
    graph/label/while_label_maker.cc \
    graph/load/graph_loader.cc \
    graph/load/new_model_manager/cpu_queue_schedule.cc \
    graph/load/new_model_manager/data_dumper.cc \
    graph/load/new_model_manager/data_inputer.cc \
    graph/load/new_model_manager/davinci_model.cc \
    graph/load/new_model_manager/davinci_model_parser.cc \
    graph/load/new_model_manager/model_manager.cc \
    graph/load/new_model_manager/model_utils.cc \
    graph/load/new_model_manager/aipp_utils.cc \
    graph/load/new_model_manager/task_info/end_graph_task_info.cc \
    graph/load/new_model_manager/task_info/event_record_task_info.cc \
    graph/load/new_model_manager/task_info/event_wait_task_info.cc \
    graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
    graph/load/new_model_manager/task_info/hccl_task_info.cc \
    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
    graph/load/new_model_manager/task_info/kernel_task_info.cc \
    graph/load/new_model_manager/task_info/label_set_task_info.cc \
    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
    graph/load/new_model_manager/task_info/stream_active_task_info.cc \
    graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/new_model_manager/task_info/task_info.cc \
    graph/load/new_model_manager/tbe_handle_store.cc \
    graph/load/new_model_manager/zero_copy_task.cc \
    graph/load/output/output.cc \
    graph/manager/graph_context.cc \
    graph/manager/graph_manager.cc \
    graph/manager/graph_manager_utils.cc \
    graph/manager/graph_mem_allocator.cc \
    graph/manager/graph_caching_allocator.cc \
    graph/manager/graph_var_manager.cc \
    graph/manager/model_manager/event_manager.cc        \
    graph/manager/trans_var_data_utils.cc \
    graph/manager/util/debug.cc                       \
    graph/manager/util/hcom_util.cc                 \
    graph/manager/util/rt_context_util.cc               \
    graph/manager/util/variable_accelerate_ctrl.cc               \
    graph/optimize/graph_optimize.cc \
    graph/optimize/optimizer/allreduce_fusion_pass.cc \
    graph/optimize/summary_optimize.cc \
    graph/partition/engine_place.cc \
    graph/partition/graph_partition.cc \
    graph/passes/addn_pass.cc \
    graph/passes/aicpu_constant_folding_pass.cc \
    graph/passes/assert_pass.cc \
    graph/passes/atomic_addr_clean_pass.cc \
    graph/partition/dynamic_shape_partition.cc \
    graph/passes/base_pass.cc \
    graph/passes/cast_remove_pass.cc \
    graph/passes/cast_translate_pass.cc \
    graph/passes/common_subexpression_elimination_pass.cc \
    graph/passes/transop_symmetry_elimination_pass.cc \
    graph/passes/compile_nodes_pass.cc \
    graph/passes/constant_folding_pass.cc \
    graph/passes/constant_fuse_same_pass.cc \
    graph/passes/control_trigger_pass.cc \
    graph/passes/dimension_adjust_pass.cc \
    graph/passes/dimension_compute_pass.cc \
    graph/passes/dropout_pass.cc \
    graph/passes/hccl_group_pass.cc \
    graph/passes/switch_fusion_pass.cc \
    graph/passes/switch_split_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/flow_ctrl_pass.cc \
    host_kernels/transpose_kernel.cc \
    host_kernels/add_kernel.cc \
    host_kernels/broadcast_args_kernel.cc \
    host_kernels/broadcast_gradient_args_kernel.cc \
    host_kernels/cast_kernel.cc \
    host_kernels/concat_offset_kernel.cc \
    host_kernels/concat_v2_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    host_kernels/empty_kernel.cc \
    host_kernels/expanddims_kernel.cc \
    host_kernels/fill_kernel.cc \
    host_kernels/floordiv_kernel.cc \
    host_kernels/floormod_kernel.cc \
    host_kernels/gather_v2_kernel.cc  \
    host_kernels/greater_kernel.cc \
    host_kernels/kernel_utils.cc \
    host_kernels/maximum_kernel.cc \
    host_kernels/mul_kernel.cc \
    host_kernels/pack_kernel.cc \
    host_kernels/permute_kernel.cc \
    host_kernels/range_kernel.cc \
    host_kernels/rank_kernel.cc \
    host_kernels/reduce_prod_kernel.cc \
    host_kernels/reshape_kernel.cc \
    host_kernels/rsqrt_kernel.cc \
    host_kernels/shape_kernel.cc \
    host_kernels/shape_n_kernel.cc \
    host_kernels/size_kernel.cc \
    host_kernels/slice_d_kernel.cc \
    host_kernels/slice_kernel.cc \
    host_kernels/squeeze_kernel.cc \
    host_kernels/unsqueeze_kernel.cc \
    host_kernels/ssd_prior_box_kernel.cc \
    host_kernels/strided_slice_kernel.cc \
    host_kernels/sub_kernel.cc \
    host_kernels/transdata_kernel.cc \
    host_kernels/unpack_kernel.cc \
    graph/passes/folding_pass.cc \
    graph/passes/get_original_format_pass.cc \
    graph/passes/guarantee_const_pass.cc \
    graph/passes/hccl_memcpy_pass.cc \
    graph/passes/identify_reference_pass.cc \
    graph/passes/identity_pass.cc \
    graph/passes/infershape_pass.cc \
    graph/passes/isolated_op_remove_pass.cc \
    graph/passes/iterator_op_pass.cc \
    graph/passes/link_gen_mask_nodes_pass.cc \
    graph/passes/merge_pass.cc \
    graph/passes/multi_batch_pass.cc \
    graph/passes/net_output_pass.cc \
    graph/passes/next_iteration_pass.cc \
    graph/passes/no_use_reshape_remove_pass.cc \
    graph/passes/pass_manager.cc \
    graph/passes/pass_utils.cc \
    graph/passes/permute_pass.cc \
    graph/passes/placeholder_with_default_pass.cc \
    graph/passes/prevent_gradient_pass.cc \
    graph/passes/print_op_pass.cc \
    graph/passes/prune_pass.cc \
    graph/passes/ctrl_edge_transfer_pass.cc \
    graph/passes/replace_with_empty_const_pass.cc \
    graph/passes/reshape_remove_pass.cc \
    graph/passes/reshape_recovery_pass.cc \
    graph/passes/resource_pair_add_control_pass.cc \
    graph/passes/resource_pair_remove_control_pass.cc \
    graph/passes/same_transdata_breadth_fusion_pass.cc \
    graph/passes/save_pass.cc \
    graph/passes/shape_operate_op_remove_pass.cc \
    graph/passes/snapshot_pass.cc \
    graph/passes/stop_gradient_pass.cc \
    graph/passes/subgraph_pass.cc \
    graph/passes/data_pass.cc \
    graph/passes/switch_data_edges_bypass.cc \
    graph/passes/switch_logic_remove_pass.cc \
    graph/passes/switch_op_pass.cc \
    graph/passes/switch_dead_branch_elimination.cc \
    graph/passes/replace_transshape_pass.cc \
    graph/passes/transop_breadth_fusion_pass.cc \
    graph/passes/transop_depth_fusion_pass.cc \
    graph/passes/transop_nearby_allreduce_fusion_pass.cc \
    graph/passes/transop_without_reshape_fusion_pass.cc \
    graph/passes/transpose_transdata_pass.cc \
    graph/passes/unused_const_pass.cc \
    graph/passes/unused_op_remove_pass.cc \
    graph/passes/var_is_initialized_op_pass.cc \
    graph/passes/parallel_concat_start_op_pass.cc \
    graph/passes/cond_pass.cc \
    graph/passes/cond_remove_pass.cc \
    graph/passes/for_pass.cc \
    graph/passes/variable_format_pass.cc \
    graph/passes/variable_op_pass.cc \
    graph/passes/variable_prepare_op_pass.cc \
    graph/passes/variable_ref_delete_op_pass.cc \
    graph/passes/variable_ref_useless_control_out_delete_pass.cc \
    graph/preprocess/graph_preprocess.cc \
    graph/preprocess/insert_op/ge_aipp_op.cc \
    graph/preprocess/insert_op/util_insert_aipp_op.cc \
    graph/preprocess/multi_batch_copy_graph.cc \
    init/gelib.cc \
    model/ge_model.cc \
    model/ge_root_model.cc \
    omm/csa_interact.cc \
    opskernel_manager/ops_kernel_manager.cc \
    session/inner_session.cc \
    session/session_manager.cc \
    single_op/single_op.cc \
    single_op/single_op_manager.cc \
    single_op/single_op_model.cc \
    single_op/stream_resource.cc \
    single_op/task/build_task_utils.cc \
    single_op/task/op_task.cc \
    single_op/task/tbe_task_builder.cc \
    single_op/task/aicpu_task_builder.cc \
    hybrid/common/tensor_value.cc                                        \
    hybrid/common/npu_memory_allocator.cc                                \
    hybrid/executor/rt_callback_manager.cc                               \
    hybrid/executor/node_state.cc                                        \
    hybrid/executor/node_done_manager.cc                                 \
    hybrid/executor/hybrid_profiler.cc                                   \
    hybrid/executor/hybrid_model_executor.cc                             \
    hybrid/executor/hybrid_model_async_executor.cc                       \
    hybrid/executor/hybrid_execution_context.cc                          \
    hybrid/executor/worker/task_compile_engine.cc                        \
    hybrid/executor/worker/shape_inference_engine.cc                     \
    hybrid/executor/worker/execution_engine.cc                           \
    hybrid/model/hybrid_model.cc                                         \
    hybrid/model/hybrid_model_builder.cc                                 \
    hybrid/model/node_item.cc                                            \
    hybrid/node_executor/aicore/aicore_node_executor.cc                  \
    hybrid/node_executor/aicore/aicore_op_task.cc                        \
    hybrid/node_executor/aicore/aicore_task_builder.cc                   \
    hybrid/node_executor/aicore/aicore_task_compiler.cc                  \
    hybrid/node_executor/aicpu/aicpu_ext_info.cc                         \
    hybrid/node_executor/aicpu/aicpu_node_executor.cc                    \
    hybrid/node_executor/compiledsubgraph/known_node_executor.cc         \
    hybrid/node_executor/hostcpu/ge_local_node_executor.cc               \
    hybrid/node_executor/node_executor.cc                                \
    hybrid/node_executor/task_context.cc                                 \
    hybrid/hybrid_davinci_model.cc                                       \
    executor/ge_executor.cc \

 LIBCLIENT_LOCAL_SRC_FILES := \
    proto/ge_api.proto \
    client/ge_api.cc \

 RUNNER_LOCAL_C_INCLUDES := \
    $(LOCAL_PATH) ./ \
    $(LOCAL_PATH)/../ \
    $(LOCAL_PATH)/../../ \
    $(TOPDIR)inc \
    $(TOPDIR)inc/common \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/framework/common \
    $(TOPDIR)inc/graph \
    $(TOPDIR)inc/runtime \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)ops/built-in/op_proto/inc \
    proto/fwk_adapter.proto \
    proto/ge_ir.proto \
    proto/insert_op.proto \
    proto/om.proto \
    proto/op_mapping_info.proto \
    proto/task.proto \
    proto/tensorflow/attr_value.proto \
    proto/tensorflow/function.proto \
    proto/tensorflow/graph.proto \
    proto/tensorflow/node_def.proto \
    proto/tensorflow/op_def.proto \
    proto/tensorflow/resource_handle.proto \
    proto/tensorflow/tensor.proto \
    proto/tensorflow/tensor_shape.proto \
    proto/tensorflow/types.proto \
    proto/tensorflow/versions.proto \
    third_party/json/include \
    third_party/opencv/include \
    third_party/protobuf/include \



 #compiler for GeRunner
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_runner

 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif


 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libprotobuf \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    libge_common \
    libhccl \
    libmsprof \
    liberror_manager \


 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_SHARED_LIBRARIES += \
    libruntime \
    libresource \

 include $(BUILD_HOST_SHARED_LIBRARY)


 # add engine_conf.json to host
 include $(CLEAR_VARS)

 LOCAL_MODULE := engine_conf.json

 LOCAL_SRC_FILES := engine_manager/engine_conf.json

 LOCAL_MODULE_CLASS := ETC

 LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json
 include $(BUILD_HOST_PREBUILT)

 # add optimizer_priority.pbtxt to host
 include $(CLEAR_VARS)

 LOCAL_MODULE := optimizer_priority.pbtxt

 LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt

 LOCAL_MODULE_CLASS := ETC

 LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt
 include $(BUILD_HOST_PREBUILT)

 #compiler for GeRunner static lib
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_runner

 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD

 LOCAL_CFLAGS += -g -O0


 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libhccl \
    libmsprof \

 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_SHARED_LIBRARIES += \
    libruntime \
    libresource \

 include $(BUILD_HOST_STATIC_LIBRARY)

 #compiler for GeRunner static lib device
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_runner

 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD

 LOCAL_CFLAGS += -g -O0

 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libhccl \
    libmsprof \

 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_SHARED_LIBRARIES += \
    libruntime \
    libresource \

 include $(BUILD_STATIC_LIBRARY)
--- a/src/ge/ge_train.mk
+++ b/src/ge/ge_train.mk
@@ -0,0 +1,333 @@
 LOCAL_PATH := $(call my-dir)

 COMMON_LOCAL_SRC_FILES := \
    proto/fusion_model.proto \
    proto/optimizer_priority.proto \
    session/inner_session.cc \
    session/session_manager.cc \
    common/ge/plugin_manager.cc\
    common/fp16_t.cc \
    common/formats/utils/formats_trans_utils.cc \
    common/formats/format_transfers/datatype_transfer.cc \
    common/formats/format_transfers/format_transfer_transpose.cc \
    common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
    common/formats/format_transfers/format_transfer_fractal_z.cc \
    common/formats/format_transfers/format_transfer_fractal_nz.cc \
    common/formats/format_transfers/format_transfer_fractal_zz.cc \
    common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
    common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
    common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
    common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
    common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
    common/formats/format_transfers/format_transfer_fracz_nchw.cc \
    common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
    common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
    common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
    common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
    common/formats/formats.cc \
    init/gelib.cc \
    engine_manager/dnnengine_manager.cc \
    opskernel_manager/ops_kernel_manager.cc \
    graph/manager/graph_manager.cc \
    graph/manager/graph_manager_utils.cc \
    graph/manager/graph_context.cc \
    graph/preprocess/graph_preprocess.cc \
    graph/preprocess/multi_batch_copy_graph.cc \
    graph/execute/graph_execute.cc \
    graph/load/graph_loader.cc \
    graph/optimize/graph_optimize.cc \
    graph/passes/folding_pass.cc \
    graph/optimize/summary_optimize.cc \
    graph/build/graph_builder.cc \
    graph/partition/engine_place.cc \
    graph/partition/graph_partition.cc \
    graph/partition/dynamic_shape_partition.cc \
    generator/ge_generator.cc \
    generator/generator_api.cc \
    common/profiling/profiling_manager.cc \
    ge_local_engine/engine/host_cpu_engine.cc \
    common/helper/model_cache_helper.cc \

 OMG_HOST_SRC_FILES := \
    model/ge_model.cc \
    model/ge_root_model.cc \
    graph/common/transop_util.cc \
    graph/manager/graph_var_manager.cc \
    graph/manager/trans_var_data_utils.cc \
    omm/csa_interact.cc \
    graph/passes/pass_manager.cc \
    graph/passes/pass_utils.cc \
    graph/passes/base_pass.cc \
    graph/passes/resource_pair_add_control_pass.cc \
    graph/passes/resource_pair_remove_control_pass.cc \
    graph/passes/constant_folding_pass.cc \
    graph/passes/aicpu_constant_folding_pass.cc \
    graph/passes/reshape_remove_pass.cc \
    graph/passes/reshape_recovery_pass.cc \
    graph/passes/transop_breadth_fusion_pass.cc \
    graph/passes/transop_depth_fusion_pass.cc \
    graph/passes/same_transdata_breadth_fusion_pass.cc \
    graph/passes/transop_without_reshape_fusion_pass.cc \
    graph/passes/compile_nodes_pass.cc \
    graph/passes/transop_nearby_allreduce_fusion_pass.cc \
    graph/passes/variable_prepare_op_pass.cc \
    graph/passes/variable_ref_delete_op_pass.cc \
    graph/passes/variable_ref_useless_control_out_delete_pass.cc \
    graph/passes/variable_op_pass.cc \
    graph/passes/cast_remove_pass.cc \
    graph/passes/replace_transshape_pass.cc \
    graph/passes/transpose_transdata_pass.cc \
    graph/passes/identify_reference_pass.cc \
    graph/passes/variable_format_pass.cc \
    graph/passes/subgraph_pass.cc \
    graph/passes/data_pass.cc \
    graph/passes/net_output_pass.cc \
    graph/passes/constant_fuse_same_pass.cc \
    graph/passes/print_op_pass.cc \
    graph/passes/no_use_reshape_remove_pass.cc \
    graph/passes/iterator_op_pass.cc \
    graph/passes/atomic_addr_clean_pass.cc \
    graph/optimize/optimizer/allreduce_fusion_pass.cc \
    graph/common/omg_util.cc \
    graph/common/bcast.cc \
    graph/passes/dimension_compute_pass.cc \
    graph/passes/dimension_adjust_pass.cc \
    graph/passes/get_original_format_pass.cc \
    graph/passes/shape_operate_op_remove_pass.cc \
    graph/passes/unused_op_remove_pass.cc \
    graph/passes/assert_pass.cc \
    graph/passes/dropout_pass.cc \
    graph/passes/infershape_pass.cc \
    graph/passes/unused_const_pass.cc \
    graph/passes/isolated_op_remove_pass.cc \
    graph/passes/permute_pass.cc \
    graph/passes/ctrl_edge_transfer_pass.cc \
    host_kernels/broadcast_gradient_args_kernel.cc \
    host_kernels/greater_kernel.cc \
    host_kernels/gather_v2_kernel.cc  \
    host_kernels/maximum_kernel.cc \
    host_kernels/floormod_kernel.cc \
    host_kernels/floordiv_kernel.cc \
    host_kernels/range_kernel.cc \
    host_kernels/shape_kernel.cc \
    host_kernels/size_kernel.cc \
    host_kernels/shape_n_kernel.cc \
    host_kernels/rank_kernel.cc \
    host_kernels/broadcast_args_kernel.cc \
    host_kernels/fill_kernel.cc \
    host_kernels/empty_kernel.cc \
    host_kernels/expanddims_kernel.cc \
    host_kernels/reshape_kernel.cc \
    host_kernels/squeeze_kernel.cc \
    host_kernels/kernel_utils.cc \
    host_kernels/cast_kernel.cc \
    host_kernels/transdata_kernel.cc \
    host_kernels/transpose_kernel.cc \
    host_kernels/permute_kernel.cc \
    host_kernels/pack_kernel.cc \
    host_kernels/concat_v2_kernel.cc \
    host_kernels/concat_offset_kernel.cc \
    host_kernels/strided_slice_kernel.cc \
    host_kernels/ssd_prior_box_kernel.cc \
    host_kernels/add_kernel.cc \
    host_kernels/unpack_kernel.cc \
    host_kernels/sub_kernel.cc \
    host_kernels/mul_kernel.cc \
    host_kernels/reduce_prod_kernel.cc \
    host_kernels/rsqrt_kernel.cc \
    host_kernels/slice_kernel.cc \
    host_kernels/slice_d_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    graph/passes/stop_gradient_pass.cc \
    graph/passes/prevent_gradient_pass.cc \
    graph/passes/identity_pass.cc \
    graph/passes/placeholder_with_default_pass.cc \
    graph/passes/snapshot_pass.cc \
    graph/passes/guarantee_const_pass.cc \
    graph/passes/var_is_initialized_op_pass.cc \
    graph/passes/parallel_concat_start_op_pass.cc \
    graph/passes/cast_translate_pass.cc \
    graph/passes/addn_pass.cc \
    graph/passes/common_subexpression_elimination_pass.cc \
    graph/passes/transop_symmetry_elimination_pass.cc \
    graph/passes/save_pass.cc \
    graph/passes/switch_dead_branch_elimination.cc \
    graph/passes/merge_pass.cc \
    graph/passes/prune_pass.cc \
    graph/passes/flow_ctrl_pass.cc \
    graph/passes/control_trigger_pass.cc \
    graph/passes/switch_data_edges_bypass.cc \
    graph/passes/switch_op_pass.cc \
    graph/passes/multi_batch_pass.cc \
    graph/passes/switch_logic_remove_pass.cc \
    graph/passes/next_iteration_pass.cc \
    graph/passes/cond_pass.cc \
    graph/passes/cond_remove_pass.cc \
    graph/passes/for_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/hccl_memcpy_pass.cc \
    graph/passes/link_gen_mask_nodes_pass.cc \
    graph/passes/replace_with_empty_const_pass.cc \
    graph/passes/hccl_group_pass.cc \

 OME_SRC_FILES := \
    graph/manager/graph_mem_allocator.cc \
    graph/manager/graph_caching_allocator.cc \
    graph/manager/model_manager/event_manager.cc        \
    graph/manager/util/debug.cc                       \
    graph/manager/util/rt_context_util.cc               \
    graph/manager/util/variable_accelerate_ctrl.cc               \
    graph/manager/util/hcom_util.cc                 \
    graph/load/new_model_manager/model_manager.cc                        \
    graph/load/new_model_manager/data_inputer.cc                         \
    graph/load/new_model_manager/davinci_model.cc                        \
    graph/load/new_model_manager/davinci_model_parser.cc                 \
    graph/load/new_model_manager/model_utils.cc                          \
    graph/load/new_model_manager/tbe_handle_store.cc                     \
    graph/load/new_model_manager/cpu_queue_schedule.cc                   \
    graph/load/new_model_manager/zero_copy_task.cc                       \
    graph/load/output/output.cc                                          \
    graph/load/new_model_manager/data_dumper.cc                          \
    graph/load/new_model_manager/task_info/task_info.cc                  \
    graph/load/new_model_manager/task_info/event_record_task_info.cc     \
    graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
    graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
    graph/load/new_model_manager/task_info/hccl_task_info.cc             \
    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
    graph/load/new_model_manager/task_info/kernel_task_info.cc           \
    graph/load/new_model_manager/task_info/label_set_task_info.cc        \
    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
    graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
    graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
    graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
    single_op/task/op_task.cc                                            \
    single_op/task/build_task_utils.cc                                   \
    single_op/task/tbe_task_builder.cc                                   \
    single_op/task/aicpu_task_builder.cc                                 \
    single_op/single_op.cc                                               \
    single_op/single_op_model.cc                                         \
    single_op/stream_resource.cc                                         \
    single_op/single_op_manager.cc                                       \
    hybrid/hybrid_davinci_model_stub.cc                                  \


 COMMON_LOCAL_C_INCLUDES := \
    proto/om.proto \
    proto/task.proto \
    proto/insert_op.proto \
    proto/ge_ir.proto \
    proto/fwk_adapter.proto \
    proto/op_mapping_info.proto \
    proto/tensorflow/attr_value.proto \
    proto/tensorflow/function.proto \
    proto/tensorflow/graph.proto \
    proto/tensorflow/node_def.proto \
    proto/tensorflow/op_def.proto \
    proto/tensorflow/resource_handle.proto \
    proto/tensorflow/tensor.proto \
    proto/tensorflow/tensor_shape.proto \
    proto/tensorflow/types.proto \
    proto/tensorflow/versions.proto \
    $(LOCAL_PATH) ./ \
    $(TOPDIR)inc \
    $(TOPDIR)inc/external \
    $(TOPDIR)inc/external/graph \
    $(TOPDIR)inc/framework \
    $(TOPDIR)inc/framework/common \
    $(TOPDIR)inc/runtime \
    $(TOPDIR)libc_sec/include \
    $(TOPDIR)ops/built-in/op_proto/inc \
    third_party/json/include \
    third_party/protobuf/include \
    third_party/opencv/include \

 NEW_OMG_HOST_SRC_FILES := \
    graph/preprocess/insert_op/util_insert_aipp_op.cc \
    graph/preprocess/insert_op/ge_aipp_op.cc \
    graph/build/model_builder.cc \
    graph/build/task_generator.cc \
    graph/build/stream_allocator.cc \
    graph/build/logical_stream_allocator.cc \
    graph/build/stream_graph_optimizer.cc \
    graph/build/run_context.cc \
    graph/build/label_allocator.cc \
    graph/label/label_maker.cc \
    graph/label/if_label_maker.cc \
    graph/label/case_label_maker.cc \
    graph/label/while_label_maker.cc \
    graph/label/partitioned_call_label_maker.cc \



 #compiler for host train
 include $(CLEAR_VARS)

 LOCAL_MODULE := libge_train

 LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
 LOCAL_CFLAGS += -DDAVINCI_CLOUD -DDAVINCI_TRAIN -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING
 LOCAL_CFLAGS += -DFMK_SUPPORT_DEBUG
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif

 LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES)
 LOCAL_SRC_FILES += $(OME_SRC_FILES)
 LOCAL_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \

 LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libprotobuf \
    libslog \
    libmmpa \
    libgraph \
    libregister \
    libge_common \
    libhccl                                \
    libmsprof     \


 LOCAL_LDFLAGS := -lrt -ldl

 LOCAL_SHARED_LIBRARIES += \
    libruntime \
    libresource \

 include $(BUILD_HOST_SHARED_LIBRARY)

 # add engine_conf.json to host
 include $(CLEAR_VARS)

 LOCAL_MODULE := engine_conf.json

 LOCAL_SRC_FILES := engine_manager/engine_conf.json

 LOCAL_MODULE_CLASS := ETC

 LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json
 include $(BUILD_HOST_PREBUILT)

 # add optimizer_priority.pbtxt to host
 include $(CLEAR_VARS)

 LOCAL_MODULE := optimizer_priority.pbtxt

 LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt

 LOCAL_MODULE_CLASS := ETC

 LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt
 include $(BUILD_HOST_PREBUILT)
--- a/src/ge/generator/ge_generator.cc
+++ b/src/ge/generator/ge_generator.cc
@@ -22,10 +22,13 @@
 #include "common/util.h"
 #include "framework/common/debug/ge_log.h"
 #include "ge/ge_api.h"
 #include "graph/ge_context.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/manager/graph_manager.h"
 #include "graph/manager/util/rt_context_util.h"
 #include "graph/opsproto_manager.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/type_utils.h"
 #include "model/ge_model.h"
 #include "init/gelib.h"

@@ -108,7 +111,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi
  return FAILED;
 }

 static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
 static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index,
                        bool attr) {
  GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
  GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
@@ -122,6 +125,17 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
  if (data_op == nullptr) {
    return FAILED;
  }
  auto op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
  auto input_desc = op_desc->MutableInputDesc(index);
  GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID);
  ge::Format old_format = input_desc->GetFormat();
  if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
    input_desc->SetFormat(FORMAT_ND);
    input_desc->SetOriginFormat(FORMAT_ND);
    (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format));
    (void)AttrUtils::SetBool(data_op, "_is_single_op", true);
  }

  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail.");
@@ -139,10 +153,21 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
 }

 static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, const vector<GeTensor> &outputs) {
  OpDescPtr op_desc = MakeShared<ge::OpDesc>(NODE_NAME_NET_OUTPUT, NETOUTPUT);
  OpDescPtr op_desc = MakeShared<ge::OpDesc>(graph->GetName() + "_" + NODE_NAME_NET_OUTPUT, NETOUTPUT);
  if (op_desc == nullptr) {
    return FAILED;
  }
  auto single_op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID);
  auto output_desc = single_op_desc->MutableOutputDesc(0);
  GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID);
  ge::Format old_format = output_desc->GetFormat();
  if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
    output_desc->SetFormat(FORMAT_ND);
    output_desc->SetOriginFormat(FORMAT_ND);
    (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format));
    (void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
  }
  int32_t count = 0;
  for (const auto &out_desc : outputs) {
    GeTensorDesc tensor = out_desc.GetTensorDesc();
@@ -187,6 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) {
  opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
 }

 static string GetModelNameFromFileName(const string &file_name_prefix) {
  int start_position = 0;
  // using output as model_name (ignore ".om")
  int filename_suffixes = 3;
  if (file_name_prefix.find_last_of('/') != string::npos) {
    start_position += 1;
  }
  int end_position = file_name_prefix.length() - filename_suffixes;
  string model_name = file_name_prefix.substr(start_position, end_position - start_position);
  GELOGI("Get model_name from file, model_name:%s", model_name.c_str());
  return model_name;
 }

 class GeGenerator::Impl {
 public:
  Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models);
@@ -278,24 +316,28 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) {
    }
    return ret;
  }
  GELOGI("GenerateInfershapeGraph success.");
  GELOGI("Generate infer shape graph success");
  return SUCCESS;
 }

 Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
                                  ModelBufferData &model, bool is_offline) {
  rtContext_t ctx = nullptr;
  auto rt = rtCtxGetCurrent(&ctx);
  if (rt != RT_ERROR_NONE) {
    GELOGW("Current ctx is null.");
  } else {
    ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx);
  }
  GraphId graph_id;
  GeRootModelPtr ge_root_model = nullptr;
  GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
  // using output as model_name (ignore ".om")
  int start_position = file_name_prefix.find_last_of('/') + 1;
  int end_position = file_name_prefix.length() - 3;
  const string model_name = file_name_prefix.substr(start_position, end_position - start_position);
  const string model_name = GetModelNameFromFileName(file_name_prefix);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!");
  impl_->is_offline_ = is_offline;
  Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model);
  if (ret != SUCCESS) {
    GELOGE(ret, "Build model failed");
    GELOGE(ret, "Build model failed.");
    if (impl_->graph_manager_.Finalize() != SUCCESS) {
      GELOGE(FAILED, "graph_manager finalize fail.");
    }
@@ -316,6 +358,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
    }
    return ret;
  }

  if (RtContextUtil::GetInstance().GetNormalModeContext() != nullptr) {
    (void)rtCtxSetCurrent(RtContextUtil::GetInstance().GetNormalModeContext());
  }

  GELOGI("GenerateOfflineModel success.");
  return SUCCESS;
 }
@@ -325,11 +372,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
                                  bool is_offline) {
  GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
  if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) {
    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size:%zu", inputs.size(), op_desc->GetInputsSize());
    GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize());
    return PARAM_INVALID;
  }
  if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
    GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size:%zu", outputs.size(), op_desc->GetOutputsSize());
    GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size: %zu", outputs.size(), op_desc->GetOutputsSize());
    return PARAM_INVALID;
  }

@@ -368,7 +415,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
    }
  } else {
    for (const auto &in_desc : inputs) {
      const GeTensorDesc input_desc = in_desc.GetTensorDesc();
      GeTensorDesc input_desc = in_desc.GetTensorDesc();
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
      arg_index++;
    }
@@ -382,7 +429,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  // dump ComputeGraph.
  compute_graph->Dump();
  Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph);
  GELOGI("ATC parser success in single op schedule.");
  GELOGI("ATC parser success in single op build.");

  GraphId graph_id;
  GeRootModelPtr ge_root_model = nullptr;
@@ -394,7 +441,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GELOGD("The opType in op_desc_tmp is: %s", op_desc_tmp->GetType().c_str());
  GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
  GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
  GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
  return SUCCESS;
@@ -411,7 +458,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
 */
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, const string &model_file_name) {
  GELOGI("Start to Build Single Op Offline Model.");
  GELOGI("Start to build single op offline model.");
  ModelBufferData model_buff;
  OpEngineType engine_type = ENGINE_SYS;
  return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
@@ -430,7 +477,7 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, OpEngineType engine_type,
                                       ModelBufferData &model_buff) {
  GELOGI("Start to Build Single Op Online");
  GELOGI("Start to build single op online");
  return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
 }

@@ -449,7 +496,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &
  model_helper.SetSaveMode(is_offline_);
  Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff);
  if (ret != SUCCESS) {
    GELOGE(ret, "Save to Om model failed");
    GELOGE(ret, "Save to om model failed");
    return ret;
  }
  return SUCCESS;
@@ -461,16 +508,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
  const std::map<std::string, std::string> options;
  Status ret = graph_manager_.AddGraph(id, graph, options);
  if (ret != SUCCESS) {
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, id: %u", id);
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", id);
    (void)graph_manager_.Finalize();
    return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
  }

  GELOGI("models inputs.size()=%zu", inputs.size());
  GELOGI("Model inputs size is %zu", inputs.size());
  graph_manager_.SetOptionsRunGraphFlag(false);
  ret = graph_manager_.BuildGraph(id, inputs, ge_root_model);
  struct timeval tv;
  if (gettimeofday(&tv, nullptr) != 0) {
    GELOGE(INTERNAL_ERROR, "get the time of day failed.");
    return INTERNAL_ERROR;
  }
  uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec);  // 1000000us
  ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id);
  if (ret != SUCCESS) {
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph failed, id: %u", id);
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id);
    return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
  }

@@ -485,14 +538,14 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g
  const std::map<std::string, std::string> options;
  Status ret = graph_manager_.AddGraph(id, graph, options);
  if (ret != SUCCESS) {
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "graphManager add graph failed, id: %u", id);
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", id);
    (void)graph_manager_.Finalize();
    return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
  }

  ret = graph_manager_.GenerateInfershapeGraph(id);
  if (ret != SUCCESS) {
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager BuildGraph failed, id: %u", id);
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed");
    return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
  }

--- a/src/ge/graph/build/memory/block_mem_assigner.cc
+++ b/src/ge/graph/build/memory/block_mem_assigner.cc
@@ -160,10 +160,10 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block) {
    parent->child_offset_ += child->block_size_;
    child->deleted_block_ = true;
    GELOGI(
      "Add block stream id:%ld [size:%zu, life time[begin:%zu, end:%zu]] to"
      " block[size:%zu, life time[begin:%zu, end:%zu]]",
      stream_id_, child->block_size_, child->GetLifeBegin(), child->GetLifeEnd(), parent->block_size_,
      parent->GetLifeBegin(), parent->GetLifeEnd());
      "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to"
      " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]",
      child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent,
      parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd());
  }
 }

@@ -499,17 +499,17 @@ void BlockMemAssigner::InitReuseFlag() {
    bool pre_reuse_flag = true;
    bool post_reuse_flag = true;
    for (auto &node_index_io : pair.second) {
      if (node_index_io.io_type == kIn) {
      if (node_index_io.io_type_ == kIn) {
        continue;
      }

      OutDataAnchorPtr out_anchor = node_index_io.node->GetOutDataAnchor(node_index_io.index);
      OutDataAnchorPtr out_anchor = node_index_io.node_->GetOutDataAnchor(node_index_io.index_);
      if (out_anchor == nullptr) {
        continue;
      }

      bool out_flg = false;
      if (node_index_io.node->GetOutDataNodes().empty()) {
      if (node_index_io.node_->GetOutDataNodes().empty()) {
        out_flg = true;
      }
      for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
@@ -643,7 +643,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
            CanReuseByStream(map_iter->second, *reusable_block)) {
          GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_,
                 stream_id);
          reusable_block->AddNodeTypeIndex({n, mem_type, out_index}, real_size, no_align_size);
          reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size);
          if (mem_type == kOutput) {
            auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
            if (iter != anchor_to_symbol_.end()) {
@@ -660,7 +660,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
    }
  }

  auto block = new (std::nothrow) MemoryBlock(block_size, is_reuse_memory);
  auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed.");

  // Data and netoutput need zero copy block
@@ -688,7 +688,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
  auto node_op_desc = n->GetOpDesc();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
  MemoryBlock *block = nullptr;
  NodeIndexIO node_index_io = NodeIndexIO(n, index, kOut);
  NodeIndexIO node_index_io(n, index, kOut);
  int64_t size = 0;
  auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
  if (output_op_desc != nullptr) {
@@ -701,7 +701,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
  if (IsSymbolExist(node_index_io)) {
    std::string symbol = anchor_to_symbol_[node_index_io.ToString()];
    block = symbol_blocks_[symbol];
    block->AddNodeTypeIndex({n, kOutput, index}, size, no_align_size);
    block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size);
    block->ref_count_++;
  } else {
    int64_t max_size = size;
@@ -749,7 +749,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
                      GELOGI("Get dst_reuse_input_index failed"));
      if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
        block->AddNodeTypeIndex({owner_node, kOutput, i}, block->Size(), block->Size());
        block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size());
        out_count_reuse_input += 1;
        reuse_input = true;
      }
@@ -775,31 +775,6 @@ bool IsOutputBlock(const ge::InDataAnchorPtr &in_data_anchor) {
  return false;
 }

 // current node's output uses previous node's output memory
 bool IsReferencePreviousNodeOutputMemory(const ge::NodePtr &node, uint32_t output_index) {
  // Get the reference type of the node, default is false
  bool is_ref = false;
  // If GetBool fail, is_ref is false.
  auto op_desc = node->GetOpDesc();
  if (op_desc == nullptr) {
    return false;
  }
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
  if (!is_ref) {
    return false;
  }
  const string &output_name = op_desc->GetOutputNameByIndex(output_index);
  for (const auto &input_name : op_desc->GetAllInputNames()) {
    if (!input_name.empty() && output_name == input_name) {
      int input_index = op_desc->GetInputIndexByName(input_name);
      GELOGI("Reference memory:name[%s] output[%s][%u] ref to input[%s][%d] ", op_desc->GetName().c_str(),
             output_name.c_str(), output_index, input_name.c_str(), input_index);
      return true;
    }
  }
  return false;
 }

 // atomic out memory will be reassigned
 bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool is_atomic,
                          bool out_node_set_continuous_input) {
@@ -920,58 +895,57 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
 }

 Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) {
  auto node_op_desc = node->GetOpDesc();
  int64_t stream_id = node_op_desc->GetStreamId();
  auto op_desc = node->GetOpDesc();
  int64_t stream_id = op_desc->GetStreamId();
  vector<int64_t> memorys_type;
  bool has_mem_type_attr = ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
  GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", node_op_desc->GetName().c_str(),
         node_op_desc->GetOutputsSize(), memorys_type.size());
  if (has_mem_type_attr && (memorys_type.size() != node_op_desc->GetOutputsSize())) {
  bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
  GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(),
         op_desc->GetOutputsSize(), memorys_type.size());
  if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) {
    GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]",
           node_op_desc->GetName().c_str(), node_op_desc->GetOutputsSize(), memorys_type.size());
           op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size());
    return INTERNAL_ERROR;
  }

  is_op_reuse_mem_ = true;
  if (op_reuse_env_valid_ == true) {
    vector<string>::iterator it_name =
      std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetName());
      std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName());
    vector<string>::iterator it_type =
      std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetType());
      std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetType());
    GE_IF_BOOL_EXEC(it_name != op_no_reuse_mem_vec_.end() || it_type != op_no_reuse_mem_vec_.end(),
                    is_op_reuse_mem_ = false;);
  }

  bool is_atomic = false;
  // If GetBool fail, is_atomic is false.
  (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
  (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
  // Allocate memory for the current node and release node memory of the same size in the workspace
  GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
                  ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);)
  for (uint32_t i = 0; i < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); i++) {
  for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
    int64_t size = 0;
    auto output_op_desc = node_op_desc->GetOutputDescPtr(i);
    auto output_op_desc = op_desc->GetOutputDescPtr(i);
    if (output_op_desc != nullptr) {
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
    }
    // fusion: other type's size not means malloc HBM memory
    bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1;
    if (l1_flag) {
      GELOGI("fusion: node[%s], output[%s], output memory type [%d]", node_op_desc->GetName().c_str(),
             node_op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
      GELOGI("fusion: node[%s], output[%s], output memory type [%d]", op_desc->GetName().c_str(),
             op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
      size = 0;
    }
    std::string peer_name;
    uint32_t peer_input_index = 0;
    bool out_node_set_continuous_input = false;
    bool no_need_assign_memory =
      ((size == 0) || CheckIsZeroMemNodeType(node->GetType()) || IsReferencePreviousNodeOutputMemory(node, i));
    bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType()));
    if (!no_need_assign_memory) {
      out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index);
      no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);
    }
    if (no_need_assign_memory) {
      zero_memory_list_.emplace_back(node, kOutput, i);
      zero_memory_list_.emplace_back(node, kOutput, i, false);
      continue;
    }
    // atomic can't be reused
@@ -1049,7 +1023,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
        workspace_skip_flag = true;
      }
      if (temp[i] == 0 || workspace_skip_flag) {
        zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i));
        zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false);
        continue;
      }
      MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast<size_t>(temp[i]), ranges),
@@ -1067,7 +1041,9 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
    (void)mem_block;  // Fix warning
  }

  GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), MergeDynamicBatchBlocks();)
  bool merge_dynamic_batch = false;
  GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();)
  GE_IF_BOOL_EXEC(!merge_dynamic_batch, ReuseBlocksByLifeTime();)
  AssignContinuousBlocks();
  ResizeMemoryBlocks();

@@ -1131,7 +1107,8 @@ void MergeBlocks(std::vector<MemoryBlock *> &dest, std::vector<MemoryBlock *> &s
  }
 }

 void BlockMemAssigner::MergeDynamicBatchBlocks() {
 bool BlockMemAssigner::MergeDynamicBatchBlocks() {
  bool merged = false;
  std::map<std::string, std::vector<MemoryBlock *>> dynamic_batch_blocks;
  for (auto block : memory_blocks_) {
    if (block == nullptr) {
@@ -1160,8 +1137,10 @@ void BlockMemAssigner::MergeDynamicBatchBlocks() {
    if (it != it_max) {
      GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str());
      MergeBlocks(it_max->second, it->second);
      merged = true;
    }
  }
  return merged;
 }

 // asending order
@@ -1331,9 +1310,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz
  }
  GELOGI(
    "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]"
    " noalignsize[%zu] life time begin[%zu] life time end[%zu]  child[%d].",
    " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d] isref[%d].",
    graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset,
    op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block);
    op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block,
    node_type.ref_input);
 }

 void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) {
@@ -1528,6 +1508,7 @@ void BlockMemAssigner::FindDependentStreamBetweenGraphs(const NodePtr &pre_node,
 bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
  return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
         (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) ||
         (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT);
         (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
         (node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE);
 }
 }  // namespace ge
--- a/src/ge/graph/build/memory/block_mem_assigner.h
+++ b/src/ge/graph/build/memory/block_mem_assigner.h
@@ -23,6 +23,7 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
 #include <list>
 #include "common/ge_inner_error_codes.h"
 #include "common/types.h"
 #include "common/util.h"
@@ -36,13 +37,14 @@ const size_t kMaxLifeTime = 0xffffffff;
 enum MemoryType { kOutput, kWorkspace };

 struct NodeTypeIndex {
  NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index)
      : node(std::move(node)), mem_type(mem_type), index(index) {}
  NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false)
      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {}

  ge::NodePtr node = nullptr;
  MemoryType mem_type = kOutput;
  uint32_t index = 0;
  size_t life_time_end = kMaxLifeTime;
  bool ref_input = false;
  const string GetMemType() const {
    if (mem_type == kOutput) {
      return "output";
@@ -55,9 +57,9 @@ struct NodeTypeIndex {

 class MemoryBlock {
 public:
  explicit MemoryBlock(size_t block_size, bool reuse_mem = true)
  explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true)
      : ref_count_(0),
        stream_id_(0),
        stream_id_(stream_id),
        deleted_block_(false),
        reuse_mem_(reuse_mem),
        input_index_(0),
@@ -81,7 +83,7 @@ class MemoryBlock {
  void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) {
    real_size_list_.emplace_back(real_size);
    no_align_size_list_.emplace_back(no_align_size);
    node_type_index_list_.emplace_back(node, type, out_index);
    node_type_index_list_.emplace_back(node, type, out_index, false);
  }
  size_t Size() const { return block_size_; }

@@ -129,6 +131,7 @@ class MemoryBlock {
  bool continuous_block_;
  bool last_continuous_block_;
  bool is_zero_copy_;
  std::map<int64_t, size_t> depend_stream_life_;

 private:
  size_t block_size_;
@@ -287,7 +290,7 @@ class BlockMemAssigner : public MemAssigner {
  std::vector<NodeTypeIndex> zero_memory_list_;

  // ref mapping
  std::map<std::string, std::vector<NodeIndexIO>> symbol_to_anchors_;
  std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors_;
  std::map<std::string, std::string> anchor_to_symbol_;
  std::map<std::string, bool> pre_reuse_flag_;
  std::map<std::string, bool> post_reuse_flag_;
@@ -371,10 +374,10 @@ class BlockMemAssigner : public MemAssigner {
  ///
  /// @ingroup GE
  /// @brief Merge memory blocks between different batchs
  /// @return void
  /// @return merge or not
  /// @author
  ///
  void MergeDynamicBatchBlocks();
  bool MergeDynamicBatchBlocks();

  void AssignContinuousBlocks();

--- a/src/ge/graph/build/memory/module.mk
+++ b/src/ge/graph/build/memory/module.mk
@@ -0,0 +1,98 @@
 LOCAL_PATH := $(call my-dir)


 local_lib_src_files :=  memory_assigner.cc \
                        graph_mem_assigner.cc \
                        binary_block_mem_assigner.cc \
                        block_mem_assigner.cc \
                        hybrid_mem_assigner.cc \
                        max_block_mem_assigner.cc \
                        var_mem_assign_util.cc \

 local_lib_inc_path :=   ${LOCAL_PATH} \
                        ${TOPDIR}inc \
                        ${TOPDIR}inc/external \
                        ${TOPDIR}inc/external/graph \
                        $(TOPDIR)libc_sec/include \
                        ${TOPDIR}third_party/protobuf/include \
                        ${TOPDIR}inc/framework \
                        $(TOPDIR)framework/domi \

 #compiler for host
 include $(CLEAR_VARS)
 LOCAL_MODULE := libge_memory

 LOCAL_CFLAGS += -std=c++11
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -O2
 ifeq ($(DEBUG), 1)
 LOCAL_CFLAGS += -g -O0
 endif

 LOCAL_LDFLAGS :=

 LOCAL_STATIC_LIBRARIES :=
 LOCAL_SHARED_LIBRARIES :=   libprotobuf \
                            libc_sec \
                            libslog \
                            libgraph \
                            libge_common \

 LOCAL_SRC_FILES := $(local_lib_src_files)

 generated_sources_dir := $(call local-generated-sources-dir)
 LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH)
 LOCAL_C_INCLUDES := $(local_lib_inc_path)
 LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS

 include ${BUILD_HOST_STATIC_LIBRARY}


 #compiler for device
 include $(CLEAR_VARS)
 LOCAL_MODULE := libge_memory

 LOCAL_CFLAGS += -std=c++11
 LOCAL_CFLAGS += -Werror
 LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY
 LOCAL_CFLAGS += -O2
 LOCAL_LDFLAGS :=

 LOCAL_STATIC_LIBRARIES :=
 LOCAL_SHARED_LIBRARIES :=   libprotobuf \
                            libc_sec \
                            libslog \
                            libgraph \
                            libge_common \

 LOCAL_SRC_FILES := $(local_lib_src_files)

 generated_sources_dir := $(call local-generated-sources-dir)
 LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH)
 LOCAL_C_INCLUDES := $(local_lib_inc_path)
 LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS

 include ${BUILD_STATIC_LIBRARY}

 #compiler for device
 include $(CLEAR_VARS)
 LOCAL_MODULE := libge_memory

 LOCAL_CFLAGS += -std=c++11
 LOCAL_LDFLAGS :=

 LOCAL_STATIC_LIBRARIES :=
 LOCAL_SHARED_LIBRARIES :=   libprotobuf \
                            libc_sec \
                            libslog \
                            libgraph \
                            libge_common \

 LOCAL_SRC_FILES := $(local_lib_src_files)

 generated_sources_dir := $(call local-generated-sources-dir)
 LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH)
 LOCAL_C_INCLUDES := $(local_lib_inc_path)
 LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS

 include ${BUILD_LLT_STATIC_LIBRARY}
--- a/src/ge/graph/build/model_builder.cc
+++ b/src/ge/graph/build/model_builder.cc
@@ -18,6 +18,7 @@
 #include <iostream>
 #include <set>
 #include <unordered_map>
 #include <securectype.h>
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/anchor.h"
@@ -250,7 +251,7 @@ Status ModelBuilder::SetInputOutputDesc() {
    }
    // if user set input node format ND, the expected node for data and netoutput format is ND in
    // final graph.
    if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) &&
    if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) &&
        ((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) {
      GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str());
      auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr();
@@ -521,11 +522,37 @@ Status ModelBuilder::MergeWeights() {
    }
    if (weight_data.data() != nullptr) {
      GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED);
      GE_CHK_BOOL_EXEC(
        memcpy_s(base_addr + offset, weight_offset_ - offset, weight_data.data(), weight_data.size()) == EOK,
        return FAILED, "call memcpy_s failed.");
      if (weight_offset_ - offset < weight_data.size()) {
        GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", weight_offset_ - offset,
               weight_data.size());
        return FAILED;
      }
      uintptr_t dst_ptr = (uintptr_t)base_addr + offset;
      uintptr_t src_ptr = (uintptr_t)weight_data.data();
      size_t left_size = weight_data.size();
      while (left_size > SECUREC_MEM_MAX_LEN) {
        auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr),
                            SECUREC_MEM_MAX_LEN);
        if (err != EOK) {
          GELOGE(FAILED,
                 "mem copy failed. errret:%u, "
                 "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
                 err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
          return FAILED;
        }
        left_size -= SECUREC_MEM_MAX_LEN;
        dst_ptr = dst_ptr + SECUREC_MEM_MAX_LEN;
        src_ptr = src_ptr + SECUREC_MEM_MAX_LEN;
      }
      auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), left_size, reinterpret_cast<void *>(src_ptr), left_size);
      if (err != EOK) {
        GELOGE(FAILED,
               "mem copy failed. errret:%u, "
               "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
               err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
        return FAILED;
      }
    }

    weight_data.clear();
  }

--- a/src/ge/graph/build/stream_allocator.cc
+++ b/src/ge/graph/build/stream_allocator.cc
@@ -683,7 +683,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
      GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id);
      return FAILED;
    }
    stream_node_num_vec[stream_id]++;
    AddNodeNum(cur_node, stream_node_num_vec[stream_id]);
    stream_2_nodes_map[stream_id].push_back(cur_node);
    // The maximum number of tasks per stream.
    int64_t max_node_num_one_stream = GetMaxNodeNumPerStream(cur_node, max_task_count);
@@ -706,7 +706,8 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
        "It's time to split the stream, split newly-added stream id is %ld",
        stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id);
      NodePtr pre_node = pre_node_vec[stream_id];
      stream_node_num_vec[stream_id] = 1;
      stream_node_num_vec[stream_id] = 0;
      AddNodeNum(cur_node, stream_node_num_vec[stream_id]);
      // try spilt a new stream and move same continuous stream label nodes from this stream
      bool not_use_cur = false;
      NodePtr not_cur = nullptr;
@@ -720,7 +721,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
          auto stored_op_desc = node->GetOpDesc();
          GE_CHECK_NOTNULL(stored_op_desc);
          stored_op_desc->SetStreamId(last_stream_id);
          stream_node_num_vec[stream_id]++;
          AddNodeNum(node, stream_node_num_vec[stream_id]);
        }
        not_use_cur = true;
        not_cur = nodes.front();
@@ -1055,7 +1056,7 @@ Status StreamAllocator::CollectDeactiveStream(const OpDescPtr &op_desc, std::set

 // Insert StreamActive Op for Entry Stream.
 Status StreamAllocator::InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id) {
  string node_name = "ActiveEntryStream_" + string(STREAMACTIVE);
  string node_name = whole_graph_->GetName() + "_ActiveEntryStream_" + string(STREAMACTIVE);
  OpDescPtr op_desc = ge::MakeShared<OpDesc>(node_name, STREAMACTIVE);
  if (op_desc == nullptr) {
    GELOGE(FAILED, "Failed to new opdesc.");
@@ -1143,7 +1144,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
    GE_CHECK_NOTNULL(node->GetInControlAnchor());
    GE_CHECK_NOTNULL(node->GetOutControlAnchor());
    for (auto &event_id : recv_event_id_list) {
      string recv_node_name = "_Recv_" + to_string(event_id);
      string recv_node_name = whole_graph_->GetName() + "_Recv_" + to_string(event_id);
      OpDescPtr op_desc_ptr = MakeShared<OpDesc>(recv_node_name, RECV);
      GE_CHECK_NOTNULL(op_desc_ptr);

@@ -1171,7 +1172,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
    GetSendEventIdList(node, send_event_id_list);

    for (auto &event_id : send_event_id_list) {
      string send_node_name = "_Send_" + to_string(event_id);
      string send_node_name = whole_graph_->GetName() + "_Send_" + to_string(event_id);
      OpDescPtr op_desc_ptr = MakeShared<OpDesc>(send_node_name, SEND);
      GE_CHECK_NOTNULL(op_desc_ptr);

@@ -1291,6 +1292,15 @@ int64_t StreamAllocator::GetMaxNodeNumPerStream(const NodePtr &node, uint32_t ma
  return max_node_num_one_stream;
 }

 void StreamAllocator::AddNodeNum(const NodePtr &node, int64_t &node_num) {
  node_num++;
  vector<uint32_t> events;
  GetSendEventIdList(node, events);
  node_num += static_cast<int64_t>(events.size());
  GetRecvEventIdList(node, events);
  node_num += static_cast<int64_t>(events.size());
 }

 // Insert send event id on a node
 void StreamAllocator::AddSendEventId(const NodePtr &node, uint32_t event_id) {
  node_to_send_events_[node].emplace_back(event_id);
--- a/src/ge/graph/build/stream_allocator.h
+++ b/src/ge/graph/build/stream_allocator.h
@@ -80,6 +80,7 @@ class StreamAllocator {

  Status GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stream_count, uint32_t &max_task_count);
  int64_t GetMaxNodeNumPerStream(const NodePtr &node, uint32_t max_node_num_one_stream);
  void AddNodeNum(const NodePtr &node, int64_t &node_num);

  void AddSendEventId(const NodePtr &node, uint32_t event_id);
  void AddRecvEventId(const NodePtr &node, uint32_t event_id);
--- a/src/ge/graph/build/task_generator.cc
+++ b/src/ge/graph/build/task_generator.cc
@@ -47,6 +47,7 @@ const char *const kIsOutputVar = "OUTPUT_IS_VAR";
 const char *const kProfilingMode = "PROFILING_MODE";
 const char *const kProfilingFpPoint = "FP_POINT";
 const char *const kProfilingBpPoint = "BP_POINT";
 const char *const kOffOptimize = "off_optimize";
 const uint32_t kProfilingArStep = 2;
 const uint64_t kProfilingFpStartLogid = 1;
 const uint64_t kProfilingBpEndLogid = 2;
@@ -83,10 +84,10 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
  }
  Status ret = SUCCESS;
  if (is_unknown_shape) {
    GELOGI("Beign to generate unknown shape task.");
    GELOGI("Beign to generate unknown shape task. Graph name is %s.", graph->GetName().c_str());
    ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map);
  } else {
    GELOGI("Beign to generate known shape task.");
    GELOGI("Beign to generate known shape task. Graph name is %s.", graph->GetName().c_str());
    ret = GenerateTask(run_context, graph, task_def_list, op_name_map);
  }
  GE_DUMP(graph, "GenerateTaskAfter");
@@ -108,7 +109,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
                   GELOGE(FAILED, "SetListStr failed.");
                   return FAILED);

  GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(),
  GELOGI("Generate task success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(),
         op_name_map.size());

  // Init and serialize model_task_def
@@ -130,7 +131,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
    return ret;
  }

  GELOGI("Get TaskInfo success. session_id=%lu", session_id);
  GELOGI("Get TaskInfo success. session id is %lu", session_id);
  return SUCCESS;
 }

@@ -253,7 +254,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed.");
    return GE_CLI_GE_NOT_INITIALIZED;
  }
  GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed.");
  GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed.");
  ProfilingPoint profiling_point;
  vector<uint32_t> all_reduce_nodes;
  GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes));
@@ -263,9 +264,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
  GE_TIMESTAMP_CALLNUM_START(GenerateTask);
  // map store fusion nodes
  map<int64_t, std::vector<NodePtr>> fusion_nodes;
  string buffer_optimize = "off_optimize";
  string buffer_optimize = kOffOptimize;
  (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
  if (buffer_optimize != "off_optimize") {
  if (buffer_optimize != kOffOptimize) {
    GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph));
  }
  std::unordered_set<Node *> fusion_nodes_seen;
@@ -371,7 +372,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed.");
    return GE_CLI_GE_NOT_INITIALIZED;
  }
  GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed.");
  GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed.");
  ProfilingPoint profiling_point;
  vector<uint32_t> all_reduce_nodes;
  GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes));
@@ -381,9 +382,9 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
  GE_TIMESTAMP_CALLNUM_START(GenerateTask);
  // map store fusion nodes
  map<int64_t, std::vector<NodePtr>> fusion_nodes;
  string buffer_optimize = "off_optimize";
  string buffer_optimize = kOffOptimize;
  (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
  if (buffer_optimize != "off_optimize") {
  if (buffer_optimize != kOffOptimize) {
    GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph));
  }
  std::unordered_set<Node *> fusion_nodes_seen;
@@ -392,7 +393,11 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
  rtStream_t stream = nullptr;
  GE_CHK_RT_RET(rtStreamCreate(&stream, 0));
  run_context.stream = stream;
  GE_CHK_RT_RET(rtModelBindStream(run_context.model, stream, 0));
  if (rtModelBindStream(run_context.model, stream, 0) != RT_ERROR_NONE) {
    GELOGE(FAILED, "Call rt api failed.");
    GE_CHK_RT(rtStreamDestroy(stream));
    return FAILED;
  }
  for (auto &node : graph->GetAllNodes()) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
@@ -437,7 +442,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
    size_t task_list_size_before = task_def_list.size();
    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));

    GELOGI("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(),
    GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(),
           name.c_str(), type.c_str(), op_id, stream_id);
    GE_TIMESTAMP_RESTART(GenerateTask);
    auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list);
@@ -659,14 +664,15 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {

 Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_single_stream) const {
  vector<vector<OpDescPtr>> continuous_op_lists(1);
  const set<string> label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX});
  const set<string> separator_types(
    {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN});
  for (auto &op_desc : ops) {
    bool attr_notask = false;
    if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) {
      continue;
    }
    string op_type = op_desc->GetType();
    if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || label_op_types.count(op_type) != 0)) {
    if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) {
      continuous_op_lists.emplace_back(vector<OpDescPtr>());
    } else {
      continuous_op_lists.back().emplace_back(op_desc);
@@ -727,7 +733,6 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP
          fp_op_desc = in_node_desc;
        }
      }
      GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId());
      break;
    }
  }
@@ -736,6 +741,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP
    GELOGW("not find fp_op_desc.");
    return SUCCESS;
  }
  GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId());
  for (auto &node : graph->GetAllNodes()) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
--- a/src/ge/graph/execute/graph_execute.cc
+++ b/src/ge/graph/execute/graph_execute.cc
@@ -86,6 +86,17 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) {
  return SUCCESS;
 }

 Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->SetDynamicSize(model_id, batch_num);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "SetDynamicSize failed");
    return ret;
  }
  return SUCCESS;
 }

 void GraphExecutor::SetTrainFlag(bool is_train_graph) { train_graph_flag_ = is_train_graph; }

 Status GraphExecutor::FreeInOutBuffer() {
@@ -476,7 +487,28 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve
    GELOGE(ret, "GetDynamicBatchInfo failed.");
    return ret;
  }
  return SUCCESS;
 }

 Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetCurShape(model_id, batch_info);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "GetCurShape failed");
    return ret;
  }
  return SUCCESS;
 }

 Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetModelAttr(model_id, dynamic_output_shape_info);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "GetModelAttr failed");
    return ret;
  }
  return SUCCESS;
 }

@@ -503,4 +535,43 @@ Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vecto

  return SUCCESS;
 }

 Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetAIPPInfo failed.");
    return ret;
  }

  return SUCCESS;
 }

 Status GraphExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetOrigInputInfo(model_id, index, orig_input_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetOrigInputInfo failed.");
    return ret;
  }

  return SUCCESS;
 }

 Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
                                                std::vector<InputOutputDims> &input_dims,
                                                std::vector<InputOutputDims> &output_dims) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetAllAippInputOutputDims(model_id, index, input_dims, output_dims);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetAllAippInputOutputDims failed.");
    return ret;
  }

  return SUCCESS;
 }

 }  // namespace ge
--- a/src/ge/graph/execute/graph_execute.h
+++ b/src/ge/graph/execute/graph_execute.h
@@ -56,6 +56,8 @@ class GraphExecutor {

  Status SetGraphContext(GraphContextPtr graph_context_ptr);

  static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num);

  void SetTrainFlag(bool is_train_graph);

  const std::vector<InputOutputDescInfo> &GetOutputsDesc() const { return outputs_desc_; }
@@ -71,6 +73,8 @@ class GraphExecutor {
                                       vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
                                       std::vector<uint32_t> &output_formats);

  static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

  ///
  /// @ingroup ge
  /// @brief Get dynamic batch_info
@@ -80,10 +84,17 @@ class GraphExecutor {
  ///
  static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);

  static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

  static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                  vector<InputOutputDescInfo> &output_desc,
                                                  std::vector<uint32_t> &input_formats,
                                                  std::vector<uint32_t> &output_formats);
  static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                          std::vector<InputOutputDims> &output_dims);

 private:
  Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,
--- a/src/ge/graph/label/while_label_maker.cc
+++ b/src/ge/graph/label/while_label_maker.cc
@@ -98,7 +98,7 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
    return FAILED;
  }

  NodePtr cond_out_node = cond_graph->FindNode(NODE_NAME_NET_OUTPUT);
  NodePtr cond_out_node = cond_graph->FindFirstNodeMatchType(NETOUTPUT);
  GE_CHECK_NOTNULL(cond_out_node);
  OpDescPtr cond_out_desc = cond_out_node->GetOpDesc();
  GE_CHECK_NOTNULL(cond_out_desc);
--- a/src/ge/graph/load/new_model_manager/aipp_utils.cc
+++ b/src/ge/graph/load/new_model_manager/aipp_utils.cc
@@ -0,0 +1,90 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph/load/new_model_manager/aipp_utils.h"

 #include <string>

 #include "common/debug/log.h"
 #include "common/op/ge_op_utils.h"
 #include "framework/common/util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/attr_utils.h"

 #include "framework/common/debug/ge_log.h"

 namespace ge {
 #define AIPP_CONVERT_TO_AIPP_INFO(KEY) aipp_info.KEY = aipp_params->KEY()

 #define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \
  do {                                                   \
    if (aipp_params->KEY##_size() > 0) {                 \
      aipp_info.KEY = aipp_params->KEY(INDEX);           \
    }                                                    \
  } while (0)

 Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) {
  GE_CHECK_NOTNULL(aipp_params);
  AIPP_CONVERT_TO_AIPP_INFO(input_format);
  AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w);
  AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h);
  AIPP_CONVERT_TO_AIPP_INFO(crop);
  AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_w);
  AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_h);
  AIPP_CONVERT_TO_AIPP_INFO(crop_size_w);
  AIPP_CONVERT_TO_AIPP_INFO(crop_size_h);
  AIPP_CONVERT_TO_AIPP_INFO(resize);
  AIPP_CONVERT_TO_AIPP_INFO(resize_output_w);
  AIPP_CONVERT_TO_AIPP_INFO(resize_output_h);
  AIPP_CONVERT_TO_AIPP_INFO(padding);
  AIPP_CONVERT_TO_AIPP_INFO(left_padding_size);
  AIPP_CONVERT_TO_AIPP_INFO(right_padding_size);
  AIPP_CONVERT_TO_AIPP_INFO(top_padding_size);
  AIPP_CONVERT_TO_AIPP_INFO(bottom_padding_size);
  AIPP_CONVERT_TO_AIPP_INFO(csc_switch);
  AIPP_CONVERT_TO_AIPP_INFO(rbuv_swap_switch);
  AIPP_CONVERT_TO_AIPP_INFO(ax_swap_switch);
  AIPP_CONVERT_TO_AIPP_INFO(single_line_mode);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c0, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c2, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c0, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c2, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c0, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c2, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_0, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_2, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_0, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_2, 0);
  AIPP_CONVERT_TO_AIPP_INFO(mean_chn_0);
  AIPP_CONVERT_TO_AIPP_INFO(mean_chn_1);
  AIPP_CONVERT_TO_AIPP_INFO(mean_chn_2);
  AIPP_CONVERT_TO_AIPP_INFO(mean_chn_3);
  AIPP_CONVERT_TO_AIPP_INFO(min_chn_0);
  AIPP_CONVERT_TO_AIPP_INFO(min_chn_1);
  AIPP_CONVERT_TO_AIPP_INFO(min_chn_2);
  AIPP_CONVERT_TO_AIPP_INFO(min_chn_3);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_0, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0);
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/aipp_utils.h
+++ b/src/ge/graph/load/new_model_manager/aipp_utils.h
@@ -0,0 +1,48 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_

 #include <vector>

 #include "common/ge_inner_error_codes.h"
 #include "common/ge_types.h"
 #include "graph/op_desc.h"
 #include "proto/insert_op.pb.h"

 using std::vector;

 namespace ge {
 const uint32_t kAippOriginInputIndex = 0;
 const uint32_t kAippInfoNum = 6;
 const uint32_t kAippInfoFormat = 0;
 const uint32_t kAippInfoDataType = 1;
 const uint32_t kAippInfoTensorName = 2;
 const uint32_t kAippInfoTensorSize = 3;
 const uint32_t kAippInfoDimNum = 4;
 const uint32_t kAippInfoShape = 5;

 class AippUtils {
 public:
  AippUtils() = default;
  ~AippUtils() = default;

  static Status ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info);
 };
 }  // namespace ge

 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_
--- a/src/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/src/ge/graph/load/new_model_manager/data_dumper.cc
@@ -35,7 +35,6 @@
 namespace {
 const uint32_t kAicpuLoadFlag = 1;
 const uint32_t kAicpuUnloadFlag = 0;
 const uint32_t kTimeBufferLen = 80;
 const char *const kDumpOutput = "output";
 const char *const kDumpInput = "input";
 const char *const kDumpAll = "all";
@@ -190,18 +189,6 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin
  }
 }

 static std::string GetCurrentTime() {
  std::time_t now = std::time(nullptr);
  std::tm *ptm = std::localtime(&now);
  if (ptm == nullptr) {
    return "";
  }
  char buffer[kTimeBufferLen] = {0};
  // format: 20171122042550
  std::strftime(buffer, kTimeBufferLen, "%Y%m%d%H%M%S", ptm);
  return std::string(buffer);
 }

 Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  GELOGI("Start dump output");
  if (inner_dump_info.is_task) {
@@ -384,10 +371,9 @@ Status DataDumper::LoadDumpInfo() {
  }

  aicpu::dump::OpMappingInfo op_mapping_info;
  std::string time_now = GetCurrentTime();
  GELOGI("Time is %s now", time_now.c_str());
  op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + time_now + "/" +
                                std::to_string(device_id_) + "/");

  auto dump_path = PropertiesManager::Instance().GetDumpOutputPath();
  op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/");
  op_mapping_info.set_model_name(model_name_);
  op_mapping_info.set_model_id(model_id_);
  op_mapping_info.set_flag(kAicpuLoadFlag);
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -80,6 +80,7 @@ const uint32_t kOutputNum = 1;
 const uint32_t kTrueBranchStreamNum = 1;
 const uint32_t kThreadNum = 16;
 const uint32_t kAddrLen = sizeof(void *);
 const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel";
 const int kDecimal = 10;
 const int kBytes = 8;
 const uint32_t kDataMemAlignSizeCompare = 64;
@@ -579,6 +580,14 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  auto ret = DoTaskSink();
  GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink");

  /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution,
  /// the aicpu opertor needs to destroy history record, and update operator memory address.
  /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel().
  if (MarkSpecifiedAicpuKernel() != SUCCESS) {
    GELOGE(FAILED, "Mark model with specified aicpu operators failed.");
    return FAILED;
  }

  // collect profiling for ge
  if (ProfilingManager::Instance().ProfilingOn()) {
    std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
@@ -595,6 +604,82 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size

 ///
 /// @ingroup ge
 /// @brief Travel all nodes and determine if destruction is required.
 /// @return bool
 ///
 bool DavinciModel::IsAicpuKernelConnectSpecifiedLayer() {
  Graph graph = ge_model_->GetGraph();
  ComputeGraphPtr compute_graph = GraphUtils::GetComputeGraph(graph);
  auto all_nodes = compute_graph->GetAllNodes();
  for (auto &node : all_nodes) {
    GE_IF_BOOL_EXEC(node == nullptr, continue);
    OpDescPtr op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);

    int64_t imply_type = -1;
    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, imply_type);
    if (imply_type != static_cast<int64_t>(domi::ImplyType::AI_CPU)) {
      continue;
    }
    GELOGD("Current operator imply type is %ld, name is %s.", imply_type, op_desc->GetName().c_str());

    for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
      GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
      auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
      GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
      auto peer_node = peer_out_data_anchor->GetOwnerNode();
      GE_IF_BOOL_EXEC(peer_node == nullptr, continue);
      auto peer_op_desc = peer_node->GetOpDesc();
      GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
      if (IsDataOp(peer_op_desc->GetType())) {
        GELOGI("Mark specified aicpu operator connected to data.");
        return true;
      }
    }
    for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
      GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue);
      auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors();
      for (auto &peer_in_data_anchor : peer_in_data_anchors) {
        GE_IF_BOOL_EXEC(peer_in_data_anchor == nullptr, continue);
        auto peer_node = peer_in_data_anchor->GetOwnerNode();
        GE_IF_BOOL_EXEC(peer_node == nullptr, continue);
        auto peer_op_desc = peer_node->GetOpDesc();
        GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
        if (peer_op_desc->GetType() == NETOUTPUT) {
          GELOGI("Mark specified aicpu operator connected to netoutput.");
          return true;
        }
      }
    }
  }

  return false;
 }
 ///
 /// @ingroup ge
 /// @brief mark ge model with specified aicpu operators .
 /// @return Status
 ///
 Status DavinciModel::MarkSpecifiedAicpuKernel() {
  bool result = IsAicpuKernelConnectSpecifiedLayer();
  if (!result) {
    // No aicpu operator needing destroy.
    GELOGD("No specified aicpu operator that connects to data or netoutput.");
    return SUCCESS;
  }

  bool ret = ge::AttrUtils::SetBool(ge_model_, kNeedDestroySpecifiedAicpuKernel, result);
  if (!ret) {
    GELOGW("Add attr[%s] in ge model failed, and may lead to specified aicpu operators destruction failure.",
           kNeedDestroySpecifiedAicpuKernel);
  }
  GELOGI("Mark ge model success, the model has specified aicpu operators, ge model name: %s.",
         ge_model_->GetName().c_str());
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Travel all nodes and do some init.
 /// @param [in] compute_graph: ComputeGraph to load.
 /// @return Status
@@ -1002,8 +1087,6 @@ Status DavinciModel::BindInputQueue() {
 /// @ingroup ge
 /// @brief definiteness queue schedule, bind input queue to task.
 /// @param [in] queue_id: input queue id from user.
 /// @param [in] addr: Data Op output tensor address.
 /// @param [in] size: Data Op output tensor size.
 /// @return: 0 for success / others for failed
 Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
  GELOGI("Set CpuKernel model dequeue task enter.");
@@ -1266,10 +1349,76 @@ Status DavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batc
    }
    break;
  }
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP input info
 /// @param [in] index
 /// @param [out] aipp_info
 /// @return execute result
 ///
 Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP)) {
    GELOGE(GE_AIPP_NOT_EXIST, "GetAIPPInfo: there is not AIPP related with index %u.", index);
    return GE_AIPP_NOT_EXIST;
  }

  std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams());
  GE_CHECK_NOTNULL(aipp_params);

  ge::GeAttrValue::NAMED_ATTRS aipp_attr;
  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
                         "Data node do not contain param aipp!");
  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed");
  GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u",
         data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank());
  if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) {
    GELOGI("GetAIPPInfo,  dynamic Aipp is not support to query temporarily.");
    return GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY;
  }

  GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info),
                    "convert aipp params to aipp config info failed");

  return SUCCESS;
 }

 void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num) {
  batch_size_.clear();
  if (batch_num.empty()) {
    GELOGD("User has not set dynammic data");
  }
  for (size_t i = 0; i < batch_num.size(); i++) {
    batch_size_.emplace_back(batch_num[i]);
  }
 }

 void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info) {
  if (batch_size_.empty()) {
    GELOGD("User does not set dynamic size");
  }
  for (size_t i = 0; i < batch_size_.size(); i++) {
    GELOGI("Start to get current shape");
    batch_info.emplace_back(batch_size_[i]);
  }
 }

 void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
  for (auto &op : output_op_list_) {
    if (op->GetType() != NETOUTPUT) {
      continue;
    }
    GELOGI("Start to get dynamic output dims attr");
    if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
      GELOGD("Can not get dynamic output dims attr");
    }
  }
 }

 Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
                                                       vector<InputOutputDescInfo> &output_desc,
                                                       std::vector<uint32_t> &input_formats,
@@ -1299,7 +1448,7 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf
 }

 Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
  for (std::size_t index = 0; index < data_op_list_.size(); ++index) {
  for (size_t index = 0; index < data_op_list_.size(); ++index) {
    InputOutputDescInfo input;
    GE_CHECK_NOTNULL(data_op_list_[index]);
    GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0));
@@ -1495,7 +1644,14 @@ Status DavinciModel::SinkModelProfile() {
  // Model Header
  string name = this->Name();
  int32_t name_len = name.size();
  reporter_data.deviceId = device_id_;
  // phy device id
  uint32_t phy_device_id = 0;
  rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
    return FAILED;
  }
  reporter_data.deviceId = phy_device_id;
  reporter_data.data = (unsigned char *)&name_len;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.",
@@ -1671,7 +1827,13 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");
  // device id
  reporter_data.deviceId = device_id_;
  uint32_t phy_device_id = 0;
  rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
    return FAILED;
  }
  reporter_data.deviceId = phy_device_id;

  // Model Header
  string name = this->Name();
@@ -2637,8 +2799,10 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
  }

  if (input_size > op_size) {
    GELOGE(FAILED, "Input size [%u] can not be bigger than op size [%u]", input_size, op_size);
    return false;
    GELOGW(
      "Input size [%u] is bigger than om size need [%u],"
      "MAY cause inference result ERROR, please check model input",
      input_size, op_size);
  }
  bool is_dynamic_aipp = false;
  for (const auto &op_desc : data_op_list_) {
@@ -2707,14 +2871,18 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp
 ///
 Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input,
                                      const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) {
  string input_or_output = "input";
  is_input ? input_or_output = "input" : input_or_output = "output";
  if (blobs.size() != data_info.size()) {
    GELOGE(FAILED, "Blobs not match: blobs=%zu datas=%zu", blobs.size(), data_info.size());
    GELOGE(FAILED, "Verify %s data num failed: model requires %zu, but user actually feeds %zu",
           input_or_output.c_str(), data_info.size(), blobs.size());
    return FAILED;
  }

  for (const auto &data : data_info) {
    if (data.first >= blobs.size()) {  // check data index.
      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u", blobs.size(), data_info.size(), data.first);
      GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu",
             input_or_output.c_str(), data.first, blobs.size());
      return FAILED;
    }
    int64_t size = data.second.first;  // size of tensor.
@@ -3262,7 +3430,7 @@ void DavinciModel::PushHcclStream(rtStream_t value) {
 void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) {
  std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_);
  capacity_of_stream_.emplace_back(make_pair(stream, remain_cap));
 };
 }

 void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) {
  std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_);
@@ -3320,4 +3488,91 @@ Status DavinciModel::GetComputeGraphInfo(std::vector<ComputeGraphDescInfo> &comp
  return SUCCESS;
 }

 Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
    GELOGE(GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index);
    return GE_AIPP_NOT_EXIST;
  }

  vector<std::string> inputs;
  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
    std::string input = inputs[kAippOriginInputIndex];
    GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str());
    std::vector<std::string> infos = ge::StringUtils::Split(input, ':');
    if (infos.size() != kAippInfoNum) {
      GELOGW("origin input str is invalid.");
    }
    orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]);
    orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]);
    orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);
  }

  return SUCCESS;
 }

 void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info) {
  GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str());
  std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':');
  if (infos.size() != kAippInfoNum) {
    GELOGW("origin input str is invalid.");
  }
  dims_info.name = infos[kAippInfoTensorName];
  dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal);
  dims_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);

  std::vector<std::string> dims = ge::StringUtils::Split(infos[kAippInfoShape], ',');
  for (const auto &dim : dims) {
    if (dim.empty()) {
      continue;
    }
    dims_info.dims.emplace_back(std::strtol(dim.c_str(), nullptr, kDecimal));
  }
 }

 Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
                                               std::vector<InputOutputDims> &output_dims) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
    GELOGE(GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index);
    return GE_AIPP_NOT_EXIST;
  }

  vector<std::string> inputs;
  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
    GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size());
    for (auto it : inputs) {
      InputOutputDims input_info;
      ParseAIPPInfo(it, input_info);
      input_dims.emplace_back(input_info);
      GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str());

      ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex);
      int64_t data_input_size;
      (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size);
      GELOGD(
        "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: %s, "
        "data_type: %s, shape: %s .",
        index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
        TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
        TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
        formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
    }
  }

  vector<std::string> outputs;
  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) {
    for (auto it : outputs) {
      InputOutputDims output_info;
      ParseAIPPInfo(it, output_info);
      output_dims.emplace_back(output_info);
      GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str());
    }
  }

  return SUCCESS;
 }

 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/davinci_model.h
+++ b/src/ge/graph/load/new_model_manager/davinci_model.h
@@ -34,6 +34,7 @@
 #include "graph/load/new_model_manager/data_dumper.h"
 #include "graph/load/new_model_manager/data_inputer.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/load/new_model_manager/aipp_utils.h"
 #include "graph/load/new_model_manager/zero_copy_task.h"
 #include "graph/model.h"
 #include "graph/node.h"
@@ -294,6 +295,19 @@ class DavinciModel {
  ///
  Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info);

  void GetCurShape(std::vector<int64_t> &batch_info);

  void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);

  ///
  /// @ingroup ge
  /// @brief Get AIPP input info
  /// @param [in] index
  /// @param [out] aipp_info
  /// @return execute result
  ///
  Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info);

  ///
  /// @ingroup ge
  /// @brief Get model_id.
@@ -407,6 +421,8 @@ class DavinciModel {
  void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args,
                       size_t size, size_t offset);

  void SetDynamicSize(const std::vector<uint64_t> &batch_num);

  bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }

  void SetProfileTime(ModelProcStage stage, int64_t endTime = 0);
@@ -452,6 +468,10 @@ class DavinciModel {
  Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset);

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
  Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
                                   std::vector<InputOutputDims> &output_dims);

 private:
  // memory address of weights
  uint8_t *weights_mem_base_;
@@ -560,6 +580,10 @@ class DavinciModel {

  void UnbindTaskSinkStream();

  bool IsAicpuKernelConnectSpecifiedLayer();

  Status MarkSpecifiedAicpuKernel();

  ///
  /// @ingroup ge
  /// @brief Travel all nodes and do some init.
@@ -741,6 +765,8 @@ class DavinciModel {
  Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
                             std::vector<ge::OutputTensorInfo> &outputs);

  void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);

  bool is_model_has_inited_;
  uint32_t model_id_;
  uint32_t runtime_model_id_;
@@ -856,6 +882,8 @@ class DavinciModel {
  void *args_host_ = nullptr;
  std::map<const void *, void *> knonw_input_data_info_;
  std::map<const void *, void *> knonw_output_data_info_;

  vector<uint64_t> batch_size_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -22,6 +22,8 @@
 #include "common/profiling/profiling_manager.h"
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/debug/ge_attr_define.h"
 #include "framework/common/util.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
@@ -31,6 +33,7 @@ thread_local uint32_t device_count = 0;
 namespace {
 const int kCmdParSize = 2;
 const int kDumpCmdPairSize = 2;
 const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel";
 }  // namespace

 std::shared_ptr<ModelManager> ModelManager::GetInstance() {
@@ -39,7 +42,10 @@ std::shared_ptr<ModelManager> ModelManager::GetInstance() {
  return instance_ptr;
 }

 ModelManager::ModelManager() { max_model_id_ = 0; }
 ModelManager::ModelManager() {
  max_model_id_ = 0;
  session_id_bias_ = 0;
 }

 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
  STR_FWK_OP_KERNEL param_base = {};
@@ -69,6 +75,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
                      GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;)
      uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr));
      param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr;
      // In the scene of loading once and running many times, the kernel needs to be destroyed many times,
      // and connot be removed from kernel map.
    }
  }

@@ -213,6 +221,13 @@ Status ModelManager::SetDevice(int32_t deviceId) const {
  return SUCCESS;
 }

 ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model->SetDynamicSize(batch_num);
  return SUCCESS;
 }

 ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const shared_ptr<ge::GeRootModel> &ge_root_model,
                                                 const shared_ptr<ModelListener> &listener) {
  auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model);
@@ -616,7 +631,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
    return FAILED;
  }
  if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') {
    dump_path += "/";
    dump_path = dump_path + "/" + CurrentTimeInStr() + "/";
  }
  GELOGI("dump status = %s.", dump_path.c_str());

@@ -647,7 +662,6 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
 Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) {
  auto hybrid_model = GetHybridModel(model_id);
  if (hybrid_model != nullptr) {
    // TODO hybrid use dynamic memory allocation
    max_size = 0;
    return SUCCESS;
  }
@@ -694,6 +708,20 @@ Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<st
  return davinci_model->GetDynamicBatchInfo(batch_info);
 }

 Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model->GetCurShape(batch_info);
  return SUCCESS;
 }

 Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model->GetModelAttr(dynamic_output_shape_info);
  return SUCCESS;
 }

 Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                       vector<InputOutputDescInfo> &output_desc,
                                                       std::vector<uint32_t> &inputFormats,
@@ -705,6 +733,52 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
  return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP info
 /// @param [in] model_id
 /// @param [in] index
 /// @param [out] aipp_info
 /// @return execute result
 ///
 Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

  return davinci_model->GetAIPPInfo(index, aipp_info);
 }

 Status ModelManager::GenSessionId(uint64_t &session_id) {
  std::lock_guard<std::mutex> lock(session_id_create_mutex_);

  struct timeval tv;
  if (gettimeofday(&tv, nullptr) != 0) {
    GELOGE(INTERNAL_ERROR, "Failed to get current time.");
    return INTERNAL_ERROR;
  }
  session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec);  // 1000000us

  session_id_bias_++;
  // max bais 100.
  session_id_bias_ = session_id_bias_ % 100;
  session_id = session_id * 100 + session_id_bias_;

  GELOGD("Generate new session id: %lu.", session_id);
  return SUCCESS;
 }

 Status ModelManager::UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id) {
  GeModelPtr ge_model_current = davinci_model->GetGeModel();
  GE_CHECK_NOTNULL(ge_model_current);
  if (!ge::AttrUtils::SetInt(ge_model_current, ge::MODEL_ATTR_SESSION_ID, static_cast<int64_t>(session_id))) {
    GELOGW("Set attr[%s] failed in updating session_id.", MODEL_ATTR_SESSION_ID.c_str());
  }

  GELOGD("Update session id: %lu.", session_id);
  return SUCCESS;
 }

 Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
                                      void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
  GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID,
@@ -747,6 +821,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
    }
    davinci_model->SetDeviceId(device_id);

    /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
    /// These session_ids come from the same model, so the values of session_id are the same.
    /// Update session_id for infer in load model to avoid the same session_id.
    uint64_t new_session_id;
    ret = GenSessionId(new_session_id);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed.");
    ret = UpdateSessionId(davinci_model, new_session_id);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed.");

    ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed.");

@@ -805,9 +888,17 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
    return ret;
  }

  /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
  /// These session_ids come from the same model, so the values of session_id are the same.
  /// Update session_id for infer in load model to avoid the same session_id.
  uint64_t new_session_id;
  ret = GenSessionId(new_session_id);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
  ret = UpdateSessionId(davinci_model, new_session_id);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");

  GenModelId(&model_id);
  davinci_model->SetId(model_id);
  davinci_model->SetSessionId(model_id);
  ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids);
  if (ret != SUCCESS) {
    GELOGE(ret, "set model queue ids failed.");
@@ -840,6 +931,22 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id);

  GeModelPtr ge_model_current = davinci_model->GetGeModel();
  bool need_destroy_aicpu_kernel = false;
  bool result = ge::AttrUtils::GetBool(ge_model_current, kNeedDestroySpecifiedAicpuKernel, need_destroy_aicpu_kernel);
  if (result && need_destroy_aicpu_kernel) {
    GELOGI("Get attr %s successfully, start to destroy specified aicpu kernel.", kNeedDestroySpecifiedAicpuKernel);

    // Zero copy is enabled by default, no need to judge.
    uint64_t session_id_davinci = davinci_model->GetSessionId();
    uint32_t model_id_davinci = davinci_model->GetModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci);
    if (status != SUCCESS) {
      GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci,
             model_id_davinci);
    }
  }

  Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data);
  if (status == SUCCESS) {
    GELOGI("Execute model %u success.", model_id);
@@ -920,4 +1027,23 @@ void ModelManager::GenModelId(uint32_t *id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  *id = ++max_model_id_;
 }

 Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetOrigInputInfo failed, invalid model_id is %u.",
                         model_id);

  return davinci_model->GetOrigInputInfo(index, orig_input_info);
 }

 Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
                                               std::vector<InputOutputDims> &input_dims,
                                               std::vector<InputOutputDims> &output_dims) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
                         "GetAllAippInputOutputDims failed, invalid model_id is %u.", model_id);

  return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims);
 }

 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/model_manager.h
+++ b/src/ge/graph/load/new_model_manager/model_manager.h
@@ -17,6 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_

 #include <model/ge_root_model.h>
 #include <pthread.h>
 #include <stdint.h>
 #include <algorithm>
@@ -25,7 +26,6 @@
 #include <set>
 #include <string>
 #include <vector>
 #include <model/ge_root_model.h>
 #include "cce/aicpu_engine_struct.h"
 #include "common/ge_inner_error_codes.h"
 #include "common/ge_types.h"
@@ -189,6 +189,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ///
  /// @ingroup ge
  /// @brief Get AIPP info
  /// @param [in] model_id
  /// @param [in] index
  /// @param [out] aipp_info
  /// @return execute result
  ///
  ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

  ///
  /// @ingroup domi_ome
  /// @brief set model input and output size zero copy
  /// @param [in] model_id  model id
@@ -202,8 +212,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
                                               std::vector<uint32_t> &inputFormats,
                                               std::vector<uint32_t> &outputFormats);

  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);

  ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

  ge::Status SetDevice(int32_t deviceId) const;

  ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num);

  ///
  /// @ingroup domi_ome
  /// @brief Get model according to given id
@@ -226,6 +242,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

  ge::Status GenSessionId(uint64_t &session_id);

  ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                       std::vector<InputOutputDims> &output_dims);

 private:
  ///
  /// @ingroup domi_ome
@@ -253,6 +276,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  ge::Status DeleteModel(uint32_t id);

  void GenModelId(uint32_t *id);
  ge::Status UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id);

  std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_;
  std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
@@ -260,6 +284,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  uint32_t max_model_id_;
  std::mutex map_mutex_;
  std::mutex sess_ids_mutex_;
  std::mutex session_id_create_mutex_;
  uint64_t session_id_bias_;
  std::set<uint64_t> sess_ids_;
 };
 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
@@ -177,6 +177,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
    rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      (void)rtStreamDestroy(stream);
      return RT_FAILED;
    }
    GELOGD("hccl_stream addr is=%p", stream);
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -67,6 +67,18 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    return FAILED;
  }

  const auto &ext_info = kernel_ex_def.kernel_ext_info();
  if (!ext_info.empty()) {
    auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                    GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
                    return FAILED;)
    rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                    GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
                    return FAILED;)
  }

  // 2.1 get loop cond variable for tensor array write
  uint64_t step_id_addr = 0;
  OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP);
@@ -77,7 +89,9 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    }
  }

  auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID;
  auto session_id = davinci_model_->GetSessionId();
  fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id;

  // 2.2 Collect aicpu kernel
  uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID;
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
@@ -97,8 +111,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr =
      static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_base_addr));
    fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr;
    fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0;
    fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
    fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size();
    fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_);

    rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;)
@@ -149,8 +163,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
  fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = workspace_base_addr;
  fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = input_output_addr;
  fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr;
  fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0;
  fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
  fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size();
  fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_);

  // 4. Create session
  GE_CHECK_NOTNULL(ModelManager::GetInstance());
@@ -291,6 +305,15 @@ Status KernelExTaskInfo::Release() {
      input_output_addr_ = nullptr;
    }
  }
  if (ext_info_addr_ != nullptr) {
    rtError_t rt_ret = rtFree(ext_info_addr_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret);
      ret = FAILED;
    } else {
      ext_info_addr_ = nullptr;
    }
  }
  return ret;
 }

--- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
@@ -31,6 +31,7 @@ class KernelExTaskInfo : public TaskInfo {
        davinci_model_(nullptr),
        kernel_buf_(nullptr),
        input_output_addr_(nullptr),
        ext_info_addr_(nullptr),
        dump_args_(nullptr) {}

  ~KernelExTaskInfo() override {}
@@ -64,6 +65,7 @@ class KernelExTaskInfo : public TaskInfo {
  DavinciModel *davinci_model_;
  void *kernel_buf_;
  void *input_output_addr_;
  void *ext_info_addr_;
  void *dump_args_;
  OpDescPtr op_desc_ = nullptr;
  uint32_t args_offset_ = 0;
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -414,6 +414,7 @@ Status KernelTaskInfo::Release() {
  FreeRtMem(&custom_info_.output_descs);
  FreeRtMem(&custom_info_.output_addrs);
  FreeRtMem(&custom_info_.attr_handle);
  FreeRtMem(&aicpu_ext_info_addr_);

  if (ctx_.argsOffset != nullptr) {
    delete[] ctx_.argsOffset;
@@ -792,6 +793,16 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    }
  }

  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size());

  // malloc device memory for args
  rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
@@ -823,6 +834,24 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  return SUCCESS;
 }

 Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
  if (ext_info.empty()) {
    return SUCCESS;
  }
  auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
    return FAILED;
  }
  rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
    return FAILED;
  }

  return SUCCESS;
 }

 Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_data_addrs,
                                              const std::vector<void *> &output_data_addrs,
                                              const std::vector<::tagCcAICPUTensor> &input_descs,
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -100,6 +100,8 @@ class KernelTaskInfo : public TaskInfo {

  Status InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def);

  Status InitAicpuTaskExtInfo(const std::string &ext_info);

  Status StoreInputOutputTensor(const std::vector<void *> &input_data_addrs,
                                const std::vector<void *> &output_data_addrs,
                                const std::vector<::tagCcAICPUTensor> &input_descs,
@@ -152,6 +154,9 @@ class KernelTaskInfo : public TaskInfo {
  DavinciModel *davinci_model_;
  uint32_t args_offset_ = 0;

  // aicpu ext_info device mem
  void *aicpu_ext_info_addr_ = nullptr;

  // For super kernel
  uint32_t skt_id_;
  std::string stub_func_name_;
--- a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
@@ -133,10 +133,11 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
    rt_ret =
      rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;)
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
                    GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
    rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret);
                    return FAILED;)
                    GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)

    GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys);
    // Create the necessary metadata for the super kernel
@@ -159,7 +160,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
    rt_ret =
      rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;)
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
                    GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
    // Create the necessary metadata for the super kernel
    h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim);
  }
--- a/src/ge/graph/manager/graph_manager.cc
+++ b/src/ge/graph/manager/graph_manager.cc
@@ -41,20 +41,24 @@
 #include "graph/ge_local_context.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/util/rt_context_util.h"
 #include "graph/partition/dynamic_shape_partition.h"
 #include "graph/passes/addn_pass.h"
 #include "graph/passes/atomic_addr_clean_pass.h"
 #include "graph/passes/cast_remove_pass.h"
 #include "graph/passes/common_subexpression_elimination_pass.h"
 #include "graph/passes/compile_nodes_pass.h"
 #include "graph/passes/cond_remove_pass.h"
 #include "graph/passes/constant_folding_pass.h"
 #include "graph/passes/constant_fuse_same_pass.h"
 #include "graph/passes/control_trigger_pass.h"
 #include "graph/passes/ctrl_edge_transfer_pass.h"
 #include "graph/passes/dimension_adjust_pass.h"
 #include "graph/passes/dimension_compute_pass.h"
 #include "graph/passes/flow_ctrl_pass.h"
 #include "graph/passes/hccl_group_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/identify_reference_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/iterator_op_pass.h"
 #include "graph/passes/link_gen_mask_nodes_pass.h"
 #include "graph/passes/merge_pass.h"
@@ -63,10 +67,11 @@
 #include "graph/passes/permute_pass.h"
 #include "graph/passes/prune_pass.h"
 #include "graph/passes/replace_with_empty_const_pass.h"
 #include "graph/passes/reshape_remove_pass.h"
 #include "graph/passes/reshape_recovery_pass.h"
 #include "graph/passes/reshape_remove_pass.h"
 #include "graph/passes/same_transdata_breadth_fusion_pass.h"
 #include "graph/passes/subgraph_pass.h"
 #include "graph/passes/switch_data_edges_bypass.h"
 #include "graph/passes/switch_dead_branch_elimination.h"
 #include "graph/passes/switch_logic_remove_pass.h"
 #include "graph/passes/switch_op_pass.h"
@@ -76,14 +81,10 @@
 #include "graph/passes/transop_symmetry_elimination_pass.h"
 #include "graph/passes/transop_without_reshape_fusion_pass.h"
 #include "graph/passes/transpose_transdata_pass.h"
 #include "graph/passes/dimension_compute_pass.h"
 #include "graph/passes/variable_op_pass.h"
 #include "graph/passes/variable_prepare_op_pass.h"
 #include "graph/passes/variable_ref_delete_op_pass.h"
 #include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
 #include "graph/passes/cond_remove_pass.h"
 #include "graph/passes/ctrl_edge_transfer_pass.h"
 #include "graph/partition/dynamic_shape_partition.h"
 #include "graph/utils/tensor_adapter.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"
@@ -369,14 +370,15 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
  GM_RUN_AND_DUMP("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph,
                  session_id);
  GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph);

  GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner);
  GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph);
  GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph);
  if (IsTailingOptimization()) {
    GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph);
  }
  GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph);
  GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed);
  // TODO: to be delete
  const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
  if (unknown_shape_skip != nullptr) {
    PassManager graph_pass;
@@ -423,7 +425,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
        return ret;
      }
    }
    ret = LoadGraph(ge_root_model, graph_node);
    if (!graph_node->IsAsync()) {
      ret = LoadGraph(ge_root_model, graph_node);
    } else {
      ret = LoadGraphAsync(ge_root_model, graph_node);
    }
    if (ret != SUCCESS) {
      GELOGE(ret, "LoadGraph Failed.");
      return ret;
@@ -432,7 +438,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
    var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId());
  } else if (!graph_node->GetLoadFlag()) {
    GeRootModelPtr ge_root_model_ptr = graph_node->GetGeRootModel();
    ret = LoadGraph(ge_root_model_ptr, graph_node);
    if (!graph_node->IsAsync()) {
      ret = LoadGraph(ge_root_model_ptr, graph_node);
    } else {
      ret = LoadGraphAsync(ge_root_model_ptr, graph_node);
    }
    if (ret != SUCCESS) {
      GELOGE(ret, "LoadGraph Failed.");
      return ret;
@@ -587,7 +597,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso
  GELOGI("[RunGraph] start to run graph, graph_id = %u, is_train_graph: %d", graph_id, GetTrainFlag());

  if (inputs.empty()) {
    GELOGI("[RunGraph] initilize sub graph has no inputs.");
    GELOGI("[RunGraph] initialize sub graph has no inputs");
  }

  // find graph
@@ -689,7 +699,7 @@ Status GraphManager::GenerateInfershapeGraph(GraphId &graph_id) {
 }

 Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs,
                                GeRootModelPtr &ge_root_model) {
                                GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) {
  GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id);
  if (inputs.empty()) {
    GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs");
@@ -712,15 +722,10 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen
    GELOGE(GE_GRAPH_ALREADY_RUNNING, "[BuildGraph] graph already running, graph id = %u", graph_node->GetGraphId());
    return GE_GRAPH_ALREADY_RUNNING;
  }
  graph_node->SetAsync(async);
  // set graph's run flag
  graph_node->SetRunFlag(true);

  struct timeval tv;
  if (gettimeofday(&tv, nullptr) != 0) {
    GELOGE(INTERNAL_ERROR, "get the time of day failed.");
    return INTERNAL_ERROR;
  }
  uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec);  // 1000000us
  ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
  graph_node->SetRunFlag(false);
  if (ret != SUCCESS) {
@@ -954,6 +959,9 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
  }
  options_.enable_print_op_pass = true;
  ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass);

  options_.is_single_op = false;
  ret = ParseOption(options, SINGLE_OP_FLAG, options_.is_single_op);
  GE_IF_BOOL_EXEC(ret != SUCCESS,
                  GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1.");
                  return GE_GRAPH_OPTIONS_INVALID);
@@ -1555,6 +1563,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
  }
  PassManager after_merge_passes;
  GE_CHK_STATUS_RET(
    after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass));
  GE_CHK_STATUS_RET(
    after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass));
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass",
                                               new (std::nothrow) CommonSubexpressionEliminationPass));
@@ -1579,8 +1589,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
  GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator");
                  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::VariableOpPass",
                                                               new (std::nothrow) VariableOpPass(&var_acc_ctrl_))))
  GE_CHK_STATUS_RET(
    after_merge_passes.AddPass("OptimizeStage1_1::TransOpDepthFusionPass", new (std::nothrow) TransOpDepthFusionPass))
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpWithoutReshapeFusionPass",
                                               new (std::nothrow) TransOpWithoutReshapeFusionPass))
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
@@ -1660,7 +1668,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
    GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret);
    return ret;
  }

  return SUCCESS;
 }

@@ -1688,10 +1695,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
  names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
  names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
  names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
  HcclGroupPass hccl_group_pass;
  if (IsTailingOptimization()) {
    names_to_passes.emplace_back("HcclGroupPass", &hccl_group_pass);
  }
  GE_TIMESTAMP_START(names_to_passes);
  ret = GEPass(compute_graph).Run(names_to_passes);
  GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses");
@@ -1708,19 +1711,12 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {

  PassManager pass_for_control_attr_optimize;
  if (options_.train_graph_flag) {
    // TODO: to be delete
    const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
    if (unknown_shape_skip == nullptr) {
      GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass",
                                                               new (std::nothrow) FlowCtrlPass))
    }
  }
  // TODO: to be delete
  const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
  if (unknown_shape_skip == nullptr) {
    GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass",
                                                             new (std::nothrow) SubgraphPass));
  }

  GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass",
                                                           new (std::nothrow) MultiBatchPass))
@@ -1739,6 +1735,14 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
  GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass",
                                                           new (std::nothrow) AtomicAddrCleanPass))

  const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
  if (unknown_shape_skip == nullptr) {
    // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and
    // graph-structure. So try not to add new pass after SubgraphPass.
    GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass",
                                                             new (std::nothrow) SubgraphPass));
  }

  GE_TIMESTAMP_START(pass_for_control_attr_optimize);
  ret = pass_for_control_attr_optimize.Run(compute_graph);
  GE_TIMESTAMP_END(pass_for_control_attr_optimize, "OptimizeStage2::ControlAttrOptimize");
@@ -1908,6 +1912,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
      graph_node->SetRunFlag(false);
      return ret;
    }
    graph_node->SetLoadFlag(true);
    ge_root_model->SetModelId(model_id_info.model_id);
    graph_node->SetGeRootModel(ge_root_model);
  }
--- a/src/ge/graph/manager/graph_manager.h
+++ b/src/ge/graph/manager/graph_manager.h
@@ -99,7 +99,8 @@ class GraphManager {
  /// @param [out] models build result
  /// @return Status result of function
  ///
  ge::Status BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, GeRootModelPtr &models);
  ge::Status BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, GeRootModelPtr &models,
                        uint64_t session_id = 0, bool async = false);

  ///
  /// @ingroup ge_graph
--- a/src/ge/graph/manager/graph_manager_utils.cc
+++ b/src/ge/graph/manager/graph_manager_utils.cc
@@ -40,6 +40,7 @@ GraphNode::GraphNode(GraphId graph_id)
      compute_graph_(nullptr),
      build_flag_(false),
      load_flag_(false),
      async_(false),
      ge_model_(nullptr),
      sem_(1) {
  graph_run_async_listener_ = MakeShared<RunAsyncListener>();
--- a/src/ge/graph/manager/graph_manager_utils.h
+++ b/src/ge/graph/manager/graph_manager_utils.h
@@ -152,6 +152,9 @@ class GraphNode {
  bool GetRunFlag() const { return run_flag_; }
  void SetRunFlag(bool flag) { run_flag_ = flag; }

  bool IsAsync() const { return async_; }
  void SetAsync(bool flag) { async_ = flag; }

  void SetSubGraph(std::vector<SubGraphInfoPtr> &subgraph_ptr_list) { subgraph_ptr_list_ = subgraph_ptr_list; }
  const std::vector<SubGraphInfoPtr> &GetAllSubGraph() const { return subgraph_ptr_list_; }

@@ -181,6 +184,7 @@ class GraphNode {
  ComputeGraphPtr compute_graph_;
  bool build_flag_;
  bool load_flag_;
  bool async_;
  GeModelPtr ge_model_;
  GeRootModelPtr ge_root_model_;
  BlockingQueue<uint8_t> sem_;
@@ -239,6 +243,7 @@ struct GraphManagerOptions {
  bool local_fmk_op_flag;
  bool hcom_parallel;
  bool enable_print_op_pass;
  bool is_single_op;
  std::map<std::string, int> stream_max_parallel_num;
  std::string output_datatype;
  std::string original_model_file;
@@ -247,7 +252,7 @@ struct GraphManagerOptions {
      : stream_num(1),
        perf_level(domi::GEN_TASK_WITHOUT_FUSION),
        encrypt_mode(-1),
        framework_type(domi::FMK_TYPE_T),
        framework_type(domi::TENSORFLOW),
        ek_file(""),
        cert_file(""),
        hw_key_file(""),
@@ -263,6 +268,7 @@ struct GraphManagerOptions {
        local_fmk_op_flag(false),
        hcom_parallel(false),
        enable_print_op_pass(true),
        is_single_op(false),
        save_original_model("false") {}
 };
 }  // namespace ge
--- a/src/ge/graph/manager/graph_var_manager.cc
+++ b/src/ge/graph/manager/graph_var_manager.cc
@@ -301,7 +301,7 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin
  return SUCCESS;
 }

 int64_t MemResource::GetVarMemSize() const { return var_mem_size_; }
 uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; }

 void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; };

--- a/src/ge/graph/manager/graph_var_manager.h
+++ b/src/ge/graph/manager/graph_var_manager.h
@@ -177,7 +177,7 @@ class MemResource {

  Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset);

  int64_t GetVarMemSize() const;
  uint64_t GetVarMemSize() const;

  void UpdateVarMemSize(int64_t mem_size);

--- a/src/ge/graph/manager/util/rt_context_util.h
+++ b/src/ge/graph/manager/util/rt_context_util.h
@@ -31,6 +31,10 @@ class RtContextUtil {

  void AddrtContext(rtContext_t context);

  const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; }

  void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; }

  void DestroyrtContexts();

  RtContextUtil &operator=(const RtContextUtil &) = delete;
@@ -41,8 +45,8 @@ class RtContextUtil {
  ~RtContextUtil() {}

  std::vector<rtContext_t> rtContexts_;
  rtContext_t before_prerun_ctx_ = nullptr;
 };
 }  // namespace ge

 #endif  // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_

--- a/src/ge/graph/optimize/graph_optimize.cc
+++ b/src/ge/graph/optimize/graph_optimize.cc
@@ -34,7 +34,7 @@ const char *const kAicoreEngine = "AIcoreEngine";

 namespace ge {
 GraphOptimize::GraphOptimize()
    : optimize_type_(domi::FrameworkType::FMK_TYPE_T),
    : optimize_type_(domi::FrameworkType::TENSORFLOW),
      cal_config_(""),
      insert_op_config_(""),
      parse_out_node_(""),
@@ -73,7 +73,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) {
      src_index_list.emplace_back(peer_out_anchor->GetIdx());
      node_op_desc->SetSrcName(src_name_list);
      node_op_desc->SetSrcIndex(src_index_list);
      GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::FMK_TYPE_T),
      GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::TENSORFLOW),
                      ge::NodePtr peer_owner_node = peer_out_anchor->GetOwnerNode();
                      input_name_list.emplace_back(
                        peer_owner_node->GetName() +
@@ -260,7 +260,7 @@ Status GraphOptimize::OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_
 }

 Status GraphOptimize::SetOptions(const ge::GraphManagerOptions &options) {
  if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FMK_TYPE_RESERVED)) {
  if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FRAMEWORK_RESERVED)) {
    GELOGE(GE_GRAPH_OPTIONS_INVALID, "Optimize Type %d invalid.", options.framework_type);
    return GE_GRAPH_OPTIONS_INVALID;
  }
@@ -293,7 +293,7 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) {
      // set - framework_type
      // [No need to verify return value]
      op->SetType("FrameworkOp");
      if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::FMK_TYPE_T)) {
      if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::TENSORFLOW)) {
        GELOGW("TranFrameOp SetInt ATTR_NAME_FRAMEWORK_FWK_TYPE failed");
      }
    }
--- a/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc
+++ b/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc
@@ -0,0 +1,397 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph/optimize/optimizer/allreduce_fusion_pass.h"
 #include <string>
 #include "common/debug/log.h"
 #include "framework/common/debug/ge_log.h"
 #include "common/types.h"
 #include "common/util.h"
 #include "graph/anchor.h"
 #include "graph/node.h"
 #include "graph/op_desc.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/debug/ge_attr_define.h"
 #include "hccl/base.h"
 #include "hccl/hcom.h"

 namespace ge {
 Status AllReducePass::Run(ge::ComputeGraphPtr graph) {
  GELOGI("FusionAllReducePass: start");
  std::vector<NodePtr> fusionOps;
  std::vector<float> inputGradientSize;
  std::vector<float> inputGradientTime;

  static const float inputGradientSizeTemp = 0.0;
  static const float inputGradientTimeTemp = 0.0;

  // Get all nodes
  for (auto nodePtr : graph->GetDirectNode()) {
    GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;);

    ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc();
    GE_IF_BOOL_EXEC(nullptr == opDescPtr,
                    GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str());
                    continue;)
    GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(),
                    // the op is allreduce and fusion > 0, then run fusion
                    std::int64_t hcom_fusion = 1;
                    GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion),
                                    GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr "
                                           "by HCOM_ATTR_FUSION"));
                    GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC(
                      hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp);
                      inputGradientTime.push_back(inputGradientTimeTemp);))
  }
  // The number of allredecue operator must be more than 1
  GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has "
                                                "%lu allreduce operator",
                                                fusionOps.size());
                  return NOT_CHANGED;);

  string group = "group";
  u32 gradientNum = fusionOps.size();
  string model_name_str = graph->GetName();
  const char *model_name = model_name_str.c_str();
  model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()};

  u32 segmentNum = 0;
  u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {};

  // Call HCCL function: hcom_gradient_segment
  GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy");
  GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM,
                                                          &segmentNum, segmentIndex),
                  GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size());
                  return FAILED;)
  GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully");

  // check whether segmentNum is legal or not
  GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum),
                  GELOGE(FAILED,
                         "FusionAllReducePass FAILED: illegal segmentNum=%u, "
                         "HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u",
                         segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum);
                  return FAILED;);

  // check whether segmentIndex is legal or not
  GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1),
                  GELOGE(FAILED,
                         "FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, "
                         "segmentIndex[segmentNum-1]=%u, gradientNum=%u",
                         segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum);
                  return FAILED;);

  for (uint32_t i = 0; i < segmentNum - 1; i++) {
    GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED,
                                                                   "FusionAllReducePass FAILED: illegal "
                                                                   "segmentIndex[%u]=%u, segmentIndex[%u]=%u",
                                                                   i, segmentIndex[i], i + 1, segmentIndex[i + 1]);
                    return FAILED;);
  }

  // check whether fusion is needed or not
  GE_IF_BOOL_EXEC(
    segmentNum == gradientNum,
    GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum);
    return NOT_CHANGED;)

  std::unordered_set<void *> anchorPtrSet;
  std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataAnchor;
  std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataToInControl;
  std::vector<ge::OutControlAnchorPtr> fusionOpPeerOutControlAnchor;
  std::vector<std::pair<int, ge::InDataAnchorPtr>> fusionOpPeerInDataAnchor;
  std::vector<std::pair<int, ge::InControlAnchorPtr>> fusionOpPeerInControlFromOutData;
  std::vector<ge::InControlAnchorPtr> fusionOpPeerInControlAnchor;
  ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr;

  // Traversing the segmentNum
  uint32_t start = 0;
  uint32_t end = 0;
  for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) {
    end = segmentIndex[segmentIdx];
    GE_IF_BOOL_EXEC(end - start < 1,
                    GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]);
                    start = end + 1; continue;);

    ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc();
    GE_CHECK_NOTNULL(originDescPtr);
    ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr);
    GE_CHECK_NOTNULL(newAllreduceDesc);

    // Cleat buffer
    anchorPtrSet.clear();
    fusionOpPeerOutDataAnchor.clear();
    fusionOpPeerOutDataToInControl.clear();
    fusionOpPeerOutControlAnchor.clear();
    fusionOpPeerInDataAnchor.clear();
    fusionOpPeerInControlFromOutData.clear();
    fusionOpPeerInControlAnchor.clear();

    // Traversing the Allreduce operators of each group
    int outDataAnchorIndex = 0;
    GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]),
                      "Get peer outDataAnchor to inDataAnchor failed");

    GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData,
                                               fusionOps[start]),
                      "Get peer inDataAnchor and inControlAnchor to outDataAnchor failed");

    GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]),
                      "Get peer outDataAnchor to inControlAnchor failed");
    GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]),
                      "Get peer outControlAnchor to inControlAnchor failed");
    GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]),
                      "Get peer outControlAnchor from inControlAnchor failed");
    GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.",
                      fusionOps[start]->GetName().c_str());

    for (uint32_t idx = start + 1; idx <= end; idx++) {
      GE_CHK_STATUS_RET(
        GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc),
        "Get peer outDataAnchor to inDataAnchor failed");
      GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]),
                        "Get peer outDataAnchor to inControlAnchor failed");
      GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]),
                        "Get peer outControlAnchor to inControlAnchor failed");
      GE_CHK_STATUS_RET(
        GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData,
                                 fusionOps[idx], newAllreduceDesc, outDataAnchorIndex),
        "Get peerAnchor from outDataAnchor failed");
      GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]),
                        "Get peer outControlAnchor from inControlAnchor failed");

      // Delete the node
      GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.",
                        fusionOps[idx]->GetName().c_str());
    }

    NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc);
    GE_CHECK_NOTNULL(newAllReducePtr);
    // Link the inputDataAnchor
    for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) {
      GE_CHK_STATUS_RET(
        GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast<int>(i))),
        "FusionAllReducePass FAILED: add input data edge failed");
    }

    // Link the inputControlAnchor
    for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) {
      GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()),
                        "FusionAllReducePass FAILED: add input control edge failed");
    }

    for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) {
      GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()),
                        "FusionAllReducePass FAILED: add edge from out data to incontrol "
                        "failed");
    }

    // Link the outputDataAnchor
    for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) {
      auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second;
      GE_CHK_STATUS_RET(
        GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor),
        "FusionAllReducePass FAILED: add output data edge failed");
    }
    for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) {
      auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second;
      GE_CHK_STATUS_RET(
        GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first),
                            peerInControlAnchor),
        "FusionAllReducePass FAILED: add edge from out data to in control "
        "failed");
    }

    // Link the outputControlAnchor
    for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) {
      GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]),
                        "FusionAllReducePass FAILED: add output control edge failed");
    }

    // Link the newAllreduce
    if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) {
      GE_CHK_STATUS_RET(
        GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()),
        "FusionAllReducePass FAILED: add input previous control edge failed");
    }

    previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor();
    start = end + 1;
  }

  return SUCCESS;
 }

 Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
                                             vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
                                             ge::NodePtr &srcNodePtr) {
  for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) {
    GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;);
    OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;);
    if (anchorSet.count(peerOutDataAnchor.get()) == 0) {
      peerOutDataAnchorVec.push_back(peerOutDataAnchor);
      anchorSet.insert(peerOutDataAnchor.get());
      GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor));
    }
  }
  return SUCCESS;
 }

 Status AllReducePass::GetPeerInAnchorToOutData(
  std::unordered_set<void *> &anchorSet, std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor,
  std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) {
  for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) {
    GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;);
    for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) {
      GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;);
      if (anchorSet.count(peerInDataAnchor.get()) == 0) {
        std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor;
        pairPeerInDataAnchor.first = 0;
        pairPeerInDataAnchor.second = peerInDataAnchor;
        fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor);
        anchorSet.insert(peerInDataAnchor.get());
        GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor));
      }
    }

    for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) {
      GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;);
      if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) {
        std::pair<uint32_t, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData;
        pairPeerInControlAnchorFromData.first = 0;
        pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData;
        fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData);
        anchorSet.insert(peerInControlAnchorFromData.get());
        GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData));
      }
    }
  }
  return SUCCESS;
 }

 Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
                                             vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
                                             ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) {
  for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) {
    GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;);
    OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;);
    if (anchorSet.count(peerOutDataAnchor.get()) == 0) {
      peerOutDataAnchorVec.push_back(peerOutDataAnchor);
      anchorSet.insert(peerOutDataAnchor.get());
      if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) !=
          ge::GRAPH_SUCCESS) {
        GELOGW("GetPeerOutDataToInData: AddInputDesc failed");
      }
      GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor));
    }
  }
  return SUCCESS;
 }

 Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet,
                                                vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec,
                                                ge::NodePtr &srcNodePtr) {
  InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor();
  GE_CHECK_NOTNULL(inControlAnchor);
  for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) {
    GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;);
    if (anchorSet.count(peerOutDataToInControl.get()) == 0) {
      peerOutDataToInControlVec.push_back(peerOutDataToInControl);
      anchorSet.insert(peerOutDataToInControl.get());
      GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor));
    }
  }
  return SUCCESS;
 }

 Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet,
                                                   vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec,
                                                   ge::NodePtr &srcNodePtr) {
  InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor();
  GE_CHECK_NOTNULL(inControlAnchor);
  for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) {
    GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;);
    if (anchorSet.count(peerOutControlAnchor.get()) == 0) {
      peerOutControlToInControlVec.push_back(peerOutControlAnchor);
      anchorSet.insert(peerOutControlAnchor.get());
      GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor));
    }
  }
  return SUCCESS;
 }

 Status AllReducePass::GetPeerAnchorFromOutData(
  std::unordered_set<void *> &anchorSet, vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec,
  vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr,
  ge::OpDescPtr &dstOpDescPtr, int &index) {
  for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) {
    GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;)
    if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) {
      if (dstOpDescPtr->AddOutputDesc(
            outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) {
        GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed");
      }
      index++;
    }

    for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) {
      GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;)
      if (anchorSet.count(peerInDataAnchor.get()) == 0) {
        std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor;
        pairPeerInDataAnchor.first = index;
        pairPeerInDataAnchor.second = peerInDataAnchor;
        peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor);
        anchorSet.insert(peerInDataAnchor.get());
        GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor))
      }
    }

    for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) {
      GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;)
      if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) {
        std::pair<int, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData;
        pairPeerInControlAnchorFromData.first = index;
        pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData;
        peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData);
        anchorSet.insert(peerInControlAnchorFromData.get());
        GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData))
      }
    }
  }
  return SUCCESS;
 }

 Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet,
                                                     vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec,
                                                     ge::NodePtr &srcNodePtr) {
  OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor();
  GE_CHECK_NOTNULL(outControlAnchor);
  for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) {
    GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;)
    if (anchorSet.count(peerInControlAnchor.get()) == 0) {
      peerInControlFromOutControlVec.push_back(peerInControlAnchor);
      anchorSet.insert(peerInControlAnchor.get());
      GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor))
    }
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h
+++ b/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h
@@ -0,0 +1,55 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_
 #define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_

 #include <unordered_set>
 #include <utility>
 #include <vector>
 #include "inc/graph_pass.h"

 namespace ge {
 //
 class AllReducePass : public GraphPass {
 public:
  Status Run(ge::ComputeGraphPtr graph) override;

 private:
  Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
                                vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr,
                                ge::OpDescPtr &dstOpDescPtr);
  Status GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet,
                                   vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr);
  Status GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet,
                                      vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec,
                                      ge::NodePtr &srcNodePtr);
  Status GetPeerAnchorFromOutData(std::unordered_set<void *> &anchorSet,
                                  vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec,
                                  vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec,
                                  ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index);
  Status GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet,
                                        vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec,
                                        ge::NodePtr &srcNodePtr);
  Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
                                std::vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr);
  Status GetPeerInAnchorToOutData(std::unordered_set<void *> &anchorSet,
                                  std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor,
                                  std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData,
                                  ge::NodePtr &srcNodePtr);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_
--- a/src/ge/graph/partition/dynamic_shape_partition.cc
+++ b/src/ge/graph/partition/dynamic_shape_partition.cc
@@ -745,7 +745,8 @@ Status Cluster::BuildPartitionSubgraph() {
  }
  int64_t parent_node_index = 0;
  for (auto anchor : inputs_) {
    auto data_op = MakeShared<OpDesc>(std::string("Data_") + std::to_string(parent_node_index), ge::DATA);
    auto data_op =
      MakeShared<OpDesc>(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA);
    REQUIRE_NOT_NULL(data_op, "Failed new memory for data op.");
    auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx());
    REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc.");
@@ -763,7 +764,7 @@ Status Cluster::BuildPartitionSubgraph() {
  if (outputs_.empty() && control_outputs_.empty()) {
    return SUCCESS;
  }
  auto net_output_op = MakeShared<OpDesc>(NODE_NAME_NET_OUTPUT, ge::NETOUTPUT);
  auto net_output_op = MakeShared<OpDesc>(subgraph_->GetName() + "_" + NODE_NAME_NET_OUTPUT, ge::NETOUTPUT);
  REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op.");
  for (size_t i = 0; i < outputs_.size(); ++i) {
    GeTensorDesc input_desc;
--- a/src/ge/graph/partition/graph_partition.cc
+++ b/src/ge/graph/partition/graph_partition.cc
@@ -300,11 +300,9 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
  GE_CHECK_NOTNULL(end_graph);
  const auto &src_node = out_anchor->GetOwnerNode();
  const auto &dst_node = peer_in_anchor->GetOwnerNode();
  string engine_end_name;
  string engine_pld_name;
  // link input -> end
  string end_name = kEndType + std::to_string(graph_info_.num_of_pld_end_);
  auto end_op_desc = MakeShared<OpDesc>(end_name, END);
  auto end_op_desc = MakeShared<OpDesc>(end_graph->GetName() + "_" + end_name, END);
  if (end_op_desc == nullptr) {
    GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr.");
    return FAILED;
@@ -318,15 +316,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
  bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning);
  if (is_need_update_desc) {
    if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) {
      GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d, engine name is %s", output_index,
             engine_end_name.c_str());
      GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d", output_index);
      return FAILED;
    }
  } else {
    GeTensorDesc input_desc;
    if (end_op_desc->AddInputDesc(input_desc) != SUCCESS) {
      GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d, engine name is %s", output_index,
             engine_end_name.c_str());
      GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d", output_index);
      return FAILED;
    }
  }
@@ -346,11 +342,11 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
  }
  /// For fe, op id has been set in AddNode,
  /// we can take op id of srcNode as the mark of parentId now
  auto const &src_node_opdesc = src_node->GetOpDesc();
  const auto &src_node_opdesc = src_node->GetOpDesc();
  GE_CHECK_NOTNULL(src_node_opdesc);
  int64_t node_id = src_node_opdesc->GetId();
  const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_);
  auto pld_op_desc = MakeShared<OpDesc>(pld_name, PLACEHOLDER);
  auto pld_op_desc = MakeShared<OpDesc>(pld_graph->GetName() + "_" + pld_name, PLACEHOLDER);
  if (pld_op_desc == nullptr) {
    GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr.");
    return FAILED;
@@ -370,15 +366,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
  is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning);
  if (is_need_update_desc) {
    if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) {
      GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d, engine name is %s", input_index,
             engine_pld_name.c_str());
      GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d", input_index);
      return FAILED;
    }
  } else {
    GeTensorDesc output_desc;
    if (pld_op_desc->AddOutputDesc(output_desc) != SUCCESS) {
      GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d, engine name is %s", input_index,
             engine_pld_name.c_str());
      GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d", input_index);
      return FAILED;
    }
  }
@@ -399,8 +393,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
    return FAILED;
  }
  graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node;
  graph_info_.pld_2_end_[new_pld_node] = new_end_node;
  graph_info_.end_2_pld_[new_end_node] = new_pld_node;
  graph_info_.pld_2_end_[new_pld_node] = new_end_node;
  return SUCCESS;
 }

@@ -591,7 +585,8 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vector<ge::SubGraphInfoPtr
      sgi->SetOutputContext(graph_info_.output_name_);
      AddEndPldInformationToSubGraphInfo(sgi);
      GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s",
             engine_name.c_str(), sub_graph->GetName().c_str(), sgi->GetStreamLabel().c_str());
             engine_name.c_str(), sub_graph->GetName().c_str(),
             sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str());
      output_subgraphs.push_back(sgi);
    }
  }
@@ -896,8 +891,8 @@ Status ge::GraphPartitioner::AddPlaceHolderEnd(const AnchorPtr &out_anchor, cons
    return FAILED;
  }
  // nodes in original graph
  auto src_node = out_anchor->GetOwnerNode();
  auto dst_node = in_anchor->GetOwnerNode();
  const auto &src_node = out_anchor->GetOwnerNode();
  const auto &dst_node = in_anchor->GetOwnerNode();
  if ((src_node == nullptr) || (dst_node == nullptr)) {
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "src_node or dst_node is null.");
    return FAILED;
--- a/src/ge/graph/passes/aicpu_constant_folding_pass.cc
+++ b/src/ge/graph/passes/aicpu_constant_folding_pass.cc
@@ -323,7 +323,7 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons
  aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;
  std::string task_info;
  Status ret = kernel_info->GenSingleOpRunTask(node, aicpu_task, task_info);
  if (ret != SUCCESS) {
@@ -378,7 +378,7 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector<uint64_t> &data_
  aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0;
  aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;
  std::string task_info;
  Status ret = kernel_info->GenMemCopyTask(data_infos.size(), aicpu_task, task_info);
  if (ret != SUCCESS) {
--- a/src/ge/graph/passes/atomic_addr_clean_pass.cc
+++ b/src/ge/graph/passes/atomic_addr_clean_pass.cc
@@ -172,9 +172,12 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) {
  if (!session_graph_id.empty()) {
    (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
  }
  // Only flush subgraph name
  string node_name = (graph->GetParentGraph() != nullptr)
                       ? (graph->GetName() + "_" + op_desc->GetName() + session_graph_id)
                       : (op_desc->GetName() + session_graph_id);

  string name = op_desc->GetName() + session_graph_id;
  op_desc->SetName(name);
  op_desc->SetName(node_name);
  GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str());
  // To avoid same name between graphs, set session graph id to this node
  NodePtr clean_addr_node = graph->AddNodeFront(op_desc);