Pre Merge pull request !2056 from 陈叶朦/ffts_plus

3 years ago · ce62b1cf5e
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -133,6 +133,7 @@ set(EXECUTOR_SRC_LIST
    "graph/load/model_manager/task_info/event_record_task_info.cc"
    "graph/load/model_manager/task_info/event_wait_task_info.cc"
    "graph/load/model_manager/task_info/ffts_task_info.cc"
    "graph/load/model_manager/task_info/ffts_plus_task_info.cc"
    "graph/load/model_manager/task_info/fusion_start_task_info.cc"
    "graph/load/model_manager/task_info/fusion_stop_task_info.cc"
    #"graph/load/model_manager/task_info/hccl_task_info.cc"         # Just for runner.
--- a/ge/common/ge/plugin_manager.h
+++ b/ge/common/ge/plugin_manager.h
@@ -120,6 +120,39 @@ class PluginManager {
    }
    return SUCCESS;
  }

  template <typename T1, typename T2>
  Status OptionalInvokeAll(const string &func_name, T1 arg1, T2 arg2) {
    for (const auto &handle : handles_) {
      // If the funcName is existed, signature of realFn can be casted to any type
      auto real_fn = (void (*)(T1, T2))mmDlsym(handle.second, const_cast<char *>(func_name.c_str()));
      if (real_fn == nullptr) {
        continue;
      } else {
        typename std::remove_reference<T1>::type arg1_temp;
        typename std::remove_reference<T2>::type arg2_temp;
        real_fn(arg1_temp, arg2_temp);
        CheckAndInsert(handle.first, func_name, arg1, arg1_temp);
        CheckAndInsert(handle.first, func_name, arg2, arg2_temp);
      }
    }
    return SUCCESS;
  }

  template <typename T>
  void CheckAndInsert(const std::string &so_name, const std::string &func_name, T arg, T arg_tmp) {
    if (std::is_same<typename std::remove_reference<T>::type, map<std::string, std::shared_ptr<DNNEngine>>>::value) {
      for (const auto &val : arg_tmp) {
        if (arg.find(val.first) != arg.end()) {
          GELOGW("FuncName %s in so %s find the same key: %s, will replace it", func_name.c_str(),
                 so_name.c_str(), val.first.c_str());
          arg[val.first] = val.second;
        }
      }
    }
    arg.insert(arg_tmp.begin(), arg_tmp.end());
  }

  template <typename T1, typename T2>
  Status InvokeAll(const string &func_name, T1 arg) {
    for (const auto &handle : handles_) {
--- a/ge/engine_manager/dnnengine_manager.cc
+++ b/ge/engine_manager/dnnengine_manager.cc
@@ -17,20 +17,15 @@
 #include "engine_manager/dnnengine_manager.h"

 #include <cstdio>
 #include <fstream>
 #include <map>
 #include <utility>

 #include "framework/common/debug/log.h"
 #include "common/ge/ge_util.h"
 #include "common/util/error_manager/error_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "analyzer/analyzer.h"
 #include "graph/ge_context.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
 #include "init/gelib.h"
 #include "framework/common/types.h"

 namespace {
 const char *const kSchedulerUnits = "schedule_units";
@@ -40,7 +35,7 @@ const char *const kExAttrs = "ex_attrs";
 const char *const kIndependent = "independent";
 const char *const kSkipAssignStream = "skip_assign_stream";
 const char *const kCalEngines = "cal_engines";
 const char *const kAttch = "attach";
 const char *const kAttach = "attach";
 const char *const kVectorCore = "VectorCore";
 const char *const kVectorEngine = "VectorEngine";
 const char *const kAIcoreEngine = "AIcoreEngine";
@@ -51,6 +46,10 @@ const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE";
 namespace ge {
 namespace {
 const std::set<std::string> kNotCpuOp = {DATA, CONSTANT, CONSTANTOP, VARIABLE, NETOUTPUT};
 const char *const kGetDNNEngineObjs = "GetDNNEngineObjs";
 const char *const kGetCompoundEngineContains = "GetCompoundEngineContains";
 const char *const kInvalidCompoundEngineName = "InvalidCompoundEngineName";
 constexpr uint32_t kMaxRecursiveDepth = 10;

 bool ExecOnHostCpu(const OpDescPtr &op_desc) {
  bool is_host_cpu_op = (kNotCpuOp.find(op_desc->GetType()) == kNotCpuOp.end());
@@ -72,22 +71,21 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
  }

  // Load engine so
  std::string so_path = "plugin/nnengine/";
  std::string plugin_so_path = "plugin/nnengine/";
  std::string path = PluginManager::GetPath();
  path.append(so_path);
  std::string so_api_func = "GetDNNEngineObjs";
  std::vector<std::string> so_func{so_api_func};
  Status status = plugin_mgr_.Load(path, so_func);
  std::string engine_plugin_path = path + plugin_so_path;
  std::vector<std::string> so_func{kGetDNNEngineObjs};
  Status status = plugin_mgr_.Load(engine_plugin_path, so_func);
  if (status != SUCCESS) {
    GELOGE(status, "[Load][EngineSo]Failed, lib path %s", path.c_str());
    REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", path.c_str());
    REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", engine_plugin_path.c_str());
    return status;
  }

  status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(so_api_func, engines_map_);
  status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(kGetDNNEngineObjs, engines_map_);
  if (status != SUCCESS) {
    GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", so_api_func.c_str());
    REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", so_api_func.c_str());
    GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", kGetDNNEngineObjs);
    REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", kGetDNNEngineObjs);
    return status;
  }

@@ -117,8 +115,8 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
      if ((attrs.mem_type.size()) != 1 || (attrs.mem_type[0] != GE_ENGINE_ATTR_MEM_TYPE_HBM)) {
        GELOGE(GE_ENG_MEMTYPE_ERROR, "[Check][Param]Engine %s in aicore, but the memory type is "
               "not HBM, mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size());
        REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, "
                          "mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size());
        REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, mem_type_size %lu",
                           (iter->first).c_str(), attrs.mem_type.size());
        return GE_ENG_MEMTYPE_ERROR;
      }
    }
@@ -161,6 +159,7 @@ Status DNNEngineManager::Finalize() {
  }
  init_flag_ = false;
  engines_map_.clear();
  atomic_2_compound_.clear();
  return SUCCESS;
 }

@@ -183,7 +182,7 @@ bool DNNEngineManager::IsEngineRegistered(const std::string &name) {
  return false;
 }

 void DNNEngineManager::InitPerformanceStaistic() {
 void DNNEngineManager::InitPerformanceStatistic() {
  std::lock_guard<std::mutex> lock(mutex_);
  checksupport_cost_.clear();
 }
@@ -221,43 +220,42 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
  std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine;
  GELOGD("engine type will exclude: %s", exclude_core_Type.c_str());

  auto root_graph = ge::GraphUtils::FindRootGraph(node_ptr->GetOwnerComputeGraph());
  std::map<std::string, std::string> unsupported_reasons;
  for (const auto &it : op_infos) {
    if (it.engine == exclude_core_Type) {
      continue;
    }
    auto &kernel_map = ops_kernel_manager.GetAllOpsKernelInfoStores();
    auto &kernel_name = it.opKernelLib;
    auto kernel_info_store = kernel_map.find(kernel_name);
    if (kernel_info_store != kernel_map.end()) {
      std::string unsupported_reason;
      // It will be replaced by engine' checksupport
      uint64_t start_time = GetCurrentTimestamp();
      if (kernel_info_store->second->CheckSupported(node_ptr, unsupported_reason)) {
        checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
        op_desc->SetOpEngineName(it.engine);
        op_desc->SetOpKernelLibName(kernel_name);
        // set attrs for taking information when load txt to graph object
        (void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine);
        (void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name);
        GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(),
               it.engine.c_str(), op_desc->GetName().c_str());
        return it.engine;
      } else {
        checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
        unsupported_reasons.emplace(kernel_name, unsupported_reason);
        GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s",
               kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
        if (!op_desc->HasAttr("_is_ge_op")) {
          ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()});
        }
    const auto &kernel_name = it.opKernelLib;
    auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(kernel_name);
    if (kernel_info_store == nullptr) {
      GELOGW("DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s, op type is %s, "
             "op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
    }
    std::string unsupported_reason;
    // It will be replaced by engine's check support
    uint64_t start_time = GetCurrentTimestamp();
    if (kernel_info_store->CheckSupported(node_ptr, unsupported_reason)) {
      checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
      op_desc->SetOpEngineName(it.engine);
      op_desc->SetOpKernelLibName(kernel_name);
      // set attrs for taking information when load txt to graph object
      if (it.flagAsync) {
        GELOGD("Set aicpu blocking op:%s attribute(is_blocking_op):true", op_desc->GetName().c_str());
        (void)AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
      }
      (void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine);
      (void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name);
      GELOGD("DNNEngineManager:Set kernel_lib %s, atomic engine %s, to node %s", kernel_name.c_str(), it.engine.c_str(),
             op_desc->GetName().c_str());
      return it.engine;
    } else {
      GELOGW(
          "DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s,"
          "op type is %s, op name is %s",
          kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
      checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
      unsupported_reasons.emplace(kernel_name, unsupported_reason);
      GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s",
             kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
      if (!op_desc->HasAttr("_is_ge_op")) {
        ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()});
      }
    }
  }

@@ -272,6 +270,7 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
           op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str());
  }

  auto root_graph = ge::GraphUtils::FindRootGraph(node_ptr->GetOwnerComputeGraph());
  analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(),
                                  analyzer::CHECKSUPPORT, node_ptr, reason};
  // do not change original process
@@ -285,6 +284,157 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
  return "";
 }

 std::string DNNEngineManager::GetCompoundEngineName(const ge::NodePtr &node_ptr, uint32_t recursive_depth) {
  if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) {
    return "";
  }

  const auto &op_desc = node_ptr->GetOpDesc();
  if (recursive_depth > kMaxRecursiveDepth) {
    REPORT_INNER_ERROR("E19999", "Get CompoundEngineName will be terminated because too many nesting levels(%d) of "
                                 "subgraphs, last node is %s", recursive_depth, op_desc->GetName().c_str());
    GELOGE(PARAM_INVALID,
           "[Check][Param] Get CompoundEngineName will be terminated because too many nesting levels(%d) of subgraphs, "
           "last node is %s", recursive_depth, op_desc->GetName().c_str());
    return "";
  }

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][CompoundEngineName]Failed, gelib not init before");
    REPORT_INNER_ERROR("E19999", "Get CompoundEngineName failed, gelib not init before");
    return "";
  }
  if (instance_ptr->OpsKernelManagerObj().GetCompoundEngineContains().empty() ||
      instance_ptr->OpsKernelManagerObj().GetCompoundEngineKernelLibName().empty()) {
    return "";
  }

  // compound engine name exist
  std::string compound_engine_name;
  (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name);
  std::string compound_engine_kernel_lib_name;
  (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name);
  if (!(compound_engine_name.empty() || compound_engine_kernel_lib_name.empty())) {
    return compound_engine_name;
  }

  // normal node without subgraph
  if (op_desc->GetSubgraphInstanceNames().empty()) {
    auto atomic_engine_name = op_desc->GetOpEngineName();
    if (atomic_engine_name.empty()) {
      atomic_engine_name = GetDNNEngineName(node_ptr);
    }
    compound_engine_name = GetOwningCompoundEngine(atomic_engine_name);
    compound_engine_kernel_lib_name = GetCompoundEngineKernelLibName(compound_engine_name);
    if (compound_engine_name.empty() || compound_engine_kernel_lib_name.empty()) {
      (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
      (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME);
    } else {
      GELOGI("Assign compound engine %s, kernel lib name %s for node %s.",
             compound_engine_name.c_str(), compound_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str());
      (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name);
      (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name);
    }
    return compound_engine_name;
  }

  bool graph_diff_compound_engine_flag = false;
  std::string graph_compound_engine_name = kInvalidCompoundEngineName;
  std::vector<ComputeGraphPtr> subgraphs;
  if (NodeUtils::GetSubgraphs(node_ptr, subgraphs) != GRAPH_SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", op_desc->GetName().c_str());
    GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", op_desc->GetName().c_str());
    return "";
  }
  for (const auto &subgraph : subgraphs) {
    std::string cur_graph_compound_engine_name;
    // if subgraph has been assigned
    if (subgraph->HasAttr(ATTR_NAME_COMPOUND_ENGINE_NAME)) {
      (void)AttrUtils::GetStr(subgraph, ATTR_NAME_COMPOUND_ENGINE_NAME, cur_graph_compound_engine_name);
    } else {
      bool node_diff_compound_engine_flag = false;
      std::string node_compound_engine_name = kInvalidCompoundEngineName;
      uint32_t assign_node_num = 0;
      for (const auto &cur_node : subgraph->GetDirectNode()) {
        if (IsStreamAssignSkip(cur_node)  && cur_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) {
          continue;
        }
        assign_node_num++;
        std::string cur_node_compound_engine_name = GetCompoundEngineName(cur_node, recursive_depth + 1);
        if (node_compound_engine_name == kInvalidCompoundEngineName) {
          node_compound_engine_name = cur_node_compound_engine_name;
        } else if (node_compound_engine_name != cur_node_compound_engine_name) {
          node_diff_compound_engine_flag = true;
          break;
        }
      }
      if (assign_node_num == 0) {
        GELOGD("all nodes in subgraph %s belongs to ge_local engine", subgraph->GetName().c_str());
        continue;
      }
      if (!(node_diff_compound_engine_flag ||
            (node_compound_engine_name == kInvalidCompoundEngineName) ||
            node_compound_engine_name.empty())) {
        GELOGI("Assign compound engine %s for subgraph %s.", node_compound_engine_name.c_str(), subgraph->GetName().c_str());
        (void)AttrUtils::SetStr(subgraph, ATTR_NAME_COMPOUND_ENGINE_NAME, node_compound_engine_name);
        cur_graph_compound_engine_name = node_compound_engine_name;
      } else {
        (void)subgraph->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
        cur_graph_compound_engine_name.clear();
      }
    }

    if (graph_compound_engine_name == kInvalidCompoundEngineName) {
      graph_compound_engine_name = cur_graph_compound_engine_name;
    } else if (graph_compound_engine_name != cur_graph_compound_engine_name) {
      graph_diff_compound_engine_flag = true;
      break;
    }
  }
  compound_engine_kernel_lib_name = GetCompoundEngineKernelLibName(graph_compound_engine_name);
  if (!(graph_diff_compound_engine_flag || (graph_compound_engine_name == kInvalidCompoundEngineName) ||
        graph_compound_engine_name.empty() || compound_engine_kernel_lib_name.empty())) {
    compound_engine_name = graph_compound_engine_name;
    GELOGI("Assign compound engine %s, kernel lib name %s for node %s.",
           compound_engine_name.c_str(), compound_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str());
    (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name);
    (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name);
  } else {
    (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
    (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME);
  }

  return compound_engine_name;
 }

 std::string DNNEngineManager::GetOwningCompoundEngine(const string &atomic_engine_name) {
  if (atomic_2_compound_.empty()) {
    InitAtomicCompoundMapping();
  }
  const auto &iter = atomic_2_compound_.find(atomic_engine_name);
  if (iter == atomic_2_compound_.end()) {
    GELOGW("Compound engine which contains atomic engine %s is not registered", atomic_engine_name.c_str());
    return "";
  }
  return iter->second;
 }

 std::string DNNEngineManager::GetCompoundEngineKernelLibName(const string &compound_engine_name) const {
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGW("[Get][CompoundEngineKernelLibName]Failed, gelib not init before");
    return "";
  }
  const auto &compound_engine_2_kernel_lib_name = instance_ptr->OpsKernelManagerObj().GetCompoundEngineKernelLibName();
  const auto &iter = compound_engine_2_kernel_lib_name.find(compound_engine_name);
  if (iter == compound_engine_2_kernel_lib_name.end()) {
    GELOGW("Kernel lib name of compound engine %s is not registered", compound_engine_name.c_str());
    return "";
  }
  return iter->second;
 }

 std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op_infos,
                                                   const OpDescPtr &op_desc) const {
  for (const auto &it : op_infos) {
@@ -418,8 +568,8 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std:
          engine_conf_ptr->independent = engines_elems[kIndependent];
        }

        if (engines_elems.find(kAttch) != engines_elems.end()) {
          engine_conf_ptr->attach = engines_elems[kAttch];
        if (engines_elems.find(kAttach) != engines_elems.end()) {
          engine_conf_ptr->attach = engines_elems[kAttach];
        }

        if (engines_elems.find(kSkipAssignStream) != engines_elems.end()) {
@@ -500,6 +650,9 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
 Status DNNEngineManager::CheckJsonFile() {
  GELOGD("Begin to check json file");
  for (auto &it : engines_map_) {
    if (!it.second->IsAtomic()) {
      continue;
    }
    std::string engine_name = it.first;
    int count = 0;
    for (auto &iter : schedulers_) {
@@ -527,4 +680,69 @@ Status DNNEngineManager::CheckJsonFile() {
  GELOGD("Check json file success");
  return SUCCESS;
 }

 void DNNEngineManager::InitAtomicCompoundMapping() {
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][CompoundEngineName]Failed, gelib not init before");
    REPORT_INNER_ERROR("E19999", "Get CompoundEngineName failed, gelib not init before");
    return;
  }

  const auto &compound_engine_2_kernel_lib_name = instance_ptr->OpsKernelManagerObj().GetCompoundEngineKernelLibName();
  for (const auto &item : instance_ptr->OpsKernelManagerObj().GetCompoundEngineContains()) {
    const auto &compound_engine = GetEngine(item.first);
    if ((compound_engine == nullptr) || compound_engine->IsAtomic()) {
      GELOGW("Compound engine %s is not registered", item.first.c_str());
    }
    const auto &iter = compound_engine_2_kernel_lib_name.find(item.first);
    if ((iter == compound_engine_2_kernel_lib_name.end()) || iter->second.empty()) {
      GELOGW("Kernel lib name of compound engine %s is empty", item.first.c_str());
    }
    for (const auto &atomic_engine_name : item.second) {
      const auto &atomic_engine = GetEngine(atomic_engine_name);
      if ((atomic_engine == nullptr) || !atomic_engine->IsAtomic()) {
        GELOGW("Atomic engine %s is not registered", atomic_engine_name.c_str());
        continue;
      }
      auto iter = atomic_2_compound_.find(atomic_engine_name);
      if (iter != atomic_2_compound_.end()) {
        GELOGW("Atomic engine %s has been contained in compound engine %s, and will be overwritten by engine %s",
               atomic_engine_name.c_str(), iter->second.c_str(), item.first.c_str());
      }
      atomic_2_compound_[atomic_engine_name] = item.first;
    }
  }
 }

 bool DNNEngineManager::IsStreamAssignSkip(const NodePtr &node) {
  const auto &op_desc = node->GetOpDesc();
  if (op_desc == nullptr) {
    return false;
  }
  std::string engine_name = op_desc->GetOpEngineName();
  if (engine_name.empty()) {
    engine_name = GetDNNEngineName(node);
  }
  return IsStreamAssignSkip(engine_name);
 }

 bool DNNEngineManager::IsStreamAssignSkip(const string &engine_name) {
  // Only one scheduler has been supported by now
  for (const auto &scheduler : schedulers_) {
    const map<string, EngineConfPtr> cal_engines = scheduler.second.cal_engines;
    auto cal_engines_iter = cal_engines.find(engine_name);
    if (cal_engines_iter == cal_engines.end()) {
      GELOGW("No cal_engines found within engine %s", engine_name.c_str());
      continue;
    }
    EngineConfPtr engine_conf_ptr = cal_engines_iter->second;
    if (engine_conf_ptr == nullptr) {
      GELOGW("engine_conf_ptr within engine %s is null", engine_name.c_str());
      continue;
    }
    return engine_conf_ptr->skip_assign_stream;
  }
  return false;
 }
 }  // namespace ge
--- a/ge/engine_manager/dnnengine_manager.h
+++ b/ge/engine_manager/dnnengine_manager.h
@@ -61,12 +61,18 @@ class DNNEngineManager {
 public:
  friend class GELib;
  std::shared_ptr<ge::DNNEngine> GetEngine(const std::string &name) const;
  const std::map<std::string, DNNEnginePtr> &GetAllEngines() const { return engines_map_; }
  bool IsEngineRegistered(const std::string &name);
  // If can't find appropriate engine name, return "", report error
  string GetDNNEngineName(const ge::NodePtr &node_ptr);
  string GetCompoundEngineName(const ge::NodePtr &node_ptr, uint32_t recursive_depth = 1);
  string GetOwningCompoundEngine(const string &atomic_engine_name);
  string GetCompoundEngineKernelLibName(const string &compound_engine_name) const;
  const map<string, SchedulerConf> &GetSchedulers() const;
  const map<string, uint64_t> &GetCheckSupportCost() const;
  void InitPerformanceStaistic();
  void InitPerformanceStatistic();
  bool IsStreamAssignSkip(const NodePtr &node);
  bool IsStreamAssignSkip(const string &engine_name);

 private:
  DNNEngineManager();
@@ -79,11 +85,15 @@ class DNNEngineManager {
                             map<string, EngineConfPtr> &engines);
  Status CheckJsonFile();
  std::string GetHostCpuEngineName(const std::vector<OpInfo> &op_infos, const OpDescPtr &op_desc) const;

  void InitAtomicCompoundMapping();
  PluginManager plugin_mgr_;
  std::map<std::string, DNNEnginePtr> engines_map_;
  std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_;
  std::map<string, SchedulerConf> schedulers_;
  std::map<string, uint64_t> checksupport_cost_;
  // {atomic_engine, compound_engine}
  std::map<std::string, std::string> atomic_2_compound_{};
  bool init_flag_;
  mutable std::mutex mutex_;
 };
--- a/ge/ge_runtime/task/label_goto_task.cc
+++ b/ge/ge_runtime/task/label_goto_task.cc
@@ -72,7 +72,7 @@ bool LabelGotoTask::Distribute() {
    return false;
  }

  rt_ret = rtLabelListCpy(reinterpret_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size);
  rt_ret = rtLabelListCpy(const_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
    return false;
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -1158,7 +1158,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
  if (ret != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "build graph failed, graph id:%u, ret:%d", graph_id, ret);
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "[Build][Graph] fail, graph id: %u", graph_id);
    ret = GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
  }

  RtContextUtil::GetInstance().DestroyRtContexts(session_id);
--- a/ge/graph/build/label_allocator.cc
+++ b/ge/graph/build/label_allocator.cc
@@ -18,7 +18,6 @@

 #include "framework/common/types.h"
 #include "framework/common/util.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/label/label_maker.h"
@@ -85,8 +84,9 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node
    return false;
  }

  if (func_node->GetOpDesc() != nullptr && func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) {
    GELOGD("Graph[%s] is ffts subgraph, skip label allocator.", graph->GetName().c_str());
  if (func_node->GetOpDesc() != nullptr && (func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) ||
                                            func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH))) {
    GELOGD("Graph[%s] is ffts/ffts+ subgraph, skip label allocator.", graph->GetName().c_str());
    return true;
  }

--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -275,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, siz
        "E19022", std::vector<std::string>({"size", "item", "maxsize"}),
        std::vector<std::string>({std::to_string(total_mem_offset), "featuremap",
                                 std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}));
    return ge::FAILED;
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  return SUCCESS;
 }
--- a/ge/graph/build/memory/memory_assigner.cc
+++ b/ge/graph/build/memory/memory_assigner.cc
@@ -29,9 +29,10 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &m
  }

  // Reassign memory for special nodes
  if (graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset) != ge::SUCCESS) {
  Status ret = graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset);
  if (ret != ge::SUCCESS) {
    GELOGE(ge::FAILED, "[ReAssign][Memory] failed, graph:%s", compute_graph_->GetName().c_str());
    return ge::FAILED;
    return ret;
  }

  // Assign memory (block and offset) for zero copy nodes
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -17,18 +17,12 @@
 #include "graph/build/stream_allocator.h"
 #include <algorithm>
 #include <memory>
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/fmk_error_codes.h"
 #include "framework/common/types.h"
 #include "graph/build/logical_stream_allocator.h"
 #include "common/omg_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/utils/graph_utils.h"
 #include "init/gelib.h"
 #include "framework/common/string_util.h"
 #include "common/util/error_manager/error_manager.h"

 using std::map;
 using std::set;
@@ -433,7 +427,8 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() {
 // Insert the send/recv event id to the graph
 Status StreamAllocator::InsertSyncEvents() {
  auto ffts_filter = [](const Node &node, const char *, const ComputeGraphPtr &) {
    return !node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH);
    return !(node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) ||
             node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH));
  };

  for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag(), nullptr, ffts_filter)) {
@@ -536,7 +531,9 @@ Status StreamAllocator::InsertEventsForSubgraph() {
  for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) {
    GE_CHECK_NOTNULL(subgraph);
    const auto parent_node = subgraph->GetParentNode();
    if (parent_node != nullptr && parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) {
    if (parent_node != nullptr && (parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) ||
                                   parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH) ||
                                   parent_node->GetOpDesc()->HasAttr(ATTR_NAME_THREAD_SCOPE_ID))) {
      GELOGD("Skip ffts subgraph, parent node is %s.", parent_node->GetName().c_str());
      continue;
    }
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -356,7 +356,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
  GE_MAKE_GUARD(release, callback);

  auto ffts_filter = [](const Node &node, const char *, const ComputeGraphPtr &) {
    return !node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH);
    return !(node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) ||
             node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH));
  };
  for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag(), nullptr, ffts_filter)) {
    OpDescPtr op_desc = node->GetOpDesc();
@@ -371,7 +372,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
                    continue);

    GE_CHK_STATUS_RET(UpdateOpIsVarAttr(op_desc, graph->GetSessionID()));
    string op_kernel_lib_name = op_desc->GetOpKernelLibName();
    // For fusion ddb pass, task def must be continuous.
    // Part2: Call
    auto fusion_task_info =
@@ -384,13 +384,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
      GELOGI("Fusion node[name:%s, type:%s] do not need generate task again.", name.c_str(), type.c_str());
      continue;
    }
    string op_kernel_lib_name = op_desc->GetOpKernelLibName();
    GE_CHK_BOOL_EXEC_INFO(!op_kernel_lib_name.empty(), continue,
                          "Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str());
    auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name);
    GE_CHECK_NOTNULL(kernel_info_store);
    GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "[Call][UpdateAnchorStatus] node:%s(%s) failed", name.c_str(),
                      type.c_str());
    if (node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) {
    if (node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) ||
        node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH)) {
      GE_CHK_STATUS_RET(UpdateAnchorStatusForFfts(node), "[Call][UpdateAnchorStatusForFfts] node:%s(%s) failed",
                        name.c_str(), type.c_str());
    }
@@ -409,7 +411,30 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
    GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(),
           name.c_str(), type.c_str(), op_id, stream_id);
    GE_TIMESTAMP_RESTART(GenerateTask);
    auto ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list);
    auto ret = SUCCESS;
    if (op_desc->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH)) {
      std::vector<ComputeGraphPtr> subgraphs;
      if (NodeUtils::GetSubgraphs(node, subgraphs) != GRAPH_SUCCESS) {
        REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", op_desc->GetName().c_str());
        GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", op_desc->GetName().c_str());
        return FAILED;
      }
      for (const auto &subgraph : subgraphs) {
        for (const auto &tmp_node : subgraph->GetAllNodes()) {
          ret = OpsKernelBuilderManager::Instance().GenerateTask(*tmp_node, run_context, task_def_list);
          if (ret != SUCCESS) {
            REPORT_CALL_ERROR("E19999", "Call OpsKernelBuilderManager GenerateTask fail for op:%s(%s)",
                              tmp_node->GetName().c_str(), tmp_node->GetType().c_str());
            GELOGE(ret, "[Generate][Task] fail for op:%s(%s)", tmp_node->GetName().c_str(),
                   tmp_node->GetType().c_str());
            return ret;
          }
        }
      }
      ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list, false);
    } else {
      ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list);
    }
    GE_TIMESTAMP_ADD(GenerateTask);
    if (ret != SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Call OpsKernelBuilderManager GenerateTask fail for op:%s(%s)",
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -100,9 +100,6 @@ const uint32_t kEndOfSequenceNew = 507005;
 const int32_t kModelAbortNormal = 0x0704000e;
 const int32_t kModelAbortNormalNew = 507024;
 const uint32_t kInteval = 2;
 const uint32_t kFftsTbeHandleElementSize = 2;
 const uint32_t kNonTailBlock = 0;
 const uint32_t kTailBlock = 1;
 const char *const kModelName = "model_name";
 const char *const kModeleId = "model_id";
 const char *const kLoadStartTime = "load_start_time";
@@ -238,6 +235,12 @@ DavinciModel::~DavinciModel() {
        GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i);
      }

      for (const auto &it : stream_2_event_) {
        if (rtEventDestroy(it.second) != RT_ERROR_NONE) {
          GELOGW("Destroy event failed");
        }
      }

      FreeWeightsMem();

      FreeFeatureMapMem();
@@ -3736,33 +3739,32 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
 Status DavinciModel::InitTbeHandleWithFfts(const OpDescPtr &op_desc) {
  std::vector<OpKernelBinPtr> tbe_kernel;
  tbe_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_NAME_THREAD_TBE_KERNEL, tbe_kernel);
  GELOGD("Kernel bin ptr vec size is %zu.", tbe_kernel.size());
  if (tbe_kernel.size() != kFftsTbeHandleElementSize) {
    REPORT_INNER_ERROR("E19999", "Get tbe_kernel for op:%s(%s) fail, model_id:%u",
                       op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_);
    GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find tvm bin file, size is %zu when ffts",
           op_desc->GetName().c_str(), tbe_kernel.size());
  std::vector<string> bin_file_keys;
  (void)AttrUtils::GetListStr(op_desc, kStubFuncName, bin_file_keys);
  if (tbe_kernel.size() != bin_file_keys.size()) {
    REPORT_INNER_ERROR("E19999", "[%s] number of bin_file != number of file_name, bin_file_num=%zu, file_name_num=%zu",
                       op_desc->GetName().c_str(), tbe_kernel.size(), bin_file_keys.size());
    GELOGE(INTERNAL_ERROR,
           "[Check][Param] [%s] number of bin_file != number of file_name, bin_file_num=%zu, file_name_num=%zu",
           op_desc->GetName().c_str(), tbe_kernel.size(), bin_file_keys.size());
    return INTERNAL_ERROR;
  }
  if (tbe_kernel[0] == nullptr || tbe_kernel[1] == nullptr) {
    REPORT_INNER_ERROR("E19999", "Tbe kernel for op:%s is nullptr.", op_desc->GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: tvm bin file of %s is nullptr when ffts.", op_desc->GetName().c_str());
  if (tbe_kernel.empty()) {
    REPORT_INNER_ERROR("E19999", "[%s] tbe kernel is empty", op_desc->GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Check][Param] [%s] tbe kernel is empty", op_desc->GetName().c_str());
    return INTERNAL_ERROR;
  }
  vector<string> bin_file_keys;
  (void)AttrUtils::GetListStr(op_desc, kStubFuncName, bin_file_keys);
  if (bin_file_keys.size() != kFftsTbeHandleElementSize) {
    REPORT_INNER_ERROR("E19999", "Get bin_file for op:%s(%s) fail.", op_desc->GetName().c_str(),
                       op_desc->GetType().c_str());
    GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find bin file keys, size is %zu when ffts",
           op_desc->GetName().c_str(), bin_file_keys.size());
    return INTERNAL_ERROR;
  size_t num = tbe_kernel.size();
  GELOGD("Kernel bin num is %zu", num);
  for (size_t i = 0; i < num; i++) {
    if (tbe_kernel[i] == nullptr) {
      REPORT_INNER_ERROR("E19999", "Tbe kernel for op:%s is nullptr.", op_desc->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: tvm bin file of %s is nullptr when ffts.", op_desc->GetName().c_str());
      return INTERNAL_ERROR;
    }
    GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[i], tbe_kernel[i], true, i),
                      "Function register of No. %zu bin file %s failed.", i, bin_file_keys[i].c_str());
  }
  GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[kNonTailBlock], tbe_kernel[kNonTailBlock], true,
                                     kNonTailBlock),
                    "Function register of first bin file %s failed.", bin_file_keys[kNonTailBlock].c_str());
  GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[kTailBlock], tbe_kernel[kTailBlock], true, kTailBlock),
                    "Function register of second bin file %s failed.", bin_file_keys[kTailBlock].c_str());
  return SUCCESS;
 }

@@ -3809,6 +3811,10 @@ Status DavinciModel::FunctionRegister(const OpDescPtr &op_desc, string &bin_file
    GE_CHK_STATUS_RET(InitKernelName(op_desc, is_ffts, thread_index, kernel_name), "Init kernel name of %s failed.",
                      op_desc->GetName().c_str());
    GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0));
    void *addr;
    uint32_t prefetch_cnt;
    GE_CHK_RT_RET(rtGetAddrAndPrefCntWithHandle(bin_handle, kernel_name.c_str(), &addr, &prefetch_cnt));
    addr_and_pref_cnt_[kernel_name] = { addr, prefetch_cnt };
    used_tbe_handle_map_[bin_file_key] = 1;  // Init used num to 1.
    return SUCCESS;
  }
@@ -3817,6 +3823,18 @@ Status DavinciModel::FunctionRegister(const OpDescPtr &op_desc, string &bin_file
  return SUCCESS;
 }

 Status DavinciModel::GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt) {
  const auto &iter = addr_and_pref_cnt_.find(kernel_name);
  if (iter == addr_and_pref_cnt_.end()) {
    REPORT_INNER_ERROR("E19999", "Get addr and pref cnt failed, kernel_name:%s", kernel_name.c_str());
    GELOGE(INTERNAL_ERROR, "[Check][Param] Get addr and pref cnt failed, kernel_name:%s", kernel_name.c_str());
    return INTERNAL_ERROR;
  }
  addr = iter->second.first;
  pref_cnt = iter->second.second;
  return SUCCESS;
 }

 Status DavinciModel::InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index,
                                     rtDevBinary_t &binary) {
  string json_string;
@@ -3830,7 +3848,7 @@ Status DavinciModel::InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, siz
  if (is_ffts) {
    vector<string> json_list;
    (void)AttrUtils::GetListStr(op_desc, tvm_magic, json_list);
    if (json_list.size() != kFftsTbeHandleElementSize) {
    if (json_list.size() <= thread_index) {
      GELOGE(INTERNAL_ERROR, "[Check][Param] failed. Attr is %s, thread index is %zu, json list size is %zu.",
             tvm_magic.c_str(), thread_index, json_list.size());
      return INTERNAL_ERROR;
@@ -3859,7 +3877,7 @@ Status DavinciModel::InitMetaData(const OpDescPtr &op_desc, bool is_ffts, size_t
  if (is_ffts) {
    vector<string> meta_data_list;
    (void)AttrUtils::GetListStr(op_desc, tvm_metadata, meta_data_list);
    if (meta_data_list.size() != kFftsTbeHandleElementSize) {
    if (meta_data_list.size() <= thread_index) {
      GELOGE(INTERNAL_ERROR, "[Check][Param] failed, attr is %s, thread index is %zu, meta data list size is %zu.",
             tvm_metadata.c_str(), thread_index, meta_data_list.size());
      return INTERNAL_ERROR;
@@ -3886,7 +3904,7 @@ Status DavinciModel::InitKernelName(const OpDescPtr &op_desc, bool is_ffts, size
    }
    string attr_kernel_name = op_desc->GetName().substr(pos + 1) + "_thread_kernelname";
    (void)AttrUtils::GetListStr(op_desc, attr_kernel_name, kernel_name_list);
    if (kernel_name_list.size() != kFftsTbeHandleElementSize) {
    if (kernel_name_list.size() <= thread_index) {
      GELOGE(INTERNAL_ERROR, "[Check][Param] failed, attr is %s, thread index is %zu, kernel name list size is %zu.",
             attr_kernel_name.c_str(), thread_index, kernel_name_list.size());
      return INTERNAL_ERROR;
@@ -4648,4 +4666,50 @@ Status DavinciModel::GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size)
  total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size;
  return SUCCESS;
 }

 Status DavinciModel::GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id) {
  GELOGI("Get event id for aicpu blocking op:%s", op_desc->GetName().c_str());
  auto it = stream_2_event_.find(stream);
  if (it != stream_2_event_.end()) {
    auto rt_ret = rtGetEventID(it->second, &event_id);
    if (rt_ret != RT_ERROR_NONE) {
      REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X",
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret);
      GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X",
             op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  } else {
    rtEvent_t rt_event = nullptr;
    auto rt_ret = rtEventCreateWithFlag(&rt_event, RT_EVENT_WITH_FLAG);
    if (rt_ret != RT_ERROR_NONE) {
      REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for op:%s(%s), ret:0x%X",
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret);
      GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for op:%s(%s), ret:0x%X",
             op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    rt_ret = rtGetEventID(rt_event, &event_id);
    if (rt_ret != RT_ERROR_NONE) {
      REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X",
                        op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret);
      GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X",
             op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    stream_2_event_.emplace(stream, rt_event);
  }
  return SUCCESS;
 }

 Status DavinciModel::GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event) {
  auto it = stream_2_event_.find(stream);
  if (it == stream_2_event_.end()) {
    REPORT_INNER_ERROR("E19999", "Get event failed");
    GELOGE(FAILED, "[Get][Event] Get event failed");
    return FAILED;
  }
  rt_event = it->second;
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -582,6 +582,12 @@ class DavinciModel {
  void SetRunningFlag(bool flag) { running_flg_ = flag; }
  Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);

  // for blocking aicpu op
  Status GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event);
  Status GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id);

  Status GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt);

 private:
  // memory address of weights
  uint8_t *weights_mem_base_;
@@ -1021,6 +1027,8 @@ class DavinciModel {

  map<string, uint32_t> used_tbe_handle_map_;

  std::map<std::string, std::pair<void *, uint32_t>> addr_and_pref_cnt_;

  // for profiling task and graph info
  vector<TaskDescInfo> task_desc_info_;

@@ -1107,6 +1115,8 @@ class DavinciModel {

  // op name to attrs mapping
  std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_;

  std::map<rtStream_t, rtEvent_t> stream_2_event_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
--- a/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc
@@ -0,0 +1,977 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph/load/model_manager/task_info/ffts_plus_task_info.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace {
 constexpr uint32_t kAddrLen = sizeof(void *);
 constexpr uint32_t kSrcSlotNum = 4;
 constexpr uint32_t kWriteValueNum = 4;
 constexpr uint32_t kUserDataNum = 9;
 constexpr uint32_t kNonTailIndex = 0;
 constexpr uint32_t kTailIndex = 1;
 constexpr uint32_t kAicAivCtxPcNum = 2;
 constexpr uint32_t kNonTailAicCtxIndex = 0;
 constexpr uint32_t kTailAicCtxIndex = 1;
 constexpr uint32_t kNonTailAivCtxIndex = 2;
 constexpr uint32_t kTailAivCtxIndex = 3;
 constexpr uint32_t kMixAicAivCtxPcNum = 4;
 }
 namespace ge {
 FftsPlusTaskInfo::~FftsPlusTaskInfo() {
  GE_FREE_RT_LOG(args_);
 }

 Status FftsPlusTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  GELOGI("Init Start");
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model_ = davinci_model;
  GE_CHK_STATUS_RET_NOLOG(SetStream(task_def.stream_id(), davinci_model_->GetStreamList()));

  const domi::FftsPlusTaskDef &ffts_plus_task_def = task_def.ffts_plus_task();
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(ffts_plus_task_def.op_index());
  GE_CHECK_NOTNULL(op_desc);

  args_size_ = kAddrLen * ffts_plus_task_def.addr_size();
  if (args_size_ != 0) {
    GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM));
  }

  std::vector<uint8_t> sqe_buffer(sizeof(rtFftsPlusSqe_t));
  auto ffts_plus_sqe = reinterpret_cast<rtFftsPlusSqe_t *>(sqe_buffer.data());
  InitFftsPlusSqe(ffts_plus_task_def.ffts_plus_sqe(), ffts_plus_sqe);
  ffts_plus_task_info_.fftsPlusSqe = ffts_plus_sqe;

  size_t ctx_num = ffts_plus_task_def.ffts_plus_ctx_size();
  ffts_plus_task_info_.descBufLen = sizeof(rtFftsPlusComCtx_t) * ctx_num;
  std::vector<uint8_t> ctx_buffer(ffts_plus_task_info_.descBufLen);
  auto ctx = reinterpret_cast<void *>(ctx_buffer.data());
  GE_CHK_STATUS_RET_NOLOG(InitFftsPlusCtx(ffts_plus_task_def, ctx_num, ctx));
  ffts_plus_task_info_.descBuf = reinterpret_cast<void *>(ctx_buffer.data());

  if (args_ != nullptr) {
    size_t data_size = kAddrLen * io_addrs_.size();
    GE_CHK_RT_RET(rtMemcpy(args_, args_size_, io_addrs_.data(), data_size, RT_MEMCPY_HOST_TO_DEVICE));
  }
  GELOGI("Init Success. Node: %s, input/output size: %zu", op_desc->GetName().c_str(), io_addrs_.size());

  return SUCCESS;
 }

 void FftsPlusTaskInfo::InitFftsPlusSqe(const domi::FftsPlusSqeDef &sqe_def, rtFftsPlusSqe_t *&sqe) {
  InitFftsPlusSqeHeader(sqe_def.sqe_header(), sqe->sqeHeader);

  sqe->pmg = static_cast<uint16_t>(sqe_def.pmg() & 0X00000003);         // 2 bits, 0000,0011
  sqe->ns = static_cast<uint16_t>(sqe_def.ns() & 0X00000001);           // 1 bit , 0000,0001
  sqe->partId = static_cast<uint16_t>(sqe_def.part_id() & 0X000000FF);  // 8 bits, 1111,1111
  sqe->qos = static_cast<uint16_t>(sqe_def.qos() & 0X0000000F);         // 4 bits, 0000,1111

  sqe->totalContextNum = static_cast<uint16_t>(sqe_def.total_context_num());
  sqe->readyContextNum = static_cast<uint16_t>(sqe_def.ready_context_num());
  sqe->preloadContextNum = static_cast<uint16_t>(sqe_def.preload_context_num());

  sqe->dsplitUnit = static_cast<uint16_t>(sqe_def.dsplit_unit() & 0X00000007);          // 3 bits, 0000,0111
  sqe->prefetchOstNum = static_cast<uint16_t>(sqe_def.prefetch_ost_num() & 0X0000001F); // 5 bit , 0001,1111
  sqe->cmaintOstNum = static_cast<uint16_t>(sqe_def.cmaint_ost_num() & 0X0000001F);     // 5 bits, 0001,1111

  sqe->aicPrefetchLower = static_cast<uint16_t>(sqe_def.aic_prefetch_lower() & 0X0000001F); // 5 bit , 0001,1111
  sqe->aicPrefetchUpper = static_cast<uint16_t>(sqe_def.aic_prefetch_upper() & 0X0000001F); // 5 bit , 0001,1111
  sqe->aivPrefetchLower = static_cast<uint16_t>(sqe_def.aiv_prefetch_lower() & 0X0000001F); // 5 bit , 0001,1111
  sqe->aivPrefetchUpper = static_cast<uint16_t>(sqe_def.aiv_prefetch_upper() & 0X0000001F); // 5 bit , 0001,1111
 }

 void FftsPlusTaskInfo::InitFftsPlusSqeHeader(const domi::StarsSqeHeaderDef &sqe_header_def,
                                             rtStarsSqeHeader_t &sqe_header) {
  sqe_header.l1Lock = static_cast<uint8_t>(sqe_header_def.l1_lock());
  sqe_header.l1Unlock = static_cast<uint8_t>(sqe_header_def.l1_unlock());
  sqe_header.blockDim = static_cast<uint16_t>(sqe_header_def.block_dim());
 }

 Status FftsPlusTaskInfo::InitFftsPlusCtx(const domi::FftsPlusTaskDef &task_def, size_t ctx_num, void *&ctx) {
  for (size_t i = 0; i < ctx_num; i++) {
    const domi::FftsPlusCtxDef &ctx_def = task_def.ffts_plus_ctx(i);
    GELOGI("Init ctx %zu in FftsPlusTask, software_ctx_type=%u, hardware_ctx_type=%u", i, ctx_def.software_ctx_type(),
           ctx_def.hardware_ctx_type());
    auto cur_ctx = reinterpret_cast<uintptr_t>(ctx) + sizeof(rtFftsPlusComCtx_t) * i;
    switch (ctx_def.software_ctx_type()) {
      case RT_SOFT_CTX_TYPE_AT_START: {
        auto at_start_ctx = reinterpret_cast<rtFftsPlusAtStartCtx_t *>(cur_ctx);
        GE_CHK_STATUS_RET_NOLOG(InitAtStartCtx(ctx_def.at_start_ctx(), at_start_ctx));
        break;
      }
      case RT_SOFT_CTX_TYPE_AT_END: {
        auto at_end_ctx = reinterpret_cast<rtFftsPlusAtEndCtx_t *>(cur_ctx);
        GE_CHK_STATUS_RET_NOLOG(InitAtEndCtx(ctx_def.at_end_ctx(), at_end_ctx));
        break;
      }
      case RT_SOFT_CTX_TYPE_LABEL: {
        auto label_ctx = reinterpret_cast<rtFftsPlusLabelCtx_t *>(cur_ctx);
        GE_CHK_STATUS_RET_NOLOG(InitLabelCtx(ctx_def.label_ctx(), label_ctx));
        break;
      }
      default:
        GE_CHK_STATUS_RET_NOLOG(InitHardWareCtx(ctx_def, cur_ctx));
        break;
    }
  }
  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitAtStartCtx(const domi::FftsPlusAtStartCtxDef &ctx_def, rtFftsPlusAtStartCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusAtStartCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusAtStartCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->threadIdInit = static_cast<uint16_t>(ctx_def.thread_id_init());
  ctx->threadWindowSize = static_cast<uint16_t>(ctx_def.thread_window_size());

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitAtEndCtx(const domi::FftsPlusAtEndCtxDef &ctx_def, rtFftsPlusAtEndCtx_t *&ctx) {
  ctx->atStartSlotNumber = static_cast<uint8_t>(ctx_def.at_start_slot_num());
  ctx->outLabelSlotNumber = static_cast<uint8_t>(ctx_def.out_label_slot_num() & 0X0000007F);  // 7 bits, 0111,1111

  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.succ_at_start_slot_size() != RT_CTX_SUCC_AT_START_SLOT_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of succ_at_start_slot in FftsPlusAtEndCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCC_AT_START_SLOT_NUM, ctx_def.succ_at_start_slot_size());
    GELOGE(FAILED, "[Check][Param] Size of succ_at_start_slot in FftsPlusAtStartCtxDef should be %d, but %d exactly",
           RT_CTX_SUCC_AT_START_SLOT_NUM, ctx_def.succ_at_start_slot_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCC_AT_START_SLOT_NUM; i++) {
    ctx->succAtStartSlot[i] = static_cast<uint16_t>(ctx_def.succ_at_start_slot(i));
  }

  if (ctx_def.succ_out_label_slot_size() != RT_CTX_SUCC_OUT_LABEL_SLOT_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of succ_out_label_slot in FftsPlusAtEndCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCC_OUT_LABEL_SLOT_NUM, ctx_def.succ_out_label_slot_size());
    GELOGE(FAILED, "[Check][Param] Size of succ_out_label_slot in FftsPlusAtStartCtxDef should be %d, but %d exactly",
           RT_CTX_SUCC_OUT_LABEL_SLOT_NUM, ctx_def.succ_out_label_slot_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCC_OUT_LABEL_SLOT_NUM; i++) {
    ctx->succOutLabelSlot[i] = static_cast<uint16_t>(ctx_def.succ_out_label_slot(i));
  }

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitLabelCtx(const domi::FftsPlusLabelCtxDef &ctx_def, rtFftsPlusLabelCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusLabelCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusLabelCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitHardWareCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx) {
  switch (ctx_def.hardware_ctx_type()) {
    case RT_HW_CTX_TYPE_AIC:
    case RT_HW_CTX_TYPE_AIV: {
      auto aic_aiv_ctx = reinterpret_cast<rtFftsPlusAicAivCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitAicAivCtx(ctx_def.aic_aiv_ctx(), ctx_def.op_index(), aic_aiv_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_NOTIFY_WAIT:
    case RT_HW_CTX_TYPE_NOTIFY_RECORD: {
      auto notify_ctx = reinterpret_cast<rtFftsPlusNotifyCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitNotifyCtx(ctx_def.notify_ctx(), notify_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_WRITE_VALUE: {
      auto write_value_ctx = reinterpret_cast<rtFftsPlusWriteValueCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitWriteValueCtx(ctx_def.write_value_ctx(), write_value_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_MIX_AIC:
    case RT_HW_CTX_TYPE_MIX_AIV: {
      auto mix_aic_aiv_ctx = reinterpret_cast<rtFftsPlusMixAicAivCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitMixAicAivCtx(ctx_def.mix_aic_aiv_ctx(), ctx_def.op_index(), mix_aic_aiv_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_SDMA: {
      auto sdma_ctx = reinterpret_cast<rtFftsPlusSdmaCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitSdmaCtx(ctx_def.sdma_ctx(), sdma_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_FLUSH_DATA:
    case RT_HW_CTX_TYPE_INVALIDATE_DATA:
    case RT_HW_CTX_TYPE_WRITEBACK_DATA: {
      auto data_ctx = reinterpret_cast<rtFftsPlusDataCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitDataCtx(ctx_def.data_ctx(), data_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_AICPU: {
      auto aicpu_ctx = reinterpret_cast<rtFftsPlusAiCpuCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitAicpuCtx(ctx_def.aicpu_ctx(), aicpu_ctx));
      break;
    }
    case RT_HW_CTX_TYPE_LOAD: {
      GE_CHK_STATUS_RET_NOLOG(InitLoadCtx(ctx_def, ctx));
      break;
    }
    default:
      REPORT_INNER_ERROR("E19999", "Unsupported hardware ctx type %u", ctx_def.hardware_ctx_type());
      GELOGE(FAILED, "[Check][CtxType] Unsupported hardware ctx type %u", ctx_def.hardware_ctx_type());
      return FAILED;
  }
  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitAicAivCtx(const domi::FftsPlusAicAivCtxDef &ctx_def, uint32_t op_index,
                                       rtFftsPlusAicAivCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusAicAivCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusAicAivCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->stat = static_cast<uint16_t>(ctx_def.stat() & 0X00000001);                               // 1 bit , 0000,0001
  ctx->schem = static_cast<uint16_t>(ctx_def.schem() & 0X00000003);                             // 2 bits, 0000,0011

  ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001);                                 // 1 bit , 0000,0001
  ctx->prefetchEnableBitmap = static_cast<uint16_t>(ctx_def.atm() & 0X0000000F);                // 4 bits, 0000,1111
  ctx->prefetchOnceBitmap = static_cast<uint16_t>(ctx_def.atm() & 0X0000000F);                  // 4 bits, 0000,1111

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->nonTailBlockdim = static_cast<uint16_t>(ctx_def.non_tail_block_dim());
  ctx->tailBlockdim = static_cast<uint16_t>(ctx_def.tail_block_dim());

  uint64_t task_param_ptr_base;
  if (ctx->atm == 0) {
    // TODO, manual
    task_param_ptr_base = reinterpret_cast<uintptr_t>(args_);
  } else {
    // auto
    task_param_ptr_base = reinterpret_cast<uintptr_t>(args_) + kAddrLen * io_addrs_.size();
    GELOGD("FftsPlusAicAivCtxDef: task param addr is %lu.", task_param_ptr_base);

    const auto &rts_param = davinci_model_->GetRuntimeParam();
    for (uint32_t i = 0; i < static_cast<uint32_t>(ctx->threadDim - 1); i++) {
      GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, i,
                                          static_cast<uint32_t>(ctx_def.task_addr_offset_size())));
    }
    GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, static_cast<uint32_t>(ctx->threadDim - 1),
                                        ctx_def.input_output_count()));
    int last_thread_workspace_size = ctx_def.task_addr_size() - ctx_def.task_addr_offset_size();
    for (int k = 0; k < last_thread_workspace_size; ++k) {
      uintptr_t logic_addr = ctx_def.task_addr(ctx_def.task_addr_offset_size() + k);
      uint8_t *io_addr = nullptr;
      GE_CHK_STATUS_RET_NOLOG(ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr));
      io_addrs_.emplace_back(io_addr);
    }
  }
  ctx->taskParamPtrBaseL = static_cast<uint32_t>(task_param_ptr_base & 0XFFFFFFFF);         // low 32 bits
  ctx->taskParamPtrBaseH = static_cast<uint16_t>((task_param_ptr_base >> 32) & 0X0000FFFF); // high 16 bits
  ctx->taskParamPtrOffset = static_cast<uint16_t>(ctx_def.task_param_ptr_offset());

  // PcL for low 32 bits of pc, PcH for high 16 bits of pc
  if (ctx_def.kernel_name_size() != kAicAivCtxPcNum) {
    REPORT_INNER_ERROR("E19999", "Size of kernel_name in FftsPlusAicAivCtxDef should be %d, but %d exactly",
                       kAicAivCtxPcNum, ctx_def.kernel_name_size());
    GELOGE(FAILED, "[Check][Param] Size of kernel_name in FftsPlusAicAivCtxDef should be %d, but %d exactly",
           kAicAivCtxPcNum, ctx_def.kernel_name_size());
    return FAILED;
  }
  uint32_t i_cache_prefetch_cnt_1;
  void *non_tail_task_start_pc = nullptr;
  GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kNonTailIndex), non_tail_task_start_pc,
                                                            i_cache_prefetch_cnt_1));
  ctx->nonTailTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(non_tail_task_start_pc) & 0XFFFFFFFF);
  ctx->nonTailTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(non_tail_task_start_pc) >> 32) &
                                                   0X0000FFFF);
  uint32_t i_cache_prefetch_cnt_2;
  void *tail_task_start_pc = nullptr;
  GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kTailIndex), tail_task_start_pc,
                                                            i_cache_prefetch_cnt_2));
  ctx->tailTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tail_task_start_pc) & 0XFFFFFFFF);
  ctx->tailTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(tail_task_start_pc) >> 32) & 0X0000FFFF);
  uint32_t i_cache_prefetch_cnt = std::min(i_cache_prefetch_cnt_1, i_cache_prefetch_cnt_2);
  ctx->icachePrefetchCnt = static_cast<uint16_t>(i_cache_prefetch_cnt & 0X0000001F);  // 5 bits, 0001,1111

  if (ctx_def.src_slot_size() != kSrcSlotNum) {
    REPORT_INNER_ERROR("E19999", "Size of src_slot in FftsPlusAicAivCtxDef should be %d, but %d exactly",
                       kSrcSlotNum, ctx_def.src_slot_size());
    GELOGE(FAILED, "[Check][Param] Size of src_slot in FftsPlusAicAivCtxDef should be %d, but %d exactly",
           kSrcSlotNum, ctx_def.src_slot_size());
    return FAILED;
  }
  for (size_t i = 0; i < kSrcSlotNum; i++) {
    ctx->srcSlot[i] = static_cast<uint16_t>(ctx_def.src_slot(i));
  }

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitNotifyCtx(const domi::FftsPlusNotifyCtxDef &ctx_def, rtFftsPlusNotifyCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusNotifyCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusNotifyCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001
  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());
  ctx->notifyIdBase = static_cast<uint16_t>(ctx_def.notify_id_base());

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitWriteValueCtx(const domi::FftsPlusWriteValueCtxDef &ctx_def,
                                           rtFftsPlusWriteValueCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusWriteValueCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusWriteValueCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001
  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->awSize = static_cast<uint8_t>(ctx_def.aw_size() & 0X00000007);   // 3 bits, 0000,0111
  ctx->snoop = static_cast<uint8_t>(ctx_def.snoop() & 0X00000001);      // 1 bit , 0000,0001
  ctx->awCache = static_cast<uint8_t>(ctx_def.aw_cache() & 0X0000000F); // 4 bits, 0000,1111
  ctx->awProt = static_cast<uint8_t>(ctx_def.aw_prot() & 0X00000007);   // 3 bits, 0000,0111
  ctx->va = static_cast<uint8_t>(ctx_def.va() & 0X00000001);            // 1 bit , 0000,0001

  const auto &rts_param = davinci_model_->GetRuntimeParam();
  uint8_t *write_addr_base = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.write_addr_base(), write_addr_base) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->writeAddressBaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(write_addr_base) & 0XFFFFFFFF);         // low 32 bits
  ctx->writeAddressBaseH =
      static_cast<uint32_t>((reinterpret_cast<uintptr_t>(write_addr_base) >> 32) & 0X0001FFFF); // high 17 bits
  ctx->writeAddressOffset = ctx_def.write_addr_offset();

  if (ctx_def.write_value_size() != kWriteValueNum) {
    REPORT_INNER_ERROR("E19999", "Size of write_value in FftsPlusWriteValueCtxDef should be %d, but %d exactly",
                       kWriteValueNum, ctx_def.write_value_size());
    GELOGE(FAILED, "[Check][Param] Size of write_value in FftsPlusWriteValueCtxDef should be %d, but %d exactly",
           kWriteValueNum, ctx_def.write_value_size());
    return FAILED;
  }
  for (size_t i = 0; i < kWriteValueNum; i++) {
    ctx->writeValue[i] = static_cast<uint16_t>(ctx_def.write_value(i));
  }

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitMixAicAivCtx(const domi::FftsPlusMixAicAivCtxDef &ctx_def, uint32_t op_index,
                                          rtFftsPlusMixAicAivCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusMixAicAivCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusMixAicAivCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->stat = static_cast<uint16_t>(ctx_def.stat() & 0X00000001);                                   // 1 bit , 0000,0001
  ctx->schem = static_cast<uint16_t>(ctx_def.schem() & 0X00000003);                                 // 2 bits, 0000,0011
  ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001);                                     // 1 bit , 0000,0001
  ctx->prefetchEnableBitmap = static_cast<uint16_t>(ctx_def.prefetch_enable_bitmap() & 0X0000000F); // 4 bits, 0000,1111
  ctx->prefetchOnceBitmap = static_cast<uint16_t>(ctx_def.prefetch_once_bitmap() & 0X0000000F);     // 4 bits, 0000,1111

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->nonTailBlockRatioN = static_cast<uint8_t>(ctx_def.non_tail_block_ratio_n());
  ctx->tailBlockRatioN = static_cast<uint8_t>(ctx_def.tail_block_ratio_n());

  ctx->nonTailBlockdim = static_cast<uint16_t>(ctx_def.non_tail_block_dim());
  ctx->tailBlockdim = static_cast<uint16_t>(ctx_def.tail_block_dim());

  uint64_t task_param_ptr_base;
  if (ctx->atm == 0) {
    // TODO, manual
    task_param_ptr_base = reinterpret_cast<uintptr_t>(args_);
  } else {
    // auto
    task_param_ptr_base = reinterpret_cast<uintptr_t>(args_) + kAddrLen * io_addrs_.size();
    GELOGD("FftsPlusMixAicAivCtxDef: task param addr is %lu.", task_param_ptr_base);

    const auto &rts_param = davinci_model_->GetRuntimeParam();
    for (uint32_t i = 0; i < static_cast<uint32_t>(ctx->threadDim - 1); i++) {
      GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, i,
                                          static_cast<uint32_t>(ctx_def.task_addr_offset_size())));
    }
    GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, static_cast<uint32_t>(ctx->threadDim - 1),
                                        ctx_def.input_output_count()));
    int last_thread_workspace_size = ctx_def.task_addr_size() - ctx_def.task_addr_offset_size();
    for (int k = 0; k < last_thread_workspace_size; ++k) {
      uintptr_t logic_addr = ctx_def.task_addr(ctx_def.task_addr_offset_size() + k);
      uint8_t *io_addr = nullptr;
      GE_CHK_STATUS_RET_NOLOG(ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr));
      io_addrs_.emplace_back(io_addr);
    }
  }

  ctx->aicTaskParamPtrL = static_cast<uint32_t>(ctx_def.aic_task_param_ptr() & 0XFFFFFFFF);         // low 32 bits
  ctx->aicTaskParamPtrH = static_cast<uint16_t>((ctx_def.aic_task_param_ptr() >> 32) & 0X0000FFFF); // high 16 bits
  ctx->aicTaskParamPtrOffset = static_cast<uint16_t>(ctx_def.aic_task_param_ptr_offset());
  ctx->aivTaskParamPtrL = static_cast<uint32_t>(ctx_def.aiv_task_param_ptr() & 0XFFFFFFFF);         // low 32 bits
  ctx->aivTaskParamPtrH = static_cast<uint16_t>((ctx_def.aiv_task_param_ptr() >> 32) & 0X0000FFFF); // high 16 bits
  ctx->aivTaskParamPtrOffset = static_cast<uint16_t>(ctx_def.aiv_task_param_ptr_offset());

  // PcL for low 32 bits of pc, PcH for high 16 bits of pc
  if (ctx_def.kernel_name_size() != kMixAicAivCtxPcNum) {
    REPORT_INNER_ERROR("E19999", "Size of kernel_name in FftsPlusMixAicAivCtxDef should be %d, but %d exactly",
                       kAicAivCtxPcNum, ctx_def.kernel_name_size());
    GELOGE(FAILED, "[Check][Param] Size of kernel_name in FftsPlusMixAicAivCtxDef should be %d, but %d exactly",
           kAicAivCtxPcNum, ctx_def.kernel_name_size());
    return FAILED;
  }
  uint32_t i_cache_prefetch_cnt_1;
  void *non_tail_aic_task_start_pc = nullptr;
  GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kNonTailAicCtxIndex),
                                                            non_tail_aic_task_start_pc, i_cache_prefetch_cnt_1));
  ctx->nonTailAicTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(non_tail_aic_task_start_pc) &
                                                      0XFFFFFFFF);
  ctx->nonTailAicTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(non_tail_aic_task_start_pc) >> 32) &
                                                      0X0000FFFF);
  uint32_t i_cache_prefetch_cnt_2;
  void *tail_aic_task_start_pc = nullptr;
  GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kTailAicCtxIndex),
                                                            tail_aic_task_start_pc, i_cache_prefetch_cnt_2));
  ctx->tailAicTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tail_aic_task_start_pc) & 0XFFFFFFFF);
  ctx->tailAicTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(tail_aic_task_start_pc) >> 32) &
                                                   0X0000FFFF);
  uint32_t aic_i_cache_prefetch_cnt = std::min(i_cache_prefetch_cnt_1, i_cache_prefetch_cnt_2);
  ctx->icachePrefetchCnt = static_cast<uint16_t>(aic_i_cache_prefetch_cnt & 0X0000001F);  // 5 bits, 0001,1111

  uint32_t i_cache_prefetch_cnt_3;
  void *non_tail_aiv_task_start_pc = nullptr;
  GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kNonTailAivCtxIndex),
                                                            non_tail_aiv_task_start_pc, i_cache_prefetch_cnt_3));
  ctx->nonTailAivTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(non_tail_aiv_task_start_pc) &
                                                      0XFFFFFFFF);
  ctx->nontailAivTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(non_tail_aiv_task_start_pc) >> 32) &
                                                      0X0000FFFF);
  uint32_t i_cache_prefetch_cnt_4;
  void *tail_aiv_task_start_pc = nullptr;
  GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kTailAivCtxIndex),
                                                            tail_aiv_task_start_pc, i_cache_prefetch_cnt_4));
  ctx->tailAivTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tail_aiv_task_start_pc) & 0XFFFFFFFF);
  ctx->tailAivTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(tail_aiv_task_start_pc) >> 32) &
                                                   0X0000FFFF);
  uint32_t aiv_i_cache_prefetch_cnt = std::min(i_cache_prefetch_cnt_3, i_cache_prefetch_cnt_4);
  // TODO
  ctx->icachePrefetchCnt = static_cast<uint16_t>(
      std::min(aic_i_cache_prefetch_cnt, aiv_i_cache_prefetch_cnt) & 0X0000001F);  // 5 bits, 0001,1111

  if (ctx_def.src_slot_size() != kSrcSlotNum) {
    REPORT_INNER_ERROR("E19999", "Size of src_slot in FftsPlusMixAicAivCtxDef should be %d, but %d exactly",
                       kSrcSlotNum, ctx_def.src_slot_size());
    GELOGE(FAILED, "[Check][Param] Size of src_slot in FftsPlusMixAicAivCtxDef should be %d, but %d exactly",
           kSrcSlotNum, ctx_def.src_slot_size());
    return FAILED;
  }
  for (size_t i = 0; i < kSrcSlotNum; i++) {
    ctx->srcSlot[i] = static_cast<uint16_t>(ctx_def.src_slot(i));
  }

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitSdmaCtx(const domi::FftsPlusSdmaCtxDef &ctx_def, rtFftsPlusSdmaCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusSdmaCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusSdmaCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->sat = static_cast<uint8_t>(ctx_def.sat() & 0X00000001);  // 1 bit, 0000,0001
  ctx->atm = static_cast<uint8_t>(ctx_def.atm() & 0X00000001);  // 1 bit, 0000,0001

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->sdmaSqeHeader = ctx_def.sdma_sqe_header();

  ctx->sourceStreamId = static_cast<uint16_t>(ctx_def.src_stream_id());
  ctx->sourceSubstreamId = static_cast<uint16_t>(ctx_def.src_sub_stream_id());

  ctx->destinationStreamId = static_cast<uint16_t>(ctx_def.dst_stream_id());
  ctx->destinationSubstreamId = static_cast<uint16_t>(ctx_def.dst_sub_stream_id());

  const auto &rts_param = davinci_model_->GetRuntimeParam();
  uint8_t *src_addr_base = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.src_addr_base(), src_addr_base) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->sourceAddressBaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(src_addr_base) & 0XFFFFFFFF);                 // low 32 bits
  ctx->sourceAddressBaseH = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(src_addr_base) >> 32);  // high 32 bits
  ctx->sourceAddressOffset = ctx_def.src_addr_offset();

  uint8_t *dst_addr_base = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.dst_addr_base(), dst_addr_base) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->destinationAddressBaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(dst_addr_base) & 0XFFFFFFFF); // low 32 bits
  ctx->destinationAddressBaseH =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(dst_addr_base) >> 32);        // high 32 bits
  ctx->destinationAddressOffset = ctx_def.dst_addr_offset();

  ctx->nonTailDataLength = ctx_def.non_tail_data_len();
  ctx->tailDataLength = ctx_def.tail_data_len();

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitDataCtx(const domi::FftsPlusDataCtxDef &ctx_def, rtFftsPlusDataCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->cntInit = static_cast<uint8_t>(ctx_def.cnt_init());
  ctx->cnt = static_cast<uint8_t>(ctx_def.cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusDataCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusDataCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->atm = static_cast<uint8_t>(ctx_def.atm() & 0X00000001);  // 1 bit, 0000,0001

  ctx->origConsumerCounter = static_cast<uint16_t>(ctx_def.orig_consumer_counter());
  ctx->runConsumerCounter = static_cast<uint16_t>(ctx_def.run_consumer_counter());
  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  const auto &rts_param = davinci_model_->GetRuntimeParam();
  uint8_t *addr_base = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.addr_base(), addr_base) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->addressBaseL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base) & 0XFFFFFFFF); // low 32 bits
  ctx->addressBaseH = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base) >> 32);        // high 32 bits
  ctx->addressOffset = ctx_def.addr_offset();

  ctx->nonTailNumOutter = static_cast<uint16_t>(ctx_def.non_tail_num_outter());
  ctx->nonTailNumInner = static_cast<uint16_t>(ctx_def.non_tail_num_inner());
  ctx->nonTailLengthInner = ctx_def.non_tail_len_inner();
  ctx->nonTailStrideOutter = ctx_def.non_tail_stride_outter();
  ctx->nonTailStrideInner = ctx_def.non_tail_stride_inner();

  ctx->tailNumOutter = static_cast<uint16_t>(ctx_def.tail_num_outter());
  ctx->tailNumInner = static_cast<uint16_t>(ctx_def.tail_num_inner());
  ctx->tailLengthInner = ctx_def.tail_len_inner();
  ctx->tailStrideOutter = ctx_def.tail_stride_outter();
  ctx->tailStrideInner = ctx_def.tail_stride_inner();

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitAicpuCtx(const domi::FftsPlusAicpuCtxDef &ctx_def, rtFftsPlusAiCpuCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit, 0000,0001
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_context_id_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_context_id in FftsPlusAicpuCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_context_id_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_context_id in FftsPlusAicpuCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_context_id_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorContextID[i] = static_cast<uint16_t>(ctx_def.successor_context_id(i));
  }

  ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001

  ctx->sqeIndex = static_cast<uint16_t>(ctx_def.sqe_index());
  ctx->kernelType = static_cast<uint8_t>(ctx_def.kernel_type() & 0X0000007F); // 7 bits, 0111,1111
  ctx->bm = static_cast<uint8_t>(ctx_def.bm() & 0X00000001);                  // 1 bit , 0000,0001
  ctx->topicType = static_cast<uint8_t>(ctx_def.topic_type() & 0X0000000F);   // 4 bits, 0000,1111
  ctx->qos = static_cast<uint8_t>(ctx_def.qos() & 0X00000007);                // 3 bits, 0000,0111

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->nonTailBlockdim = static_cast<uint16_t>(ctx_def.non_tail_block_dim());
  ctx->tailBlockdim = static_cast<uint16_t>(ctx_def.tail_block_dim());

  if (ctx_def.user_data_size() != kUserDataNum) {
    REPORT_INNER_ERROR("E19999", "Size of user_data in FftsPlusAicpuCtxDef should be %d, but %d exactly",
                       kUserDataNum, ctx_def.user_data_size());
    GELOGE(FAILED, "[Check][Param] Size of user_data in FftsPlusAicpuCtxDef should be %d, but %d exactly",
           kUserDataNum, ctx_def.user_data_size());
    return FAILED;
  }
  for (size_t i = 0; i < kUserDataNum; i++) {
    ctx->usrData[i] = static_cast<uint32_t>(ctx_def.user_data(i));
  }

  ctx->subtopicId = static_cast<uint32_t>(ctx_def.sub_topic_id() & 0X00000FFF);     // 12 bits, 1111,1111,1111
  ctx->topicId = static_cast<uint32_t>(ctx_def.topic_id() & 0X0000003F);            // 6 bits, 0011,1111
  ctx->groupId = static_cast<uint32_t>(ctx_def.group_id() & 0X0000003F);            // 6 bits, 0011,1111
  ctx->usrDataLength = static_cast<uint32_t>(ctx_def.user_data_len() & 0X000000FF); // 8 bits, 1111,1111

  ctx->taskParamOffset = ctx_def.qos();

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitLoadCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx) {
  switch (ctx_def.software_ctx_type()) {
    case RT_SOFT_CTX_TYPE_COND_SWITCH: {
      auto cond_switch_ctx = reinterpret_cast<rtFftsPlusCondSwitchCtx_t *>(ctx);
      GE_CHK_STATUS_RET_NOLOG(InitCondSwitchCtx(ctx_def.cond_switch_ctx(), cond_switch_ctx));
      break;
    }
    case RT_SOFT_CTX_TYPE_CASE_SWITCH: {
      if (ctx_def.has_case_switch_ctx() == ctx_def.has_case_default_ctx()) {
        REPORT_INNER_ERROR("E19999", "case_switch_ctx %s and case_default_ctx %s when software ctx type is case",
                           ctx_def.has_case_switch_ctx() ? "exist" : "not exist",
                           ctx_def.has_case_default_ctx() ? "exist" : "not exist");
        GELOGE(FAILED, "[Check][Ctx] case_switch_ctx %s and case_default_ctx %s when software ctx type is case",
               ctx_def.has_case_switch_ctx() ? "exist" : "not exist",
               ctx_def.has_case_default_ctx() ? "exist" : "not exist");
        return FAILED;
      }
      if (ctx_def.has_case_switch_ctx()) {
        auto case_switch_ctx = reinterpret_cast<rtFftsPlusCaseSwitchCtx_t *>(ctx);
        GE_CHK_STATUS_RET_NOLOG(InitCaseSwitchCtx(ctx_def.case_switch_ctx(), case_switch_ctx));
      }
      if (ctx_def.has_case_default_ctx()) {
        auto case_default_ctx = reinterpret_cast<rtFftsPlusCaseDefCtx_t *>(ctx);
        GE_CHK_STATUS_RET_NOLOG(InitCaseDefaultCtx(ctx_def.case_default_ctx(), case_default_ctx));
      }
      break;
    }
    default:
      REPORT_INNER_ERROR("E19999", "Unsupported software ctx type %u when hardware ctx type is load",
                         ctx_def.hardware_ctx_type());
      GELOGE(FAILED, "[Check][CtxType] Unsupported software ctx type %u when hardware ctx type is load",
             ctx_def.hardware_ctx_type());
      return FAILED;
  }
  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitCondSwitchCtx(const domi::FftsPlusCondSwitchCtxDef &ctx_def,
                                           rtFftsPlusCondSwitchCtx_t *&ctx) {
  ctx->trueSuccessorNum = static_cast<uint8_t>(ctx_def.true_successor_num());
  ctx->falseSuccessorNum = static_cast<uint8_t>(ctx_def.false_successor_num() & 0X0000007F);  // 7 bits, 0111,1111
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);                              // 1 bit , 0000,0001

  if (ctx_def.condition() == RT_COND_TYPE_MAX) {
    REPORT_INNER_ERROR("E19999", "Unsupported cond type %u", ctx_def.condition());
    GELOGE(FAILED, "[Check][CtxType] Unsupported cond type %u", ctx_def.condition());
    return FAILED;
  }
  ctx->condition = static_cast<rtFftsPlusCondType_t>(ctx_def.condition());
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.true_successor_list_size() != RT_CTX_TRUE_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of true_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly",
                       RT_CTX_TRUE_SUCCESSOR_NUM, ctx_def.true_successor_list_size());
    GELOGE(FAILED,
           "[Check][Param] Size of true_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly",
           RT_CTX_TRUE_SUCCESSOR_NUM, ctx_def.true_successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_TRUE_SUCCESSOR_NUM; i++) {
    ctx->trueSuccessorList[i] = static_cast<uint16_t>(ctx_def.true_successor_list(i));
  }

  if (ctx_def.false_successor_list_size() != RT_CTX_FALSE_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999",
                       "Size of false_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly",
                       RT_CTX_FALSE_SUCCESSOR_NUM, ctx_def.false_successor_list_size());
    GELOGE(FAILED,
           "[Check][Param] Size of false_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly",
           RT_CTX_FALSE_SUCCESSOR_NUM, ctx_def.false_successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_FALSE_SUCCESSOR_NUM; i++) {
    ctx->falseSuccessorList[i] = static_cast<uint16_t>(ctx_def.false_successor_list(i));
  }

  ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->arSize = static_cast<uint8_t>(ctx_def.ar_size() & 0X00000007);   // 3 bits, 0000,0111
  ctx->snoop = static_cast<uint8_t>(ctx_def.snoop() & 0X00000001);      // 1 bit , 0000,0001
  ctx->arCache = static_cast<uint8_t>(ctx_def.ar_cache() & 0X0000000F); // 4 bits, 0000,1111
  ctx->arProt = static_cast<uint8_t>(ctx_def.ar_prot() & 0X00000007);   // 3 bits, 0000,0111
  ctx->va = static_cast<uint8_t>(ctx_def.va() & 0X00000001);            // 1 bit , 0000,0001

  const auto &rts_param = davinci_model_->GetRuntimeParam();
  uint8_t *addr_base_0 = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr0_base(), addr_base_0) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->loadAddress0BaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_0) & 0XFFFFFFFF);         // low 32 bits
  ctx->loadAddress0BaseH =
      static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_0) >> 32) & 0X0001FFFF); // high 17 bits
  ctx->ld0En = static_cast<uint32_t>(ctx_def.ld0_en() & 0X00000001);                        // 1 bit , 0000,0001
  ctx->loadAddress0Offset = ctx_def.load_addr0_offset();

  uint8_t *addr_base_1 = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr1_base(), addr_base_1) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->loadAddress1BaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_1) & 0XFFFFFFFF);         // low 32 bits
  ctx->loadAddress1BaseH =
      static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_1) >> 32) & 0X0001FFFF); // high 17 bits
  ctx->ld1En = static_cast<uint32_t>(ctx_def.ld1_en() & 0X00000001);                        // 1 bit , 0000,0001
  ctx->loadAddress1Offset = ctx_def.load_addr1_offset();

  ctx->cmpValue1 = ctx_def.cmp_value_1();
  ctx->cmpValue2 = ctx_def.cmp_value_2();

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def,
                                           rtFftsPlusCaseSwitchCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit , 0000,0001

  ctx->startLabelId = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->labelListLen = static_cast<uint8_t>(ctx_def.label_list_len());
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  ctx->atm = static_cast<uint8_t>(ctx_def.atm() & 0X00000001);  // 1 bit , 0000,0001

  ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id());
  ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim());

  ctx->arSize = static_cast<uint8_t>(ctx_def.ar_size() & 0X00000007);   // 3 bits, 0000,0111
  ctx->snoop = static_cast<uint8_t>(ctx_def.snoop() & 0X00000001);      // 1 bit , 0000,0001
  ctx->arCache = static_cast<uint8_t>(ctx_def.ar_cache() & 0X0000000F); // 4 bits, 0000,1111
  ctx->arProt = static_cast<uint8_t>(ctx_def.ar_prot() & 0X00000007);   // 3 bits, 0000,0111
  ctx->va = static_cast<uint8_t>(ctx_def.va() & 0X00000001);            // 1 bit , 0000,0001

  const auto &rts_param = davinci_model_->GetRuntimeParam();
  uint8_t *addr_base_0 = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr0_base(), addr_base_0) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->loadAddress0BaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_0) & 0XFFFFFFFF);         // low 32 bits
  ctx->loadAddress0BaseH =
      static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_0) >> 32) & 0X0001FFFF); // high 17 bits
  ctx->ld0En = static_cast<uint32_t>(ctx_def.ld0_en() & 0X00000001);                        // 1 bit , 0000,0001
  ctx->loadAddress0Offset = ctx_def.load_addr0_offset();

  uint8_t *addr_base_1 = nullptr;
  if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr1_base(), addr_base_1) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
    return INTERNAL_ERROR;
  }
  ctx->loadAddress1BaseL =
      static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_1) & 0XFFFFFFFF);         // low 32 bits
  ctx->loadAddress1BaseH =
      static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_1) >> 32) & 0X0001FFFF); // high 17 bits
  ctx->ld1En = static_cast<uint32_t>(ctx_def.ld1_en() & 0X00000001);                        // 1 bit , 0000,0001
  ctx->loadAddress1Offset = ctx_def.load_addr1_offset();

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def,
                                           rtFftsPlusCaseDefCtx_t *&ctx) {
  ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001);  // 1 bit , 0000,0001

  ctx->startLabelId = static_cast<uint8_t>(ctx_def.successor_num());
  ctx->labelListLen = static_cast<uint8_t>(ctx_def.label_list_len());
  ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init());
  ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt());

  if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) {
    REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly",
                       RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly",
           RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size());
    return FAILED;
  }
  for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) {
    ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i));
  }

  return SUCCESS;
 }

 Status FftsPlusTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  return SUCCESS;
 }

 Status FftsPlusTaskInfo::UpdateArgs() {
  GE_CHECK_NOTNULL(davinci_model_);
  std::vector<void *> io_addrs = io_addrs_;
  davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
  auto addr_size = kAddrLen * io_addrs.size();
  GE_CHK_RT_RET(rtMemcpy(args_, args_size_, io_addrs.data(), addr_size, RT_MEMCPY_HOST_TO_DEVICE));
  return SUCCESS;
 }

 Status FftsPlusTaskInfo::Distribute() {
  GELOGI("FftsPlusTaskInfo Distribute Start.");
  rtError_t rt_ret = rtFftsPlusTaskLaunch(&ffts_plus_task_info_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "[Check][RT_ret] Call rtFftsPlusTaskLaunch failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  GELOGI("FftsPlusTaskInfo Distribute Success.");
  return SUCCESS;
 }

 // task_addr = {0,200,700,1000,2000, 3500}
 // task_addr_offset = {20,40,2,100,200}
 template <typename T>
 Status FftsPlusTaskInfo::InitIoAddrs(const RuntimeParam &rts_param, const T &ctx_def, uint32_t thread_id,
                                     uint32_t addr_count) {
  for (uint32_t i = 0; i < addr_count; ++i) {
    uintptr_t logic_addr = ctx_def.task_addr(i)  + thread_id * ctx_def.task_addr_offset(i);
    uint8_t *io_addr = nullptr;
    if (ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed.");
      return INTERNAL_ERROR;
    }
    GELOGD("task base addr is %ld, offset is %ld, thread id is %d, logic addr is 0x%lx, io addr is %p",
           ctx_def.task_addr(i), ctx_def.task_addr_offset(i), thread_id, logic_addr, io_addr);
    io_addrs_.emplace_back(io_addr);
  }
  return SUCCESS;
 }

 REGISTER_TASK_INFO(RT_MODEL_TASK_FFTS_PLUS_TASK, FftsPlusTaskInfo);
 }  // namespace ge
--- a/ge/graph/load/model_manager/task_info/ffts_plus_task_info.h
+++ b/ge/graph/load/model_manager/task_info/ffts_plus_task_info.h
@@ -0,0 +1,70 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_

 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"

 namespace ge {
 class FftsPlusTaskInfo : public TaskInfo {
 public:
  FftsPlusTaskInfo() = default;
  ~FftsPlusTaskInfo() override;

  Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

  Status Distribute() override;

  Status UpdateArgs() override;

  Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

 private:
  void InitFftsPlusSqe(const domi::FftsPlusSqeDef &sqe_def, rtFftsPlusSqe_t *&sqe);
  void InitFftsPlusSqeHeader(const domi::StarsSqeHeaderDef &sqe_header_def, rtStarsSqeHeader_t &sqe_header);
  Status InitFftsPlusCtx(const domi::FftsPlusTaskDef &task_def, size_t ctx_num, void *&ctx);

  Status InitAtStartCtx(const domi::FftsPlusAtStartCtxDef &ctx_def, rtFftsPlusAtStartCtx_t *&ctx);
  Status InitAtEndCtx(const domi::FftsPlusAtEndCtxDef &ctx_def, rtFftsPlusAtEndCtx_t *&ctx);
  Status InitLabelCtx(const domi::FftsPlusLabelCtxDef &ctx_def, rtFftsPlusLabelCtx_t *&ctx);

  Status InitHardWareCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx);
  Status InitAicAivCtx(const domi::FftsPlusAicAivCtxDef &ctx_def, uint32_t op_index, rtFftsPlusAicAivCtx_t *&ctx);
  Status InitNotifyCtx(const domi::FftsPlusNotifyCtxDef &ctx_def, rtFftsPlusNotifyCtx_t *&ctx);
  Status InitWriteValueCtx(const domi::FftsPlusWriteValueCtxDef &ctx_def, rtFftsPlusWriteValueCtx_t *&ctx);
  Status InitMixAicAivCtx(const domi::FftsPlusMixAicAivCtxDef &ctx_def, uint32_t op_index, rtFftsPlusMixAicAivCtx_t *&ctx);
  Status InitSdmaCtx(const domi::FftsPlusSdmaCtxDef &ctx_def, rtFftsPlusSdmaCtx_t *&ctx);
  Status InitDataCtx(const domi::FftsPlusDataCtxDef &ctx_def, rtFftsPlusDataCtx_t *&ctx);
  Status InitAicpuCtx(const domi::FftsPlusAicpuCtxDef &ctx_def, rtFftsPlusAiCpuCtx_t *&ctx);

  Status InitLoadCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx);
  Status InitCondSwitchCtx(const domi::FftsPlusCondSwitchCtxDef &ctx_def, rtFftsPlusCondSwitchCtx_t *&ctx);
  Status InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def, rtFftsPlusCaseSwitchCtx_t *&ctx);
  Status InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def, rtFftsPlusCaseDefCtx_t *&ctx);

  template<typename T>
  Status InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_id, uint32_t addr_count);

  DavinciModel *davinci_model_{nullptr};
  rtFftsPlusTaskInfo_t ffts_plus_task_info_;
  std::vector<void *> io_addrs_;
  void *args_{nullptr};    // runtime args memory
  uint32_t args_size_{0};    // runtime args memory length
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -26,8 +26,8 @@
 #include "external/graph/attr_value.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"
 #include "framework/common/debug/log.h"
 #include "runtime/rt.h"

 namespace {
 const char *const kAicpuAllshape = "_AllShape";
@@ -43,7 +43,7 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe
  UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
  uint32_t num_inputs = op_desc->GetInputsSize();
  uint32_t num_outputs = op_desc->GetOutputsSize();
  std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle(
  std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle(
          new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(),
                                                              num_inputs,
                                                              num_outputs,
@@ -76,6 +76,16 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe
      }
    }
  }

  AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_);
  GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_);

  if (UpdateEventIdForAicpuBlockingOp(op_desc, ext_handle) != SUCCESS) {
    GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)",
           op_desc->GetName().c_str(), op_desc->GetType().c_str());
    return FAILED;
  }

  auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
                  REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", ext_info.size(), rt_ret);
@@ -448,6 +458,101 @@ Status KernelExTaskInfo::Distribute() {
  stream_id_ = stream_id;

  GELOGI("KernelExTaskInfo Distribute Success. task id: %u, stream id: %u", task_id_, stream_id_);
  if (is_blocking_aicpu_op_) {
    if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) {
      GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed");
      return FAILED;
    }
  }
  return SUCCESS;
 }

 Status KernelExTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) {
  int32_t device_id = 0;
  auto rt_ret = rtGetDevice(&device_id);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  int32_t value = 0;
  rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) {
    REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d",
                       RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d",
           RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    return FAILED;
  }
  is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false);
  return SUCCESS;
 }

 Status KernelExTaskInfo::UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc,
    std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle) {
  if (is_blocking_aicpu_op_) {
    bool is_support = false;
    if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
      GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
      return FAILED;
    }
    if (!is_support) {
      GELOGD("Device not support blocking aicpu op process");
      return SUCCESS;
    }
    uint32_t event_id = 0;
    if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc, stream_, event_id) != SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Get event id failed for op:%s(%s).", op_desc->GetName().c_str(),
                        op_desc->GetType().c_str());
      GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc->GetName().c_str(),
             op_desc->GetType().c_str());
      return FAILED;
    }
    if (ext_handle->UpdateEventId(event_id) != SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Update event id failed for op:%s(%s).", op_desc->GetName().c_str(),
                        op_desc->GetType().c_str());
      GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc->GetName().c_str(),
             op_desc->GetType().c_str());
      return FAILED;
    }
    GELOGI("Update event_id=%u success", event_id);
  }
  return SUCCESS;
 }

 Status KernelExTaskInfo::DistributeWaitTaskForAicpuBlockingOp() {
  bool is_support = false;
  if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
    GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
    return FAILED;
  }
  if (!is_support) {
    GELOGD("Device not support blocking aicpu op process.");
    return SUCCESS;
  }
  GELOGD("Distribute wait task begin");
  rtEvent_t rt_event = nullptr;
  if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) {
    GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed");
    return FAILED;
  }
  auto rt_ret = rtStreamWaitEvent(stream_, rt_event);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  rt_ret = rtEventReset(rt_event, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  return SUCCESS;
 }

--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
@@ -19,6 +19,7 @@

 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"

 namespace ge {
 class KernelExTaskInfo : public TaskInfo {
@@ -65,6 +66,12 @@ class KernelExTaskInfo : public TaskInfo {
  void InitDumpArgs(void *addr, const OpDescPtr &op_desc);
  Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc);

  // for blocking aicpu op
  Status DistributeWaitTaskForAicpuBlockingOp();
  Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support);
  Status UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc,
                                         std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle);

  uint32_t task_id_;
  uint32_t stream_id_;
  uint32_t dump_flag_;
@@ -79,6 +86,7 @@ class KernelExTaskInfo : public TaskInfo {
  uint32_t args_offset_ = 0;
  int64_t fixed_addr_offset_ = 0;
  int32_t topic_type_flag_ = -1;
  bool is_blocking_aicpu_op_ = false;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -28,11 +28,10 @@
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/model_utils.h"
 #include "runtime/kernel.h"
 #include "runtime/rt.h"
 #include "graph/load/model_manager/task_info/super_kernel/super_kernel.h"
 #include "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h"
 #include "cce/aicpu_engine_struct.h"
 #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"
 #include "framework/common/debug/log.h"

 namespace {
@@ -474,6 +473,12 @@ Status KernelTaskInfo::Distribute() {
  }
  // set for task_id_
  UpdateTaskId();
  if (is_blocking_aicpu_op_) {
    if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) {
      GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed");
      return FAILED;
    }
  }
  GELOGD(
      "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p "
      "blockdim:%d stream:%p",
@@ -482,6 +487,91 @@ Status KernelTaskInfo::Distribute() {
  return SUCCESS;
 }

 Status KernelTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) {
  int32_t device_id = 0;
  auto rt_ret = rtGetDevice(&device_id);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  int32_t value = 0;
  rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) {
    REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d",
                       RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d",
           RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    return FAILED;
  }
  is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false);
  return SUCCESS;
 }

 Status KernelTaskInfo::UpdateEventIdForAicpuBlockingOp(std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle) {
  if (is_blocking_aicpu_op_) {
    bool is_support = false;
    if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
      GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
      return FAILED;
    }
    if (!is_support) {
      GELOGD("Device not support blocking aicpu op process");
      return SUCCESS;
    }
    uint32_t event_id = 0;
    if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc_, stream_, event_id) != SUCCESS) {
      GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc_->GetName().c_str(),
             op_desc_->GetType().c_str());
      return FAILED;
    }
    if (ext_handle->UpdateEventId(event_id) != SUCCESS) {
      GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc_->GetName().c_str(),
             op_desc_->GetType().c_str());
      return FAILED;
    }
    GELOGI("Update event_id=%u success", event_id);
  }
  return SUCCESS;
 }

 Status KernelTaskInfo::DistributeWaitTaskForAicpuBlockingOp() {
  bool is_support = false;
  if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
    GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
    return FAILED;
  }
  if (!is_support) {
    GELOGD("device not support blocking aicpu op process.");
    return SUCCESS;
  }
  GELOGD("Distribute wait task begin");
  rtEvent_t rt_event = nullptr;
  if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Call GetEventByStream failed");
    GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed");
    return FAILED;
  }
  auto rt_ret = rtStreamWaitEvent(stream_, rt_event);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  rt_ret = rtEventReset(rt_event, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  return SUCCESS;
 }

 void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) {
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
@@ -1109,7 +1199,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
  UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
  uint32_t num_inputs = op_desc_->GetInputsSize();
  uint32_t num_outputs = op_desc_->GetOutputsSize();
  std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle(
  std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle(
          new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(),
                                                              num_inputs,
                                                              num_outputs,
@@ -1145,6 +1235,16 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
                        j, op_desc_->GetName().c_str());
    }
  }

  AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_);
  GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_);

  if (UpdateEventIdForAicpuBlockingOp(ext_handle) != SUCCESS) {
    GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)",
           op_desc_->GetName().c_str(), op_desc_->GetType().c_str());
    return FAILED;
  }

  auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X",
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h
@@ -24,6 +24,8 @@

 #include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"

 namespace ge {
 class KernelTaskInfo : public TaskInfo {
 public:
@@ -148,6 +150,11 @@ class KernelTaskInfo : public TaskInfo {
  bool DoubleCallSKTSaveCheck();
  void SetArgs();

  // for blocking aicpu op
  Status DistributeWaitTaskForAicpuBlockingOp();
  Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support);
  Status UpdateEventIdForAicpuBlockingOp(std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle);

  void *stub_func_;
  void *args_;
  void *sm_desc_;
@@ -187,6 +194,7 @@ class KernelTaskInfo : public TaskInfo {
  uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT;
  void *superkernel_device_args_addr_ = nullptr;
  void *superkernel_dev_nav_table_ = nullptr;
  bool is_blocking_aicpu_op_ = false;

  struct AICPUCustomInfo {
    void *input_descs = nullptr;
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -808,6 +808,14 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr
    GELOGE(ret, "[Call][OptimizeSubGraphWithMultiThreads] failed, ret:%d, session_id:%lu", ret, session_id);
    return ret;
  }
  for (const auto &item : sub_graph_map) {
    for (const auto &subgraph_info : item.second) {
      const auto &subgraph = subgraph_info->GetSubGraph();
      for (const auto &new_graph : subgraph->GetAllSubgraphs()) {
        compute_graph->AddSubGraph(new_graph);
      }
    }
  }
  return SUCCESS;
 }

@@ -881,8 +889,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
  GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
  GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
                       GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuild",
                       GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuild,
                       compute_graph);

  Status ret = compute_graph->TopologicalSorting();
@@ -1381,8 +1389,8 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen
  ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
  graph_node->SetRunFlag(false);
  if (ret != SUCCESS) {
    GELOGE(GE_GRAPH_PRERUN_FAILED, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id);
    return GE_GRAPH_PRERUN_FAILED;
    GELOGE(ret, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id);
    return ret;
  }

  GELOGI("[BuildGraph] build graph success, graph_id=%u.", graph_id);
@@ -2837,20 +2845,59 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
    GELOGE(ret, "[Call][Partition] for Graph:%s by dynamic shape Failed", compute_graph->GetName().c_str());
    return ret;
  }
  bool dynamic_shape_partitioned = false;
  if (!AttrUtils::GetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
    REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%s(id:%u) fail",
                       ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetName().c_str(),
                       compute_graph->GetGraphID());
    GELOGE(FAILED, "[Get][Attr] %s from graph:%u failed",
           ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID());
  if (!compute_graph->HasAttr(ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED)) {
    REPORT_INNER_ERROR("E19999", "Attr:%s not exist in graph:%s(id:%u)", ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(),
                       compute_graph->GetName().c_str(), compute_graph->GetGraphID());
    GELOGE(FAILED, "[Get][Attr] Attr %s not exist in graph:%u", ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(),
           compute_graph->GetGraphID());
    return FAILED;
  }
  GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape");
  GE_DUMP(compute_graph, "AfterDynamicShapePartition");

  GE_TIMESTAMP_START(SubgraphPartitionAndOptimization_CompoundEngine);
  ret = SubgraphPartitionAndOptimization(graph_node, compute_graph, session_id,
                                         GraphPartitioner::kCompoundEnginePartitioning);
  if (ret != SUCCESS) {
    GELOGE(ret, "[SubgraphPartitionAndOptimization][CompoundEngine] for graph:%s failed",
           compute_graph->GetName().c_str());
    return ret;
  }
  GE_TIMESTAMP_EVENT_END(SubgraphPartitionAndOptimization_CompoundEngine,
                         "OptimizeSubgraph::SubgraphPartitionAndOptimization::CompoundEngine");
  GE_DUMP(compute_graph, "MergedComputeGraphAfterCompoundEnginePartition");

  GE_TIMESTAMP_START(SubgraphPartitionAndOptimization_AtomicEngine);
  ret = SubgraphPartitionAndOptimization(graph_node, compute_graph, session_id,
                                         GraphPartitioner::kAtomicEnginePartitioning);
  if (ret != SUCCESS) {
    GELOGE(ret, "[SubgraphPartitionAndOptimization][AtomicEngine] for graph:%s failed",
           compute_graph->GetName().c_str());
    return ret;
  }
  GE_TIMESTAMP_EVENT_END(SubgraphPartitionAndOptimization_AtomicEngine,
                         "OptimizeSubgraph::SubgraphPartitionAndOptimization::AtomicEngine");
  GE_DUMP(compute_graph, "MergedComputeGraphAfterAtomicEnginePartition");

  return SUCCESS;
 }

 Status GraphManager::SubgraphPartitionAndOptimization(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph,
                                                      uint64_t session_id, GraphPartitioner::Mode mode) {
  std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    REPORT_INNER_ERROR("E19999", "GELib instance is nullptr or it is not InitFlag, check invalid.");
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] Run enginePlacer failed, because GELib is invalid.");
    return FAILED;
  }
  if ((mode == GraphPartitioner::kCompoundEnginePartitioning) &&
      instance_ptr->OpsKernelManagerObj().GetCompoundEngineContains().empty()) {
    GELOGI("No compound engine registers, ignore subgraph partition and optimization for compound engine");
    return SUCCESS;
  }
  GE_TIMESTAMP_START(GraphPartition);
  GraphPartitioner &partitioner = GetCompilerStages(graph_node->GetGraphId()).partitioner;
  ret = partitioner.Partition(compute_graph, GraphPartitioner::kPartitioning);
  Status ret = partitioner.Partition(compute_graph, mode);
  if (ret != SUCCESS) {
    GELOGE(ret, "[Call][Partition] for Graph:%s Failed", compute_graph->GetName().c_str());
    return ret;
@@ -2863,24 +2910,24 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
    return ret;
  }
  GE_TIMESTAMP_EVENT_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph");
  std::set<string> build_steps = {BUILD_STEP_BEFORE_UB_MATCH, BUILD_STEP_AFTER_BUILDER, BUILD_STEP_AFTER_BUILDER_SUB};
  if ((options_.build_mode == BUILD_MODE_TUNING) && (build_steps.count(options_.build_step) > 0)) {
    GE_TIMESTAMP_START(ConvertGraphToFile);
    std::string tuning_path;
    (void) GetContext().GetOption(TUNING_PATH, tuning_path);
    Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path,
                                    (options_.build_step == BUILD_STEP_AFTER_BUILDER));
    if (ret != SUCCESS) {
      GELOGE(ret, "[Convert][Graph] [%s] to file failed", compute_graph->GetName().c_str());
      return ret;
  if (mode == GraphPartitioner::kAtomicEnginePartitioning) {
    std::set<string> build_steps = {BUILD_STEP_BEFORE_UB_MATCH, BUILD_STEP_AFTER_BUILDER, BUILD_STEP_AFTER_BUILDER_SUB};
    if ((options_.build_mode == BUILD_MODE_TUNING) && (build_steps.count(options_.build_step) > 0)) {
      GE_TIMESTAMP_START(ConvertGraphToFile);
      std::string tuning_path;
      (void) GetContext().GetOption(TUNING_PATH, tuning_path);
      Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path,
                                      (options_.build_step == BUILD_STEP_AFTER_BUILDER));
      if (ret != SUCCESS) {
        GELOGE(ret, "[Convert][Graph] [%s] to file failed", compute_graph->GetName().c_str());
        return ret;
      }
      GE_TIMESTAMP_EVENT_END(ConvertGraphToFile, "OptimizeSubgraph::ConvertGraphToFile");
      return SUCCESS;
    }
    GE_TIMESTAMP_EVENT_END(ConvertGraphToFile, "OptimizeSubgraph::ConvertGraphToFile");
    return SUCCESS;
  }

  ComputeGraphPtr merged_compute_graph = nullptr;
  std::vector<ComputeGraphPtr> merged_sub_graph_list;

  GE_TIMESTAMP_START(MergeSubgraph);
  ret = MergeSubGraph(merged_compute_graph, compute_graph, graph_node->GetGraphId());
  if (ret != SUCCESS) {
@@ -2896,27 +2943,31 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
    sub_graph->SetSessionID(session_id);
    sub_graph->SetGraphID(graph_node->GetGraphId());
  }
  bool off_superkernel = false;
  if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
    GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel);
    if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
      REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail",
  bool off_super_kernel = false;
  if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_super_kernel)) {
    GELOGI("Compute graph %s get super kernel flag %d.", compute_graph->GetName().c_str(), off_super_kernel);
    if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_super_kernel)) {
      REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u failed",
                         ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID());
      GELOGE(FAILED, "[Set][Attr] %s to graph:%u fail",
      GELOGE(FAILED, "[Set][Attr] %s to graph:%u failed",
             ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID());
      return FAILED;
    }
  }
  bool dynamic_shape_partitioned = false;
  if (AttrUtils::GetBool(compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
    GELOGI("Compute graph %s get super kernel flag %d.", compute_graph->GetName().c_str(), dynamic_shape_partitioned);
    if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
      REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u failed",
                         ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID());
      GELOGE(FAILED, "[Set][Attr] %s to graph:%u failed",
             ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID());
      return FAILED;
    }
  }
  GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph");
  GE_DUMP(merged_compute_graph, "mergedComputeGraph");
  compute_graph = merged_compute_graph;
  if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
    REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail",
                       ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID());
    GELOGE(FAILED, "[Set][Attr] %s to graph:%u fail",
           ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID());
    return FAILED;
  }

  return SUCCESS;
 }

--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -243,6 +243,9 @@ class GraphManager {

  Status OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, uint64_t session_id);

  Status SubgraphPartitionAndOptimization(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph,
                                          uint64_t session_id, GraphPartitioner::Mode mode);

  Status Build(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph,
               GeRootModelPtr &ge_root_model, uint64_t session_id);

--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -17,10 +17,10 @@
 #include "graph/optimize/graph_optimize.h"

 #include "graph/ge_context.h"
 #include "common/local_context.h"
 #include "graph/passes/dimension_adjust_pass.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"
 #include "graph/partition/engine_place.h"

 namespace {
 const char *const kVectorCore = "VectorCore";
@@ -85,9 +85,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
    return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
  }

  Status ret = SUCCESS;
  vector<GraphOptimizerPtr> graph_optimizer;

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    REPORT_INNER_ERROR("E19999", "Gelib not init before, check invalid, graph:%s",
@@ -96,7 +93,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
           compute_graph->GetName().c_str());
    return GE_CLI_GE_NOT_INITIALIZED;
  }

  vector<GraphOptimizerPtr> graph_optimizer;
  if (instance_ptr->DNNEngineManagerObj().IsEngineRegistered(engine_name)) {
    instance_ptr->OpsKernelManagerObj().GetGraphOptimizerByEngine(engine_name, graph_optimizer);
    AddNodeInputProperty(compute_graph);
@@ -123,7 +120,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
    }

    for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) {
      ret = (*iter)->OptimizeFusedGraph(*(compute_graph));
      Status ret = (*iter)->OptimizeFusedGraph(*(compute_graph));
      if (ret != SUCCESS) {
        REPORT_INNER_ERROR("E19999", "Call OptimizeFusedGraph failed, ret:%d, engine_name:%s, "
                           "graph_name:%s", ret, engine_name.c_str(),
@@ -137,7 +134,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
    GELOGI("Engine: %s is not registered. do nothing in subGraph Optimize by ATC.", engine_name.c_str());
  }

  return ret;
  return SUCCESS;
 }

 Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
@@ -269,13 +266,27 @@ Status GraphOptimize::OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_
  return ret;
 }

 Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph) {
 Status GraphOptimize::OptimizeGraphBeforeBuild(ComputeGraphPtr &compute_graph) {
  if (compute_graph == nullptr) {
    REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid");
    GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Check][Param] compute_graph is nullptr.");
    return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
  }

  EnginePlacer engine_place(compute_graph);
  Status ret = engine_place.Run();
  if (ret != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Assign atomic engine for graph %s failed", compute_graph->GetName().c_str());
    GELOGE(ret, "[Assign][Engine] Assign atomic engine for graph %s failed", compute_graph->GetName().c_str());
    return ret;
  }
  ret = engine_place.AssignCompoundEngine();
  if (ret != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Assign compound engine for graph %s failed", compute_graph->GetName().c_str());
    GELOGE(ret, "[Assign][Engine] Assign compound engine for graph %s failed", compute_graph->GetName().c_str());
    return ret;
  }

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    REPORT_INNER_ERROR("E19999", "Gelib not init before, check invalid, graph:%s.",
@@ -284,13 +295,11 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr
           compute_graph->GetName().c_str());
    return GE_CLI_GE_NOT_INITIALIZED;
  }

  auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
  GELOGD("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.",
         graph_optimizer.size());
  Status ret = SUCCESS;
  string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
  GELOGD("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s",
  GELOGD("[OptimizeGraphBeforeBuild]: engine type will exclude: %s, core_type_: %s",
         exclude_core_Type.c_str(), core_type_.c_str());
  if (graph_optimizer.size() != 0) {
    for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) {
@@ -308,7 +317,7 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr
      }
    }
  }
  return ret;
  return SUCCESS;
 }

 Status GraphOptimize::OptimizeAfterStage1(ComputeGraphPtr &compute_graph) {
--- a/ge/graph/optimize/graph_optimize.h
+++ b/ge/graph/optimize/graph_optimize.h
@@ -55,8 +55,8 @@ class GraphOptimize {
  // for engine to optimize merged whole graph before ge Optimize2
  Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph);

  // for rts optimize before build to add attr and insert memcpy op
  Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);
  // for optimize before build
  Status OptimizeGraphBeforeBuild(ComputeGraphPtr &compute_graph);

  // optimize whole graph, using after stage1
  Status OptimizeAfterStage1(ComputeGraphPtr &graph);
--- a/ge/graph/partition/engine_place.cc
+++ b/ge/graph/partition/engine_place.cc
@@ -16,19 +16,12 @@

 #include "graph/partition/engine_place.h"

 #include <climits>
 #include <memory>
 #include <string>
 #include <utility>
 #include <mutex>

 #include "framework/common/op/ge_op_utils.h"
 #include "common/util/error_manager/error_manager.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "init/gelib.h"
 #include "opskernel_manager/ops_kernel_manager.h"
 #include "analyzer/analyzer.h"

 namespace ge {
 namespace {
@@ -40,7 +33,7 @@ Status EnginePlacer::Check() const {
    GELOGE(GE_GRAPH_NULL_INPUT, "[Check][Param] compute_graph_ is nullptr.");
    return FAILED;
  }
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    REPORT_INNER_ERROR("E19999", "GELib instance is nullptr or it is not InitFlag, check invalid.");
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] Run enginePlacer failed, because GELib is invalid.");
@@ -49,7 +42,7 @@ Status EnginePlacer::Check() const {
  return SUCCESS;
 }

 Status EnginePlacer::Run() {
 Status EnginePlacer::Run(bool direct_node_flag) {
  std::lock_guard<std::mutex> lock(check_support_cost_mutex);

  GELOGD("Engine placer starts.");
@@ -58,8 +51,8 @@ Status EnginePlacer::Run() {
  }
  bool is_check_support_success = true;
  // Assign engine for each node in the graph
  ge::GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStaistic();
  for (const auto &node_ptr : compute_graph_->GetDirectNode()) {
  GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStatistic();
  for (const auto &node_ptr : compute_graph_->GetNodes(direct_node_flag)) {
    GE_CHECK_NOTNULL(node_ptr);
    auto op_desc = node_ptr->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
@@ -73,9 +66,7 @@ Status EnginePlacer::Run() {
    bool use_exist_engine_name = !op_desc->GetOpKernelLibName().empty() || (has_kernel_attr && has_engine_attr);
    if (use_exist_engine_name) {
      if (op_desc->GetOpEngineName().empty()) {
        GELOGI("Op %s set engine_name %s engine_name %s from attrs",
               op_desc->GetName().c_str(),
               engine_name.c_str(),
        GELOGI("Op %s set engine_name %s engine_name %s from attrs", op_desc->GetName().c_str(), engine_name.c_str(),
               kernel_name.c_str());
        op_desc->SetOpEngineName(engine_name);
        op_desc->SetOpKernelLibName(kernel_name);
@@ -83,7 +74,7 @@ Status EnginePlacer::Run() {
      engine_name = op_desc->GetOpEngineName();
    } else {
      // Call placer cost model to get the "best" engine for this node
      engine_name = ge::GELib::GetInstance()->DNNEngineManagerObj().GetDNNEngineName(node_ptr);
      engine_name = GELib::GetInstance()->DNNEngineManagerObj().GetDNNEngineName(node_ptr);
      // If can't get op's engine name, keep check support finish and return failed
      if (engine_name.empty()) {
        is_check_support_success = false;
@@ -94,34 +85,48 @@ Status EnginePlacer::Run() {
        continue;
      }
    }
    if (AssignEngineAndLog(node_ptr, engine_name) != SUCCESS) {
      GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Call][AssignEngineAndLog] FAILED, node:%s", op_desc->GetName().c_str());
      return FAILED;
    }

    // Record the node assigned atomic_engine name
    GELOGD("Assigning DNNEngine %s to node %s, op type %s", engine_name.c_str(), node_ptr->GetName().c_str(),
           node_ptr->GetType().c_str());
    node_atomic_engine_map_.insert(std::make_pair(node_ptr, engine_name));
  }

  for (auto &it : ge::GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) {
  for (auto &it : GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) {
    GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second);
  }
  GELOGD("Engine placer ends.");
  return is_check_support_success ? SUCCESS : FAILED;
 }

 Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::string &engine_name) {
  if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) {
    REPORT_INNER_ERROR("E19999", "Param node_ptr is nullptr or it's opdesc is nullptr, check invalid.");
    GELOGE(FAILED, "[Check][Param] node_ptr is nullptr.");
 Status EnginePlacer::AssignCompoundEngine() {
  if (GELib::GetInstance()->OpsKernelManagerObj().GetCompoundEngineContains().empty()) {
    GELOGI("No compound engine registers, ignore assign compound engine");
    return SUCCESS;
  }
  std::vector<ComputeGraphPtr> subgraphs;
  if (GraphUtils::GetSubgraphs(compute_graph_, subgraphs) != GRAPH_SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Get subgraphs contained in graph %s failed", compute_graph_->GetName().c_str());
    GELOGE(FAILED, "[Get][Subgraphs] Get subgraphs contained in graph %s failed", compute_graph_->GetName().c_str());
    return FAILED;
  }

  // private function, promise node_ptr->GetOpDesc() not null
  GELOGD("Assigning DNNEngine %s to node %s, op type %s", engine_name.c_str(), node_ptr->GetName().c_str(),
         node_ptr->GetOpDesc()->GetType().c_str());

  // Record the node assigned engine name
  node_engine_map_.insert(std::make_pair(node_ptr, engine_name));

  for (const auto &subgraph : subgraphs) {
    (void)subgraph->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
  }
  std::reverse(subgraphs.begin(), subgraphs.end());
  subgraphs.emplace_back(compute_graph_);
  for (const auto &subgraph : subgraphs) {
    for (const auto &node : subgraph->GetDirectNode()) {
      std::string compound_engine_name = GELib::GetInstance()->DNNEngineManagerObj().GetCompoundEngineName(node, 1);
      GELOGD("Assign compound engine %s to node %s, op type %s", compound_engine_name.c_str(),
             node->GetName().c_str(), node->GetType().c_str());
      node_compound_engine_map_.insert(std::make_pair(node, compound_engine_name));
    }
  }
  return SUCCESS;
 }
 }  // namespace ge

 const NodeEngineMap *EnginePlacer::GetNodeEngineMap(bool compound_engine_flag) const {
  return compound_engine_flag ? &node_compound_engine_map_ : &node_atomic_engine_map_;
 }
 }  // namespace ge
--- a/ge/graph/partition/engine_place.h
+++ b/ge/graph/partition/engine_place.h
@@ -17,7 +17,6 @@
 #ifndef GE_GRAPH_PARTITION_ENGINE_PLACE_H_
 #define GE_GRAPH_PARTITION_ENGINE_PLACE_H_

 #include <string>
 #include <unordered_map>

 #include "framework/common/ge_inner_error_codes.h"
@@ -37,19 +36,20 @@ class EnginePlacer {
  EnginePlacer() = default;
  ~EnginePlacer() = default;

  Status Run();
  Status Run(bool direct_node_flag = true);
  Status AssignCompoundEngine();

  // Get the unique node-engine map
  const NodeEngineMap *GetNodeEngineMap() const { return &node_engine_map_; }
  const NodeEngineMap *GetNodeEngineMap(bool compound_engine_flag) const;

  void SetComputeGraph(const ComputeGraphPtr &compute_graph) { compute_graph_ = compute_graph; }

 private:
  Status AssignEngineAndLog(ConstNodePtr node_ptr, const std::string &engine_name);
  Status Check() const;

  ComputeGraphPtr compute_graph_;
  NodeEngineMap node_engine_map_;
  NodeEngineMap node_atomic_engine_map_;
  NodeEngineMap node_compound_engine_map_;
 };
 }  // namespace ge

--- a/ge/graph/partition/graph_partition.cc
+++ b/ge/graph/partition/graph_partition.cc
@@ -23,17 +23,12 @@
 #include <vector>

 #include "analyzer/analyzer.h"
 #include "common/ge/ge_util.h"
 #include "framework/common/op/ge_op_utils.h"
 #include "framework/common/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/manager/graph_manager_utils.h"
 #include "common/ge_call_wrapper.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/type_utils.h"
 #include "init/gelib.h"
 #include "opskernel_manager/ops_kernel_manager.h"

 namespace {
 const char *const kEngineDefaultData = "ENGINE_DEFAULT_DATA";
@@ -386,7 +381,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
                  dst_node_op_desc->GetOpEngineName()), GELOGW("SetStr rearNodeEngineName failed");)
  // replace input_desc of end with owner node's desc
  int output_index = ge::AnchorUtils::GetIdx(out_anchor);
  bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning);
  bool is_need_update_desc = (output_index >= 0) && ((graph_info_.mode_ == kAtomicEnginePartitioning) ||
                                                     (graph_info_.mode_ == kCompoundEnginePartitioning));
  if (is_need_update_desc) {
    if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) {
      GELOGE(GRAPH_PARAM_INVALID, "[Update][EndOpDesc] failed, input index:%d, end_op_desc:%s",
@@ -464,7 +460,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
  graph_info_.num_of_pld_end_++;
  // replace output_desc of pld with input node's output desc
  int input_index = ge::AnchorUtils::GetIdx(peer_in_anchor);
  is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning);
  is_need_update_desc = (input_index >= 0) && ((graph_info_.mode_ == kAtomicEnginePartitioning) ||
                                               (graph_info_.mode_ == kCompoundEnginePartitioning));
  if (is_need_update_desc) {
    if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) {
      GELOGE(GRAPH_PARAM_INVALID, "[Update][PldOpDesc] failed, output index:%d, pld_op_desc:%s",
@@ -629,18 +626,8 @@ bool ge::GraphPartitioner::HasNoInput(ge::NodePtr node) {

 Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) {
  GELOGI("Initialize starts.");
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || compute_graph == nullptr) {
    REPORT_INNER_ERROR("E19999", "compute_graph or instance_ptr of GELib is nullptr, check invalid.");
    GELOGE(GE_GRAPH_NOT_INIT, "[Check][Param] compute_graph or instance_ptr of GELib is nullptr.");
    return FAILED;
  }
  graph_info_.engine_placer_.SetComputeGraph(compute_graph);
  if (graph_info_.engine_placer_.Run() != SUCCESS) {
    GELOGE(FAILED, "[Call][Run] Engine placer run failed, graph:%s.", compute_graph->GetName().c_str());
    return FAILED;
  }
  const NodeEngineMap *node_engine_map = graph_info_.engine_placer_.GetNodeEngineMap();
  GE_CHECK_NOTNULL(compute_graph);
  const NodeEngineMap *node_engine_map = GetNodeEngineMap();
  size_t temp_index = 0;
  // travese nodes by topo order one by one
  for (const auto &node : compute_graph->GetDirectNode()) {
@@ -999,6 +986,25 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo
 }

 Status ge::GraphPartitioner::Partition(ge::ComputeGraphPtr compute_graph, Mode mode) {
  if (compute_graph->TopologicalSorting() != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "TopologicalSorting for graph:%s failed",
                      compute_graph->GetName().c_str());
    GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[Call][TopologicalSorting] for subGraph:%s failed",
           compute_graph->GetName().c_str());
    return FAILED;
  }
  graph_info_.engine_placer_.SetComputeGraph(compute_graph);
  if (graph_info_.engine_placer_.Run(false) != SUCCESS) {
    GELOGE(FAILED, "[Call][Run] Engine placer run failed, graph:%s.", compute_graph->GetName().c_str());
    return FAILED;
  }
  if (mode == GraphPartitioner::kCompoundEnginePartitioning) {
    if (graph_info_.engine_placer_.AssignCompoundEngine() != SUCCESS) {
      GELOGE(FAILED, "[Partition][SubGraph] Assign compound engine for graph %s failed",
             compute_graph->GetName().c_str());
      return FAILED;
    }
  }
  ClearAllPartitionData();
  auto real_ret = SUCCESS;
  auto ret = PartitionSubGraph(compute_graph, mode);
@@ -1043,14 +1049,6 @@ Status ge::GraphPartitioner::PartitionSubGraph(ge::ComputeGraphPtr compute_graph
    return FAILED;
  }
  GELOGI("Graph Partition starts, graph nodes size is %zu", compute_graph->GetDirectNodesSize());
  Status ret = compute_graph->TopologicalSorting();
  if (ret != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "TopologicalSorting for graph:%s failed",
                      compute_graph->GetName().c_str());
    GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[Call][TopologicalSorting] for subGraph:%s failed",
           compute_graph->GetName().c_str());
    return FAILED;
  }
  GE_TIMESTAMP_START(PartitionSubGraphInitialize);
  if (Initialize(compute_graph) != SUCCESS) {
    GELOGE(GE_GRAPH_INIT_FAILED, "[Call][Initialize] for graph:%s failed", compute_graph->GetName().c_str());
@@ -1234,4 +1232,8 @@ void ge::GraphPartitioner::ClearAllPartitionData() {
  GELOGD("Clear all partition data success.");
  return;
 }

 const NodeEngineMap *GraphPartitioner::GetNodeEngineMap() const {
  return graph_info_.engine_placer_.GetNodeEngineMap(graph_info_.mode_ == kCompoundEnginePartitioning);
 }
 }  // namespace ge
--- a/ge/graph/partition/graph_partition.h
+++ b/ge/graph/partition/graph_partition.h
@@ -56,7 +56,12 @@ class GraphPartitioner {
  /// Partition() can only be called in Partition mode.
  /// MergeAfterSubGraphOptimization() can only be called in Merge mode.
  /// After Partition(), change to Merge mode. After MergeAfterSubGraphOptimization(), change to Partition mode
  enum Mode { kPartitioning, kSecondPartitioning, kMerging };
  enum Mode {
    kAtomicEnginePartitioning,
    kCompoundEnginePartitioning,
    kSecondPartitioning,
    kMerging
  };
  GraphPartitioner() : partition_times_(0){};
  ~GraphPartitioner() = default;

@@ -136,6 +141,8 @@ class GraphPartitioner {
  void ClearAllPartitionData();
  void SetMergedGraphId(ComputeGraphPtr &output_merged_compute_graph);

  const NodeEngineMap *GetNodeEngineMap() const;

  struct GraphPartitionInfo {
    EnginePlacer engine_placer_;
    PartitionMap partitions_;  // sub-graphs after partition <sub-graph-id, ComputeGraphPtr>
@@ -165,12 +172,12 @@ class GraphPartitioner {
      pld_2_end_.clear();
      end_2_pld_.clear();
      if (mode_ == kMerging) {
        mode_ = kPartitioning;
        mode_ = kAtomicEnginePartitioning;
      } else {
        mode_ = mode;
      }
    }
    GraphPartitionInfo() : num_of_pld_end_(0), input_size_(0), output_size_(0), mode_(kPartitioning) {}
    GraphPartitionInfo() : num_of_pld_end_(0), input_size_(0), output_size_(0), mode_(kAtomicEnginePartitioning) {}
    ~GraphPartitionInfo() = default;
  };
  std::unordered_map<ComputeGraphPtr, GraphPartitionInfo> graph_2_graph_partition_info_;
@@ -178,8 +185,10 @@ class GraphPartitioner {
  Graph2InputNodesSubGraphInfo graph_2_input_subgraph_;
  GraphPartitionInfo graph_info_;
  uint32_t partition_times_;  // times of call partition
  std::map<Mode, std::string> mode_2_str_ = {{kPartitioning, "Partitioning"},
    {kSecondPartitioning, "SecondPartitioning"}, {kMerging, "Merging"}};
  std::map<Mode, std::string> mode_2_str_ = {{ kAtomicEnginePartitioning, "AtomicEnginePartitioning" },
                                             { kCompoundEnginePartitioning, "CompoundEnginePartitioning" },
                                             { kSecondPartitioning, "SecondPartitioning" },
                                             { kMerging, "Merging" }};
  friend class GraphManager;
 };
 }  // namespace ge
--- a/ge/graph/partition/stage_partition.cc
+++ b/ge/graph/partition/stage_partition.cc
@@ -93,15 +93,15 @@ Status StagePartitioner::SplitStageLevel() {
      auto node = nodes.top();
      nodes.pop();
      GE_CHECK_NOTNULL(node->GetOpDesc());
      uint32_t tmp_level = cur_stage_level;
      (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level);
      if (tmp_level != cur_stage_level) {
        continue;
      }
      for (const auto &in_node : node->GetInAllNodes()) {
        if (visited_stage_nodes.count(in_node) != 0) {
          continue;
        }
        uint32_t tmp_level = cur_stage_level;
        (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level);
        if (tmp_level != cur_stage_level) {
          continue;
        }
        if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) {
          REPORT_CALL_ERROR("E19999", "Set Attr %s on node %s failed.",
                            ATTR_STAGE_LEVEL.c_str(), in_node->GetName().c_str());
@@ -128,315 +128,27 @@ Status StagePartitioner::SplitStageLevel() {

 Status StagePartitioner::StagePartition() {
  for (const auto &stage : stage_nodes_) {
    StageInfo stage_info(stage.first);
    FindStageIO(stage.second, stage_info);

    std::string subgraph_name = "Subgraph_Level_" + std::to_string(stage.first);
    NodePtr graph_node = BuildSubgraphNode(subgraph_name, stage_info);
    if (graph_node == nullptr) {
      GELOGE(FAILED, "[Build][SubgraphNode] for stage %u failed, graph name:%s.", stage.first, subgraph_name.c_str());
    const std::string &subgraph_name = "Subgraph_Level_" + std::to_string(stage.first);
    const auto &stage_subgraph = GraphUtils::BuildSubgraphWithNodes(root_graph_, stage.second, subgraph_name);
    if (stage_subgraph == nullptr) {
      REPORT_CALL_ERROR("E19999", "Build subgraph %s failed.", subgraph_name.c_str());
      GELOGE(FAILED, "[Build][Subgraph] %s failed.", subgraph_name.c_str());
      return FAILED;
    }

    ComputeGraphPtr subgraph = BuildStageGraph(graph_node, stage_info);
    if (subgraph == nullptr) {
      GELOGE(FAILED, "[Build][StageGraph] %s for stage %u failed.", graph_node->GetName().c_str(), stage.first);
    if (!AttrUtils::SetInt(stage_subgraph, ATTR_STAGE_LEVEL, stage.first)) {
      REPORT_CALL_ERROR("E19999", "Set attr %s on graph %s failed.", ATTR_STAGE_LEVEL.c_str(),
                        stage_subgraph->GetName().c_str());
      GELOGE(FAILED, "[Set][Attr] %s on graph %s failed.", ATTR_STAGE_LEVEL.c_str(), stage_subgraph->GetName().c_str());
      return FAILED;
    }
    if (root_graph_->AddSubgraph(subgraph) != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "add subgraph:%s in root graph:%s of stage %u failed.",
                        subgraph->GetName().c_str(), root_graph_->GetName().c_str(), stage.first);
      GELOGE(FAILED, "[Add][SubGraph] %s in root graph:%s of stage %u failed.",
             subgraph->GetName().c_str(), root_graph_->GetName().c_str(), stage.first);
    const auto &parent_node = stage_subgraph->GetParentNode();
    GE_CHECK_NOTNULL(parent_node);
    if (!AttrUtils::SetInt(parent_node->GetOpDesc(), ATTR_STAGE_LEVEL, stage.first)) {
      REPORT_CALL_ERROR("E19999", "Set attr %s on node %s failed", ATTR_STAGE_LEVEL.c_str(),
                        parent_node->GetName().c_str());
      GELOGE(FAILED, "[Set][Attr] %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), parent_node->GetName().c_str());
      return FAILED;
    }

    if ((RelinkDataEdges(graph_node, stage_info) != SUCCESS) ||
        (RelinkCtrlEdges(graph_node, stage_info) != SUCCESS)) {
      GELOGE(FAILED, "[ReLink][Edges] for stage %u failed, graph_node:%s.", stage.first, graph_node->GetName().c_str());
      return FAILED;
    }

    for (const auto &stage_node : stage.second) {
      if (GraphUtils::RemoveNodeWithoutRelink(root_graph_, stage_node) != GRAPH_SUCCESS) {
        GELOGW("Remove node %s failed.", stage_node->GetName().c_str());
      }
    }
  }

  return SUCCESS;
 }

 void StagePartitioner::FindStageIO(const std::unordered_set<NodePtr> &stage_nodes, StageInfo &stage_info) {
  for (const auto &node : stage_nodes) {
    // stage nodes
    stage_info.stage_nodes.emplace(node);
    // in data nodes
    for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
      OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
      if (peer_out_anchor == nullptr) {
        continue;
      }
      if (stage_nodes.count(peer_out_anchor->GetOwnerNode()) == 0) {
        stage_info.data_inputs.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor));
      } else {
        stage_info.inner_data_edges.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor));
      }
    }
    // out data nodes
    std::list<InDataAnchorPtr> peer_data_anchors;
    for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
      peer_data_anchors.clear();
      for (const auto &peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
        if (stage_nodes.count(peer_in_anchor->GetOwnerNode()) == 0) {
          peer_data_anchors.emplace_back(peer_in_anchor);
        }
      }
      if (!peer_data_anchors.empty()) {
        stage_info.data_outputs.emplace_back(std::make_pair(out_data_anchor, peer_data_anchors));
      }
    }
    // in ctrl nodes
    for (const auto &in_ctrl_node : node->GetInControlNodes()) {
      if (stage_nodes.count(in_ctrl_node) == 0) {
        stage_info.ctrl_inputs.emplace_back(in_ctrl_node->GetOutControlAnchor(), node->GetInControlAnchor());
      } else {
        stage_info.inner_ctrl_edges.emplace_back(std::make_pair(in_ctrl_node->GetOutControlAnchor(),
                                                                node->GetInControlAnchor()));
      }
    }
    // out ctrl nodes
    for (const auto &out_ctrl_node : node->GetOutControlNodes()) {
      if (stage_nodes.count(out_ctrl_node) == 0) {
        stage_info.ctrl_outputs.emplace_back(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor());
      }
    }
  }
 }

 NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info) {
  OpDescBuilder op_desc_builder(graph_name, PARTITIONEDCALL);
  size_t input_num = stage_info.data_inputs.size();
  for (size_t i = 0; i < input_num; i++) {
    auto input_desc = stage_info.data_inputs[i].second->GetOwnerNode()->GetOpDesc();
    if (input_desc == nullptr) {
      GELOGE(PARAM_INVALID, "[Check][Param] op_desc is null, node:%s",
             stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str());
      return nullptr;
    }
    op_desc_builder.AddInput("args" + std::to_string(i),
                             input_desc->GetInputDesc(stage_info.data_inputs[i].second->GetIdx()));
  }
  size_t output_num = stage_info.data_outputs.size();
  for (size_t i = 0; i < output_num; i++) {
    auto output_desc = stage_info.data_outputs[i].first->GetOwnerNode()->GetOpDesc();
    if (output_desc == nullptr) {
      GELOGE(PARAM_INVALID, "[Check][Param] op_desc is null, node:%s",
             stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str());
      return nullptr;
    }
    op_desc_builder.AddOutput("output" + std::to_string(i),
                              output_desc->GetOutputDesc(stage_info.data_outputs[i].first->GetIdx()));
  }

  OpDescPtr op_desc = op_desc_builder.Build();
  if (op_desc == nullptr) {
    GELOGE(FAILED, "[Create][OpDesc] for subgraph node failed, name:%s.", graph_name.c_str());
    return nullptr;
  }

  op_desc->AddSubgraphName("f");
  op_desc->SetSubgraphInstanceName(0, graph_name);

  if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) {
    REPORT_CALL_ERROR("E19999", "set attr %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), op_desc->GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Set][Attr] %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), op_desc->GetName().c_str());
    return nullptr;
  }

  NodePtr subgraph_node = root_graph_->AddNode(op_desc);
  if (subgraph_node == nullptr) {
    REPORT_CALL_ERROR("E19999", "add node:%s in graph:%s failed.",
                      op_desc->GetName().c_str(), root_graph_->GetName().c_str());
    GELOGE(FAILED, "[Add][Node] %s in graph:%s failed.", op_desc->GetName().c_str(), root_graph_->GetName().c_str());
    return nullptr;
  }
  if (subgraph_node->SetOwnerComputeGraph(root_graph_) != GRAPH_SUCCESS) {
    REPORT_CALL_ERROR("E19999", "SetOwnerComputeGraph for node %s failed, grpah:%s.",
                      subgraph_node->GetName().c_str(), root_graph_->GetName().c_str());
    GELOGE(FAILED, "[Set][OwnerGraph] for node %s failed, grpah:%s.",
           subgraph_node->GetName().c_str(), root_graph_->GetName().c_str());
    return nullptr;
  }

  return subgraph_node;
 }

 ComputeGraphPtr StagePartitioner::BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info) {
  CompleteGraphBuilder graph_builder(subgraph_node->GetName(), false);
  // Add parent node
  graph_builder.SetParentNode(subgraph_node);

  // Add node
  for (const auto &node : stage_info.stage_nodes) {
    graph_builder.AddNode(AttrUtils::CopyOpDesc(node->GetOpDesc()));
  }

  // Set Input
  size_t data_input_num = stage_info.data_inputs.size();
  for (size_t i = 0; i < data_input_num; i++) {
    graph_builder.SetInput(i, { stage_info.data_inputs[i].second->GetOwnerNode()->GetName() },
                           { static_cast<uint32_t>(stage_info.data_inputs[i].second->GetIdx()) });
  }

  // Add Outputs
  size_t data_output_num = stage_info.data_outputs.size();
  for (uint32_t i = 0; i < data_output_num; i++) {
    graph_builder.AddOutput(stage_info.data_outputs[i].first->GetOwnerNode()->GetName(),
                            stage_info.data_outputs[i].first->GetIdx());
  }

  // Add Data Edges
  for (const auto &data_edge : stage_info.inner_data_edges) {
    graph_builder.AddDataLink(data_edge.first->GetOwnerNode()->GetName(), data_edge.first->GetIdx(),
                              data_edge.second->GetOwnerNode()->GetName(), data_edge.second->GetIdx());
  }

  // Add Ctrl Edges
  for (const auto &ctrl_edge : stage_info.inner_ctrl_edges) {
    graph_builder.AddControlLink(ctrl_edge.first->GetOwnerNode()->GetName(),
                                 ctrl_edge.second->GetOwnerNode()->GetName());
  }

  // Add Input-Mapping
  std::map<uint32_t, uint32_t> input_mapping;
  for (size_t i = 0; i < data_input_num; i++) {
    input_mapping[i] = i;
  }
  graph_builder.SetInputMapping(input_mapping);

  // Add outputMapping
  std::map<uint32_t, uint32_t> output_mapping;
  for (size_t i = 0; i < data_output_num; i++) {
    output_mapping[i] = i;
  }
  graph_builder.SetOutputMapping(output_mapping);

  graphStatus error_code = GRAPH_SUCCESS;
  std::string error_msg;
  ComputeGraphPtr subgraph = graph_builder.Build(error_code, error_msg);
  if (subgraph == nullptr) {
    GELOGE(error_code, "[Build][Subgraph] %s failed:%s.", subgraph_node->GetName().c_str(), error_msg.c_str());
    return nullptr;
  }
  if (!AttrUtils::SetInt(subgraph, ATTR_STAGE_LEVEL, stage_info.stage_level)) {
    REPORT_CALL_ERROR("E19999", "set attr %s on graph %s failed.",
                      ATTR_STAGE_LEVEL.c_str(), subgraph->GetName().c_str());
    GELOGE(FAILED, "[Set][Attr] %s on graph %s failed.", ATTR_STAGE_LEVEL.c_str(), subgraph->GetName().c_str());
    return nullptr;
  }

  return subgraph;
 }

 Status StagePartitioner::RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) {
  // in data nodes
  for (size_t i = 0; i < stage_info.data_inputs.size(); i++) {
    if (stage_info.data_inputs[i].first->Unlink(stage_info.data_inputs[i].second) != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "remove data edge from %s:%d to %s:%d failed",
                        stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(),
                        stage_info.data_inputs[i].first->GetIdx(),
                        stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str(),
                        stage_info.data_inputs[i].second->GetIdx());
      GELOGE(INTERNAL_ERROR, "[Remove][DataEdge] %s:%d->%s:%d failed.",
             stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(),
             stage_info.data_inputs[i].first->GetIdx(),
             stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str(),
             stage_info.data_inputs[i].second->GetIdx());
      return INTERNAL_ERROR;
    }
    if (stage_info.data_inputs[i].first->LinkTo(subgraph_node->GetInDataAnchor(i)) != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "add data edge from %s:%d to %s:%zu failed.",
                        stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(),
                        stage_info.data_inputs[i].first->GetIdx(),
                        subgraph_node->GetName().c_str(), i);
      GELOGE(INTERNAL_ERROR, "[Add][DataEdge] %s:%d->%s:%zu failed.",
             stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(),
             stage_info.data_inputs[i].first->GetIdx(),
             subgraph_node->GetName().c_str(), i);
      return INTERNAL_ERROR;
    }
  }
  // out data nodes
  for (size_t i = 0; i < stage_info.data_outputs.size(); i++) {
    const auto &out_data_anchor = subgraph_node->GetOutDataAnchor(i);
    GE_CHECK_NOTNULL(out_data_anchor);
    for (const auto &peer_in_anchor : stage_info.data_outputs[i].second) {
      if (stage_info.data_outputs[i].first->Unlink(peer_in_anchor) != GRAPH_SUCCESS) {
        REPORT_CALL_ERROR("E19999", "Remove data edge from %s:%d to %s:%d failed.",
                          stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str(),
                          stage_info.data_outputs[i].first->GetIdx(),
                          peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx());
        GELOGE(INTERNAL_ERROR, "[Remove][DataEdge] %s:%d->%s:%d failed.",
               stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str(),
               stage_info.data_outputs[i].first->GetIdx(),
               peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx());
        return INTERNAL_ERROR;
      }
      if (out_data_anchor->LinkTo(peer_in_anchor) != GRAPH_SUCCESS) {
        REPORT_CALL_ERROR("E19999", "Add data edge from %s:%zu to %s:%d failed.", subgraph_node->GetName().c_str(), i,
                          peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx());
        GELOGE(INTERNAL_ERROR, "[Add][DataEdge] %s:%zu->%s:%d failed.", subgraph_node->GetName().c_str(), i,
               peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx());
        return INTERNAL_ERROR;
      }
    }
  }

  return SUCCESS;
 }

 Status StagePartitioner::RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) {
  // in ctrl nodes
  for (const auto &ctrl_input : stage_info.ctrl_inputs) {
    if (ctrl_input.first->Unlink(ctrl_input.second) != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Remove ctrl edge %s->%s failed.",
                        ctrl_input.first->GetOwnerNode()->GetName().c_str(),
                        ctrl_input.second->GetOwnerNode()->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Remove][CtrlEdge] %s->%s failed.",
             ctrl_input.first->GetOwnerNode()->GetName().c_str(), ctrl_input.second->GetOwnerNode()->GetName().c_str());
      return INTERNAL_ERROR;
    }
    if (!ctrl_input.first->IsLinkedWith(subgraph_node->GetInControlAnchor())) {
      if (ctrl_input.first->LinkTo(subgraph_node->GetInControlAnchor()) != GRAPH_SUCCESS) {
        REPORT_CALL_ERROR("E19999", "Add ctrl edge %s->%s failed.",
                          ctrl_input.first->GetOwnerNode()->GetName().c_str(), subgraph_node->GetName().c_str());
        GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] %s->%s failed.",
               ctrl_input.first->GetOwnerNode()->GetName().c_str(), subgraph_node->GetName().c_str());
        return INTERNAL_ERROR;
      }
    }
  }
  // out ctrl nodes
  for (const auto &ctrl_output : stage_info.ctrl_outputs) {
    if (ctrl_output.first->Unlink(ctrl_output.second) != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Remove ctrl edge %s->%s failed.",
                        ctrl_output.first->GetOwnerNode()->GetName().c_str(),
                        ctrl_output.second->GetOwnerNode()->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Remove][CtrlEdge] %s->%s failed.",
             ctrl_output.first->GetOwnerNode()->GetName().c_str(),
             ctrl_output.second->GetOwnerNode()->GetName().c_str());
      return INTERNAL_ERROR;
    }
    if (!subgraph_node->GetOutControlAnchor()->IsLinkedWith(ctrl_output.second)) {
      if (subgraph_node->GetOutControlAnchor()->LinkTo(ctrl_output.second) != GRAPH_SUCCESS) {
        REPORT_CALL_ERROR("E19999", "Add ctrl edge %s->%s failed.",
                          subgraph_node->GetName().c_str(), ctrl_output.second->GetOwnerNode()->GetName().c_str());
        GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] %s->%s failed.",
               subgraph_node->GetName().c_str(), ctrl_output.second->GetOwnerNode()->GetName().c_str());
        return INTERNAL_ERROR;
      }
    }
  }

  return SUCCESS;
--- a/ge/graph/partition/stage_partition.h
+++ b/ge/graph/partition/stage_partition.h
@@ -17,26 +17,10 @@
 #ifndef GE_GRAPH_PARTITION_STAGE_PARTITION_H_
 #define GE_GRAPH_PARTITION_STAGE_PARTITION_H_

 #include <map>
 #include <unordered_set>
 #include <list>
 #include <utility>
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/compute_graph.h"

 namespace ge {
 struct StageInfo {
  explicit StageInfo(uint32_t level) : stage_level(level) {}
  uint32_t stage_level;
  std::unordered_set<NodePtr> stage_nodes;
  std::vector<std::pair<OutDataAnchorPtr, InDataAnchorPtr>> data_inputs;
  std::vector<std::pair<OutDataAnchorPtr, std::list<InDataAnchorPtr>>> data_outputs;
  std::list<std::pair<OutControlAnchorPtr, InControlAnchorPtr>> ctrl_inputs;
  std::list<std::pair<OutControlAnchorPtr, InControlAnchorPtr>> ctrl_outputs;
  std::list<std::pair<OutDataAnchorPtr, InDataAnchorPtr>> inner_data_edges;
  std::list<std::pair<OutControlAnchorPtr, InControlAnchorPtr>> inner_ctrl_edges;
 };

 class StagePartitioner {
 public:
  explicit StagePartitioner(ComputeGraphPtr graph) : root_graph_(std::move(graph)) {}
@@ -49,18 +33,8 @@ class StagePartitioner {

  Status StagePartition();

  static void FindStageIO(const std::unordered_set<NodePtr> &stage_nodes, StageInfo &stage_info);

  NodePtr BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info);

  static ComputeGraphPtr BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info);

  static Status RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info);

  static Status RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info);

  ComputeGraphPtr root_graph_;
  std::map<uint32_t, std::unordered_set<NodePtr>> stage_nodes_;
  std::map<uint32_t, std::set<NodePtr>> stage_nodes_;
 };
 }  // namespace ge

--- a/ge/graph/passes/end_of_sequence_add_control_pass.cc
+++ b/ge/graph/passes/end_of_sequence_add_control_pass.cc
@@ -20,41 +20,36 @@
 #include <vector>

 #include "init/gelib.h"
 #include "graph/node.h"

 namespace ge {

 Status EndOfSequenceAddControlPass::Run(ComputeGraphPtr graph) {
  if (graph == nullptr) {
    REPORT_INNER_ERROR("E19999", "Param graph is nullptr, check invalid");
    GELOGE(PARAM_INVALID, "[Check][Param] param [graph] must not be null.");
    return PARAM_INVALID;
  }
  if (graph->GetParentGraph() != nullptr) {
    return SUCCESS;
  }
  NodePtr end_of_sequence = GetEndOfSequence(graph);
  const auto &end_of_sequence = graph->FindFirstNodeMatchType(ENDOFSEQUENCE);
  if (end_of_sequence == nullptr) {
    return SUCCESS;
  }

  GELOGI("EndOfSequenceAddControlPass begin.");
  std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGE(FAILED, "GELib not initialized");
    return FAILED;
  }

  std::vector<NodePtr> target_nodes;
  for (NodePtr &node : graph->GetDirectNode()) {
    if (node == nullptr) {
      GELOGW("node is nullptr.");
      continue;
    }
    string stream_label;
    (void)AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label);
    if (!stream_label.empty() || IsDataLikeNode(node)) {
    // op_desc of node should not be null
    if (node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL) ||
        instance_ptr->DNNEngineManagerObj().IsStreamAssignSkip(node)) {
      continue;
    }
    // Save the nodes whose pre-nodes are all data-like node
    auto in_data_nodes = node->GetInDataNodes();
    bool flag = false;
    for (auto in_node : in_data_nodes) {
      if (!IsDataLikeNode(in_node)) {
    for (const auto &in_node : node->GetInDataNodes()) {
      if (!instance_ptr->DNNEngineManagerObj().IsStreamAssignSkip(in_node)) {
        flag = true;
        break;
      }
@@ -64,83 +59,20 @@ Status EndOfSequenceAddControlPass::Run(ComputeGraphPtr graph) {
    }
    target_nodes.push_back(node);
  }
  // Insert control edge
  Status status = AddControlEdge(end_of_sequence, target_nodes);
  if (status != SUCCESS) {
    GELOGE(FAILED, "[Add][ControlEdge] Graph add EndOfSequence op:%s out ctrl edge failed.",
           end_of_sequence->GetName().c_str());
    return FAILED;
  }
  GELOGI("EndOfSequenceAddControlPass end.");
  return SUCCESS;
 }

 Status EndOfSequenceAddControlPass::AddControlEdge(NodePtr &end_of_sequence, std::vector<NodePtr> &target_nodes) {
  auto out_ctrl_anchor = end_of_sequence->GetOutControlAnchor();
  for (NodePtr &node : target_nodes) {
    auto in_ctrl_anchor = node->GetInControlAnchor();
    if (in_ctrl_anchor == nullptr) {
      continue;
    }
    Status status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor);
    if (status != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Add control edge between op:%s(%s) and op:%s(%s) failed",
                        end_of_sequence->GetName().c_str(), end_of_sequence->GetType().c_str(),
                        node->GetName().c_str(), node->GetType().c_str());
      GELOGE(FAILED, "[Add][ControlEdge] between op:%s(%s) and op:%s(%s) failed",
             end_of_sequence->GetName().c_str(), end_of_sequence->GetType().c_str(),
             node->GetName().c_str(), node->GetType().c_str());
  // Insert control edge
  for (const auto &node : target_nodes) {
    GELOGI("Add ctrl edge between %s and %s", end_of_sequence->GetName().c_str(), node->GetName().c_str());
    if (GraphUtils::AddEdge(end_of_sequence->GetOutControlAnchor(), node->GetInControlAnchor()) != GRAPH_SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Add ctrl edge between %s and %s failed", end_of_sequence->GetName().c_str(),
                        node->GetName().c_str());
      GELOGE(FAILED, "[Add][CtrlEdge] between %s and %s failed", end_of_sequence->GetName().c_str(),
             node->GetName().c_str());
      return FAILED;
    }
    GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", node->GetName().c_str());
  }
  return SUCCESS;
 }

 inline NodePtr EndOfSequenceAddControlPass::GetEndOfSequence(const ComputeGraphPtr &graph) const {
  // Internal function, guaranteeing graph non-null
  for (NodePtr &node : graph->GetDirectNode()) {
    if (node->GetType() == ENDOFSEQUENCE) {
      return node;
    }
  }
  return nullptr;
 }

 bool EndOfSequenceAddControlPass::IsDataLikeNode(const NodePtr &node) {
  std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGW("GELib not initialized");
    return false;
  }
  OpDescPtr op_desc = node->GetOpDesc();
  if (op_desc == nullptr) {
    return false;
  }
  string engine_name = op_desc->GetOpEngineName();
  if (engine_name.empty()) {
    engine_name = instance_ptr->DNNEngineManagerObj().GetDNNEngineName(node);
  }
  const map<string, SchedulerConf> schedulers = instance_ptr->DNNEngineManagerObj().GetSchedulers();
  // Only one scheduler has been supported by now
  for (auto schedulers_iter = schedulers.begin(); schedulers_iter != schedulers.end(); ++schedulers_iter) {
    const map<string, EngineConfPtr> cal_engines = schedulers_iter->second.cal_engines;
    auto cal_engines_iter = cal_engines.find(engine_name);
    if (cal_engines_iter == cal_engines.end()) {
      GELOGW("No cal_engines found within engine %s, node name %s", engine_name.c_str(), node->GetName().c_str());
      continue;
    }
    EngineConfPtr engine_conf_ptr = cal_engines_iter->second;
    if (engine_conf_ptr == nullptr) {
      GELOGW("engine_conf_ptr within engine %s, node name %s is null", engine_name.c_str(), node->GetName().c_str());
      continue;
    }
    bool skip_assign_stream = engine_conf_ptr->skip_assign_stream;
    if (skip_assign_stream) {
      return true;
    }
    return false;
  }
  return false;
  GELOGI("EndOfSequenceAddControlPass end.");
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/end_of_sequence_add_control_pass.h
+++ b/ge/graph/passes/end_of_sequence_add_control_pass.h
@@ -30,26 +30,6 @@ class EndOfSequenceAddControlPass : public GraphPass {
  ~EndOfSequenceAddControlPass() override {}

  Status Run(ComputeGraphPtr graph) override;

 private:
  /**
  * Get EndOfSequence node in graph, nullptr if not exist.
  * @param graph
  * @return EndOfSequence node
  */
  inline NodePtr GetEndOfSequence(const ComputeGraphPtr &graph) const;
  /**
  * Check whether this node is a data-like node.
  * @param node
  * @return
  */
  bool IsDataLikeNode(const NodePtr &node);
  /**
  * Check whether this node is a data-like node.
  * @param node
  * @return
  */
  Status AddControlEdge(NodePtr &end_of_sequence, std::vector<NodePtr> &target_nodes);
 };
 }  // namespace ge

--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -23,7 +23,6 @@
 #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h"
 #include "common/formats/format_transfers/format_transfer_transpose.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "common/util/error_manager/error_manager.h"
 #include "framework/common/helper/model_helper.h"
 #include "common/math/math_util.h"
 #include "framework/common/op/ge_op_utils.h"
@@ -39,7 +38,6 @@
 #include "graph/passes/addn_pass.h"
 #include "graph/passes/aicpu_constant_folding_pass.h"
 #include "graph/passes/assert_pass.h"
 #include "external/ge/ge_api_types.h"
 #include "graph/passes/common_subexpression_elimination_pass.h"
 #include "graph/passes/cond_pass.h"
 #include "graph/passes/cond_remove_pass.h"
@@ -774,7 +772,12 @@ Status UpdateSubgraphDataOfCase(NodePtr &mbatch_node, DataType &dt_set, int32_t
    return SUCCESS;
  }

  auto subgraphs = NodeUtils::GetAllSubgraphs(*mbatch_node);
  std::vector<ComputeGraphPtr> subgraphs;
  if (NodeUtils::GetSubgraphs(mbatch_node, subgraphs) != GRAPH_SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", mbatch_node->GetName().c_str());
    GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", mbatch_node->GetName().c_str());
    return FAILED;
  }
  for (const auto &subgraph : subgraphs) {
    GE_CHECK_NOTNULL(subgraph);
    for (auto &sub_node : subgraph->GetDirectNode()) {
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -60,7 +60,6 @@ const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE";
 const char *const kForceInfershape = "_force_infershape_when_running";

 const std::set<std::string> kExecutionDependentTypes{ IF, STATELESSIF, CASE, STREAMSWITCH };
 const std::set<std::string> kMergeInputSkipTypes{ STREAMACTIVE, STREAMSWITCH, CONSTANT, CONSTANTOP };
 const std::set<std::string> kStreamActiveTypes{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION };

 Status SetOutputNameAttr(ComputeGraph &graph) {
@@ -519,170 +518,6 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {
  return SUCCESS;
 }

 Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor,
                                               const InDataAnchorPtr &in_data_anchor) {
  GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor),
                          "[Invoke][Unlink] failed to unlink %s:%d from %s:%d",
                          out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(),
                          in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx());

  GELOGD("Succeeded in unlinking %s:%d from %s:%d",
         out_data_anchor->GetOwnerNode()->GetName().c_str(),
         out_data_anchor->GetIdx(),
         in_data_anchor->GetOwnerNode()->GetName().c_str(),
         in_data_anchor->GetIdx());
  return SUCCESS;
 }

 Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) {
  GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "[Invoke][LinkTo]Failed to link %s:%d to %s:%d",
                          out_data_anchor->GetOwnerNode()->GetName().c_str(),
                          out_data_anchor->GetIdx(),
                          in_data_anchor->GetOwnerNode()->GetName().c_str(),
                          in_data_anchor->GetIdx());

  GELOGD("Succeeded in linking %s:%d to %s:%d",
         out_data_anchor->GetOwnerNode()->GetName().c_str(),
         out_data_anchor->GetIdx(),
         in_data_anchor->GetOwnerNode()->GetName().c_str(),
         in_data_anchor->GetIdx());
  return SUCCESS;
 }

 Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) {
  const auto &wrapped_node = graph.GetParentNode();
  std::set<NodePtr> root_nodes;
  for (const auto &node : graph.GetDirectNode()) {
    GE_CHECK_NOTNULL(node);
    if (node->GetType() != DATA_TYPE) {
      if (node->GetInDataNodes().empty()) {
        root_nodes.emplace(node);
      }

      continue;
    }

    auto data_op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(data_op_desc);

    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      GELOGE(FAILED, "[Invoke][GetInt] failed, node:[%s] attr:[%s]",
             data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
      REPORT_CALL_ERROR("E19999", "GetInt failed, node:[%s] attr:[%s]",
                        data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
      return FAILED;
    }

    auto wrapped_node_in_anchor = wrapped_node->GetInDataAnchor(parent_index);
    GE_CHECK_NOTNULL(wrapped_node_in_anchor);
    auto src_out_anchor = wrapped_node_in_anchor->GetPeerOutAnchor();
    if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) {
      continue;
    }
    wrapped_node_in_anchor->UnlinkAll();

    // link src to outputs of DataNode
    for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
      GE_CHECK_NOTNULL(out_data_anchor);
      for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
        auto dst_node = peer_in_data_anchor->GetOwnerNode();
        GE_CHECK_NOTNULL(dst_node);
        const auto in_nodes = dst_node->GetInDataNodes();
        if (std::all_of(in_nodes.begin(), in_nodes.end(), [](const NodePtr &n) { return n->GetType() == DATA; })) {
          root_nodes.emplace(dst_node);
        }
        GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(out_data_anchor, peer_in_data_anchor));
        GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, peer_in_data_anchor));
      }
    }
  }

  // transfer in control edges to all root nodes
  for (auto &root_node : root_nodes) {
    auto in_nodes = root_node->GetInAllNodes();
    std::set<NodePtr> in_node_set(in_nodes.begin(), in_nodes.end());
    for (auto &in_control_node : wrapped_node->GetInControlNodes()) {
      if (in_node_set.count(in_control_node) == 0 && kMergeInputSkipTypes.count(root_node->GetType()) == 0) {
        GELOGD("[%s] Restore control edge to [%s]", in_control_node->GetName().c_str(), root_node->GetName().c_str());
        GE_CHECK_NOTNULL(in_control_node->GetOutControlAnchor());
        (void) in_control_node->GetOutControlAnchor()->LinkTo(root_node->GetInControlAnchor());
      }
    }
  }

  wrapped_node->GetInControlAnchor()->UnlinkAll();
  return SUCCESS;
 }

 Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
  const auto &parent_node = graph.GetParentNode();
  const NodePtr &net_output_node = graph.FindFirstNodeMatchType(NETOUTPUT);
  if (net_output_node == nullptr) {
    GELOGD("Graph has no netoutput no need to merge");
    return SUCCESS;
  }
  const auto &net_output_desc = net_output_node->GetOpDesc();
  GE_CHECK_NOTNULL(net_output_desc);

  auto all_in_nodes = net_output_node->GetInAllNodes();
  auto all_out_nodes = parent_node->GetOutAllNodes();
  net_output_node->GetInControlAnchor()->UnlinkAll();
  parent_node->GetOutControlAnchor()->UnlinkAll();

  for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) {
    auto src_out_anchor = in_data_anchor->GetPeerOutAnchor();
    GE_CHECK_NOTNULL(src_out_anchor);
    GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode());
    GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(src_out_anchor, in_data_anchor));

    auto index = in_data_anchor->GetIdx();
    auto input_desc = net_output_desc->MutableInputDesc(index);
    if (input_desc == nullptr) {
      GELOGE(INTERNAL_ERROR, "[Invoke][MutableInputDesc][%s] Failed to get input desc[%d]",
             net_output_desc->GetName().c_str(), index);
      REPORT_CALL_ERROR("E19999", "[%s] Failed to get input desc[%d].", net_output_desc->GetName().c_str(), index);
      return INTERNAL_ERROR;
    }

    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      GELOGW("SubGraph: %s NetOutput input tensor %d, attr %s not found.",
             graph.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
      continue;
    }

    const OutDataAnchorPtr &parent_out_anchor = parent_node->GetOutDataAnchor(parent_index);
    GE_CHECK_NOTNULL(parent_out_anchor);
    for (InDataAnchorPtr &dst_in_anchor : parent_out_anchor->GetPeerInDataAnchors()) {
      if (dst_in_anchor == nullptr) {
        continue;
      }

      GE_CHECK_NOTNULL(dst_in_anchor->GetOwnerNode());
      GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(parent_out_anchor, dst_in_anchor));
      GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, dst_in_anchor));
    }
  }

  // transfer out control edges
  std::set<NodePtr> in_node_set(all_in_nodes.begin(), all_in_nodes.end());
  std::set<NodePtr> out_node_set(all_out_nodes.begin(), all_out_nodes.end());
  for (auto &src_node : in_node_set) {
    GELOGD("[%s] process in node.", src_node->GetName().c_str());
    auto out_nodes = src_node->GetOutAllNodes();
    std::set<NodePtr> node_set(out_nodes.begin(), out_nodes.end());
    for (auto &dst_node : out_node_set) {
      if (node_set.count(dst_node) == 0) {
        src_node->GetOutControlAnchor()->LinkTo(dst_node->GetInControlAnchor());
        GELOGD("[%s] Restore control edge to [%s]", src_node->GetName().c_str(), dst_node->GetName().c_str());
      }
    }
  }

  return SUCCESS;
 }

 Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph) {
  merged_graph = MakeShared<ComputeGraph>("MergedGraph");
  merged_graph->SetGraphUnknownFlag(root_graph->GetGraphUnknownFlag());
@@ -716,9 +551,21 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG
        }
      }
    }
    GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph),

    const auto &filter = [](const ComputeGraphPtr &graph) {
      const auto &parent_node = graph->GetParentNode();
      if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) {
        return false;
      }
      if ((parent_node->GetType() != PARTITIONEDCALL) ||
          (parent_node->GetOpDesc()->GetSubgraphInstanceNames().size() != 1)) {
        return false;
      }
      return graph->GetGraphUnknownFlag();
    };
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::UnfoldSubgraph(subgraph, filter),
                            "[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph.",
                            subgraph->GetName().c_str());
                            subgraph->GetName().c_str())
  }

  // invoke before adding subgraphs. in case modify node id in known-shaped subgraphs.
@@ -744,56 +591,6 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG
  return SUCCESS;
 }

 Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph,
                                          ComputeGraphPtr &parent_graph,
                                          ComputeGraph &sub_graph) {
  auto parent_node = sub_graph.GetParentNode();
  GE_CHECK_NOTNULL(parent_node);

  GE_CHK_STATUS_RET(MergeInputNodes(sub_graph),
                    "[Invoke][MergeInputNodes][%s] Failed to merge data nodes for subgraph",
                    sub_graph.GetName().c_str());
  GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph),
                    "[Invoke][MergeNetOutputNode][%s] Failed to merge net output nodes for subgraph",
                    sub_graph.GetName().c_str());
  GELOGD("[%s] Done merging subgraph inputs and outputs successfully", sub_graph.GetName().c_str());

  for (auto &sub_node : sub_graph.GetDirectNode()) {
    auto sub_op_type = sub_node->GetType();
    if (sub_op_type == DATA_TYPE || sub_op_type == NETOUTPUT) {
      continue;
    }
    if (sub_op_type == PARTITIONEDCALL) {
      auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex);
      GE_CHECK_NOTNULL(sub_sub_graph);
      if (sub_sub_graph->GetGraphUnknownFlag()) {
        GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph),
                          "[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph",
                          sub_sub_graph->GetName().c_str());
        continue;
      }
    }

    if (!sub_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) {
      for (size_t i = 0; i < sub_node->GetOpDesc()->GetSubgraphInstanceNames().size(); ++i) {
        auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, i);
        GE_CHECK_NOTNULL(sub_sub_graph);
        sub_sub_graph->SetParentGraph(parent_graph);
      }
    }
    parent_graph->AddNode(sub_node);
    GELOGD("[%s::%s] added to parent graph: [%s].",
           sub_graph.GetName().c_str(),
           sub_node->GetName().c_str(),
           parent_graph->GetName().c_str());
    sub_node->SetOwnerComputeGraph(parent_graph);
  }

  GELOGD("[%s] Done merging subgraph. remove it from root graph", sub_graph.GetName().c_str());
  root_graph->RemoveSubgraph(sub_graph.GetName());
  return SUCCESS;
 }

 Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item,
                                              const NodeItem &node_item,
                                              bool is_root_graph) {
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -39,16 +39,11 @@ class HybridModelBuilder {

 private:
  static Status UpdateAnchorStatus(const NodePtr &node);
  static Status DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor, const InDataAnchorPtr &in_data_anchor);
  static Status DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor);
  static NodePtr GetPeerNode(const InDataAnchorPtr &in_data_anchor);
  static Status GetParentNodeOutputIndex(const OpDesc &op_desc, int index, uint32_t &out_index);
  static Status GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, NodePtr &peer_node, int &peer_out_index);
  static Status HandleDtString(const GeTensor &tensor, void *var_addr);
  static Status MergeInputNodes(ComputeGraph &compute_graph);
  static Status MergeNetOutputNode(ComputeGraph &compute_graph);
  static Status UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph);
  static Status UnfoldSubgraph(ComputeGraphPtr &root_graph, ComputeGraphPtr &parent_graph, ComputeGraph &sub_graph);
  static Status BuildInputMapping(GraphItem &graph_item,
                                  std::vector<NodeItem *> &data_nodes,
                                  bool is_root_graph);
--- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
@@ -81,6 +81,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
      case aicpu::FWKAdapter::FWK_ADPT_EXT_TOPIC_TYPE:
        GE_CHK_STATUS_RET(ParseExtTopicType(aicpu_ext_info), "[Parse][ExtTopicType] failed.");
        break;
      case aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT:
        GE_CHK_STATUS_RET(ParseExtAsyncWait(aicpu_ext_info), "[Parse][ExtAsyncWait] failed.");
        break;
      default:
        GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.",
               node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen);
@@ -101,6 +104,22 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
  return SUCCESS;
 }

 Status AicpuExtInfoHandler::ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info) {
  if (aicpu_ext_info->infoLen != sizeof(AsyncWaitInfo)) {
    REPORT_INNER_ERROR("E19999",
                       "Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.",
                       node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen);
    GELOGE(ACL_ERROR_GE_PARAM_INVALID,
           "[Check][DataLen]Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.",
           node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen);
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  async_wait_ = reinterpret_cast<AsyncWaitInfo *>(aicpu_ext_info->infoMsg);
  GELOGI("Node[%s] parse async wait info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
  return SUCCESS;
 }

 Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) {
  GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(int32_t),
                  REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.",
@@ -280,6 +299,17 @@ Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kern
  return SUCCESS;
 }

 Status AicpuExtInfoHandler::UpdateEventId(uint32_t event_id) {
  if (async_wait_ == nullptr) {
    REPORT_INNER_ERROR("E19999", "async_wait_ is nullptr.");
    GELOGE(FAILED, "[Check][async_wait_] async_wait_ is nullptr.");
    return FAILED;
  }
  async_wait_->waitType = 1;
  async_wait_->waitId = event_id;
  return SUCCESS;
 }

 Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) {
  if (session_info_ == nullptr) {
    GELOGD("There is no session info in ext_info, no need update.");
--- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
+++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
@@ -27,6 +27,7 @@ namespace ge {
 namespace hybrid {
 using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType;
 using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo;
 using AsyncWaitInfo = aicpu::FWKAdapter::AsyncWait;
 using AicpuSessionInfo = SessionInfo;

 class AicpuExtInfoHandler {
@@ -59,6 +60,8 @@ class AicpuExtInfoHandler {

  Status UpdateExecuteMode(bool flag);

  Status UpdateEventId(uint32_t event_id);

  Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type);

  bool IsNeedRefreshIOAddr();
@@ -73,6 +76,7 @@ class AicpuExtInfoHandler {
  Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info);
  Status ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info);
  Status ParseExtTopicType(AicpuExtInfo *aicpu_ext_info);
  Status ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info);

  static Status UpdateShapeAndType(const GeShape &shape,
                                   DataType data_type,
@@ -90,6 +94,7 @@ class AicpuExtInfoHandler {
  const uint32_t output_num_;
  UnknowShapeOpType unknown_type_;
  AicpuSessionInfo *session_info_ = nullptr;
  AsyncWaitInfo *async_wait_ = nullptr;
  uint64_t *bit_map_ = nullptr;
  uint32_t *update_addr_ = nullptr;
  int32_t topic_type_flag_ = -1;
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -22,6 +22,7 @@
 #include "graph/utils/node_utils.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/model/hybrid_model.h"
 #include "runtime/rt.h"

 namespace ge {
 namespace hybrid {
@@ -33,6 +34,12 @@ const char *const kAicpuAllshape = "_AllShape";
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor);
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor);

 AicpuNodeTaskBase::~AicpuNodeTaskBase() {
  if (rt_event_ != nullptr) {
    (void)rtEventDestroy(rt_event_);
  }
 }

 Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer) {
  auto allocator = NpuMemoryAllocator::GetAllocator();
  GE_CHECK_NOTNULL(allocator);
@@ -64,6 +71,13 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_
  GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id),
                    "[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id);

  if (is_blocking_aicpu_op_) {
    if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) {
      GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed");
      return FAILED;
    }
  }

  // copy task args buf
  GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_),
                    "[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu",
@@ -230,6 +244,96 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
  return SUCCESS;
 }

 Status AicpuNodeTaskBase::UpdateEventIdForBlockingAicpuOp() {
  bool is_support = false;
  if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
    GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
    return FAILED;
  }
  if (!is_support) {
    GELOGD("Device not support blocking aicpu op process");
    return SUCCESS;
  }
  uint32_t event_id = 0;
  auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for node:%s, ret:0x%X", node_name_.c_str(),
                      rt_ret);
    GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  rt_ret = rtGetEventID(rt_event_, &event_id);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (aicpu_ext_handle_.UpdateEventId(event_id) != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Update event id failed for node:%s.", node_name_.c_str());
    GELOGE(FAILED, "[Update][EventId] Update event id failed for node:%s", node_name_.c_str());
    return FAILED;
  }
  GELOGI("Update event_id=%u success", event_id);
  return SUCCESS;
 }

 Status AicpuNodeTaskBase::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) {
  int32_t device_id = 0;
  auto rt_ret = rtGetDevice(&device_id);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  int32_t value = 0;
  rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) {
    REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d",
                       RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d",
           RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    return FAILED;
  }
  is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false);
  return SUCCESS;
 }

 Status AicpuNodeTaskBase::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) {
  bool is_support = false;
  if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
    GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
    return FAILED;
  }
  if (!is_support) {
    GELOGD("Device not support blocking aicpu op process.");
    return SUCCESS;
  }
  GELOGD("Distribute queue task begin");
  if (rt_event_ == nullptr) {
    REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr");
    GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr");
    return FAILED;
  }
  auto rt_ret = rtStreamWaitEvent(stream, rt_event_);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  rt_ret = rtEventReset(rt_event_, stream);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  return SUCCESS;
 }

 Status AicpuTfNodeTask::InitForDependComputeTask() {
  if ((unknown_type_ != DEPEND_COMPUTE) || (node_item_->num_outputs == 0)) {
    GELOGD("Node[%s] type[%s] unknown_type is %d, output num is %d.",
@@ -325,6 +429,9 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) {

  // init ext info
  uint64_t ext_session_id = model.GetSessionId();
  const OpDescPtr op_desc = node_item_->GetOpDesc();
  AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_);
  GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_);
  GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].",
                    node_name_.c_str());
  GE_CHK_STATUS_RET(InitForDependComputeTask(), "[Init][DependComputeTask] failed for Node[%s].", node_name_.c_str());
@@ -642,6 +749,12 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) {
                                  kernel_buf_->GetSize(), flag, context.GetStream()));
  RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End");
  GELOGD("Node[%s] launch end.", node_name_.c_str());
  if (is_blocking_aicpu_op_) {
    if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) {
      GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed");
      return FAILED;
    }
  }
  if (need_sync_) {
    GELOGD("[%s] Task needs sync", node_name_.c_str());
    GE_CHK_STATUS_RET_NOLOG(context.Synchronize());
@@ -760,6 +873,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
                  return FAILED;);

  uint64_t ext_session_id = model.GetSessionId();
  AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_);
  GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_);
  GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id),
                    "[Init][ExtInfo] failed for Node[%s].", node_name.c_str());

@@ -826,6 +941,12 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
                                            args_.get(), args_size_,
                                            nullptr, context.GetStream(), flag);
  GE_CHK_RT_RET(rt_ret);
  if (is_blocking_aicpu_op_) {
    if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) {
      GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed");
      return FAILED;
    }
  }
  GELOGD("Node[%s] launch task end.", node_name_.c_str());
  return SUCCESS;
 }
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
@@ -35,7 +35,7 @@ class AicpuNodeTaskBase : public NodeTask {
                          node_item->num_outputs,
                          node_item->shape_inference_type) {}

  ~AicpuNodeTaskBase() override = default;
  ~AicpuNodeTaskBase() override;

  using NodeTask::Init;

@@ -61,6 +61,10 @@ class AicpuNodeTaskBase : public NodeTask {

  static Status AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer);

  Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream);
  Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support);
  Status UpdateEventIdForBlockingAicpuOp();

 protected:
  const NodeItem *node_item_;
  // just reference.
@@ -78,6 +82,10 @@ class AicpuNodeTaskBase : public NodeTask {

  // ext info addr, device mem
  std::unique_ptr<TensorBuffer> ext_info_addr_dev_;

  // for blocking aicpu op
  bool is_blocking_aicpu_op_ = false;
  rtEvent_t rt_event_ = nullptr;
 };

 class AicpuTfNodeTask : public AicpuNodeTaskBase {
--- a/ge/offline/single_op_parser.cc
+++ b/ge/offline/single_op_parser.cc
@@ -89,7 +89,8 @@ map<string, DataType> kDataTypeDict = {
    {"float", DT_FLOAT},
    {"float32", DT_FLOAT},
    {"double", DT_DOUBLE},
    {"complex64", DT_COMPLEX64}
    {"complex64", DT_COMPLEX64},
    {"complex128", DT_COMPLEX128}
 };

 map<string, Format> kFormatDict = {
--- a/ge/opskernel_manager/ops_kernel_builder_manager.cc
+++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc
@@ -154,12 +154,16 @@ Status OpsKernelBuilderManager::CalcOpRunningParam(Node &node) const {
  return SUCCESS;
 }

 Status OpsKernelBuilderManager::GenerateTask(const Node &node,
                                             RunContext &context,
                                             std::vector<domi::TaskDef> &tasks) const {
 Status OpsKernelBuilderManager::GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks,
                                             bool atomic_engine_flag) const {
  auto op_desc = node.GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  const std::string &lib_name = op_desc->GetOpKernelLibName();
  std::string lib_name;
  if (atomic_engine_flag) {
    lib_name = op_desc->GetOpKernelLibName();
  } else {
    (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, lib_name);
  }
  auto it = ops_kernel_builders_.find(lib_name);
  if (it == ops_kernel_builders_.end()) {
    GELOGE(INTERNAL_ERROR, "[Find][LibName]fail for libName = %s, node:%s", lib_name.c_str(),
--- a/ge/opskernel_manager/ops_kernel_builder_manager.h
+++ b/ge/opskernel_manager/ops_kernel_builder_manager.h
@@ -43,8 +43,8 @@ class GE_FUNC_VISIBILITY OpsKernelBuilderManager {

  Status CalcOpRunningParam(Node &node) const;

  Status GenerateTask(const Node &node, RunContext &context,
                      std::vector<domi::TaskDef> &tasks) const;
  Status GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks,
                      bool atomic_engine_flag = true) const;

 private:
  OpsKernelBuilderManager() = default;
--- a/ge/opskernel_manager/ops_kernel_manager.cc
+++ b/ge/opskernel_manager/ops_kernel_manager.cc
@@ -24,6 +24,7 @@ const char *const kInitialize = "Initialize";
 const char *const kGetOpsKernelInfoStores = "GetOpsKernelInfoStores";
 const char *const kGetGraphOptimizerObjs = "GetGraphOptimizerObjs";
 const char *const kFinalize = "Finalize";
 const char *const kGetCompoundEngineContains = "GetCompoundEngineContains";

 std::mutex ops_kernel_info_mutex;
 }  // namespace
@@ -35,6 +36,12 @@ OpsKernelManager::OpsKernelManager()
 OpsKernelManager::~OpsKernelManager() {
  graph_optimizers_.clear();
  ops_kernel_store_.clear();
  atomic_graph_optimizers_.clear();
  compound_graph_optimizers_.clear();
  atomic_graph_optimizers_by_priority_.clear();
  atomic_first_optimizers_by_priority_.clear();
  compound_first_optimizers_by_priority_.clear();
  compound_engine_contains_.clear();
  ops_kernel_info_.clear();
 }

@@ -70,53 +77,48 @@ Status OpsKernelManager::Initialize(const map<string, string> &options_const) {
  GELOGI("OPTION_EXEC_EXTERN_PLUGIN_PATH=%s.", extern_engine_path.c_str());

  op_tiling_manager_.LoadSo();

  ret = plugin_manager_.LoadSo(extern_engine_path, func_check_list);
  if (ret == SUCCESS) {
    initialize_ = options;
    Status rst0 = plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_);
    if (rst0 == FAILED) {
      GELOGE(GE_OPS_GET_NO_VALID_SO, "[Invoke][OpsKernelInfo]PluginManager InvokeAll failed.");
      REPORT_INNER_ERROR("E19999", "PluginManager InvokeAll failed.");
      return GE_OPS_GET_NO_VALID_SO;
    }
    Status rst1 =
        plugin_manager_.InvokeAll<map<string, OpsKernelInfoStorePtr> &>(kGetOpsKernelInfoStores, ops_kernel_store_);
    if (rst1 != SUCCESS) {
      GELOGW("Initialize OpsKernelInfo failed.");
    }
    Status rst2 =
        plugin_manager_.InvokeAll<map<string, GraphOptimizerPtr> &>(kGetGraphOptimizerObjs, graph_optimizers_);
    if (rst2 != SUCCESS) {
      GELOGW("Initialize GraphOptimizerObjs failed.");
    }

    ret = CheckPluginPtr();
    if (ret != SUCCESS) {
      return ret;
    }
    ret = InitOpKernelInfoStores(options);
    if (ret != SUCCESS) {
      return ret;
    }
    InitOpsKernelInfo();
    ret = InitGraphOptimzers(options);
    if (ret != SUCCESS) {
      return ret;
    }
    ret = InitGraphOptimizerPriority();
    if ((ret != SUCCESS)) {
      GELOGE(ret, "[Init][GraphOptimizerPriority] failed.");
      REPORT_CALL_ERROR("E19999", "InitGraphOptimizerPriority failed.");
      return ret;
    }
    init_flag_ = true;
    return SUCCESS;
  } else {
  if (ret != SUCCESS) {
    GELOGE(ret, "[Check][SoFile] not find any valid so file.");
    REPORT_INNER_ERROR("E19999", "OpsKernelManager::Initialize failed for not find any valid so file.");
    return ret;
  }

  initialize_ = options;
  if (plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_) == FAILED) {
    GELOGE(GE_OPS_GET_NO_VALID_SO, "[Invoke][OpsKernelInfo]PluginManager InvokeAll failed.");
    REPORT_INNER_ERROR("E19999", "PluginManager InvokeAll failed.");
    return GE_OPS_GET_NO_VALID_SO;
  }
  if (plugin_manager_.InvokeAll<map<string, OpsKernelInfoStorePtr> &>(kGetOpsKernelInfoStores,
                                                                      ops_kernel_store_) != SUCCESS) {
    GELOGW("Initialize OpsKernelInfo failed.");
  }
  if (plugin_manager_.InvokeAll<map<string, GraphOptimizerPtr> &>(kGetGraphOptimizerObjs,
                                                                  graph_optimizers_) != SUCCESS) {
    GELOGW("Initialize GraphOptimizerObjs failed.");
  }
  plugin_manager_.
    OptionalInvokeAll<std::map<std::string, std::set<std::string>> &, std::map<std::string, std::string> &>(
      kGetCompoundEngineContains, compound_engine_contains_, compound_engine_2_kernel_lib_name_);

  ret = CheckPluginPtr();
  if (ret != SUCCESS) {
    return ret;
  }
  ret = InitOpKernelInfoStores(options);
  if (ret != SUCCESS) {
    return ret;
  }
  InitOpsKernelInfo();
  ret = InitGraphOptimizers(options);
  if (ret != SUCCESS) {
    return ret;
  }
  ClassifyGraphOptimizers();
  InitGraphOptimizerPriority();
  init_flag_ = true;
  return SUCCESS;
 }

 void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path,
@@ -264,7 +266,7 @@ void OpsKernelManager::InitOpsKernelInfo() {
    REPORT_INNER_ERROR("E19999", "InitOpsKernelInfo failed for new GELib.");
    return;
  }
  // sort opinfo of ops_kernel_info_
  // sort op_info of ops_kernel_info_
  for (auto &it : ops_kernel_info_) {
    if (it.second.empty()) {
      continue;
@@ -293,31 +295,30 @@ void OpsKernelManager::InitOpsKernelInfo() {
  GELOGI("Init opsKernelInfo finished, size is %zu", ops_kernel_info_.size());
 }

 Status OpsKernelManager::InitGraphOptimzers(const map<string, string> &options) {
 Status OpsKernelManager::InitGraphOptimizers(const map<string, string> &options) {
  GELOGI("Init graph optimizers options count %zu", options.size());
  for (const auto &option : options) {
    GELOGI("Init graph optimizers option %s: %s", option.first.c_str(), option.second.c_str());
  }
  GELOGI("The number of GraphOptimzerObjs are %zu.", graph_optimizers_.size());
  GELOGI("The number of GraphOptimizerObjs are %zu.", graph_optimizers_.size());
  for (const auto &it : graph_optimizers_) {
    GELOGI("GraphOptimzer name: %s.", (it.first).c_str());
    GELOGI("GraphOptimizer name: %s.", (it.first).c_str());
    GraphOptimizerAttribute attrs;
    GE_CHK_STATUS_RET(it.second->GetAttributes(attrs))
    std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
    if (instance_ptr == nullptr) {
      GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib]malloc instance_ptr failed.");
      REPORT_INNER_ERROR("E19999", "InitGraphOptimzers failed for new GELib.");
      REPORT_INNER_ERROR("E19999", "InitGraphOptimizers failed for new GELib.");
      return GE_CLI_GE_NOT_INITIALIZED;
    }
    if (!instance_ptr->DNNEngineManagerObj().IsEngineRegistered(attrs.engineName)) {
      GELOGW("Engine: %s is not registered.", attrs.engineName.c_str());
      continue;
    }
    Status ret = it.second->Initialize(options);
    if (ret != SUCCESS) {
      GELOGE(GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED, 
          "[Init][GraphOptimzer]GraphOptimzer: %s initialize failed.", (it.first).c_str());
      REPORT_CALL_ERROR("E19999", "InitGraphOptimzers failed. %s initialize failed.", (it.first).c_str());
    if (it.second->Initialize(options) != SUCCESS) {
      GELOGE(GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED,
             "[Init][GraphOptimizer] GraphOptimizer: %s initialize failed.", (it.first).c_str());
      REPORT_CALL_ERROR("E19999", "InitGraphOptimizers failed. %s initialize failed.", (it.first).c_str());
      return GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED;
    }
  }
@@ -340,11 +341,11 @@ Status OpsKernelManager::Finalize() {
    }
  }
  for (auto iter = graph_optimizers_.begin(); iter != graph_optimizers_.end(); ++iter) {
    GELOGI("GraphOptimzers finalize, name: %s.", (iter->first).c_str());
    GELOGI("GraphOptimizer finalize, name: %s.", (iter->first).c_str());
    Status status = iter->second->Finalize();
    if (status != SUCCESS) {
      GELOGE(status, "[Check][Status]GraphOptimzers finalize failed, name: %s.", (iter->first).c_str());
      REPORT_CALL_ERROR("E19999", "GraphOptimzers finalize failed, name: %s.", (iter->first).c_str());
      GELOGE(status, "[Check][Status] GraphOptimizer finalize failed, name: %s.", (iter->first).c_str());
      REPORT_CALL_ERROR("E19999", "GraphOptimizer finalize failed, name: %s.", (iter->first).c_str());
      return status;
    }
  }
@@ -398,8 +399,12 @@ const map<string, OpsKernelInfoStorePtr> &OpsKernelManager::GetAllOpsKernelInfoS

 const map<string, GraphOptimizerPtr> &OpsKernelManager::GetAllGraphOptimizerObjs() const { return graph_optimizers_; }

 const vector<pair<string, GraphOptimizerPtr>> &OpsKernelManager::GetAllGraphOptimizerObjsByPriority() const {
  return graph_optimizers_by_priority_;
 const vector<pair<string, GraphOptimizerPtr>> &OpsKernelManager::GetAllGraphOptimizerObjsByPriority(bool atomic_first_flag) const {
  if (atomic_first_flag) {
    return atomic_first_optimizers_by_priority_;
  } else {
    return compound_first_optimizers_by_priority_;
  }
 }

 void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name,
@@ -407,11 +412,11 @@ void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name,
  for (const auto &it : graph_optimizers_) {
    GraphOptimizerAttribute attrs;
    if (it.second->GetAttributes(attrs) != SUCCESS) {
      GELOGW("Get GraphOptimzer name: %s attributes failed.", (it.first).c_str());
      GELOGW("Get GraphOptimizer name: %s attributes failed.", (it.first).c_str());
      continue;
    }
    if (attrs.engineName == engine_name) {
      GELOGD("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(),
      GELOGD("GetGraphOptimizerByEngine GraphOptimizer name: %s, engineName: %s", (it.first).c_str(),
             attrs.engineName.c_str());
      graph_optimizer.push_back(it.second);
    }
@@ -428,39 +433,62 @@ bool OpsKernelManager::GetEnableAICPUFlag() const { return enable_aicpu_flag_; }

 bool OpsKernelManager::GetEnablePluginFlag() const { return (enable_fe_flag_ || enable_aicpu_flag_); }

 Status OpsKernelManager::InitGraphOptimizerPriority() {
 void OpsKernelManager::ClassifyGraphOptimizers() {
  if (compound_engine_contains_.empty()) {
    atomic_graph_optimizers_ = graph_optimizers_;
    compound_graph_optimizers_.clear();
    return;
  }
  for (const auto &item : graph_optimizers_) {
    if (compound_engine_contains_.find(item.first) != compound_engine_contains_.end()) {
      GELOGI("Engine %s is a compound engine.", item.first.c_str());
      compound_graph_optimizers_.emplace(item);
    } else {
      GELOGI("Engine %s is an atomic engine.", item.first.c_str());
      atomic_graph_optimizers_.emplace(item);
    }
  }
 }

 void OpsKernelManager::InitGraphOptimizerPriority() {
  string priority_conf_path = "plugin/opskernel/optimizer_priority.pbtxt";
  string path = PluginManager::GetPath();
  path.append(priority_conf_path);

  optimizers::Priority optimizerPriority;
  bool ret = ReadProtoFromText(path.c_str(), &optimizerPriority);
  if (!ret) {
  if (!ReadProtoFromText(path.c_str(), &optimizerPriority)) {
    GELOGW("Read priority file failed. Follow loading sequence.");
    return SUCCESS;
    return;
  }
  auto priorities = optimizerPriority.optimizer();
  if (priorities.empty()) {
    GELOGI("No priority file config. Follow loading sequence.");
    return SUCCESS;
    return;
  }
  // sort optimizer map by priority
  std::stringstream priority_seq;
  for (const auto optimizer_name : priorities) {
    auto name_to_optimizer_pair = graph_optimizers_.find(optimizer_name);
    if (name_to_optimizer_pair != graph_optimizers_.end()) {
      graph_optimizers_by_priority_.emplace_back(*name_to_optimizer_pair);
    auto name_to_optimizer_pair = atomic_graph_optimizers_.find(optimizer_name);
    if (name_to_optimizer_pair != atomic_graph_optimizers_.end()) {
      atomic_graph_optimizers_by_priority_.emplace_back(*name_to_optimizer_pair);
      priority_seq << optimizer_name.c_str() << ' ';
    } else {
      GELOGW("Unknown optimizer %s show up in priority config file. Please check.", optimizer_name.c_str());
    }
  }
  GELOGI("Graph Optimizers priority initialized. The sequence will follow : %s.", priority_seq.str().c_str());
  return SUCCESS;
  GELOGI("Atomic graph Optimizers priority initialized. The sequence will follow : %s.", priority_seq.str().c_str());
  atomic_first_optimizers_by_priority_ = atomic_graph_optimizers_by_priority_;
  for (const auto &item : compound_graph_optimizers_) {
    atomic_first_optimizers_by_priority_.emplace_back(std::make_pair(item.first, item.second));
    compound_first_optimizers_by_priority_.emplace_back(std::make_pair(item.first, item.second));
  }
  for (const auto &item : atomic_graph_optimizers_by_priority_) {
    compound_first_optimizers_by_priority_.emplace_back(std::make_pair(item.first, item.second));
  }
 }

 Status OpsKernelManager::FinalizeOpsKernel() {
  GELOGI("ge invoke ops kernal finalize.");
  GELOGI("ge invoke ops kernel finalize.");
  Status ret = plugin_manager_.InvokeAll<Status>(kFinalize);
  if (ret != SUCCESS) {
    GELOGE(ret, "[Finalize][Check][Status] invoke Fe finalize failed.");
--- a/ge/opskernel_manager/ops_kernel_manager.h
+++ b/ge/opskernel_manager/ops_kernel_manager.h
@@ -18,10 +18,12 @@
 #define GE_OPSKERNEL_MANAGER_OPS_KERNEL_MANAGER_H_

 #include <map>
 #include <set>
 #include <memory>
 #include <string>
 #include <vector>
 #include <mutex>
 #include <set>

 #include "framework/common/debug/log.h"
 #include "common/ge/plugin_manager.h"
@@ -61,7 +63,25 @@ class GE_FUNC_VISIBILITY OpsKernelManager {
  const map<string, GraphOptimizerPtr> &GetAllGraphOptimizerObjs() const;

  // get all graph_optimizer by priority
  const vector<pair<string, GraphOptimizerPtr>> &GetAllGraphOptimizerObjsByPriority() const;
  const vector<pair<string, GraphOptimizerPtr>> &GetAllGraphOptimizerObjsByPriority(bool atomic_first_flag = true) const;

  // get atomic_engine graph_optimizer by priority
  const vector<pair<string, GraphOptimizerPtr>> &GetAtomicGraphOptimizerObjsByPriority() const {
    return atomic_graph_optimizers_by_priority_;
  }

  // get compound_engine graph_optimizer
  const map<string, GraphOptimizerPtr> &GetCompoundGraphOptimizerObjs() const {
    return compound_graph_optimizers_;
  }

  const map<string, std::set<std::string>> &GetCompoundEngineContains() const {
    return compound_engine_contains_;
  }

  const std::map<std::string, std::string> &GetCompoundEngineKernelLibName() const {
    return compound_engine_2_kernel_lib_name_;
  }

  // get subgraphOptimizer by engine name
  void GetGraphOptimizerByEngine(const std::string &engine_name, vector<GraphOptimizerPtr> &graph_optimizer);
@@ -93,15 +113,15 @@ class GE_FUNC_VISIBILITY OpsKernelManager {

  void InitOpsKernelInfo();

  Status InitGraphOptimzers(const map<string, string> &options);
  Status InitGraphOptimizers(const map<string, string> &options);

  Status InitPluginOptions(const map<string, string> &options);

  Status ParsePluginOptions(const map<string, string> &options, const string &plugin_name, bool &enable_flag);

  Status LoadGEGraphOptimizer(map<string, GraphOptimizerPtr>& graphOptimizer);
  void ClassifyGraphOptimizers();

  Status InitGraphOptimizerPriority();
  void InitGraphOptimizerPriority();

  // Finalize other ops kernel resource
  Status FinalizeOpsKernel();
@@ -112,8 +132,20 @@ class GE_FUNC_VISIBILITY OpsKernelManager {
  map<string, OpsKernelInfoStorePtr> ops_kernel_store_{};
  // graph_optimizer
  map<string, GraphOptimizerPtr> graph_optimizers_{};
  // ordered graph_optimzer
  vector<pair<string, GraphOptimizerPtr>> graph_optimizers_by_priority_{};
  // compound_graph_optimizer
  map<string, GraphOptimizerPtr> compound_graph_optimizers_{};
  // atomic_graph_optimizer
  map<string, GraphOptimizerPtr> atomic_graph_optimizers_{};
  // ordered atomic_graph_optimizer
  vector<pair<string, GraphOptimizerPtr>> atomic_graph_optimizers_by_priority_{};
  // atomic_first graph_optimizer
  vector<pair<string, GraphOptimizerPtr>> atomic_first_optimizers_by_priority_{};
  // compound_first graph_optimizer
  vector<pair<string, GraphOptimizerPtr>> compound_first_optimizers_by_priority_{};
  // {compound_engine, {containing atomic engines}}
  std::map<std::string, std::set<std::string>> compound_engine_contains_{};
  // {compound_engine, compound_engine_kernel_lib_name}
  std::map<std::string, std::string> compound_engine_2_kernel_lib_name_{};
  // opsKernelInfo
  map<string, vector<OpInfo>> ops_kernel_info_{};

--- a/ge/plugin/engine/dnnengines.cc
+++ b/ge/plugin/engine/dnnengines.cc
@@ -16,9 +16,7 @@

 #include "plugin/engine/dnnengines.h"

 #include <map>
 #include <string>
 #include <vector>

 namespace ge {
 AICoreDNNEngine::AICoreDNNEngine(const std::string &engine_name) {
@@ -29,14 +27,6 @@ AICoreDNNEngine::AICoreDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 AICoreDNNEngine::AICoreDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }

 Status AICoreDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status AICoreDNNEngine::Finalize() { return SUCCESS; }

 void AICoreDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 VectorCoreDNNEngine::VectorCoreDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.compute_cost = COST_1;
@@ -45,14 +35,6 @@ VectorCoreDNNEngine::VectorCoreDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 VectorCoreDNNEngine::VectorCoreDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }

 Status VectorCoreDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status VectorCoreDNNEngine::Finalize() { return SUCCESS; }

 void VectorCoreDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 AICpuDNNEngine::AICpuDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.compute_cost = COST_2;
@@ -61,14 +43,6 @@ AICpuDNNEngine::AICpuDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 AICpuDNNEngine::AICpuDNNEngine(const DNNEngineAttribute &attrs) {  engine_attribute_ = attrs; }

 Status AICpuDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status AICpuDNNEngine::Finalize() { return SUCCESS; }

 void AICpuDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 AICpuTFDNNEngine::AICpuTFDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.compute_cost = COST_3;
@@ -77,28 +51,12 @@ AICpuTFDNNEngine::AICpuTFDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 AICpuTFDNNEngine::AICpuTFDNNEngine(const DNNEngineAttribute &attrs) {  engine_attribute_ = attrs; }

 Status AICpuTFDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status AICpuTFDNNEngine::Finalize() { return SUCCESS; }

 void AICpuTFDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 GeLocalDNNEngine::GeLocalDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.engine_input_format = FORMAT_RESERVED;
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 GeLocalDNNEngine::GeLocalDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }

 Status GeLocalDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status GeLocalDNNEngine::Finalize() { return SUCCESS; }

 void GeLocalDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 HostCpuDNNEngine::HostCpuDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.compute_cost = COST_10;
@@ -107,39 +65,21 @@ HostCpuDNNEngine::HostCpuDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 HostCpuDNNEngine::HostCpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }

 Status HostCpuDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status HostCpuDNNEngine::Finalize() { return SUCCESS; }

 void HostCpuDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 RtsDNNEngine::RtsDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.engine_input_format = FORMAT_RESERVED;
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 RtsDNNEngine::RtsDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }

 Status RtsDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status RtsDNNEngine::Finalize() { return SUCCESS; }

 void RtsDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }

 HcclDNNEngine::HcclDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.engine_input_format = FORMAT_RESERVED;
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }

 HcclDNNEngine::HcclDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }

 Status HcclDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }

 Status HcclDNNEngine::Finalize() { return SUCCESS; }

 void HcclDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; }
 FftsPlusDNNEngine::FftsPlusDNNEngine(const std::string &engine_name) {
  engine_attribute_.engine_name = engine_name;
  engine_attribute_.engine_input_format = FORMAT_RESERVED;
  engine_attribute_.engine_output_format = FORMAT_RESERVED;
 }
 }  // namespace ge
--- a/ge/plugin/engine/dnnengines.h
+++ b/ge/plugin/engine/dnnengines.h
@@ -27,123 +27,66 @@
 namespace ge {
 class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine {
 public:
  AICoreDNNEngine() = default;
  explicit AICoreDNNEngine(const std::string &engine_name);
  explicit AICoreDNNEngine(const DNNEngineAttribute &attrs);
  ~AICoreDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit AICoreDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~AICoreDNNEngine() override = default;
 };

 class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine {
 public:
  VectorCoreDNNEngine() = default;
  explicit VectorCoreDNNEngine(const std::string &engine_name);
  explicit VectorCoreDNNEngine(const DNNEngineAttribute &attrs);
  ~VectorCoreDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit VectorCoreDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~VectorCoreDNNEngine() override = default;
 };


 class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine {
 public:
  AICpuDNNEngine() = default;
  explicit AICpuDNNEngine(const std::string &engine_name);
  explicit AICpuDNNEngine(const DNNEngineAttribute &attrs);
  ~AICpuDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit AICpuDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~AICpuDNNEngine() override = default;
 };

 class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine {
 public:
  AICpuTFDNNEngine() = default;
  explicit AICpuTFDNNEngine(const std::string &engine_name);
  explicit AICpuTFDNNEngine(const DNNEngineAttribute &attrs);
  ~AICpuTFDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit AICpuTFDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~AICpuTFDNNEngine() override = default;
 };

 class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine {
 public:
  GeLocalDNNEngine() = default;
  explicit GeLocalDNNEngine(const std::string &engine_name);
  explicit GeLocalDNNEngine(const DNNEngineAttribute &attrs);
  ~GeLocalDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit GeLocalDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~GeLocalDNNEngine() override = default;
 };

 class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine {
 public:
  HostCpuDNNEngine() = default;
  explicit HostCpuDNNEngine(const std::string &engine_name);
  explicit HostCpuDNNEngine(const DNNEngineAttribute &attrs);
  ~HostCpuDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit HostCpuDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~HostCpuDNNEngine() override = default;
 };

 class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine {
 public:
  RtsDNNEngine() = default;
  explicit RtsDNNEngine(const std::string &engine_name);
  explicit RtsDNNEngine(const DNNEngineAttribute &attrs);
  ~RtsDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;

 private:
  DNNEngineAttribute engine_attribute_;
  explicit RtsDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~RtsDNNEngine() override = default;
 };

 class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine {
 public:
  HcclDNNEngine() = default;
  explicit HcclDNNEngine(const std::string &engine_name);
  explicit HcclDNNEngine(const DNNEngineAttribute &attrs);
  ~HcclDNNEngine() = default;

  Status Initialize(const std::map<std::string, std::string> &options);
  Status Finalize();
  void GetAttributes(DNNEngineAttribute &attr) const;
  explicit HcclDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~HcclDNNEngine() override = default;
 };

 private:
  DNNEngineAttribute engine_attribute_;
 class GE_FUNC_VISIBILITY FftsPlusDNNEngine : public DNNEngine {
 public:
  explicit FftsPlusDNNEngine(const std::string &engine_name);
  explicit FftsPlusDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {}
  ~FftsPlusDNNEngine() override = default;
 };
 }  // namespace ge
 #endif  // GE_PLUGIN_ENGINE_DNNENGINES_H_
--- a/ge/plugin/engine/engine_manage.cc
+++ b/ge/plugin/engine/engine_manage.cc
@@ -63,7 +63,13 @@ void RegisterAiCoreEngine() {
  const std::string ai_core = "AIcoreEngine";
  std::vector<std::string> mem_type_aicore;
  mem_type_aicore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  DNNEngineAttribute attr_aicore = {ai_core, mem_type_aicore, COST_0, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
  DNNEngineAttribute attr_aicore = { ai_core,
                                     mem_type_aicore,
                                     COST_0,
                                     DEVICE,
                                     FORMAT_RESERVED,
                                     FORMAT_RESERVED,
                                     true };
  DNNEnginePtr aicore_engine_ptr = MakeShared<AICoreDNNEngine>(attr_aicore);
  if (aicore_engine_ptr == nullptr) {
    GELOGE(ge::FAILED, "[Register][AiCoreEngine] failed, as malloc shared_ptr failed.");
@@ -79,8 +85,13 @@ void RegisterVectorEngine() {
  const std::string vector_core = "VectorEngine";
  std::vector<std::string> mem_type_aivcore;
  mem_type_aivcore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  DNNEngineAttribute attr_vector_core = {vector_core, mem_type_aivcore, COST_1,
                                         DEVICE,      FORMAT_RESERVED,  FORMAT_RESERVED};
  DNNEngineAttribute attr_vector_core = { vector_core,
                                          mem_type_aivcore,
                                          COST_1,
                                          DEVICE,
                                          FORMAT_RESERVED,
                                          FORMAT_RESERVED,
                                          true };
  DNNEnginePtr vectorcore_engine_ptr = MakeShared<VectorCoreDNNEngine>(attr_vector_core);
  if (vectorcore_engine_ptr == nullptr) {
    GELOGE(ge::FAILED, "[Register][VectorEngine] failed, as malloc shared_ptr failed.");
@@ -97,7 +108,13 @@ void RegisterAiCpuEngine() {
  std::vector<std::string> mem_type_aicpu;
  mem_type_aicpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);

  DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
  DNNEngineAttribute attr_aicpu = { vm_aicpu,
                                    mem_type_aicpu,
                                    COST_2,
                                    DEVICE,
                                    FORMAT_RESERVED,
                                    FORMAT_RESERVED,
                                    true };

  DNNEnginePtr vm_engine_ptr = MakeShared<AICpuDNNEngine>(attr_aicpu);
  if (vm_engine_ptr == nullptr) {
@@ -115,8 +132,13 @@ void RegisterAiCpuTFEngine() {
  std::vector<std::string> mem_type_aicpu_tf;
  mem_type_aicpu_tf.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);

  DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};

  DNNEngineAttribute attr_aicpu_tf = { vm_aicpu_tf,
                                       mem_type_aicpu_tf,
                                       COST_3,
                                       DEVICE,
                                       FORMAT_RESERVED,
                                       FORMAT_RESERVED,
                                       true };
  DNNEnginePtr vm_engine_ptr = MakeShared<AICpuTFDNNEngine>(attr_aicpu_tf);
  if (vm_engine_ptr == nullptr) {
    GELOGE(ge::FAILED, "[Register][AiCpuTFEngine]make vm_engine_ptr failed");
@@ -133,7 +155,13 @@ void RegisterGeLocalEngine() {
  std::vector<std::string> mem_type_ge_local;
  mem_type_ge_local.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  // GeLocal use minimum priority, set it as 9
  DNNEngineAttribute attr_ge_local = {vm_ge_local, mem_type_ge_local, COST_9, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
  DNNEngineAttribute attr_ge_local = { vm_ge_local,
                                       mem_type_ge_local,
                                       COST_9,
                                       DEVICE,
                                       FORMAT_RESERVED,
                                       FORMAT_RESERVED,
                                       true };
  DNNEnginePtr ge_local_engine = MakeShared<GeLocalDNNEngine>(attr_ge_local);
  if (ge_local_engine == nullptr) {
    GELOGE(ge::FAILED, "[Register][GeLocalEngine] failed, as malloc shared_ptr failed.");
@@ -150,8 +178,13 @@ void RegisterHostCpuEngine() {
  std::vector<std::string> mem_type_host_cpu;
  mem_type_host_cpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  // HostCpu use minimum priority, set it as 10
  DNNEngineAttribute attr_host_cpu = {vm_host_cpu, mem_type_host_cpu, COST_10,
      HOST, FORMAT_RESERVED, FORMAT_RESERVED};
  DNNEngineAttribute attr_host_cpu = { vm_host_cpu,
                                       mem_type_host_cpu,
                                       COST_10,
                                       HOST,
                                       FORMAT_RESERVED,
                                       FORMAT_RESERVED,
                                       true };
  DNNEnginePtr host_cpu_engine = MakeShared<HostCpuDNNEngine>(attr_host_cpu);
  if (host_cpu_engine == nullptr) {
    GELOGE(ge::FAILED, "[Register][HostCpuEngine] failed, as malloc shared_ptr failed.");
@@ -167,7 +200,13 @@ void RegisterRtsEngine() {
  const std::string vm_rts = "DNN_VM_RTS";
  std::vector<std::string> mem_type_rts;
  mem_type_rts.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  DNNEngineAttribute attr_rts = {vm_rts, mem_type_rts, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
  DNNEngineAttribute attr_rts = { vm_rts,
                                  mem_type_rts,
                                  COST_1,
                                  DEVICE,
                                  FORMAT_RESERVED,
                                  FORMAT_RESERVED,
                                  true };
  DNNEnginePtr rts_engine = MakeShared<RtsDNNEngine>(attr_rts);
  if (rts_engine == nullptr) {
    GELOGE(ge::FAILED, "[Register][RtsEngine] failed, as malloc shared_ptr failed.");
@@ -183,7 +222,13 @@ void RegisterHcclEngine() {
  const std::string dnn_hccl = "DNN_HCCL";
  std::vector<std::string> mem_type_hccl;
  mem_type_hccl.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  DNNEngineAttribute attr_hccl = {dnn_hccl, mem_type_hccl, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
  DNNEngineAttribute attr_hccl = { dnn_hccl,
                                   mem_type_hccl,
                                   COST_1,
                                   DEVICE,
                                   FORMAT_RESERVED,
                                   FORMAT_RESERVED,
                                   true };
  DNNEnginePtr hccl_engine = MakeShared<HcclDNNEngine>(attr_hccl);
  if (hccl_engine == nullptr) {
    GELOGE(ge::FAILED, "[Register][HcclEngine] failed, as malloc shared_ptr failed.");
@@ -195,6 +240,28 @@ void RegisterHcclEngine() {
  }
 }

 void RegisterFftsPlusEngine() {
  const std::string dnn_ffts_plus = "DNN_FFTS_PLUS";
  std::vector<std::string> mem_type_ffts_plus;
  mem_type_ffts_plus.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
  DNNEngineAttribute attr_ffts_plus = { dnn_ffts_plus,
                                        mem_type_ffts_plus,
                                        COST_0,
                                        DEVICE,
                                        FORMAT_RESERVED,
                                        FORMAT_RESERVED,
                                        false };
  DNNEnginePtr ffts_plus_engine = MakeShared<FftsPlusDNNEngine>(attr_ffts_plus);
  if (ffts_plus_engine == nullptr) {
    GELOGE(ge::FAILED, "[Register][FftsPlusDNNEngine] failed, as malloc shared_ptr failed.");
    REPORT_INNER_ERROR("E19999", "RegisterFftsPlusEngine failed for new DNNEnginePtr failed.");
    return;
  }
  if (EngineManager::RegisterEngine(dnn_ffts_plus, ffts_plus_engine) != SUCCESS) {
    GELOGW("register ffts_plus_engine failed");
  }
 }

 void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines) {
  RegisterAiCoreEngine();
  RegisterVectorEngine();
@@ -204,6 +271,7 @@ void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines) {
  RegisterHostCpuEngine();
  RegisterRtsEngine();
  RegisterHcclEngine();
  RegisterFftsPlusEngine();

  for (auto it = EngineManager::engine_map_->begin(); it != EngineManager::engine_map_->end(); ++it) {
    GELOGI("get engine %s from engine plugin.", it->first.c_str());
--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -564,6 +564,41 @@ AiCpuBaseTask::~AiCpuBaseTask() {
  if (ext_info_addr_dev_ != nullptr) {
    (void)rtFree(ext_info_addr_dev_);
  }
  if (rt_event_ != nullptr) {
    (void)rtEventDestroy(rt_event_);
  }
 }

 Status AiCpuBaseTask::UpdateEventIdForBlockingAicpuOp() {
  bool is_support = false;
  if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
    GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
    return FAILED;
  }
  if (!is_support) {
    GELOGD("Device not support blocking aicpu op process");
    return SUCCESS;
  }
  uint32_t event_id = 0;
  auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  rt_ret = rtGetEventID(rt_event_, &event_id);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetEventID] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (aicpu_ext_handle_->UpdateEventId(event_id) != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Update event id=%u failed.", event_id);
    GELOGE(FAILED, "[Update][EventId] Update event id failed", event_id);
    return FAILED;
  }
  GELOGI("Update event_id=%u success", event_id);
  return SUCCESS;
 }

 Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id) {
@@ -577,6 +612,9 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint
  GELOGD("Get unknown_type is %d.", unknown_shape_type_val);
  unknown_type_ = static_cast<UnknowShapeOpType>(unknown_shape_type_val);

  AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_);
  GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_);

  aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(),
                                                                              num_inputs_,
                                                                              num_outputs_,
@@ -595,6 +633,13 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint
  GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false),
                    "[Update][SessionInfo] failed.");

  if (is_blocking_aicpu_op_) {
    if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) {
      GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed");
      return FAILED;
    }
  }

  GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM));
  GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(),
                         aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(),
@@ -770,6 +815,63 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto
  return SUCCESS;
 }

 Status AiCpuBaseTask::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) {
  int32_t device_id = 0;
  auto rt_ret = rtGetDevice(&device_id);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  int32_t value = 0;
  rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) {
    REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d",
                       RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d",
           RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value);
    return FAILED;
  }
  is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false);
  return SUCCESS;
 }

 Status AiCpuBaseTask::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) {
  bool is_support = false;
  if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) {
    GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed");
    return FAILED;
  }
  if (!is_support) {
    GELOGD("Device not support blocking aicpu op process.");
    return SUCCESS;
  }
  GELOGI("Distribute queue task begin");
  if (rt_event_ == nullptr) {
    REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr");
    GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr");
    return FAILED;
  }
  auto rt_ret = rtStreamWaitEvent(stream, rt_event_);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  rt_ret = rtEventReset(rt_event_, stream);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  return SUCCESS;
 }

 AiCpuTask::~AiCpuTask() {
  FreeHbm(args_);
  FreeHbm(io_addr_);
@@ -813,6 +915,14 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
  GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str());

  GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str());

  if (is_blocking_aicpu_op_) {
    if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) {
      GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed");
      return FAILED;
    }
  }

  return SUCCESS;
 }

@@ -1089,6 +1199,13 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
  }
  GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str());
  GELOGD("Invoke rtCpuKernelLaunch succeeded");

  if (is_blocking_aicpu_op_) {
    if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) {
      GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed");
      return FAILED;
    }
  }
  return SUCCESS;
 }

--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -178,6 +178,10 @@ class AiCpuBaseTask : public OpTask {
                       rtStream_t stream);
  Status UpdateOutputShape(vector<GeTensorDesc> &output_desc);
  Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc);
  // for blocking aicpu op
  Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream);
  Status UpdateEventIdForBlockingAicpuOp();
  Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support);

 protected:
  size_t num_inputs_ = 0;
@@ -186,6 +190,9 @@ class AiCpuBaseTask : public OpTask {
  std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> aicpu_ext_handle_;
  void *ext_info_addr_dev_ = nullptr;
  vector<bool> input_is_const_;
  // for blocking aicpu op
  bool is_blocking_aicpu_op_ = false;
  rtEvent_t rt_event_ = nullptr;
 };

 class AiCpuTask : public AiCpuBaseTask {
--- a/inc/framework/engine/dnnengine.h
+++ b/inc/framework/engine/dnnengine.h
@@ -43,14 +43,21 @@ struct DNNEngineAttribute {
  // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED
  Format engine_input_format;
  Format engine_output_format;
  bool atomic_engine_flag;
 };

 class GE_FUNC_VISIBILITY DNNEngine {
 public:
  DNNEngine() = default;
  explicit DNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; }
  virtual ~DNNEngine() = default;
  virtual Status Initialize(const std::map<std::string, std::string> &options) = 0;
  virtual Status Finalize() = 0;
  virtual void GetAttributes(DNNEngineAttribute &attr) const = 0;
  Status Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; }
  Status Finalize() { return SUCCESS; }
  void GetAttributes(DNNEngineAttribute &attr) const { attr = engine_attribute_; }
  bool IsAtomic() const { return engine_attribute_.atomic_engine_flag; }

 protected:
  DNNEngineAttribute engine_attribute_;
 };
 }  // namespace ge

--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd
 Subproject commit 8f2c4395c346af026c470b47a7c52f2ab5b51f90
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 7a2daaa2625505e1a15e1faa46c90df1a23dd6fa
 Subproject commit 72d6fcd776ea2eba8000249fd02c8948042e9856
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -16,12 +16,94 @@

 #include <cce/dnn.h>
 #include <securec.h>
 #include "runtime_stub.h"
 #include "runtime/rt.h"

 #define ADD_STUB_RETURN_VALUE(FUNC, TYPE) std::vector<TYPE> g_Stub_##FUNC##_RETURN

 #define GET_STUB_RETURN_VALUE(FUNC, TYPE, DEFAULT) ({   \
  TYPE result = DEFAULT;                                \
  if (!g_Stub_##FUNC##_RETURN.empty()) {                \
    result = g_Stub_##FUNC##_RETURN.back();             \
    g_Stub_##FUNC##_RETURN.pop_back();                  \
  }                                                     \
  result;                                               \
 })

 #define DEL_STUB_RETURN_VALUE(FUNC, TYPE)           \
 do {                                                \
  extern std::vector<TYPE> g_Stub_##FUNC##_RETURN;  \
  g_Stub_##FUNC##_RETURN.clear();                   \
 } while (0)


 #define ADD_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME

 #define GET_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, DEFAULT) ({ \
  TYPE value;                                                 \
  if (!g_Stub_##FUNC##_OUT_##NAME.empty()) {                  \
    value = g_Stub_##FUNC##_OUT_##NAME.back();                \
    g_Stub_##FUNC##_OUT_##NAME.pop_back();                    \
  } else {                                                    \
    value = DEFAULT;                                          \
  }                                                           \
  value;                                                      \
 })

 #define DEL_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME)       \
 do {                                                    \
  extern std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME;  \
  g_Stub_##FUNC##_OUT_##NAME.clear();                   \
 } while (0)

 #ifdef __cplusplus
 extern "C" {
 #endif
 #define EVENT_LENTH 10

 void rtStubTearDown() {
  DEL_STUB_RETURN_VALUE(rtGetDevice, rtError_t);
  DEL_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t);
  DEL_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t);
  DEL_STUB_RETURN_VALUE(rtEventReset, rtError_t);
  DEL_STUB_RETURN_VALUE(rtEventCreate, rtError_t);
  DEL_STUB_RETURN_VALUE(rtGetEventID, rtError_t);
 }

 ADD_STUB_RETURN_VALUE(rtGetDevice, rtError_t);
 rtError_t rtGetDevice(int32_t *device) {
  return GET_STUB_RETURN_VALUE(rtGetDevice, rtError_t, RT_ERROR_NONE);
 }

 ADD_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t);
 ADD_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value);
 rtError_t rtGetDeviceCapability(int32_t device, int32_t moduleType, int32_t featureType, int32_t *value) {
  *value = GET_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT);
  return GET_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
 }

 ADD_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t);
 rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) {
  return GET_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, RT_ERROR_NONE);
 }

 ADD_STUB_RETURN_VALUE(rtEventReset, rtError_t);
 rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) {
  return GET_STUB_RETURN_VALUE(rtEventReset, rtError_t, RT_ERROR_NONE);
 }

 ADD_STUB_RETURN_VALUE(rtEventCreate, rtError_t);
 rtError_t rtEventCreate(rtEvent_t *event) {
  *event = new int[EVENT_LENTH];
  return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE);
 }

 ADD_STUB_RETURN_VALUE(rtGetEventID, rtError_t);
 rtError_t rtGetEventID(rtEvent_t event, uint32_t *event_id) {
  *event_id = 0;
  return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE);
 }

 rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; }

 rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) {
@@ -42,11 +124,6 @@ rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event) {
  return RT_ERROR_NONE;
 }

 rtError_t rtEventCreate(rtEvent_t *event) {
  *event = new int[EVENT_LENTH];
  return RT_ERROR_NONE;
 }

 rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag) {
  return rtEventCreate(event);
 }
@@ -112,8 +189,6 @@ rtError_t rtMemcpyAsync(void *dst, uint64_t dest_max, const void *src, uint64_t
  return RT_ERROR_NONE;
 }

 rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { return RT_ERROR_NONE; }

 rtError_t rtSetTSDevice(uint32_t tsId) {
  return RT_ERROR_NONE;
 }
@@ -347,10 +422,6 @@ rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *value_ptr,

 rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream) { return RT_ERROR_NONE; }

 rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { return RT_ERROR_NONE; }

 rtError_t rtGetDevice(int32_t *device) { return RT_ERROR_NONE; }

 rtError_t rtDatadumpInfoLoad(const void *dump_info, uint32_t length) { return RT_ERROR_NONE; }

 rtError_t rtKernelLaunchWithFlag(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size,
@@ -467,6 +538,14 @@ rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) {
  return RT_ERROR_NONE;
 }

 rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt) {
  return RT_ERROR_NONE;
 }

 rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream) {
  return RT_ERROR_NONE;
 }

 rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) {
  return RT_ERROR_NONE;
 }
--- a/tests/depends/runtime/src/runtime_stub.h
+++ b/tests/depends/runtime/src/runtime_stub.h
@@ -0,0 +1,70 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef __INC_LLT_RUNTIME_STUB_H
 #define __INC_LLT_RUNTIME_STUB_H

 #include <vector>

 #ifdef __cplusplus
 extern "C" {
 #endif
 void rtStubTearDown();

 #define RTS_STUB_SETUP()    \
 do {                        \
  rtStubTearDown();         \
 } while (0)

 #define RTS_STUB_TEARDOWN() \
 do {                        \
  rtStubTearDown();         \
 } while (0)

 #define RTS_STUB_RETURN_VALUE(FUNC, TYPE, VALUE)                          \
 do {                                                                      \
  g_Stub_##FUNC##_RETURN.emplace(g_Stub_##FUNC##_RETURN.begin(), VALUE);  \
 } while (0)

 #define RTS_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, VALUE)                          \
 do {                                                                              \
  g_Stub_##FUNC##_OUT_##NAME.emplace(g_Stub_##FUNC##_OUT_##NAME.begin(), VALUE);  \
 } while (0)


 #define RTS_STUB_RETURN_EXTERN(FUNC, TYPE) extern std::vector<TYPE> g_Stub_##FUNC##_RETURN;
 #define RTS_STUB_OUTBOUND_EXTERN(FUNC, TYPE, NAME) extern std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME;

 RTS_STUB_RETURN_EXTERN(rtGetDevice, rtError_t);
 RTS_STUB_OUTBOUND_EXTERN(rtGetDevice, int32_t, device)

 RTS_STUB_RETURN_EXTERN(rtGetDeviceCapability, rtError_t);
 RTS_STUB_OUTBOUND_EXTERN(rtGetDeviceCapability, int32_t, value);

 RTS_STUB_RETURN_EXTERN(rtStreamWaitEvent, rtError_t);

 RTS_STUB_RETURN_EXTERN(rtEventReset, rtError_t);

 RTS_STUB_RETURN_EXTERN(rtEventCreate, rtError_t);
 RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, rtEvent_t, event);

 RTS_STUB_RETURN_EXTERN(rtGetEventID, rtError_t);
 RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, uint32_t, event_id);

 #ifdef __cplusplus
 }
 #endif
 #endif // __INC_LLT_RUNTIME_STUB_H
--- a/tests/framework/cmake/graphengine.cmake
+++ b/tests/framework/cmake/graphengine.cmake
@@ -45,7 +45,7 @@ file(GLOB_RECURSE METADEF_REGISTER_SRCS CONFIGURE_DEPENDS
        "${GE_CODE_DIR}/metadef/register/*.cpp"
 )

 file(GLOB_RECURSE PARSER_SRCS CONFIGURE_DEPENDS 
 file(GLOB_RECURSE PARSER_SRCS CONFIGURE_DEPENDS
    "${GE_CODE_DIR}/parser/parser/common/*.cc"
 )

@@ -114,7 +114,6 @@ list(APPEND INCLUDE_DIRECTORIES
 list(APPEND STUB_LIBS
    c_sec
    slog_stub
    cce_ge_stub
    runtime_stub
    profiler_stub
    hccl_stub
@@ -226,7 +225,7 @@ add_custom_command(
 add_library(graphengine STATIC ${PARSER_SRCS} ${GE_SRCS})

 target_include_directories(graphengine
    PUBLIC 
    PUBLIC
    "${INCLUDE_DIRECTORIES}"
    "${GE_CODE_DIR}/ge/host_cpu_engine"
 )
--- a/tests/framework/ge_graph_dsl/src/op_desc/op_desc_cfg_repo.cc
+++ b/tests/framework/ge_graph_dsl/src/op_desc/op_desc_cfg_repo.cc
@@ -16,7 +16,6 @@

 #include "ge_graph_dsl/op_desc/op_desc_cfg_repo.h"
 #include "framework/common/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "ge_graph_dsl/op_desc/op_desc_cfg.h"

 GE_NS_BEGIN
@@ -39,6 +38,8 @@ static std::map<OpType, OpDescCfg> cfg_repo{OP_CFG(DATA, 1, 1, FORMAT_NCHW, DT_F
                                            OP_CFG(EXIT, 1, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}),
                                            OP_CFG(NEXTITERATION, 1, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}),
                                            OP_CFG(NETOUTPUT, 2, 2, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}),
                                            OP_CFG(CONSTANTOP, 0, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}),
                                            OP_CFG(GETNEXT, 0, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}),
                                            OP_CFG(VARIABLE, 1, 1)};
 }  // namespace

--- a/tests/framework/ge_graph_dsl/tests/stub/optype_stub.cc
+++ b/tests/framework/ge_graph_dsl/tests/stub/optype_stub.cc
@@ -15,7 +15,6 @@
 */

 #include "framework/common/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "ge_graph_dsl/ge.h"

 GE_NS_BEGIN
@@ -32,9 +31,10 @@ REGISTER_OPTYPE_DEFINE(ADD, "Add");
 REGISTER_OPTYPE_DEFINE(WHILE, "While");
 REGISTER_OPTYPE_DEFINE(ENTER, "Enter");
 REGISTER_OPTYPE_DEFINE(MERGE, "Merge");
 REGISTER_OPTYPE_DEFINE(LOOPCOND, "Loopcond");
 REGISTER_OPTYPE_DEFINE(LOOPCOND, "LoopCond");
 REGISTER_OPTYPE_DEFINE(SWITCH, "Switch");
 REGISTER_OPTYPE_DEFINE(EXIT, "Exit");
 REGISTER_OPTYPE_DEFINE(NEXTITERATION, "Nextiteration");
 REGISTER_OPTYPE_DEFINE(NEXTITERATION, "NextIteration");
 REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext");

 GE_NS_END
--- a/tests/framework/ge_running_env/include/ge_running_env/env_installer.h
+++ b/tests/framework/ge_running_env/include/ge_running_env/env_installer.h
@@ -20,6 +20,7 @@
 #include "fake_ns.h"
 #include "opskernel_manager/ops_kernel_manager.h"
 #include "register/ops_kernel_builder_registry.h"
 #include "plugin/engine/engine_manage.h"

 FAKE_NS_BEGIN

@@ -27,6 +28,9 @@ struct EnvInstaller {
  virtual void InstallTo(std::map<string, OpsKernelInfoStorePtr>&) const {}
  virtual void InstallTo(std::map<string, GraphOptimizerPtr>&) const {}
  virtual void InstallTo(std::map<string, OpsKernelBuilderPtr>&) const {}
  virtual void InstallTo(std::map<string, std::set<std::string>>&) const {}
  virtual void InstallTo(std::map<string, std::string>&) const {}
  virtual void InstallTo(std::map<string, DNNEnginePtr>&) const {}
  virtual void Install() const {}
 };

--- a/tests/framework/ge_running_env/include/ge_running_env/fake_atomic_optimizer.h
+++ b/tests/framework/ge_running_env/include/ge_running_env/fake_atomic_optimizer.h
@@ -0,0 +1,40 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_4DCD71AA72F8492D8594C49094B92528
 #define INC_4DCD71AA72F8492D8594C49094B92528

 #include "ge_running_env/fake_ns.h"
 #include "common/optimizer/graph_optimizer.h"

 FAKE_NS_BEGIN

 struct FakeAtomicOptimizer : GraphOptimizer {
  explicit FakeAtomicOptimizer(const std::string &engine_name) : engine_name_(engine_name) {}
 private:
  Status Initialize(const map<string, string> &options) override;
  Status Finalize() override;
  Status OptimizeOriginalGraph(ComputeGraph &graph) override;
  Status OptimizeFusedGraph(ComputeGraph &graph) override;
  Status OptimizeWholeGraph(ComputeGraph &graph) override;
  Status GetAttributes(GraphOptimizerAttribute &attrs) const override;

 protected:
  std::string engine_name_;
 };

 FAKE_NS_END
 #endif
--- a/tests/framework/ge_running_env/include/ge_running_env/fake_compound_engine.h
+++ b/tests/framework/ge_running_env/include/ge_running_env/fake_compound_engine.h
@@ -0,0 +1,40 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_897A92FE9414452E8912FC7204E018A8
 #define INC_897A92FE9414452E8912FC7204E018A8

 #include "ge_running_env/fake_ns.h"
 #include "ge_running_env/fake_engine.h"
 #include "common/optimizer/graph_optimizer.h"

 FAKE_NS_BEGIN

 struct FakeCompoundEngine : FakeEngine {
  FakeCompoundEngine(const std::string &name, const std::set<std::string> &sub_engines) : FakeEngine(name),
                                                                                          sub_engines_(sub_engines) {}

 private:
  void InstallTo(std::map<std::string, GraphOptimizerPtr>&) const override;
  void InstallTo(std::map<std::string, OpsKernelInfoStorePtr>&) const override;
  void InstallTo(std::map<std::string, std::set<std::string>>&) const override;
  void InstallTo(std::map<std::string, std::string>&) const override;
  void InstallTo(std::map<std::string, DNNEnginePtr>&) const override;
 private:
  std::set<std::string> sub_engines_;
 };

 FAKE_NS_END
 #endif
--- a/tests/framework/ge_running_env/include/ge_running_env/fake_compound_optimizer.h
+++ b/tests/framework/ge_running_env/include/ge_running_env/fake_compound_optimizer.h
@@ -0,0 +1,34 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_8E85C90AF30E4DBF9EF50467846EDA88
 #define INC_8E85C90AF30E4DBF9EF50467846EDA88

 #include "ge_running_env/fake_ns.h"
 #include "ge_running_env/fake_atomic_optimizer.h"

 FAKE_NS_BEGIN

 struct FakeCompoundOptimizer : FakeAtomicOptimizer {
 public:
  explicit FakeCompoundOptimizer(const std::string &engine_name) : FakeAtomicOptimizer(engine_name) {}
 private:
  Status OptimizeFusedGraph(ComputeGraph &graph) override;
  static uint32_t thread_scope_id_;
 };

 FAKE_NS_END
 #endif
--- a/tests/framework/ge_running_env/include/ge_running_env/fake_engine.h
+++ b/tests/framework/ge_running_env/include/ge_running_env/fake_engine.h
@@ -39,14 +39,16 @@ struct FakeEngine : EnvInstaller {
 private:
  void InstallTo(std::map<string, OpsKernelInfoStorePtr>&) const override;
  void InstallTo(std::map<string, OpsKernelBuilderPtr>&) const override;
  void InstallTo(std::map<std::string, DNNEnginePtr>&) const override;

 private:
  template <typename BasePtr, typename SubClass>
  void InstallFor(std::map<string, BasePtr>& maps, const std::map<std::string, std::shared_ptr<SubClass>>&) const;

 private:
 protected:
  std::string engine_name_;
  std::set<std::string> info_store_names_;

 private:
  std::map<std::string, FakeOpsKernelBuilderPtr> custom_builders_;
  std::map<std::string, FakeOpsKernelInfoStorePtr> custom_info_stores_;
 };
--- a/tests/framework/ge_running_env/include/ge_running_env/ge_running_env_faker.h
+++ b/tests/framework/ge_running_env/include/ge_running_env/ge_running_env_faker.h
@@ -38,6 +38,9 @@ struct GeRunningEnvFaker {
  std::map<string, OpsKernelInfoStorePtr> &ops_kernel_info_stores_;
  std::map<string, GraphOptimizerPtr> &ops_kernel_optimizers_;
  std::map<string, OpsKernelBuilderPtr> &ops_kernel_builders_;
  std::map<string, std::set<std::string>> &compound_engines_contains_;
  std::map<string, std::string> &compound_engine_2_kernel_lib_name_;
  std::map<std::string, DNNEnginePtr> &engine_map_;
 };

 FAKE_NS_END
--- a/tests/framework/ge_running_env/src/engine/fake_atomic_optimizer.cc
+++ b/tests/framework/ge_running_env/src/engine/fake_atomic_optimizer.cc
@@ -0,0 +1,46 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ge_running_env/fake_atomic_optimizer.h"

 FAKE_NS_BEGIN

 Status FakeAtomicOptimizer::Initialize(const map<string, string> &options) {
  return SUCCESS;
 };

 Status FakeAtomicOptimizer::Finalize() {
  return SUCCESS;
 }

 Status FakeAtomicOptimizer::OptimizeOriginalGraph(ComputeGraph &graph) {
  return SUCCESS;
 }

 Status FakeAtomicOptimizer::OptimizeFusedGraph(ComputeGraph& graph) {
  return SUCCESS;
 }

 Status FakeAtomicOptimizer::OptimizeWholeGraph(ComputeGraph &graph) {
  return SUCCESS;
 }

 Status FakeAtomicOptimizer::GetAttributes(GraphOptimizerAttribute &attrs) const {
  attrs.engineName = engine_name_;
  return SUCCESS;
 }

 FAKE_NS_END
--- a/tests/framework/ge_running_env/src/engine/fake_compound_engine.cc
+++ b/tests/framework/ge_running_env/src/engine/fake_compound_engine.cc
@@ -0,0 +1,48 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ge_running_env/fake_compound_engine.h"
 #include "ge_running_env/fake_compound_optimizer.h"

 FAKE_NS_BEGIN

 void FakeCompoundEngine::InstallTo(std::map<std::string, GraphOptimizerPtr> &graph_optimizers) const {
  auto optimizer = std::make_shared<FakeCompoundOptimizer>(engine_name_);
  graph_optimizers[engine_name_] = optimizer;
 }

 void FakeCompoundEngine::InstallTo(std::map<std::string, OpsKernelInfoStorePtr>&) const {
 }

 void FakeCompoundEngine::InstallTo(std::map<std::string, std::set<std::string>> &compound_engine_contains) const {
  compound_engine_contains[engine_name_] = sub_engines_;
 }

 void FakeCompoundEngine::InstallTo(std::map<std::string, std::string> &compound_engine_2_kernel_lib_name) const {
  if (info_store_names_.size() != 1) {
    return;
  }
  compound_engine_2_kernel_lib_name[engine_name_] = *info_store_names_.begin();
 }

 void FakeCompoundEngine::InstallTo(std::map<string, DNNEnginePtr> &engines) const {
  DNNEngineAttribute attr;
  attr.engine_name = engine_name_;
  attr.atomic_engine_flag = false;
  engines[engine_name_] = MakeShared<DNNEngine>(attr);
 }

 FAKE_NS_END
--- a/tests/framework/ge_running_env/src/engine/fake_compound_optimizer.cc
+++ b/tests/framework/ge_running_env/src/engine/fake_compound_optimizer.cc
@@ -0,0 +1,61 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


 #include "ge_running_env/fake_compound_optimizer.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/debug/ge_attr_define.h"
 #include "framework/common/types.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"

 FAKE_NS_BEGIN

 uint32_t FakeCompoundOptimizer::thread_scope_id_ = 0;

 Status FakeCompoundOptimizer::OptimizeFusedGraph(ComputeGraph& graph) {
  std::set<NodePtr> nodes;
  for (const auto &node : graph.GetDirectNode()) {
    const auto &type = NodeUtils::GetNodeType(node);
    if ((type != PLACEHOLDER) && (type != END)) {
      nodes.emplace(node);
    }
  }
  if (nodes.size() == 1) {
    return SUCCESS;
  }

  const std::string &subgraph_name = "PartitionedCall_" + std::to_string(thread_scope_id_);
  const auto &subgraph = GraphUtils::BuildSubgraphWithNodes(graph, nodes, subgraph_name);
  if (subgraph == nullptr) {
    GELOGE(FAILED, "Build subgraph %s failed", subgraph_name.c_str());
    return FAILED;
  }
  const auto &parent_node = subgraph->GetParentNode();
  GE_CHECK_NOTNULL(parent_node);
  (void)AttrUtils::SetStr(parent_node->GetOpDesc(), ATTR_NAME_FFTS_PLUS_SUB_GRAPH, subgraph_name);
  for (const auto &node : subgraph->GetAllNodes()) {
    (void)AttrUtils::SetInt(node->GetOpDesc(), ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_);
  }

  thread_scope_id_++;

  return SUCCESS;
 }

 FAKE_NS_END
--- a/tests/framework/ge_running_env/src/engine/fake_engine.cc
+++ b/tests/framework/ge_running_env/src/engine/fake_engine.cc
@@ -15,9 +15,6 @@
 */

 #include "ge_running_env/fake_engine.h"
 #include "ge_running_env/fake_ops_kernel_builder.h"
 #include "ge_running_env/fake_ops_kernel_info_store.h"
 #include "opskernel_manager/ops_kernel_manager.h"

 FAKE_NS_BEGIN

@@ -78,4 +75,11 @@ void FakeEngine::InstallTo(std::map<string, OpsKernelBuilderPtr> &ops_kernel_bui
  InstallFor<OpsKernelBuilderPtr, FakeOpsKernelBuilder>(ops_kernel_builders, custom_builders_);
 }

 void FakeEngine::InstallTo(std::map<string, DNNEnginePtr> &engines) const {
  DNNEngineAttribute attr;
  attr.engine_name = engine_name_;
  attr.atomic_engine_flag = true;
  engines[engine_name_] = MakeShared<DNNEngine>(attr);
 }

 FAKE_NS_END
--- a/tests/framework/ge_running_env/src/env/ge_default_running_env.cc
+++ b/tests/framework/ge_running_env/src/env/ge_default_running_env.cc
@@ -32,6 +32,8 @@ std::vector<FakeOp> fake_ops = {
  FakeOp(SWITCH).InfoStoreAndBuilder("RTSLib"),       FakeOp(LOOPCOND).InfoStoreAndBuilder("RTSLib"),
  FakeOp(STREAMMERGE).InfoStoreAndBuilder("RTSLib"),  FakeOp(STREAMSWITCH).InfoStoreAndBuilder("RTSLib"),
  FakeOp(STREAMACTIVE).InfoStoreAndBuilder("RTSLib"), FakeOp(EXIT).InfoStoreAndBuilder("RTSLib"),
  FakeOp(SEND).InfoStoreAndBuilder("RTSLib"),         FakeOp(RECV).InfoStoreAndBuilder("RTSLib"),
  FakeOp(IDENTITY).InfoStoreAndBuilder("RTSLib"),     FakeOp(IDENTITYN).InfoStoreAndBuilder("RTSLib"),

  FakeOp(LESS).InfoStoreAndBuilder("AiCoreLib"),      FakeOp(NEXTITERATION).InfoStoreAndBuilder("AiCoreLib"),
  FakeOp(CAST).InfoStoreAndBuilder("AiCoreLib"),      FakeOp(TRANSDATA).InfoStoreAndBuilder("AiCoreLib"),
@@ -53,4 +55,4 @@ void GeDefaultRunningEnv::InstallTo(GeRunningEnvFaker& ge_env) {
  }
 }

 FAKE_NS_END
 FAKE_NS_END
--- a/tests/framework/ge_running_env/src/env/ge_running_env_faker.cc
+++ b/tests/framework/ge_running_env/src/env/ge_running_env_faker.cc
@@ -15,34 +15,41 @@
 */

 #include <map>
 #include <algorithm>
 #include "external/ge/ge_api.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
 #include "init/gelib.h"
 #include "utility"
 #include "plugin/engine/engine_manage.h"
 #include "ge_running_env/ge_running_env_faker.h"
 #include "ge_default_running_env.h"
 #include "ge_running_env/env_installer.h"
 #include "op/fake_op_repo.h"

 FAKE_NS_BEGIN

 namespace {
 OpsKernelManager& getKernelManger() {
 OpsKernelManager& getKernelManager() {
  std::shared_ptr<GELib> instancePtr = ge::GELib::GetInstance();
  return instancePtr->OpsKernelManagerObj();
 }

 DNNEngineManager& getDNNEngindManager() {
  std::shared_ptr<GELib> instancePtr = ge::GELib::GetInstance();
  return instancePtr->DNNEngineManagerObj();
 }

 struct InitEnv {
  static InitEnv& GetInstance() {
    static InitEnv instance;
    return instance;
  }

  void reset(std::map<string, OpsKernelInfoStorePtr>& ops_kernel_info_stores,
             std::map<string, OpsKernelBuilderPtr>& builders) {
  void reset(std::map<string, OpsKernelInfoStorePtr> &ops_kernel_info_stores,
             std::map<string, OpsKernelBuilderPtr> &builders,
             std::map<string, GraphOptimizerPtr> &ops_kernel_optimizers,
             std::map<string, std::set<std::string>> &compound_engines_contains,
             std::map<string, std::string> &compound_engine_2_kernel_lib_name,
             std::map<string, DNNEnginePtr> &engines) {
    std::set<string> remove_info_names;
    for (auto iter : ops_kernel_info_stores) {
    for (auto iter : builders) {
      if (kernel_info_names.find(iter.first) == kernel_info_names.end()) {
        remove_info_names.insert(iter.first);
      }
@@ -50,12 +57,16 @@ struct InitEnv {
    for (auto info_name : remove_info_names) {
      ops_kernel_info_stores.erase(info_name);
      builders.erase(info_name);
      ops_kernel_optimizers.erase(info_name);
      compound_engines_contains.erase(info_name);
      compound_engine_2_kernel_lib_name.erase(info_name);
      engines.erase(info_name);
    }
  }

 private:
  InitEnv() {
    for (auto iter : getKernelManger().GetAllOpsKernelInfoStores()) {
    for (auto iter : getKernelManager().GetAllOpsKernelInfoStores()) {
      kernel_info_names.insert(iter.first);
    }
  }
@@ -66,20 +77,26 @@ struct InitEnv {
 }  // namespace

 GeRunningEnvFaker::GeRunningEnvFaker()
    : op_kernel_info_(const_cast<std::map<string, vector<OpInfo>>&>(getKernelManger().GetAllOpsKernelInfo())),
    : op_kernel_info_(const_cast<std::map<std::string, vector<OpInfo>>&>(getKernelManager().GetAllOpsKernelInfo())),
      ops_kernel_info_stores_(
        const_cast<std::map<string, OpsKernelInfoStorePtr>&>(getKernelManger().GetAllOpsKernelInfoStores())),
        const_cast<std::map<std::string, OpsKernelInfoStorePtr>&>(getKernelManager().GetAllOpsKernelInfoStores())),
      ops_kernel_optimizers_(
        const_cast<std::map<string, GraphOptimizerPtr>&>(getKernelManger().GetAllGraphOptimizerObjs())),
      ops_kernel_builders_(const_cast<std::map<string, OpsKernelBuilderPtr>&>(
        OpsKernelBuilderManager::Instance().GetAllOpsKernelBuilders())) {
        const_cast<std::map<std::string, GraphOptimizerPtr>&>(getKernelManager().GetAllGraphOptimizerObjs())),
      ops_kernel_builders_(const_cast<std::map<std::string, OpsKernelBuilderPtr>&>(
        OpsKernelBuilderManager::Instance().GetAllOpsKernelBuilders())),
      compound_engines_contains_(
        const_cast<std::map<std::string, std::set<std::string>>&>(getKernelManager().GetCompoundEngineContains())),
      compound_engine_2_kernel_lib_name_(
        const_cast<std::map<std::string, std::string>&>(getKernelManager().GetCompoundEngineKernelLibName())),
      engine_map_(const_cast<std::map<std::string, DNNEnginePtr>&>(getDNNEngindManager().GetAllEngines())) {
  Reset();
 }

 GeRunningEnvFaker& GeRunningEnvFaker::Reset() {
  InitEnv& init_env = InitEnv::GetInstance();
  FakeOpRepo::Reset();
  init_env.reset(ops_kernel_info_stores_, ops_kernel_builders_);
  init_env.reset(ops_kernel_info_stores_, ops_kernel_builders_, ops_kernel_optimizers_,
                 compound_engines_contains_, compound_engine_2_kernel_lib_name_, engine_map_);
  flush();
  return *this;
 }
@@ -91,13 +108,17 @@ GeRunningEnvFaker& GeRunningEnvFaker::Install(const EnvInstaller& installer) {
  installer.InstallTo(ops_kernel_info_stores_);
  installer.InstallTo(ops_kernel_optimizers_);
  installer.InstallTo(ops_kernel_builders_);
  installer.InstallTo(compound_engines_contains_);
  installer.InstallTo(compound_engine_2_kernel_lib_name_);
  installer.InstallTo(engine_map_);

  flush();
  return *this;
 }

 void GeRunningEnvFaker::flush() {
  op_kernel_info_.clear();
  getKernelManger().GetOpsKernelInfo("");
  getKernelManager().GetOpsKernelInfo("");
 }

 GeRunningEnvFaker& GeRunningEnvFaker::InstallDefault() {
--- a/tests/framework/ge_running_env/tests/test_ge_running_env_faker.cc
+++ b/tests/framework/ge_running_env/tests/test_ge_running_env_faker.cc
@@ -20,9 +20,10 @@
 #include "external/ge/ge_api.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
 #include "ge_running_env/fake_ops_kernel_builder.h"
 #include "ge_running_env/fake_ns.h"
 #include "ge_running_env/ge_running_env_faker.h"
 #include "ge_running_env/fake_op.h"
 #include "ge_running_env/fake_compound_engine.h"

 FAKE_NS_BEGIN

 #define ASSERT_OPS_LIST_SIZE(list_size) \
@@ -35,6 +36,7 @@ class GeRunningEvnFakerTest : public testing::Test {
  void SetUp() {}
  OpsKernelManager &kernel_manager = ge::GELib::GetInstance()->OpsKernelManagerObj();
  OpsKernelBuilderManager &builder_manager = OpsKernelBuilderManager::Instance();
  DNNEngineManager &dnnengine_manager = ge::GELib::GetInstance()->DNNEngineManagerObj();
 };

 TEST_F(GeRunningEvnFakerTest, test_reset_running_env_is_success) {
@@ -142,7 +144,31 @@ TEST_F(GeRunningEvnFakerTest, test_install_default_fake_engine_success) {

  ASSERT_EQ(kernel_manager.GetAllOpsKernelInfoStores().size(), 7);
  ASSERT_EQ(builder_manager.GetAllOpsKernelBuilders().size(), 7);
  ASSERT_EQ(kernel_manager.GetAllOpsKernelInfo().size(), 66);
  ASSERT_EQ(kernel_manager.GetAllOpsKernelInfo().size(), 68);
 }

 TEST_F(GeRunningEvnFakerTest, test_install_fake_engine_with_optimizer_success) {
  GeRunningEnvFaker ge_env;
  ge_env.Install(FakeEngine("DNN_VM_AICPU"));

  ASSERT_EQ(kernel_manager.GetAllOpsKernelInfoStores().size(), 2);
  ASSERT_EQ(kernel_manager.GetAllGraphOptimizerObjs().size(), 0);
  ASSERT_EQ(builder_manager.GetAllOpsKernelBuilders().size(), 2);
 }

 TEST_F(GeRunningEvnFakerTest, test_install_fake_engine_with_sub_engines_success) {
  GeRunningEnvFaker ge_env;
  ge_env.Install(FakeEngine("DNN_VM_AICPU"))
        .Install(FakeEngine("AIcoreEngine"))
        .Install(FakeCompoundEngine("FFTS+", {"DNN_VM_AICPU", "AIcoreEngine"}).KernelInfoStore("FFTS+"));

  ASSERT_EQ(kernel_manager.GetAllOpsKernelInfoStores().size(), 3);
  ASSERT_EQ(kernel_manager.GetAllGraphOptimizerObjs().size(), 1);
  ASSERT_EQ(builder_manager.GetAllOpsKernelBuilders().size(), 4);
  ASSERT_EQ(kernel_manager.GetCompoundEngineContains().size(), 1);

  ASSERT_EQ(ge::GELib::GetInstance()->OpsKernelManagerObj().GetCompoundEngineContains().size(), 1);
  ASSERT_EQ(ge::GELib::GetInstance()->OpsKernelManagerObj().GetCompoundEngineKernelLibName().size(), 1);
 }

 FAKE_NS_END
--- a/tests/st/testcase/test_ffts_plus.cc
+++ b/tests/st/testcase/test_ffts_plus.cc
@@ -0,0 +1,151 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include "init/gelib.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
 #include "external/ge/ge_api.h"
 #include "ge_running_env/ge_running_env_faker.h"
 #include "ge_graph_dsl/graph_dsl.h"
 #include "ge_running_env/fake_compound_engine.h"
 #include "ge_running_env/fake_op.h"

 #include "easy_graph/layout/graph_layout.h"
 #include "easy_graph/layout/engines/graph_easy/graph_easy_option.h"
 #include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h"

 #include "ge_graph_dsl/assert/graph_assert.h"

 using namespace std;
 using namespace ge;

 namespace {
 bool IfNodeExist(const ComputeGraphPtr &graph, std::function<bool(const NodePtr &)> filter, bool direct_node_flag = true) {
  for (const auto &node : graph->GetNodes(direct_node_flag)) {
    if (filter(node)) {
      return true;
    }
  }
  return false;
 }

 void GetSubgraphsWithFilter(const ComputeGraphPtr &graph, std::function<bool(const ComputeGraphPtr &)> filter,
                            std::vector<ComputeGraphPtr> &subgraphs) {
  for (const auto &subgraph : graph->GetAllSubgraphs()) {
    if (filter(subgraph)) {
      subgraphs.emplace_back(subgraph);
    }
  }
 }

 bool IsAllNodeMatch(const ComputeGraphPtr &graph, std::function<bool(const NodePtr &)> filter) {
  for (const auto &node : graph->GetAllNodes()) {
    if (!filter(node)) {
      return false;
    }
  }
  return true;
 }
 }

 class TestFftsPlus : public testing::Test {
 protected:
  GeRunningEnvFaker ge_env;
  EG_NS::GraphEasyExecutor executor;
  void SetUp() {
    EG_NS::GraphLayout::GetInstance().Config(executor, nullptr);
    ge_env.InstallDefault()
          .Install(FakeCompoundEngine("FFTS+", {"AIcoreEngine", "DNN_VM_AICPU"}).KernelInfoStore("FFTS+"))
          .Install(FakeOp(GETNEXT).InfoStoreAndBuilder("AicpuLib"))
          .Install(FakeOp(HCOMREDUCE).InfoStoreAndBuilder("HcclLib"));
  }
  void TearDown() {}
 };

 /*
 *                         g1
 *
 *   ┌──────────┐  (0,1)   ┌────────┐  (0,0)   ┌────────┐
 *   │  const   │ ───────> │  less  │ ───────> │ reduce │
 *   └──────────┘          └────────┘          └────────┘
 *                           ∧
 *                           │ (0,0)
 *                           │
 *   ┌──────────┐  (0,0)   ┌────────┐  (0,1)   ┌────────┐
 *   │ get_next │ ───────> │  add   │ <─────── │ data1  │
 *   └──────────┘          └────────┘          └────────┘
 *
 */
 TEST_F(TestFftsPlus, test_ffts_plus) {
  auto tensor = std::make_shared<GeTensor>();
  uint32_t value = 0;
  tensor->SetData((uint8_t *)&value, sizeof(uint32_t));
  DEF_GRAPH(g1) {
    CHAIN(NODE("get_next", GETNEXT)->NODE("add", ADD));
    CHAIN(NODE("data1", DATA)->NODE("add")->NODE("less", LESS)->NODE("reduce", HCOMREDUCE));
    CHAIN(NODE("const", OP_CFG(CONSTANTOP).Attr("value", tensor))->Node("less"));
  };

  auto graph = ToGeGraph(g1);
  // new session & add graph
  map<AscendString, AscendString> options;
  Session session(options);
  auto ret = session.AddGraph(1, graph, options);
  EXPECT_EQ(ret, SUCCESS);

  // build input tensor
  std::vector<InputTensorInfo> inputs;
  // build_graph through session
  ret = session.BuildGraph(1, inputs);
  EXPECT_EQ(ret, SUCCESS);

  CHECK_GRAPH(PreRunAfterBuild) {
    // node exist
    ASSERT_FALSE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetName() == "get_next"; }));
    ASSERT_FALSE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetName() == "add"; }));
    ASSERT_FALSE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetName() == "less"; }));
    ASSERT_TRUE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetType() == PARTITIONEDCALL; }));

    // subgraph exit
    ASSERT_EQ(graph->GetAllSubgraphs().size(), 1);
    std::vector<ComputeGraphPtr> subgraphs;
    GetSubgraphsWithFilter(graph,
                           [](const ComputeGraphPtr &graph) {
                             const auto &parent_node = graph->GetParentNode();
                             if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) {
                               return false;
                             }
                             return parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH); },
                           subgraphs);
    ASSERT_EQ(subgraphs.size(), 1);

    // subgraph node check
    const auto &subgraph = subgraphs[0];
    ASSERT_TRUE(subgraph != nullptr);
    ASSERT_TRUE(IsAllNodeMatch(subgraph,
                               [](const NodePtr &node) {
                                 return node->GetOpDesc()->HasAttr(ATTR_NAME_THREAD_SCOPE_ID);
                               }));
    const auto &parent_node = subgraph->GetParentNode();
    ASSERT_TRUE(parent_node != nullptr);
    ASSERT_TRUE(parent_node->GetOpDesc() != nullptr);
    int64_t stream_id = parent_node->GetOpDesc()->GetStreamId();
    ASSERT_TRUE(IsAllNodeMatch(subgraph,
                               [stream_id](const NodePtr &node) {
                                 return node->GetOpDesc()->GetStreamId() == stream_id;
                               }));
  };
 }
--- a/tests/st/testcase/test_framework_dummy.cc
+++ b/tests/st/testcase/test_framework_dummy.cc
@@ -19,6 +19,11 @@
 #include "graph/debug/ge_attr_define.h"
 #include "framework/common/types.h"
 #include "ge_running_env/ge_running_env_faker.h"

 #include "easy_graph/layout/graph_layout.h"
 #include "easy_graph/layout/engines/graph_easy/graph_easy_option.h"
 #include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h"

 #include "ge_graph_dsl/graph_dsl.h"
 #include "ge_graph_dsl/assert/graph_assert.h"

@@ -94,9 +99,13 @@ Graph BuildV1ControlFlowGraph() {
 }
 }  // namespace
 class FrameworkTest : public testing::Test {
  EG_NS::GraphEasyExecutor executor;
 protected:
  GeRunningEnvFaker ge_env;
  void SetUp() { ge_env.InstallDefault(); }
  void SetUp() {
    ge_env.InstallDefault();
    EG_NS::GraphLayout::GetInstance().Config(executor, nullptr);
  }
  void TearDown() {}
 };

--- a/tests/st/testcase/test_ge_opt_info.cc
+++ b/tests/st/testcase/test_ge_opt_info.cc
@@ -21,11 +21,21 @@
 #include "framework/common/types.h"
 #include "graph/ge_local_context.h"
 #include "ge_graph_dsl/graph_dsl.h"
 #include "ge_running_env/ge_running_env_faker.h"

 #include "easy_graph/layout/graph_layout.h"
 #include "easy_graph/layout/engines/graph_easy/graph_easy_option.h"
 #include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h"

 namespace ge {
 class STEST_opt_info : public testing::Test {
 protected:
  void SetUp() {}
  GeRunningEnvFaker ge_env;
  EG_NS::GraphEasyExecutor executor;
  void SetUp() {
    EG_NS::GraphLayout::GetInstance().Config(executor, nullptr);
    ge_env.InstallDefault();
  }
  void TearDown() {}
 };

--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -670,6 +670,7 @@ set(MULTI_PARTS_TEST_FILES
    "graph/build/stream_allocator_unittest.cc"
    "graph/build/model_builder_unittest.cc"
    "graph/build/mem_assigner_unittest.cc"
    "graph/build/graph_mem_assigner_unittest.cc"
    "graph/build/task_generator_unittest.cc"
    "graph/build/buffer_pool_mem_assigner_unittest.cc"
    "graph/execute/graph_execute_unittest.cc"
@@ -935,6 +936,7 @@ target_link_libraries(ge_single_op PRIVATE
    ascend_protobuf
    json
    c_sec
    runtime_stub
 )

 # ut binary
--- a/tests/ut/ge/graph/build/graph_mem_assigner_unittest.cc
+++ b/tests/ut/ge/graph/build/graph_mem_assigner_unittest.cc
@@ -0,0 +1,90 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include <memory>

 #include "graph/anchor.h"
 #include "graph/attr_value.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "omg/omg_inner_types.h"
 #include "../passes/graph_builder_utils.h"

 #define protected public
 #define private public
 #include "graph/build/memory/binary_block_mem_assigner.h"
 #include "graph/build/memory/graph_mem_assigner.h"
 #include "graph/build/memory/hybrid_mem_assigner.h"
 #include "graph/build/memory/max_block_mem_assigner.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/graph_mem_manager.h"
 #undef protected
 #undef private

 using namespace std;
 using namespace testing;
 using namespace ge;
 using domi::GetContext;

 class UtestGraphMemAssigner : public testing::Test {
 public:
    ge::ComputeGraphPtr BuildGraphWithVar(int64_t session_id) {
      // init
      MemManager::Instance().Initialize(std::vector<rtMemType_t>({RT_MEMORY_HBM}));
      VarManager::Instance(session_id)->Init(0, 0, 0, 0);
      ge::ut::GraphBuilder builder("graph");
      auto var_input = builder.AddNode("var", "Variable", 1, 1);
      auto const_input = builder.AddNode("const", "Const", 1, 1);
      auto assign = builder.AddNode("assgin", "Assign", 2, 1);
      // add link
      builder.AddDataEdge(var_input, 0, assign, 0);
      builder.AddDataEdge(const_input, 0, assign, 1);
      // set offset
      var_input->GetOpDesc()->SetOutputOffset({10000});
      const_input->GetOpDesc()->SetOutputOffset({1000});
      assign->GetOpDesc()->SetInputOffset({10100, 1000});
      assign->GetOpDesc()->SetOutputOffset({10100});
      // set inner offset
      int64_t inner_offset = 100;
      ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableInputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset);
      ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableOutputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset);
      // add var addr
      VarManager::Instance(session_id)->var_resource_->var_offset_map_.emplace(10000, RT_MEMORY_HBM);

      return builder.GetGraph();
    }

 protected:
    void SetUp() {}
    void TearDown() {}
 };

 TEST_F(UtestGraphMemAssigner, graph_memory_assign_fail_case) {
  ge::ComputeGraphPtr compute_graph = make_shared<ge::ComputeGraph>("");
  GraphMemoryAssigner graph_mem_assigner(compute_graph);
  MemoryOffset mem_offset(2, 10000);
  graph_mem_assigner.memory_offset_.insert({2, mem_offset});
  VarManager::Instance(0)->graph_mem_max_size_ = 0;

  map<uint64_t, size_t> mem_type_to_offset = {};
  Status ret = graph_mem_assigner.ReAssignMemory(false, mem_type_to_offset);
  EXPECT_EQ(ret, ACL_ERROR_GE_MEMORY_ALLOCATION);
 }

--- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
@@ -23,15 +23,20 @@

 #include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
 #include "cce/aicpu_engine_struct.h"
 #include "tests/depends/runtime/src/runtime_stub.h"

 namespace ge {
 extern OpDescPtr CreateOpDesc(string name, string type);

 class UtestKernelExTaskInfo : public testing::Test {
 protected:
  void SetUp() {}
  void SetUp() {
    RTS_STUB_SETUP();
  }

  void TearDown() {}
  void TearDown() {
    RTS_STUB_TEARDOWN();
  }
 };

 // test kernel_ex_task_Release
@@ -209,4 +214,136 @@ TEST_F(UtestKernelExTaskInfo, parse_topic_type_failed_2) {
  KernelExTaskInfo kernel_ex_task_info;
  EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS);
 }

 TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::TaskDef task_def;
  domi::KernelExDef kernel_ex_def;
  kernel_ex_def.set_kernel_ext_info(buf, len);
  kernel_ex_def.set_kernel_ext_info_size(len);
  domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex();
  *kernel_ex_def_tmp = kernel_ex_def;

  const OpDescPtr op_desc = CreateOpDesc("deque", "Deque");
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);

  KernelExTaskInfo kernel_ex_task_info;
  kernel_ex_task_info.op_desc_ = op_desc;
  DavinciModel davinci_model(0, nullptr);
  kernel_ex_task_info.davinci_model_ = &davinci_model;
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS);
  EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS);
  kernel_ex_task_info.op_desc_ = op_desc;
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS);
  EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS);
 }

 TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_01) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::TaskDef task_def;
  domi::KernelExDef kernel_ex_def;
  kernel_ex_def.set_kernel_ext_info(buf, len);
  kernel_ex_def.set_kernel_ext_info_size(len);
  domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex();
  *kernel_ex_def_tmp = kernel_ex_def;

  const OpDescPtr op_desc = CreateOpDesc("deque", "Deque");

  KernelExTaskInfo kernel_ex_task_info;
  kernel_ex_task_info.op_desc_ = op_desc;
  DavinciModel davinci_model(0, nullptr);
  kernel_ex_task_info.davinci_model_ = &davinci_model;
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS);

  kernel_ex_task_info.is_blocking_aicpu_op_ = true;
  EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED);
 }

 TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_02) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::TaskDef task_def;
  domi::KernelExDef kernel_ex_def;
  kernel_ex_def.set_kernel_ext_info(buf, len);
  kernel_ex_def.set_kernel_ext_info_size(len);
  domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex();
  *kernel_ex_def_tmp = kernel_ex_def;

  const OpDescPtr op_desc = CreateOpDesc("deque", "Deque");
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  KernelExTaskInfo kernel_ex_task_info;
  kernel_ex_task_info.op_desc_ = op_desc;
  DavinciModel davinci_model(0, nullptr);
  kernel_ex_task_info.davinci_model_ = &davinci_model;

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1);
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED);

  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED);

  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS);
 }

 }  // namespace ge
--- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
@@ -22,15 +22,20 @@
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/task_info/kernel_task_info.h"
 #include "graph/load/model_manager/task_info/hccl_task_info.h"
 #include "tests/depends/runtime/src/runtime_stub.h"

 namespace ge {
 extern OpDescPtr CreateOpDesc(string name, string type);

 class UtestKernelTaskInfo : public testing::Test {
 protected:
  void SetUp() {}
  void SetUp() {
    RTS_STUB_SETUP();
  }

  void TearDown() {}
  void TearDown() {
    RTS_STUB_TEARDOWN();
  }
 };

 // test KernelTaskInfo Init.
@@ -1240,4 +1245,135 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_super_kernel_info) {
  EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS);
 }

 TEST_F(UtestKernelTaskInfo, blocking_aicpu_op) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::TaskDef task_def;
  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);

  const OpDescPtr op_desc = CreateOpDesc("deque", "Deque");
  op_desc->SetId(0);
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  DavinciModel davinci_model(0, nullptr);
  davinci_model.op_list_.emplace(0, op_desc);

  KernelTaskInfo kernel_task_info;
  kernel_task_info.op_desc_ = op_desc;
  kernel_task_info.davinci_model_ = &davinci_model;
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS);
  EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS);
  kernel_task_info.op_desc_ = op_desc;
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS);
  EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS);
 }

 TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_01) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);

  const OpDescPtr op_desc = CreateOpDesc("deque", "Deque");
  op_desc->SetId(0);
  DavinciModel davinci_model(0, nullptr);
  davinci_model.op_list_.emplace(0, op_desc);

  KernelTaskInfo kernel_task_info;
  kernel_task_info.davinci_model_ = &davinci_model;
  kernel_task_info.op_desc_ = op_desc;

  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS);

  kernel_task_info.is_blocking_aicpu_op_ = true;
  EXPECT_EQ(kernel_task_info.Distribute(), FAILED);
 }

 TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_02) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);

  const OpDescPtr op_desc = CreateOpDesc("deque", "Deque");
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  op_desc->SetId(0);
  DavinciModel davinci_model(0, nullptr);
  davinci_model.op_list_.emplace(0, op_desc);

  KernelTaskInfo kernel_task_info;
  kernel_task_info.davinci_model_ = &davinci_model;
  kernel_task_info.op_desc_ = op_desc;

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1);
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_task_info.Distribute(), FAILED);

  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_task_info.Distribute(), FAILED);

  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001);
  EXPECT_EQ(kernel_task_info.Distribute(), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS);
 }

 }  // namespace ge
--- a/tests/ut/ge/graph/optimize/graph_optimize_unittest.cc
+++ b/tests/ut/ge/graph/optimize/graph_optimize_unittest.cc
@@ -131,7 +131,7 @@ TEST_F(UtestGraphOptimizeTest, test_OptimizeAfterStage1_succ) {
  shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  EXPECT_NE(instance_ptr, nullptr);
  GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerSuccess>();
  instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));
  instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));

  ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph");
  GraphOptimize base_optimize;
@@ -167,7 +167,7 @@ TEST_F(UtestGraphOptimizeTest, test_OptimizeAfterStage1_fail) {
  shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  EXPECT_NE(instance_ptr, nullptr);
  GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerFail>();
  instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));
  instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));
  ret = base_optimize.OptimizeAfterStage1(compute_graph);
  EXPECT_EQ(ret, FAILED);

@@ -183,7 +183,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_succ) {
  shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  EXPECT_NE(instance_ptr, nullptr);
  GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerSuccess>();
  instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));
  instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));

  ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph");
  GraphOptimize base_optimize;
@@ -197,7 +197,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_succ) {
  ret = base_optimize.OptimizeOriginalGraphForQuantize(compute_graph);
  EXPECT_EQ(ret, SUCCESS);

  ret = base_optimize.OptimizeGraphBeforeBuildForRts(compute_graph);
  ret = base_optimize.OptimizeGraphBeforeBuild(compute_graph);
  EXPECT_EQ(ret, SUCCESS);

  ret = base_optimize.OptimizeWholeGraph(compute_graph);
@@ -215,7 +215,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_fail) {
  shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  EXPECT_NE(instance_ptr, nullptr);
  GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerFail>();
  instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));
  instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt));

  ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph");
  GraphOptimize base_optimize;
@@ -229,7 +229,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_fail) {
  ret = base_optimize.OptimizeOriginalGraphForQuantize(compute_graph);
  EXPECT_EQ(ret, FAILED);

  ret = base_optimize.OptimizeGraphBeforeBuildForRts(compute_graph);
  ret = base_optimize.OptimizeGraphBeforeBuild(compute_graph);
  EXPECT_EQ(ret, FAILED);

  ret = base_optimize.OptimizeWholeGraph(compute_graph);
--- a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc
+++ b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc
@@ -367,7 +367,7 @@ TEST(UtestIrBuild, check_data_op_attr_index_valid) {
  };
  ModelBufferData model;
  graphStatus ret = aclgrphBuildModel(graph, build_options, model);
  EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED);
  EXPECT_EQ(ret, ge::FAILED);
 }

 // set attr index invalid, when not set input shape range
@@ -377,7 +377,7 @@ TEST(UtestIrBuild, check_data_attr_index_succ_no_input_range) {
  const map<string, string> build_options;
  ModelBufferData model;
  graphStatus ret = aclgrphBuildModel(graph, build_options, model);
  EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED);
  EXPECT_EQ(ret, ge::FAILED);
 }

 TEST(UtestIrBuild, check_modify_mixlist_param) {
--- a/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
+++ b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
@@ -27,7 +27,7 @@
 #include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
 #undef protected
 #undef private

 #include "tests/depends/runtime/src/runtime_stub.h"
 using namespace std;
 using namespace testing;

@@ -43,8 +43,12 @@ using namespace hybrid;

 class UtestAicpuNodeExecutor : public testing::Test {
 protected:
  void SetUp() {}
  void TearDown() {}
  void SetUp() {
    RTS_STUB_SETUP();
  }
  void TearDown() {
    RTS_STUB_TEARDOWN();
  }
 };

 static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) {
@@ -164,5 +168,222 @@ TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) {

 }

 TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task) {
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
  ge_root_model->SetModelName("test_name");
  HybridModel hybrid_model(ge_root_model);

  NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1);
  ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true);
  std::unique_ptr<NodeItem> new_node;
  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
  NodeItem *node_item = new_node.get();
  node_item->input_start = 0;
  node_item->output_start = 0;
  node_item->is_dynamic = true;
  node_item->shape_inference_type = DEPEND_SHAPE_RANGE;

  GraphItem graph_item;
  graph_item.node_items_.emplace_back(node_item);
  graph_item.total_inputs_ = 1;
  graph_item.total_outputs_ = 1;

  GraphExecutionContext graph_execution_context;
  SubgraphContext subgraph_context(&graph_item, &graph_execution_context);
  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
  graph_execution_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());

  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
  ASSERT_NE(node_state, nullptr);

  uint64_t value_0 = 512;
  TensorValue in_tensor0(&value_0, sizeof(value_0));
  subgraph_context.SetInput(*node_item, 0, in_tensor0);

  TensorValue out_tensor0(&value_0, sizeof(value_0));
  subgraph_context.SetOutput(*node_item, 0, out_tensor0);

  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);
  domi::TaskDef task_def;

  AicpuTaskStruct args;
  args.head.length = sizeof(args);
  args.head.ioAddrNum = 2;

  kernel_def.set_args(reinterpret_cast<const char *>(&args), args.head.length);
  kernel_def.set_args_size(args.head.length);
  domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel();
  *kernel_def_tmp = kernel_def;

  AicpuNodeTask aicpu_node_task(node_item, task_def);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS);
  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);

  node_item->shape_inference_type = DEPEND_COMPUTE;
  domi::KernelExDef kernel_ex_def;
  kernel_ex_def.set_kernel_ext_info(buf, len);
  kernel_ex_def.set_kernel_ext_info_size(len);
  kernel_ex_def.set_args(reinterpret_cast<const char *>(&args), args.head.length);
  kernel_ex_def.set_args_size(args.head.length);
  domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex();
  *kernel_ex_def_tmp = kernel_ex_def;
  hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def});

  AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def);
  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
  ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);
 }

 TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task_fail) {
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
  ge_root_model->SetModelName("test_name");
  HybridModel hybrid_model(ge_root_model);

  NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1);
  ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true);
  std::unique_ptr<NodeItem> new_node;
  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
  NodeItem *node_item = new_node.get();
  node_item->input_start = 0;
  node_item->output_start = 0;
  node_item->is_dynamic = true;
  node_item->shape_inference_type = DEPEND_SHAPE_RANGE;

  GraphItem graph_item;
  graph_item.node_items_.emplace_back(node_item);
  graph_item.total_inputs_ = 1;
  graph_item.total_outputs_ = 1;

  GraphExecutionContext graph_execution_context;
  SubgraphContext subgraph_context(&graph_item, &graph_execution_context);
  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
  graph_execution_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());

  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
  ASSERT_NE(node_state, nullptr);

  uint64_t value_0 = 512;
  TensorValue in_tensor0(&value_0, sizeof(value_0));
  subgraph_context.SetInput(*node_item, 0, in_tensor0);

  TensorValue out_tensor0(&value_0, sizeof(value_0));
  subgraph_context.SetOutput(*node_item, 0, out_tensor0);

  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);
  domi::TaskDef task_def;

  AicpuTaskStruct args;
  args.head.length = sizeof(args);
  args.head.ioAddrNum = 2;

  kernel_def.set_args(reinterpret_cast<const char *>(&args), args.head.length);
  kernel_def.set_args_size(args.head.length);
  domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel();
  *kernel_def_tmp = kernel_def;

  AicpuNodeTask aicpu_node_task(node_item, task_def);

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED);

  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED);

  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);

  node_item->shape_inference_type = DEPEND_COMPUTE;
  domi::KernelExDef kernel_ex_def;
  kernel_ex_def.set_kernel_ext_info(buf, len);
  kernel_ex_def.set_kernel_ext_info_size(len);
  kernel_ex_def.set_args(reinterpret_cast<const char *>(&args), args.head.length);
  kernel_ex_def.set_args_size(args.head.length);
  domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex();
  *kernel_ex_def_tmp = kernel_ex_def;
  hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def});

  AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1);
  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED);

  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED);

  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);
 }
 }  // namespace ge

--- a/tests/ut/ge/single_op/single_op_task_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_task_unittest.cc
@@ -19,6 +19,7 @@

 #include "graph/load/model_manager/model_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"
 #include "runtime/rt.h"

 #define protected public
@@ -30,6 +31,7 @@
 #include "external/register/op_tiling_registry.h"
 #undef private
 #undef protected
 #include "tests/depends/runtime/src/runtime_stub.h"

 using namespace std;
 using namespace testing;
@@ -38,9 +40,13 @@ using namespace optiling;

 class UtestSingleOpTask : public testing::Test {
 protected:
  void SetUp() {}
  void SetUp() {
    RTS_STUB_SETUP();
  }

  void TearDown() {}
  void TearDown() {
    RTS_STUB_TEARDOWN();
  }
 };

 TEST_F(UtestSingleOpTask, test_build_kernel_task) {
@@ -237,3 +243,124 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) {
    ASSERT_EQ(ret, PARAM_INVALID);
  }
 }

 TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_01) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);

  auto op_desc = make_shared<OpDesc>("deque", "Deque");
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  AiCpuCCTask aicpu_task;
  aicpu_task.SetOpDesc(op_desc);
  rtStream_t stream;
  ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);

  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
 }

 TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_02) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);

  auto op_desc = make_shared<OpDesc>("deque", "Deque");
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  AiCpuTask aicpu_task;
  aicpu_task.SetOpDesc(op_desc);
  rtStream_t stream;
  ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);

  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
 }

 TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) {
  int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  vector<char> aicpu_ext_info(len, 0);
  char *buf = aicpu_ext_info.data();
  int offset = 0;
  hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  offset += sizeof(hybrid::AicpuExtInfo);
  hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  async_wait_info->waitType = 0;
  async_wait_info->waitId = 0;
  async_wait_info->timeOut = 0;
  async_wait_info->reserved = 0;

  domi::KernelDef kernel_def;
  kernel_def.set_kernel_ext_info(buf, len);
  kernel_def.set_kernel_ext_info_size(len);

  auto op_desc = make_shared<OpDesc>("deque", "Deque");
  ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  AiCpuTask aicpu_task;
  aicpu_task.SetOpDesc(op_desc);
  rtStream_t stream;
  ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);

  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1);
  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED);

  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED);

  ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001);
  ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED);

  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
 }
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -62,6 +62,7 @@ enum FWKTaskExtInfoType {
  FWK_ADPT_EXT_SESSION_INFO,
  FWK_ADPT_EXT_BITMAP,
  FWK_ADPT_EXT_TOPIC_TYPE,
  FWK_ADPT_EXT_ASYNCWAIT,
  FWK_ADPT_EXT_INVALID
 };

@@ -80,6 +81,12 @@ enum FWKExtUpdateAddrType {
  FWK_ADPT_UPDATE_INPUT_OUTPUT
 };

 enum FWKExtWaitType {
  FWK_ADPT_WAIT_TYPE_NULL = 0,
  FWK_ADPT_WAIT_TYPE_EVENT,
  FWK_ADPT_WAIT_TYPE_INVALID
 };

 #pragma pack(push, 1)
 // API Parameter Structure
 struct StrFWKKernel {
@@ -133,6 +140,15 @@ struct ResultSummary {
  uint64_t raw_data_size;    // size of raw data
 };
 #pragma pack(pop)

 #pragma pack(push, 1)
 struct AsyncWait {
  uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
  uint32_t waitId; // wait id, GE refresh
  uint32_t timeOut; // reserved
  uint64_t reserved;
 };
 #pragma pack(pop)
 }  // end  namespace FWKAdapter
 }  // namespace aicpu

--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -52,6 +52,14 @@ typedef enum tagRtAicpuScheType {
    SCHEDULE_HARDWARE, /* HWTS Schedule */
 } rtAicpuScheType;

 typedef enum tagRtDeviceCapabilityType {
  RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule
  RT_SCHEDULE_SOFTWARE_OPT,
  RT_SCHEDULE_HARDWARE, // HWTS Schedule
  RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
  RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
 } rtDeviceCapabilityType;

 typedef enum tagRtVersion {
    VER_BEGIN = 0,
    VER_NA = VER_BEGIN,
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -65,6 +65,7 @@ typedef enum tagRtFeatureType {

 typedef enum tagRtDeviceFeatureType {
  FEATURE_TYPE_SCHE,
  FEATURE_TYPE_BLOCKING_OPERATOR,
  FEATURE_TYPE_END,
 } rtDeviceFeatureType_t;

@@ -78,6 +79,17 @@ typedef enum tagMemoryInfo {
    MEMORY_INFO_RSV
 } rtMemoryInfo_t;

 typedef enum tagRtDeviceModuleType {
  RT_MODULE_TYPE_SYSTEM = 0,
  RT_MODULE_TYPE_AICPU,
  RT_MODULE_TYPE_CCPU,
  RT_MODULE_TYPE_DCPU,
  RT_MODULE_TYPE_AICORE,
  RT_MODULE_TYPE_TSCPU,
  RT_MODULE_TYPE_PCIE,
  RT_MODULE_TYPE_VECTOR_CORE
 } tagRtDeviceModuleType_t;

 /**
 * @ingroup dvrt_dev
 * @brief get total device number.
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -356,7 +356,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                            rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
                                           rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);

 /**
 * @ingroup rt_kernel
@@ -652,4 +652,3 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr);
 #endif

 #endif  // __CCE_RUNTIME_KERNEL_H__

--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -28,5 +28,7 @@
 #include "rt_model.h"
 #include "stream.h"
 #include "rt_ffts.h"
 #include "rt_ffts_plus.h"
 #include "rt_ffts_plus_define.h"

 #endif  // __CCE_RUNTIME_RT_H__
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
@@ -0,0 +1,33 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
 * Description: ffts plus interface
 */

 #ifndef __CCE_RUNTIME_FFTS_PLUS_H
 #define __CCE_RUNTIME_FFTS_PLUS_H

 #include "base.h"
 #include "rt_stars_define.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

 #pragma pack(push)
 #pragma pack (1)

 typedef struct tagFftsPlusTaskInfo {
    const rtFftsPlusSqe_t *fftsPlusSqe;
    const void *descBuf;      // include total context
    size_t descBufLen;  // the length of descBuf
 } rtFftsPlusTaskInfo_t;

 #pragma pack(pop)

 RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt);
 RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 #endif // __CCE_RUNTIME_FFTS_H
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
@@ -0,0 +1,682 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
 * Description: the definition of ffts plus
 */

 #ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
 #define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

 #pragma pack(push)
 #pragma pack (1)

 // hardware context type
 typedef enum tagFftsPlusHwType {
    RT_HW_CTX_TYPE_AIC = 0,
    RT_HW_CTX_TYPE_AIV = 1,
    RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
    RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
    RT_HW_CTX_TYPE_WRITE_VALUE = 5,
    RT_HW_CTX_TYPE_MIX_AIC = 6,
    RT_HW_CTX_TYPE_MIX_AIV = 7,
    RT_HW_CTX_TYPE_SDMA = 8,
    RT_HW_CTX_TYPE_FLUSH_DATA = 9,
    RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
    RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
    RT_HW_CTX_TYPE_AICPU = 12,
    RT_HW_CTX_TYPE_LOAD = 13,
    RT_HW_CTX_TYPE_MAX,
 }rtFftsPlusHwType_t;

 // hardware context type
 typedef enum tagFftsPlusSoftType {
    RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
    RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
    RT_SOFT_CTX_TYPE_AT_START = 3,
    RT_SOFT_CTX_TYPE_AT_END = 4,
    RT_SOFT_CTX_TYPE_LABEL = 5,
    RT_SOFT_CTX_TYPE_MAX,
 }rtFftsPlusSoftType_t;

 // condition type
 typedef enum tagFftsPlusCondType {
    RT_COND_TYPE_EQUAL = 0,
    RT_COND_TYPE_NOTEQUAL = 1,
    RT_COND_TYPE_GREATER = 2,
    RT_COND_TYPE_GREATER_OR_EQUAL = 3,
    RT_COND_TYPE_LESS = 4,
    RT_COND_TYPE_LESS_OR_EQUAL = 5,
    RT_COND_TYPE_MAX,
 }rtFftsPlusCondType_t;

 // the definition of ffts plus context

 #define RT_CTX_SUCCESSOR_NUM   26

 // ffts plus common context
 typedef struct tagFftsPlusComCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t  res1 : 7;
    uint8_t  aten : 1;
    // 4-7
    uint8_t  res2;
    uint8_t  res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-71
    uint32_t res5[2];
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-127
    uint32_t res6[13];
 } rtFftsPlusComCtx_t;

 // aic/aiv context
 typedef struct tagFftsPlusAicAivCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t stat: 1;
    uint16_t schem: 2;
    uint16_t icachePrefetchCnt: 5;
    uint16_t res5: 7;
    uint16_t atm: 1;
    uint16_t prefetchEnableBitmap: 4;
    uint16_t res6: 4;
    uint16_t prefetchOnceBitmap: 4;
    uint16_t res7: 4;
    // 68-71
    uint32_t res8;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint16_t nonTailBlockdim;
    uint16_t tailBlockdim;
    // 80-83
    uint32_t taskParamPtrBaseL;
    // 84-87
    uint16_t taskParamPtrBaseH;
    uint16_t taskParamPtrOffset;
    // 88-95
    uint32_t res9;
    uint32_t res10;
    // 96-103
    uint32_t nonTailTaskStartPcL;
    uint16_t nonTailTaskStartPcH;
    uint16_t res11;
    // 104-111
    uint32_t tailTaskStartPcL;
    uint16_t tailTaskStartPcH;
    uint16_t res12;
    // 112-119
    uint32_t res13;
    uint32_t res14;
    // 120-127
    uint16_t srcSlot[4];    // src_slot0-3(context ID for source data which is out of subgraph)
 } rtFftsPlusAicAivCtx_t;

 // mix aic/aiv context
 typedef struct tagFftsPlusMixAicAivCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t stat: 1;
    uint16_t schem: 2;
    uint16_t icachePrefetchCnt: 5;
    uint16_t res5: 7;
    uint16_t atm: 1;
    uint16_t prefetchEnableBitmap: 4;
    uint16_t res6: 4;
    uint16_t prefetchOnceBitmap: 4;
    uint16_t res7: 4;
    // 68-71
    uint16_t res8;
    uint8_t nonTailBlockRatioN;
    uint8_t tailBlockRatioN;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint16_t nonTailBlockdim;
    uint16_t tailBlockdim;
    // 80-87
    uint32_t aicTaskParamPtrL;
    uint16_t aicTaskParamPtrH;
    uint16_t aicTaskParamPtrOffset;
    // 88-95
    uint32_t aivTaskParamPtrL;
    uint16_t aivTaskParamPtrH;
    uint16_t aivTaskParamPtrOffset;
    // 96-103
    uint32_t nonTailAicTaskStartPcL;
    uint16_t nonTailAicTaskStartPcH;
    uint16_t tailAicTaskStartPcH;
    // 104-111
    uint32_t tailAicTaskStartPcL;
    uint32_t nonTailAivTaskStartPcL;
    // 112-119
    uint16_t nontailAivTaskStartPcH;
    uint16_t tailAivTaskStartPcH;
    uint32_t tailAivTaskStartPcL;
    // 120-127
    uint16_t srcSlot[4];    // src_slot0-3(context ID for source data which is out of subgraph)
 } rtFftsPlusMixAicAivCtx_t;

 // adma context
 typedef struct tagFftsPlusSdmaCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint8_t sat: 1;
    uint8_t res5: 7;
    uint8_t res6: 7;
    uint8_t atm: 1;
    uint16_t res7;
    // 68-71
    uint32_t res8;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint32_t sdmaSqeHeader;  // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
    // 80-83
    uint16_t sourceStreamId;
    uint16_t sourceSubstreamId;
    // 84-87
    uint16_t destinationStreamId;
    uint16_t destinationSubstreamId;
    // 88-127
    uint32_t sourceAddressBaseL;
    uint32_t sourceAddressBaseH;
    uint32_t sourceAddressOffset;
    uint32_t destinationAddressBaseL;
    uint32_t destinationAddressBaseH;
    uint32_t destinationAddressOffset;
    uint32_t nonTailDataLength;
    uint32_t tailDataLength;
    uint32_t res9[2];
 } rtFftsPlusSdmaCtx_t;

 // ffts plus notify record/wait context
 typedef struct tagFftsPlusNotifyCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t res5: 15;
    uint16_t atm: 1;
    uint16_t res6;
    // 68-71
    uint32_t res7;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint16_t notifyIdBase;
    uint16_t res8;
    // 80-127
    uint32_t res9[12];
 } rtFftsPlusNotifyCtx_t;

 // write Value context
 typedef struct tagFftsPlusWriteValueCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t res5: 15;
    uint16_t atm: 1;
    uint16_t res6;
    // 68-71
    uint32_t res7;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint8_t awSize: 3;
    uint8_t snoop: 1;
    uint8_t res8: 4;
    uint8_t awCache: 4;
    uint8_t awProt: 3;
    uint8_t va: 1;
    uint16_t res9;
    // 80-83
    uint32_t writeAddressBaseL;
    // 84-87
    uint32_t writeAddressBaseH: 17;
    uint32_t res10: 15;
    // 88-91
    uint32_t writeAddressOffset;
    // 92-95
    uint32_t res11;
    // 96-111
    uint32_t writeValue[4]; // write_value_00 -> write_value_03
    // 112-127
    uint32_t res12[4];
 } rtFftsPlusWriteValueCtx_t;

 // ai cpu context
 typedef struct tagFftsPlusAiCpuCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t sat: 1;
    uint16_t res5: 14;
    uint16_t atm: 1;
    uint16_t res6;
    // 68-71
    uint16_t sqeIndex;
    uint8_t kernelType: 7;
    uint8_t bm: 1;
    uint8_t topicType: 4;
    uint8_t qos: 3;
    uint8_t res7: 1;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint16_t nonTailBlockdim;
    uint16_t tailBlockdim;
    // 80-115
    uint32_t usrData[9];   // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
    // 116--119
    uint32_t res8;
    // 120-123
    uint32_t subtopicId: 12;
    uint32_t topicId: 6;
    uint32_t groupId: 6;
    uint32_t usrDataLength: 8;
    // 124-127
    uint32_t taskParamOffset;
 } rtFftsPlusAiCpuCtx_t;

 // data context
 typedef struct tagFftsPlusDataCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t cntInit; // cons_cnt_init / prod_cnt_init
    uint8_t cnt;     // cons_cnt / prod_cnt
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t res5: 15;
    uint16_t atm: 1;
    uint16_t res6;
    // 68-81
    uint16_t origConsumerCounter;
    uint16_t runConsumerCounter;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint32_t res7;
    // 80-83
    uint32_t addressBaseL;
    // 84-87
    uint32_t addressBaseH;
    // 88-91
    uint32_t addressOffset;
    // 92-95
    uint32_t res8;
    // 96-99
    uint16_t nonTailNumOutter;
    uint16_t nonTailNumInner;
    // 100-103
    uint32_t nonTailLengthInner;
    // 104-107
    uint32_t nonTailStrideOutter;
    // 108-111
    uint32_t nonTailStrideInner;
    // 112-115
    uint16_t tailNumOutter;
    uint16_t tailNumInner;
    // 116-119
    uint32_t tailLengthInner;
    // 120-123
    uint32_t tailStrideOutter;
    // 124-127
    uint32_t tailStrideInner;
 } rtFftsPlusDataCtx_t;

 // at start context
 typedef struct tagFftsPlusAtStartCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t res5;
    uint16_t res6;
    // 68-71
    uint16_t res7;
    uint16_t res8;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint16_t threadIdInit;
    uint16_t threadWindowSize;
    // 80-127
    uint16_t res9[12];
 } rtFftsPlusAtStartCtx_t;

 // at end context
 #define RT_CTX_SUCC_AT_START_SLOT_NUM   12
 #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM  12

 typedef struct tagFftsPlusAtEndCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t atStartSlotNumber;
    uint8_t outLabelSlotNumber: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t res1;
    uint8_t res2;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res3;
    // 12-59
    uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
    uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
    // 60-63
    uint16_t res4;
    uint16_t res5;
    // 64-67
    uint16_t res6;
    uint16_t res7;
    // 68-71
    uint16_t res8;
    uint16_t res9;
    // 72-75
    uint16_t threadId;
    uint16_t res10;
    // 76-79
    uint16_t res11;
    uint16_t res12;
    // 80-127
    uint32_t res13[12];
 } rtFftsPlusAtEndCtx_t;

 // label context
 typedef struct tagFftsPlusLabelCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1;
    // 4-7
    uint8_t res2;
    uint8_t res3;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res4;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-79
    uint16_t res5[8];
    // 80-127
    uint32_t res6[12];
 } rtFftsPlusLabelCtx_t;

 // case switch context
 typedef struct tagFftsPlusCaseSwitchCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t startLabelId;
    uint8_t labelListLen;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res2;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t res3: 15;
    uint16_t atm: 1;
    uint16_t res4;
    // 68-71
    uint32_t res5;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint8_t arSize: 3;
    uint8_t snoop: 1;
    uint8_t res6: 4;
    uint8_t arCache: 4;
    uint8_t arProt: 3;
    uint8_t va: 1;
    uint16_t res7;
    // 80-83
    uint32_t loadAddress0BaseL;
    // 84-87
    uint32_t loadAddress0BaseH: 17;
    uint32_t res8: 14;
    uint32_t ld0En: 1;
    // 88-91
    uint32_t loadAddress0Offset;
    // 92-95
    uint32_t res9;
    // 96-99
    uint32_t loadAddress1BaseL;
    // 100-103
    uint32_t loadAddress1BaseH: 17;
    uint32_t res10: 14;
    uint32_t ld1En: 1;
    // 104-107
    uint32_t loadAddress1Offset;
    // 108-127
    uint32_t res11[5];
 } rtFftsPlusCaseSwitchCtx_t;

 // case default context
 typedef struct tagFftsPlusCaseDefCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t successorNum;
    uint8_t res1: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t startLabelId;
    uint8_t labelListLen;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res2;
    // 12-63
    uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
    // 64-67
    uint16_t res3;
    uint16_t res4;
    // 68-127
    uint32_t res5[15];
 } rtFftsPlusCaseDefCtx_t;

 // condition switch context
 #define RT_CTX_TRUE_SUCCESSOR_NUM 12
 #define RT_CTX_FALSE_SUCCESSOR_NUM 14

 typedef struct tagFftsPlusCondSwitchCtx {
    // 0-3 bytes
    uint8_t hardwareContextType;
    uint8_t softwareContextType;
    uint8_t trueSuccessorNum;
    uint8_t falseSuccessorNum: 7;
    uint8_t aten: 1;
    // 4-7
    uint8_t condition;
    uint8_t res1;
    uint8_t predCntInit;
    uint8_t predCnt;
    // 8-11
    uint32_t res2;
    // 12-63
    uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
    uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
    // 64-67
    uint16_t res3: 15;
    uint16_t atm: 1;
    uint16_t res4;
    // 68-71
    uint32_t res5;
    // 72-75
    uint16_t threadId;
    uint16_t threadDim;
    // 76-79
    uint8_t arSize: 3;
    uint8_t snoop: 1;
    uint8_t res6: 4;
    uint8_t arCache: 4;
    uint8_t arProt: 3;
    uint8_t va: 1;
    uint16_t res7;
    // 80-83
    uint32_t loadAddress0BaseL;
    // 84-87
    uint32_t loadAddress0BaseH: 17;
    uint32_t res8: 14;
    uint32_t ld0En: 1;
    // 88-91
    uint32_t loadAddress0Offset;
    // 92-95
    uint32_t res9;
    // 96-99
    uint32_t loadAddress1BaseL;
    // 100-103
    uint32_t loadAddress1BaseH: 17;
    uint32_t res10: 14;
    uint32_t ld1En: 1;
    // 104-107
    uint32_t loadAddress1Offset;
    // 108-127
    uint32_t res11[3];
    uint32_t cmpValue1;
    uint32_t cmpValue2;
 } rtFftsPlusCondSwitchCtx_t;

 #pragma pack(pop)

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 #endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -53,6 +53,7 @@ typedef enum tagModelTaskType {
    RT_MODEL_TASK_ALL_KERNEL,
    RT_MODEL_TASK_PROFILER_TRACE_EX,
    RT_MODEL_TASK_FFTS_TASK,
    RT_MODEL_TASK_FFTS_PLUS_TASK,
 } rtModelTaskType_t;

 typedef enum tagModelStreamType {
--- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
@@ -0,0 +1,97 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
 * Description: the definition of stars
 */

 #ifndef __CCE_RUNTIME_STARS_DEFINE__H
 #define __CCE_RUNTIME_STARS_DEFINE__H

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
 #endif

 #pragma pack(push)
 #pragma pack (1)

 typedef struct tagStarsSqeHeader {
    uint8_t type: 6;
    uint8_t l1Lock: 1;
    uint8_t l1Unlock: 1;

    uint8_t ie: 2;
    uint8_t preP: 2;
    uint8_t postP: 2;
    uint8_t wrCqe: 1;
    uint8_t reserved: 1;

    uint16_t blockDim;

    uint16_t rtStreamId;
    uint16_t taskId;
 } rtStarsSqeHeader_t;

 // ffts+ type
 typedef enum tagFftsPlusType {
    RT_FFTS_PLUS_TYPE_RES1 = 2,   // Reserved
    RT_FFTS_PLUS_TYPE_RES2 = 3,   // Reserved
    RT_FFTS_PLUS_TYPE = 4,        // FFTS+ mode
 } rtFftsPlusType_t;

 // ffts+ sqe
 typedef struct tagFftsPlusSqe {
    // 0-7 bytes
    rtStarsSqeHeader_t sqeHeader;
    // 8-11 bytes
    uint16_t fftsType: 3;
    uint16_t reserved1: 13;
    uint16_t reserved2;
    // 12-15 bytes
    uint16_t pmg: 2;
    uint16_t ns: 1;
    uint16_t partId: 8;
    uint16_t reserved3: 1;
    uint16_t qos: 4;
    uint8_t  kernelCredit;
    uint8_t  reserved4;
    // 16-23 bytes
    uint32_t stackPhyBaseL;
    uint32_t stackPhyBaseH;
    // 24-31 bytes
    uint16_t  totalContextNum;
    uint16_t  readyContextNum;
    uint16_t  preloadContextNum;
    uint16_t  reserved5;
    // 32-35 bytes
    uint16_t  reserved6: 8;
    uint16_t  reserved7: 4;
    uint16_t  dsplitUnit: 3;
    uint16_t  reserved8: 1;
    uint16_t  prefetchOstNum: 5;
    uint16_t  reserved9: 3;
    uint16_t  cmaintOstNum: 5;
    uint16_t  reserved10: 3;
    // 36-39 bytes
    uint16_t  aicPrefetchLower: 5;
    uint16_t  reserved11: 3;
    uint16_t  aicPrefetchUpper: 5;
    uint16_t  Reserved12: 3;
    uint16_t  aivPrefetchLower: 5;
    uint16_t  Reserved13: 3;
    uint16_t  aivPrefetchUpper: 5;
    uint16_t  Reserved14: 3;
    // 40-47 bytes
    uint32_t contextAddressBaseL;
    uint32_t contextAddressBaseH:17;
    uint32_t reserved15:15;
    // 48-63 bytes
    uint32_t reserved16[4];
 } rtFftsPlusSqe_t;

 #pragma pack(pop)

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
 #endif // __CCE_RUNTIME_STARS_DEFINE__H