!937 Synchronize latest Ascend software suite 11 Jan 2021

From: @nicholas_yhr Reviewed-by: @liujunzhu,@youui Signed-off-by: @youui
4 years ago · 75facc1a8a
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 }  // namespace

 namespace ge {
 ProfilingManager::ProfilingManager() : is_load_profiling_(false),
                                       is_execute_profiling_(false),
                                       is_training_trace_(false),
                                       subscribe_count_(0) {
 ProfilingManager::ProfilingManager()
    : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) {
  prof_cb_.msprofCtrlCallback = nullptr;
  prof_cb_.msprofReporterCallback = nullptr;
 }
@@ -91,19 +89,18 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
 #ifdef DAVINCI_SUPPORT_PROFILING
  // enable profiling by env
  char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 };
  is_load_profiling_ = false; // Change in ProfInit
  is_execute_profiling_ = false;

  if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
    // enable profiling by ge option
    if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
                 options.profiling_options.size()) != EOK) {
    if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
                 MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
      GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
      return INTERNAL_ERROR;
    }
    is_execute_profiling_ = true;
    GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(),
          prof_conf.options, options.profiling_options.c_str());
    GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
           options.profiling_options.c_str());
  } else {
    (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
    (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
@@ -127,11 +124,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
    return ge::PARAM_INVALID;
  }

  if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(),
               sizeof(options.job_id.c_str())) != EOK) {
  if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(),
               MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
    GELOGE(INTERNAL_ERROR, "copy job_id failed.");
    return INTERNAL_ERROR;
  }
  GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
 #endif
  return ge::SUCCESS;
 }
@@ -143,6 +141,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
  }
  try {
    Json prof_options = Json::parse(options);
    if (options.find(kTrainingTrace) == std::string::npos) {
      return ge::SUCCESS;
    }
    const std::string training_trace = prof_options[kTrainingTrace];
    if (training_trace.empty()) {
      GELOGI("Training trace will not take effect.");
@@ -158,6 +159,7 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
    if (!fp_point_.empty() && !bp_point_.empty()) {
      GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
    }
    is_training_trace_ = true;
  } catch (...) {
    GELOGE(FAILED, "Json prof_conf options is invalid.");
    return ge::PARAM_INVALID;
@@ -627,6 +629,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
    uint64_t module, const std::map<std::string, std::string> &config_para) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::lock_guard<std::mutex> lock(mutex_);
  uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK;
  if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
    is_training_trace_ = true;
  }
  int32_t device_num = 0;
  vector<int32_t> device_list;
  if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -402,6 +402,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
    GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
    return FAILED;
  }
  continuous_mem_start = iter->second.mem_offset_;
  for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
    auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
      continue;
    }
    string op_type = op_desc->GetType();
    if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) {
    if ((!is_single_stream && !op_desc->GetSubgraphInstanceNames().empty()) || separator_types.count(op_type) != 0) {
      continuous_op_lists.emplace_back(vector<OpDescPtr>());
    } else {
      continuous_op_lists.back().emplace_back(op_desc);
--- a/ge/graph/common/transop_util.cc
+++ b/ge/graph/common/transop_util.cc
@@ -23,7 +23,10 @@
 namespace {
 const int kInvalidTransopDataIndex = -1;
 const int kTransOpOutIndex = 0;
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}};
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {
  {ge::DT_FLOAT, ge::DT_BOOL},
  {ge::DT_INT64, ge::DT_BOOL}
 };
 }  // namespace

 namespace ge {
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -320,10 +320,10 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
  return SUCCESS;
 }

 Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
 Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id);
  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id, sub_model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "Destroy aicpu kernel failed.");
    return ret;
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@@ -68,7 +68,7 @@ class GraphLoader {
                             const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
                             std::vector<GeTensorDesc> &output_desc);

  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

  static Status DestroyAicpuSessionForInfer(uint32_t model_id);

--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -734,7 +734,6 @@ Status DavinciModel::ReportProfilingData() {
  }
  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
  GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
  op_list_.clear();

  return SUCCESS;
 }
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -136,6 +136,20 @@ class DavinciModel {
  ///
  void SetId(uint32_t model_id) { model_id_ = model_id; }

  ///
  /// @ingroup ge
  /// @brief Get SubModelId
  /// @return sub model ID
  ///
  uint32_t SubModelId() const { return sub_model_id_; }

  ///
  /// @ingroup ge
  /// @brief Set SubModelId
  /// @return sub model ID
  ///
  void SetSubModelId(uint32_t sub_model_id) { sub_model_id_ = sub_model_id; }

  static void *Run(DavinciModel *model_pointer);

  ///
@@ -815,6 +829,7 @@ class DavinciModel {

  uint32_t model_id_;
  uint32_t runtime_model_id_;
  uint32_t sub_model_id_ = 0;
  string name_;

  // used for inference data dump
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -81,7 +81,8 @@ ModelManager::ModelManager() {
  session_id_bias_ = 0;
 }

 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
                                    uint32_t sub_model_id) {
  STR_FWK_OP_KERNEL param_base = {};
  void *devicebase = nullptr;
  void *aicpu_kernel_addr = nullptr;
@@ -91,10 +92,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
  param_base.fwkKernelBase.fwk_kernel.sessionID = session_id;
  if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
    std::vector<uint64_t> v_aicpu_kernel;
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                            std::to_string(sub_model_id);
    auto iter = model_aicpu_kernel_.find(model_key);
    if (iter != model_aicpu_kernel_.end()) {
      GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
      GELOGD("kernel destroy session_id %lu, model_id %u, sub_model_id %u..", session_id, model_id, sub_model_id);
      v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
      // Insert size of aicpu kernel vector in the first element
      v_aicpu_kernel.insert(v_aicpu_kernel.begin(), v_aicpu_kernel.size());
@@ -192,7 +194,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
      GE_CHK_RT(rtSetDevice(static_cast<int32_t>(GetContext().DeviceId())));
    }

    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0, 0);
    if (ret != SUCCESS) {
      GELOGW("The session: %lu destroy failed.", session_id);
    } else {
@@ -218,20 +220,22 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

  auto it = model_map_.find(model_id);
  if (it == model_map_.end()) {
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
    return GE_EXEC_MODEL_ID_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
  }
  uint64_t session_id = it->second->GetSessionId();
  DestroyAicpuSession(session_id);
  return SUCCESS;
 }

 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
  GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                          std::to_string(sub_model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id,
                                sub_model_id);
    if (ret != SUCCESS) {
      GELOGE(FAILED, "Destroy aicpu kernel failed.");
      return FAILED;
@@ -240,10 +244,12 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
  return SUCCESS;
 }

 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id,
                                           uint64_t kernel_id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::vector<uint64_t> v_aicpu_kernel;
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                          std::to_string(sub_model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
  }
@@ -378,7 +384,8 @@ Status ModelManager::DeleteModel(uint32_t id) {
  auto hybrid_model_it = hybrid_model_map_.find(id);
  if (it != model_map_.end()) {
    uint64_t session_id = it->second->GetSessionId();
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id);
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id)  + "_" +
                            std::to_string(it->second->SubModelId());
    auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
    if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
      (void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
@@ -905,7 +912,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);

  davinci_model->SetModelDescVersion(new_model_desc);
@@ -1224,7 +1231,8 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
    // Zero copy is enabled by default, no need to judge.
    uint64_t session_id_davinci = davinci_model->GetSessionId();
    uint32_t model_id_davinci = davinci_model->GetModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci);
    uint32_t sub_model_id = davinci_model->SubModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci, sub_model_id);
    if (status != SUCCESS) {
      GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci,
             model_id_davinci);
@@ -1244,7 +1252,7 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  auto it = sess_ids_.find(session_id);
  // never been created by any model
  if (it == sess_ids_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0, 0);
    if (ret == SUCCESS) {
      (void)sess_ids_.insert(session_id);
      GELOGI("The session: %lu create success.", session_id);
@@ -1558,6 +1566,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
  size_t aicpu_op_nums = aicpu_optype_list.size();
  size_t tf_op_nums = aicpu_tf_optype_list.size();
  size_t op_nums = aicpu_op_nums + tf_op_nums;
  std::function<void()> callback = [&]() {
    for (auto mem : allocated_mem) {
      GE_CHK_RT(rtFree(mem));
    }
  };
  GE_MAKE_GUARD(release, callback);
  // malloc sysOpInfoList in SysOpCheckInfo
  status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
@@ -1637,8 +1651,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(args);
  GE_CHK_RT(
      rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo),
                     RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)),
                     sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtStreamCreate(&stream, 0));
@@ -1647,24 +1661,21 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
  status = rtStreamSynchronize(stream);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
    GE_CHK_RT(rtStreamDestroy(stream));
    return RT_ERROR_TO_GE_STATUS(status);
  }

  // Check the response
  SysOpCheckResp *d_op_check_info_res = reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
  SysOpCheckResp *d_op_check_info_res =
    reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(
    reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
  (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp));
  GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
                     RT_MEMCPY_DEVICE_TO_HOST));
  std::function<void()> callback = [&]() {
    for (auto mem : allocated_mem) {
      GE_CHK_RT(rtFree(mem));
    }
    GE_CHK_RT(rtStreamDestroy(stream));
  };

  if (op_check_info_res.isWithoutJson) {
    GELOGI("No need to check aicpu in this scenoria.");
    GE_MAKE_GUARD(release, callback);
    GE_CHK_RT(rtStreamDestroy(stream));
    return SUCCESS;
  }
  uint64_t res_op_nums = op_check_info_res.opListNum;
@@ -1682,7 +1693,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
                       sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
    if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
      GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
      GE_MAKE_GUARD(release, callback);
      GE_CHK_RT(rtStreamDestroy(stream));
      return FAILED;
    }
    std::string fail_reason;
@@ -1705,11 +1716,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
    }
    fail_reason += "not support.";
    GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
    GE_MAKE_GUARD(release, callback);
    GE_CHK_RT(rtStreamDestroy(stream));
    return FAILED;
  }

  GE_MAKE_GUARD(release, callback);
  GE_CHK_RT(rtStreamDestroy(stream));
  GELOGI("Cpu kernel launch check optype task success.");
  return SUCCESS;
 }
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -273,7 +273,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  std::shared_ptr<hybrid::HybridDavinciModel> GetHybridModel(uint32_t id);

  ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id);
  ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
                            uint32_t sub_model_id);

  ge::Status CreateAicpuSession(uint64_t session_id);

@@ -281,9 +282,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  void DestroyAicpuSession(uint64_t session_id);

  ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
  ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

  ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id);
  ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id, uint64_t kernel_id);

  ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -97,14 +97,16 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin

  // 2.2 Collect aicpu kernel
  uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID;
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(),
                                                                 davinci_model->SubModelId(), kernel_id) != SUCCESS,
                  GELOGE(FAILED, "CreateAicpuKernel error.");
                  return FAILED;)
  // 2.3 Create session
  GE_CHECK_NOTNULL(ModelManager::GetInstance());
  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS,
                  GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id);
                  return FAILED;)
  ret = ModelManager::GetInstance()->CreateAicpuSession(session_id);
  GE_IF_BOOL_EXEC(ret != SUCCESS,
                  GELOGE(ret, "CreateAicpuSession error. session id: %lu", session_id);
                  return ret;)

  kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL);
  if (davinci_model_->IsKnownNode()) {
@@ -132,6 +134,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
                    return RT_ERROR_TO_GE_STATUS(rt_ret);)

    InitDumpTask(input_output_addr, op_desc);
    GELOGI("KernelExTaskInfo knonw node Init Success.");
    return SUCCESS;
  }
@@ -166,11 +169,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
                    return RT_ERROR_TO_GE_STATUS(rt_ret);)

    if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                            op_desc->GetName())) {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
      dump_args_ = input_output_addr_;
    }
    InitDumpTask(input_output_addr_, op_desc);
    if (davinci_model_->GetOpDugReg()) {
      GELOGI("Op debug is open in kernel ex task info");
      dump_args_ = input_output_addr_;
@@ -200,6 +199,14 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
  return SUCCESS;
 }

 void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    dump_args_ = input_output_addr_;
  }
 }

 Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  auto kernel_ex_def = task_def.kernel_ex();
  uint32_t op_index = kernel_ex_def.op_index();
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
@@ -60,6 +60,8 @@ class KernelExTaskInfo : public TaskInfo {
 private:
  Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc);

  void InitDumpTask(void *addr, const OpDescPtr &op_desc);

  uint32_t task_id_;
  uint32_t stream_id_;
  uint32_t dump_flag_;
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -571,6 +571,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
  GE_CHECK_NOTNULL(op_desc);
  if (davinci_model_->IsKnownNode()) {
    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    InitDumpTask(offset);
    return SUCCESS;
  }

@@ -635,15 +637,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
    return FAILED;
  }
  skt_dump_args_ = static_cast<char *>(args_) + offset;
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    if (IsL1FusionOp(op_desc)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + offset;
  }
  InitDumpTask(offset);

  GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset,
                             "Op debug is open in TVM task info");
@@ -941,16 +935,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    if (IsL1FusionOp(op_desc)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
  }
  InitDumpTask(sizeof(aicpu::AicpuParamHead));
  if (davinci_model_->GetOpDugReg()) {
    GELOGI("Op debug is open in aicpu task info");
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
@@ -964,6 +949,18 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  return SUCCESS;
 }

 void KernelTaskInfo::InitDumpTask(uint32_t offset) {
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc_->GetName())) {
    if (IsL1FusionOp(op_desc_)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + offset;
  }
 }

 Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
  if (ext_info.empty()) {
    return SUCCESS;
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -129,7 +129,9 @@ class KernelTaskInfo : public TaskInfo {
  Status SuperKernelDistribute();
  bool IsL1FusionOp(const OpDescPtr &op_desc);

  // For super kernel
  void InitDumpTask(uint32_t offset);

    // For super kernel
  Status SaveSKTDumpInfo();
  void UpdateTaskId();
  void UpdateSKTTaskId();
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -536,7 +536,7 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr
  return SUCCESS;
 }

 Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, 
 Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph,
                                                      Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) {
  GE_CHECK_NOTNULL(compute_graph);
  // use default 16 multi thread
@@ -737,6 +737,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
                                                 GeRootModelPtr &ge_root_model, uint64_t session_id) {
  GE_CHECK_NOTNULL(graph_node);
  GE_CHECK_NOTNULL(compute_graph);

  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
  GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
  GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
                       GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
@@ -2439,6 +2442,13 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
      continue;
    }
    auto model_id = model->GetModelId();
    // unknown model not release
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape));
    if (is_unknown_shape) {
      GELOGD("model_id[%u] graph_id[%u] is unknown model, not release memory", model_id, graph_id);
      continue;
    }
    // not loaded,no need unload
    if (!it.second->GetLoadFlag()) {
      GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id);
@@ -2456,7 +2466,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
      GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id);
      continue;
    }
    result = GraphLoader::DestroyAicpuKernel(session_id, model_id);
    result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
    if (result != SUCCESS) {
      GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
             graph_id);
--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) {
  }
  return SUCCESS;
 }
 Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) {
  if (compute_graph == nullptr) {
    GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr.");
    return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
  }

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed.");
    return GE_CLI_GE_NOT_INITIALIZED;
  }

  auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
  GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size());
  Status ret = SUCCESS;
  string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
  GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str());
  if (!graph_optimizer.empty()) {
    for (auto &iter : graph_optimizer) {
      if (iter.first == exclude_core_type || iter.second == nullptr) {
        continue;
      }
      GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str());
      ret = iter.second->OptimizeWholeGraph(*compute_graph);
      GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first);
      if (ret != SUCCESS) {
        GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret);
        return ret;
      }
    }
  }
  return ret;
 }
 }  // namespace ge
--- a/ge/graph/optimize/graph_optimize.h
+++ b/ge/graph/optimize/graph_optimize.h
@@ -52,6 +52,9 @@ class GraphOptimize {
  // for fe prepare optimize in quantize scene
  Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph);

  // for engine to optimize merged whole graph before ge Optimize2
  Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph);

  // for rts optimize before build to add attr and insert memcpy op
  Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);

--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -22,6 +22,8 @@
 #include "graph/preprocess/multi_batch_options.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "register/op_registry.h"

 namespace ge {
@@ -478,8 +480,28 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
  if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
    return SUCCESS;
  }

  (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());

  GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex));
  std::vector<std::string> input_dims_str;
  for (size_t i = 0; i < batch_shapes_.size(); ++i) {
    auto shape = data_shape;
    auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape);
    if (ret != SUCCESS) {
      GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str());
      return ret;
    }
    tensor.SetShape(shape);
    int64_t tensor_size = 0;
    (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size);
    string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" +
 	               TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" +
 	               std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" +
                       formats::JoinToString(tensor.GetShape().GetDims());
    input_dims_str.emplace_back(input_str);
  }
  (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);

  size_t max_shape_index = 0;
  int64_t max_size = 0;
  for (size_t i = 0; i < batch_shapes_.size(); ++i) {
--- a/ge/graph/passes/subgraph_const_migration_pass.cc
+++ b/ge/graph/passes/subgraph_const_migration_pass.cc
@@ -20,11 +20,12 @@
 #include "graph/passes/folding_pass.h"

 namespace ge {
 constexpr uint32_t kDataOutIndex = 0;
 constexpr uint32_t kZeroIndex = 0;
 constexpr uint32_t kCaseInputBase = 1;
 constexpr uint32_t kInvalidParent = 0x7fffffffU;
 const string kMbatchNodeNameMark = "_ascend_mbatch_batch_";

 bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
 bool IsSameConstNode(const NodePtr &src_node, const NodePtr &dst_node) {
  if ((src_node == nullptr) && (dst_node == nullptr)) {
    return true;
  }
@@ -37,35 +38,9 @@ bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
    return false;
  }

  if ((src_node->GetInControlNodes().size() != dst_node->GetInControlNodes().size()) ||
      (src_node->GetOutDataNodesSize() != dst_node->GetOutDataNodesSize())) {
    return false;
  }

  set<uint32_t> related_parent;
  const auto in_nodes = src_node->GetInControlNodes();
  for (uint32_t i = 0; i < in_nodes.size(); ++i) {
    const auto owner_node = in_nodes.at(i);
    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      return false;
    }

    related_parent.insert(parent_index);
  }

  for (const auto &in_node : dst_node->GetInControlNodes()) {
    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      return false;
    }

    if (related_parent.count(parent_index) == 0) {
      return false;
    }
  }

  return true;
  const GeTensorDesc &src_desc = src_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
  const GeTensorDesc &dst_desc = dst_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
  return (src_desc == dst_desc);
 }

 /***********************************************************************************************************************
@@ -89,12 +64,12 @@ bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
 +-----------+ +-----------+ +-----------+ +-----------+ +-----------+    +-----------+    +-----------+
 |   Data    | |   Data    | |   Data    | |   Data    | |   Data    |    |   Data    |    |  Conv2D   |
 +-----------+ +-----------+ +-----------+ +-----------+ +-----------+    +-----------+    +-----------+
        \                 \        |        /                  /                |                |
         \                 \       |       /                  /                 |                |
          \                 \      |      /                  /                  |                |
           \                 \     |     /                  /                   |                |
            \                +-----------+                 /                    |          +-----------+
             +---------------|   Const   |----------------+                     |          |  Pooling  |
        \                 \        |        /                  /                |                |         +-----------+
         \                 \       |       /                  /                 |                |         |   Const   |
          \                 \      |      /                  /                  |                |         +-----------+
           \                 \     |     /                  /                   |                |             /
            \                +-----------+                 /                    |          +-----------+      /
             +---------------|   Const   |----------------+                     |          |  Pooling  |-----+
                             +-----------+                                      |          +-----------+
                                   \                                            |               /
                                    \                                           |              /
@@ -126,28 +101,26 @@ Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) {
      continue;
    }

    do {
      migration_append_ = false;
      map<ComputeGraphPtr, map<uint32_t, NodePtr>> graph_datas;
      if (ClassifyDataNodes(graph, func_desc, graph_datas) != SUCCESS) {
        return FAILED;
      }
    map<ComputeGraphPtr, map<string, NodePtr>> all_const_nodes;
    map<ComputeGraphPtr, map<uint32_t, NodePtr>> all_data_nodes;
    if (ClassifyGraphNodes(graph, func_desc, all_const_nodes, all_data_nodes) != SUCCESS) {
      return FAILED;
    }

      if (graph_datas.empty()) {
        GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str());
        break;
      }
    if (all_const_nodes.empty()) {
      GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str());
      break;
    }

      // {subgraph0, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
      // {subgraph1, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
      // {subgraph2, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
      const auto base_nodes = graph_datas.begin()->second;  // Need copy.
      for (const auto &node_item : base_nodes) {
        if (GraphNodeMigration(graph, node, graph_datas, node_item.second, node_item.first) != SUCCESS) {
          return FAILED;
        }
    // {subgraph0, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
    // {subgraph1, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
    // {subgraph2, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
    const auto &const_nodes = all_const_nodes.begin()->second;
    for (const auto &item : const_nodes) {
      if (GraphNodeMigration(graph, node, all_const_nodes, all_data_nodes, item.second, item.first) != SUCCESS) {
        return FAILED;
      }
    } while (migration_append_);
    }
  }

  return SUCCESS;
@@ -155,14 +128,16 @@ Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) {

 ///
 /// @ingroup ge
 /// @brief Get all Data nodes for all subgraph.
 /// @brief Get all Const/Data nodes for all subgraph.
 /// @param [in] graph: Root compute graph.
 /// @param [in] func_desc: functional OpDesc of Case.
 /// @param [out] graph_datas: Data groups of subgraph.
 /// @param [out] all_const_nodes: Const groups of subgraph.
 /// @param [out] all_data_nodes: Data groups of subgraph.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                                                     map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas) {
 Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                                                      map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes) {
  for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
    const auto &subgraph = graph->GetSubgraph(name);
    if (subgraph == nullptr) {
@@ -170,32 +145,47 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap
      return GE_GRAPH_EMPTY_SUBGRAPH;
    }

    auto &data_nodes = graph_datas[subgraph];
    for (auto &data : subgraph->GetDirectNode()) {
      if (data->GetType() != DATA) {
        continue;
      }
    auto &data_nodes = all_data_nodes[subgraph];
    auto &const_nodes = all_const_nodes[subgraph];
    for (auto &node : subgraph->GetDirectNode()) {
      if (node->GetType() == DATA) {
        uint32_t parent_index = kInvalidParent;
        if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
          return FAILED;
        }

      uint32_t parent_index = 0;
      if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
        GELOGE(FAILED, "Parent index not found, name: %s", data->GetName().c_str());
        return FAILED;
      }
        data_nodes[parent_index] = node;
        GELOGD("%s, index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, node->GetName().c_str());
      } else if ((node->GetType() == CONSTANT) && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) {
        set<string> peer_name_list;
        const auto &out_anchor = node->GetOutDataAnchor(kZeroIndex);
        for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
          const auto &peer_node = in_anchor->GetOwnerNode();
          // Trim subgraph node name prefix.
          string node_full_name = peer_node->GetName();
          size_t pos = node_full_name.find(kMbatchNodeNameMark);
          if (pos == string::npos) {
            GELOGE(FAILED, "find: %s of multi-batch in node: %s", kMbatchNodeNameMark.c_str(), node_full_name.c_str());
            return FAILED;
          }

          string fixed_name = node_full_name.substr(0, pos);
          pos = node_full_name.find("_", pos + kMbatchNodeNameMark.length());
          if (pos != string::npos) {
            fixed_name += node_full_name.substr(pos);
          }

          peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx()));
        }

      data_nodes[parent_index] = data;
      GELOGD("%s, Parent index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, data->GetName().c_str());
    }
  }
        string key_of_const;
        for (const string &name : peer_name_list) {
          key_of_const += (key_of_const.empty() ? name : "_" + name);
        }

  auto iter = graph_datas.begin();
  if (iter == graph_datas.end()) {
    return SUCCESS;
  }
  for (const auto &data_nodes : graph_datas) {
    if (data_nodes.second.size() != iter->second.size()) {
      GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]",
             data_nodes.first->GetName().c_str(), data_nodes.second.size(), iter->second.size());
      return FAILED;
        const_nodes[key_of_const] = node;
        GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str());
      }
    }
  }

@@ -204,36 +194,27 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap

 ///
 /// @ingroup ge
 /// @brief Get all Data nodes for all subgraph.
 /// @param [in] node: Const node of subgraph.
 /// @param [out] inputs: parent index to Const.
 /// @param [out] outputs: Data groups of subgraph.
 /// @brief Get parent_index for Const node migration.
 /// @param [in] all_data_nodes: Data groups of subgraph.
 /// @param [in] const_node: Const node will process.
 /// @param [out] parent_index: parent index for replace Data.
 /// @return true: SUCCESS / false: FAILED
 ///
 bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map<uint32_t, uint32_t> &inputs,
                                                    map<uint32_t, uint32_t> &outputs) {
  for (uint32_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) {
    outputs[i] = kInvalidParent;
  }

  uint32_t out_index = 0;
  const auto in_nodes = node->GetInAllNodes();
  for (size_t i = 0; i < in_nodes.size(); ++i) {
    const auto owner_node = in_nodes.at(i);
    if (owner_node->GetType() != DATA) {
 bool SubgraphConstMigrationPass::GetAssociatedNodes(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                                                    const NodePtr &const_node, uint32_t &parent_index) {
  for (const auto in_node : const_node->GetInAllNodes()) {
    if (in_node->GetType() != DATA) {
      return false;
    }

    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
    uint32_t node_index = 0;
    if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, node_index)) {
      return false;
    }

    // Input Data feed other Node, need add new Data.
    inputs[i] = parent_index;
    if ((out_index == outputs.size()) && owner_node->GetOutDataNodes().empty()) {
      outputs[out_index] = parent_index;
      ++out_index;
    if ((parent_index == kInvalidParent) && in_node->GetOutDataNodes().empty()) {
      parent_index = node_index;
    }
  }

@@ -242,43 +223,26 @@ bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map<uin

 ///
 /// @ingroup ge
 /// @brief Get all Data nodes for all subgraph.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] data_base: Data Node for migration.
 /// @param [in] data_idx: Data groups of subgraph.
 /// @param [in] data_idx: Data groups of subgraph.
 /// @brief Check parallel node is same for all subgraph.
 /// @param [in] all_const_nodes: Const groups of subgraph.
 /// @param [in] const_node: Const Node for migration.
 /// @param [in] node_key: Key of Const node.
 /// @return true: Same / false: not same
 ///
 bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                    const NodePtr &const_node, uint32_t parent_index, size_t index) {
  auto it = graph_datas.begin();
  for (++it; it != graph_datas.end(); ++it) {
    const auto &data_nodes = it->second;
    auto data_it = data_nodes.find(parent_index);
    if (data_it == data_nodes.end()) {
      GELOGE(FAILED, "Data: %s not fount, index: %u", const_node->GetName().c_str(), parent_index);
      return false;
    }

    const auto &work_data = data_it->second;
    const auto &out_anchor = work_data->GetOutControlAnchor();
    const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
    if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) {
      GELOGW("Node anchors not same, Data: %s -> %s anchor size: %zu, index: %zu",
             work_data->GetName().c_str(), const_node->GetName().c_str(), in_anchors.size(), index);
      return false;
    }

    const auto &in_anchor = in_anchors.at(index);
    const auto &work_node = in_anchor->GetOwnerNode();
    if (work_node == nullptr) {
      GELOGE(FAILED, "Data: %s not found, parent: %u, index: %zu", const_node->GetName().c_str(), parent_index, index);
 bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                    const NodePtr &const_node, const string &node_key) {
  auto it = all_const_nodes.begin();
  for (++it; it != all_const_nodes.end(); ++it) {
    const auto &const_nodes = it->second;
    auto node_it = const_nodes.find(node_key);
    if (node_it == const_nodes.end()) {
      GELOGW("Const node: %s not fount, key: %s", const_node->GetName().c_str(), node_key.c_str());
      return false;
    }

    if (!IsSameOpNode(const_node, work_node)) {
      GELOGI("OpDesc not same: %s %s, parent: %u, index: %zu",
             const_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, index);
    const auto &work_node = node_it->second;
    if (!IsSameConstNode(const_node, work_node)) {
      GELOGI("Not same: %s %s, key: %s", const_node->GetName().c_str(), work_node->GetName().c_str(), node_key.c_str());
      return false;
    }
  }
@@ -291,51 +255,34 @@ bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, m
 /// @brief Migration subgraph Node to Root
 /// @param [in] graph: Root compute graph.
 /// @param [in] func_node: functional Node of Case.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] data_base: Data Node for migration.
 /// @param [in] data_idx: Data groups of subgraph.
 /// @param [in] all_const_nodes: Const groups of subgraph.
 /// @param [in] all_data_nodes: Data groups of subgraph.
 /// @param [in] const_node: Const Node for migration.
 /// @param [in] node_key: Key of Const node for migration.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                      const NodePtr &data_node, uint32_t parent_index) {
  bool can_extrapolation = false;
  do {
    can_extrapolation = false;
    const auto &out_anchor = data_node->GetOutControlAnchor();
    const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
    for (size_t i = in_anchors.size(); i > 0; --i) {
      const auto &in_anchor = in_anchors.at(i - 1);
      const auto &work_node = in_anchor->GetOwnerNode();
      GELOGD("Data: %s, node: %s, parent: %u, index: %zu",
             data_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, i);
      if (work_node->GetType() != CONSTANT) {
        continue;
      }

      // Get associated Data, if Data feed other nodes, need append new Data.
      map<uint32_t, uint32_t> inputs;
      map<uint32_t, uint32_t> outputs;
      if (!GetAssociatedNodes(work_node, inputs, outputs)) {
        continue;
      }
                                                      const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                                                      const NodePtr &const_node, const string &node_key) {
  if (!IsParallelNodeSame(all_const_nodes, const_node, node_key)) {
    return SUCCESS;
  }

      if (!IsParallelNodeSame(graph_datas, work_node, parent_index, i - 1)) {
        continue;
      }
  // Get associated Data, if Data feed other nodes, need append new Data.
  uint32_t parent_index = kInvalidParent;
  if (!GetAssociatedNodes(all_data_nodes, const_node, parent_index)) {
    return SUCCESS;
  }

      GELOGI("Move node: %s, parent: %u, index: %zu", work_node->GetName().c_str(), parent_index, i);
      if (AppendParallelNode(graph_datas, func_node, outputs) != SUCCESS) {
        return FAILED;
      }
  GELOGI("Move node: %s, parent index: %u", const_node->GetName().c_str(), parent_index);
  if (AppendParallelNode(func_node, parent_index, all_data_nodes) != SUCCESS) {
    return FAILED;
  }

      if (MoveNodeToParent(graph, func_node, graph_datas, parent_index, i - 1, inputs, outputs) != SUCCESS) {
        return FAILED;
      }
      can_extrapolation = true;
      break;
    }
  } while (can_extrapolation);
  if (MoveNodeToParent(graph, func_node, all_const_nodes, all_data_nodes, node_key, parent_index) != SUCCESS) {
    return FAILED;
  }

  return SUCCESS;
 }
@@ -343,114 +290,100 @@ Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra
 ///
 /// @ingroup ge
 /// @brief Append Input Tensor for functional node.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] func_node: functional Node of Case.
 /// @param [in] outputs: Parent index of Node output.
 /// @param [in/out] parent_index: Parent index for migration.
 /// @param [in/out] all_data_nodes: Data groups of subgraph.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::AppendParallelNode(map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                      const NodePtr &func_node, map<uint32_t, uint32_t> &outputs) {
 Status SubgraphConstMigrationPass::AppendParallelNode(const NodePtr &func_node, uint32_t &parent_index,
                                                      map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes) {
  // If outputs index invalid, add Data and Input Tensor.
  for (auto &item : outputs) {
    if (item.second != kInvalidParent) {
      continue;
    }

    // Add Data to subgraph.
    map<ComputeGraphPtr, uint32_t> append_num;
    for (auto &groups : graph_datas) {
      const auto &subgraph = groups.first;
      auto &data_nodes = groups.second;

      item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index.
      const auto data_name = subgraph->GetName() + "_data_" + std::to_string(item.second);

      OpDescBuilder op_builder(data_name, DATA);
      const OpDescPtr op_desc = op_builder.AddInput("x").AddOutput("y").Build();
      if (op_desc == nullptr) {
        GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed");
        return OUT_OF_MEMORY;
      }
  if (parent_index != kInvalidParent) {
    return SUCCESS;
  }

      uint32_t data_index = item.second - kCaseInputBase;
      if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
        GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
        return FAILED;
      }
  // Add Data to subgraph.
  parent_index = func_node->GetAllInDataAnchorsSize();  // Update to valid parent index.
  for (auto &item : all_data_nodes) {
    const auto &subgraph = item.first;
    const auto data_name = subgraph->GetName() + "_data_" + std::to_string(parent_index);
    OpDescBuilder op_builder(data_name, DATA);
    const auto op_desc = op_builder.AddInput("x").AddOutput("y").Build();
    if (op_desc == nullptr) {
      GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed");
      return OUT_OF_MEMORY;
    }

      if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, item.second)) {
        GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
        return FAILED;
      }
    uint32_t data_index = parent_index - kCaseInputBase;
    if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
      GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
      return FAILED;
    }

      append_num[subgraph]++;
      data_nodes[item.second] = subgraph->AddNode(op_desc);
      GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second);
    if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
      return FAILED;
    }

    // Add InputTensor to functional Node.
    NodeUtils::AppendInputAnchor(func_node, item.second + 1);
    item.second[parent_index] = subgraph->AddNode(op_desc);
    GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), parent_index);
  }

  // Add InputTensor to functional Node.
  NodeUtils::AppendInputAnchor(func_node, parent_index + 1);
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Delete Node from all subgraph.
 /// @param [in] graph_nodes: Data groups of subgraph.
 /// @param [in] detach: Node will move to parent.
 /// @param [in] outputs: Parent index of Node output.
 /// @brief Delete Node from subgraph.
 /// @param [in] graph: subgraph for process.
 /// @param [in] const_node: Node will move to parent.
 /// @param [in] data_node: Place holder for Const.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::DetachParallelNode(const map<uint32_t, NodePtr> &graph_datas, const NodePtr &detach,
                                                      const map<uint32_t, uint32_t> &outputs) {
 Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &graph, const NodePtr &const_node,
                                                      const NodePtr &data_node) {
  // Break Data and Move node.
  const auto &in_anchor = detach->GetInControlAnchor();
  const auto &out_anchors = in_anchor->GetPeerOutControlAnchors();
  for (size_t i = out_anchors.size(); i > 0; --i) {
    const auto &out_anchor = out_anchors.at(i - 1);
  const auto &in_anchor = const_node->GetInControlAnchor();
  const auto out_anchors = in_anchor->GetPeerOutControlAnchors();
  for (const auto out_anchor : out_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    const auto &owner_node = out_anchor->GetOwnerNode();
    GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), detach->GetName().c_str());
  }

  // Break Move and follow, Link Data and follow.
  for (uint32_t i = 0; i < detach->GetAllOutDataAnchorsSize(); ++i) {
    auto it_idx = outputs.find(i);
    if (it_idx == outputs.end()) {
      GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i);
      return FAILED;
    }

    auto it_data = graph_datas.find(it_idx->second);
    if (it_data == graph_datas.end()) {
      GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i);
      return FAILED;
    const auto owner_node = out_anchor->GetOwnerNode();
    GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str());
    if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) {
      graph->RemoveNode(owner_node);
    }
  }

    const auto &data_node = it_data->second;
    const auto &out_anchor = detach->GetOutDataAnchor(i);
  const auto &ctrl_anchor = const_node->GetOutControlAnchor();
  const auto ctrl_anchors = ctrl_anchor->GetPeerInControlAnchors();
  for (const auto in_anchor : ctrl_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(ctrl_anchor, in_anchor), "Remove edge failed");
    GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());

    const auto &out_desc = detach->GetOpDesc()->GetOutputDesc(i);
    const auto &data_desc = data_node->GetOpDesc();
    (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc);    // Set Data Input to new connect Node.
    (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc);   // Set Data Output to new connect Node.
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_node->GetOutControlAnchor(), in_anchor), "Add edge failed");
    GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
  }

    for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
      if (in_anchor == nullptr) {
          continue;
      }
      GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
      const auto &owner_node = in_anchor->GetOwnerNode();
      GELOGI("Remove Edge: %s %s", detach->GetName().c_str(), owner_node->GetName().c_str());
  // Break Move and follow, Link Data and follow.
  const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex);
  const auto in_anchors =out_anchor->GetPeerInDataAnchors();
  for (const auto in_anchor : in_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());

      const auto &data_out_anchor = data_node->GetOutDataAnchor(kDataOutIndex);
      GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_out_anchor, in_anchor), "Add edge failed");
      GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), owner_node->GetName().c_str());
    }
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
    GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
  }

  // Update Data op DataType.
  const auto &const_desc = const_node->GetOpDesc();
  const auto &tensor_desc = const_desc->GetOutputDesc(kZeroIndex);
  const auto &data_desc = data_node->GetOpDesc();
  (void)data_desc->UpdateInputDesc(kZeroIndex, tensor_desc);    // Set Data Input to new connect Node.
  (void)data_desc->UpdateOutputDesc(kZeroIndex, tensor_desc);   // Set Data Output to new connect Node.

  return SUCCESS;
 }

@@ -459,47 +392,37 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const map<uint32_t, NodePt
 /// @brief Move Node to Parent Graph.
 /// @param [in] graph: Parent compute graph.
 /// @param [in] func_node: functional Node of Case.
 /// @param [in] attach: Node will move to parent.
 /// @param [in] inputs: Parent index of Node input.
 /// @param [in] outputs: Parent index of Node output.
 /// @param [in] const_node: Node will move to parent.
 /// @param [in] parent_index: Parent index of Node input.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node,
                                                      const NodePtr &attach, const map<uint32_t, uint32_t> &inputs,
                                                      const map<uint32_t, uint32_t> &outputs) {
  GE_CHECK_NOTNULL(attach);
  for (const auto item : inputs) {
    if (item.second == kInvalidParent) {   // Not connect, Skip.
      continue;
    }

    const auto &in_anchor = func_node->GetInDataAnchor(item.second);
    const auto &out_anchor = in_anchor->GetPeerOutAnchor();
    const auto &owner_node = out_anchor->GetOwnerNode();
    const auto &in_control = attach->GetInControlAnchor();
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(owner_node->GetOutControlAnchor(), in_control), "Add edge failed");
    GELOGI("Add Edge: %s %s", owner_node->GetName().c_str(), attach->GetName().c_str());
                                                      const NodePtr &const_node, uint32_t parent_index) {
  GE_CHECK_NOTNULL(const_node);
  if (parent_index == kInvalidParent) {
    return INTERNAL_ERROR;
  }

  for (const auto &item : outputs) {
    const auto &func_desc = func_node->GetOpDesc();
    const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(item.second);
    (void)func_desc->UpdateInputDesc(item.second, out_desc);    // Set Data Input to new connect Node.

    const auto &in_anchor = func_node->GetInDataAnchor(item.second);
    const auto &out_anchor = in_anchor->GetPeerOutAnchor();
    if (out_anchor != nullptr) {
      GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
      const auto &owner_node = out_anchor->GetOwnerNode();
      GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
  const auto &func_desc = func_node->GetOpDesc();
  const auto &tensor_desc = const_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
  (void)func_desc->UpdateInputDesc(parent_index, tensor_desc);    // Set Data Input to new connect Node.

  const auto &in_anchor = func_node->GetInDataAnchor(parent_index);
  const auto &out_anchor = in_anchor->GetPeerOutAnchor();
  if (out_anchor != nullptr) {  // Break useless old link.
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    const auto owner_node = out_anchor->GetOwnerNode();
    GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
    if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) {
      graph->RemoveNode(owner_node);
    }
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(attach->GetOutDataAnchor(item.first), in_anchor), "Add edge failed");
    GELOGI("Add Edge: %s %s", attach->GetName().c_str(), func_node->GetName().c_str());
  }
  GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
  GELOGI("Add Edge: %s %s, index: %u", const_node->GetName().c_str(), func_node->GetName().c_str(), parent_index);

  (void)graph->AddNode(attach);
  (void)attach->SetOwnerComputeGraph(graph);
  GELOGI("Add Node: %s %s", graph->GetName().c_str(), attach->GetName().c_str());
  (void)graph->AddNode(const_node);
  (void)const_node->SetOwnerComputeGraph(graph);
  GELOGI("Add Node: %s %s", graph->GetName().c_str(), const_node->GetName().c_str());
  return SUCCESS;
 }

@@ -515,43 +438,37 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node,
                                                    const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
                                                    uint32_t parent_index, uint32_t index,
                                                    const map<uint32_t, uint32_t> &inputs,
                                                    const map<uint32_t, uint32_t> &outputs) {
  if (inputs.empty()) {
                                                    const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                                                    const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                                                    const string &node_key, uint32_t parent_index) {
  if (node_key.empty() || parent_index == kInvalidParent) {
    GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str());
    return FAILED;
  }

  NodePtr move_node;
  for (auto &groups : graph_datas) {
    const auto &subgraph = groups.first;
    const auto &data_nodes = groups.second;
    auto it = data_nodes.find(parent_index);
    if (it == data_nodes.end()) {
      GELOGE(FAILED, "Graph: %s, Data: %u node not found", subgraph->GetName().c_str(), parent_index);
  for (auto &item : all_const_nodes) {
    const auto &subgraph = item.first;
    const auto it_const = item.second.find(node_key);
    if (it_const == item.second.end()) {
      GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
      return FAILED;
    }
    move_node = it_const->second;

    const auto &base_data = it->second;
    const auto &out_anchor = base_data->GetOutControlAnchor();
    const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
    if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) {
      GELOGE(FAILED, "Data: %s, anchor size: %zu, index: %u not found",
             base_data->GetName().c_str(), in_anchors.size(), index);
    const auto it_nodes = all_data_nodes.find(subgraph);
    if (it_nodes == all_data_nodes.end()) {
      GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
      return FAILED;
    }

    const auto &in_anchor = in_anchors.at(index);
    move_node = in_anchor->GetOwnerNode();
    if (move_node == nullptr) {
      GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index);
    const auto it_data = it_nodes->second.find(parent_index);
    if (it_data == it_nodes->second.end()) {
      GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
      return FAILED;
    }

    if (DetachParallelNode(data_nodes, move_node, outputs) != SUCCESS) {
      GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index);
    if (DetachParallelNode(subgraph, move_node, it_data->second) != SUCCESS) {
      GELOGE(FAILED, "Data: %s not found, index: %u", move_node->GetName().c_str(), parent_index);
      return FAILED;
    }

@@ -559,11 +476,10 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph
    GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str());
  }

  if (AttachParallelNode(graph, func_node, move_node, inputs, outputs) != SUCCESS) {
  if (AttachParallelNode(graph, func_node, move_node, parent_index) != SUCCESS) {
    return FAILED;
  }

  migration_append_ = true;
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/subgraph_const_migration_pass.h
+++ b/ge/graph/passes/subgraph_const_migration_pass.h
@@ -36,50 +36,54 @@ class SubgraphConstMigrationPass : public GraphPass {
 private:
  ///
  /// @ingroup ge
  /// @brief Get all Data nodes for all subgraph.
  /// @brief Get all Const/Data nodes for all subgraph.
  /// @param [in] graph: Root compute graph.
  /// @param [in] func_desc: functional OpDesc of Case.
  /// @param [out] graph_datas: Data groups of subgraph.
  /// @param [out] all_const_nodes: Const groups of subgraph.
  /// @param [out] all_data_nodes: Data groups of subgraph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                           map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas);
  Status ClassifyGraphNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
                            map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes);

  ///
  /// @ingroup ge
  /// @brief Get all Data nodes for all subgraph.
  /// @param [in] node: Const node of subgraph.
  /// @param [in] func_desc: functional OpDesc of Case.
  /// @param [out] graph_nodes: Data groups of subgraph.
  /// @brief Get parent_index for Const node migration.
  /// @param [in] all_data_nodes: Data groups of subgraph.
  /// @param [in] const_node: Const node will process.
  /// @param [out] parent_index: parent index for replace Data.
  /// @return true: SUCCESS / false: FAILED
  ///
  bool GetAssociatedNodes(const NodePtr &node, map<uint32_t, uint32_t> &inputs, map<uint32_t, uint32_t> &outputs);
  bool GetAssociatedNodes(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                          const NodePtr &const_node, uint32_t &parent_index);

  ///
  /// @ingroup ge
  /// @brief Get all Data nodes for all subgraph.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] data_base: Data Node for migration.
  /// @param [in] data_idx: Data groups of subgraph.
  /// @param [in] data_idx: Data groups of subgraph.
  /// @brief Check parallel node is same for all subgraph.
  /// @param [in] all_const_nodes: Const groups of subgraph.
  /// @param [in] const_node: Const Node for migration.
  /// @param [in] node_key: Key of Const node.
  /// @return true: Same / false: not same
  ///
  bool IsParallelNodeSame(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                          const NodePtr &const_node, uint32_t parent_index, size_t index);
  bool IsParallelNodeSame(const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                          const NodePtr &const_node, const string &node_key);

  ///
  /// @ingroup ge
  /// @brief Migration subgraph Node to Root
  /// @param [in] graph: Root compute graph.
  /// @param [in] func_node: functional Node of Case.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] data_base: Data Node for migration.
  /// @param [in] data_idx: Data groups of subgraph.
  /// @param [in] all_const_nodes: Const groups of subgraph.
  /// @param [in] all_data_nodes: Data groups of subgraph.
  /// @param [in] const_node: Const Node for migration.
  /// @param [in] node_key: Key of Const node for migration.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                            const NodePtr &data_base, uint32_t data_idx);
                            const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                            const NodePtr &const_node, const string &node_key);

  ///
  /// @ingroup ge
@@ -93,46 +97,42 @@ class SubgraphConstMigrationPass : public GraphPass {
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node,
                          const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                          uint32_t parent_index, uint32_t anchor_idx,
                          const map<uint32_t, uint32_t> &inputs, const map<uint32_t, uint32_t> &outputs);
                          const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
                          const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
                          const string &node_key, uint32_t parent_index);

  ///
  /// @ingroup ge
  /// @brief Append Input Tensor for functional node.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] func_node: functional Node of Case.
  /// @param [in] outputs: Parent index of Node output.
  /// @param [in] graph_nodes: Const groups of subgraph.
  /// @param [in/out] parent_index: Parent index for migration.
  /// @param [in/out] all_data_nodes: Data groups of subgraph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status AppendParallelNode(map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
                            const NodePtr &func_node, map<uint32_t, uint32_t> &outputs);
  Status AppendParallelNode(const NodePtr &func_node, uint32_t &parent_index,
                            map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes);

  ///
  /// @ingroup ge
  /// @brief Delete Node from all subgraph.
  /// @param [in] graph_nodes: Data groups of subgraph.
  /// @param [in] detach: Node will move to parent.
  /// @param [in] outputs: Parent index of Node output.
  /// @brief Delete Node from subgraph.
  /// @param [in] graph: subgraph for process.
  /// @param [in] const_node: Node will move to parent.
  /// @param [in] data_node: Place holder for Const.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status DetachParallelNode(const map<uint32_t, NodePtr> &graph_datas, const NodePtr &detach,
                            const map<uint32_t, uint32_t> &outputs);
  Status DetachParallelNode(const ComputeGraphPtr &graph, const NodePtr &const_node, const NodePtr &data_node);

  ///
  /// @ingroup ge
  /// @brief Move Node to Parent Graph.
  /// @param [in] graph: Parent compute graph.
  /// @param [in] func_node: functional Node of Case.
  /// @param [in] attach: Node will move to parent.
  /// @param [in] inputs: Parent index of Node input.
  /// @param [in] outputs: Parent index of Node output.
  /// @param [in] const_node: Node will move to parent.
  /// @param [in] parent_index: Parent index of Node input.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, const NodePtr &attach,
                            const map<uint32_t, uint32_t> &inputs, const map<uint32_t, uint32_t> &outputs);

  bool migration_append_{false};
  Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node,
                            const NodePtr &const_node, uint32_t parent_index);
 };
 }  // namespace ge
 #endif  // GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -1646,6 +1646,10 @@ Status GraphPrepare::InferShapeForPreprocess() {
  if (!options_.train_graph_flag) {
    names_to_passes.emplace_back("AssertPass", &assert_pass);
  }
  SwitchDeadBranchElimination switch_dead_branch_elimination;
  names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination);
  MergePass merge_pass;
  names_to_passes.emplace_back("MergePass", &merge_pass);
  InferShapePass infer_shape_pass;
  names_to_passes.emplace_back("InferShapePass", &infer_shape_pass);
  ReplaceWithEmptyConstPass replace_with_empty_const_pass;
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -123,11 +123,22 @@ Status KnownNodeTask::Init(TaskContext &context) {
           davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
  }
  if (!load_flag_) {
    auto dump_properties = context.GetDumpProperties();
    if (dump_properties.IsDumpOpen()) {
      davinci_model_->SetDumpProperties(dump_properties);
    }
    int32_t device_id = 0;
    rtError_t rt_ret = rtGetDevice(&device_id);
    if (rt_ret != RT_ERROR_NONE || device_id < 0) {
      GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    davinci_model_->SetDeviceId(device_id);
    GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed.");
    load_flag_ = true;
  } else {
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
            davinci_model_->Id()), "KnownNodeTask::Init destroy aicpu kernel failed.");
            davinci_model_->Id(), davinci_model_->SubModelId()), "KnownNodeTask::Init destroy aicpu kernel failed.");
  }
  GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName());
  return SUCCESS;
@@ -161,8 +172,9 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node

  // set known node flag as true
  davinci_model->SetKnownNode(true);
  davinci_model->SetId(model.GetModelId());
  // set model id as root node's node id
  davinci_model->SetId(node->GetOpDesc()->GetId());
  davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
  GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId());

  GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed.");
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -581,42 +581,6 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat
  return GRAPH_SUCCESS;
 }

 graphStatus aclgrphInferShapeAndType(ge::Graph &graph) {
  auto compute_graph = GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);

  auto root_graph = compute_graph->GetParentGraph();
  if (root_graph != nullptr) {
    GELOGE(GRAPH_PARAM_INVALID, "Input param should not be subgraph");
    return GRAPH_PARAM_INVALID;
  }

  auto ret = compute_graph->TopologicalSorting();
  if (ret != GRAPH_SUCCESS) {
    GELOGE(ret, "Acl topo logical sort failed.");
    return ret;
  }

  ret = compute_graph->InferOriginFormat();
  if (ret != GRAPH_SUCCESS) {
    GELOGE(ret, "Acl InferOriginFormat failed.");
    return ret;
  }

  for (auto &node: compute_graph->GetAllNodes()) {
    graphStatus ret = ShapeRefiner::InferShapeAndType(node);
    if (ret == GRAPH_PARAM_INVALID) {
      GELOGW("Can not find infershape func.");
      continue;
    } else if (ret != GRAPH_SUCCESS) {
      GELOGE(ret, "Acl infershape failed.");
      return ret;
    }
  }

  return GRAPH_SUCCESS;
 }

 graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len) {
  GE_CHECK_NOTNULL(file);

--- a/ge/omm/csa_interact.cc
+++ b/ge/omm/csa_interact.cc
@@ -78,8 +78,8 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) {
 Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode,
                                  ErrorModule error_module) {
  if (!is_init_) {
    GELOGE(INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState");
    return INTERNAL_ERROR;
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState");
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }
  if ((curr_state_ == JOBSTATE_FAILED) || (curr_state_ == JOBSTATE_KILLED)) {
    return SUCCESS;
--- a/inc/external/acl/acl_prof.h
+++ b/inc/external/acl/acl_prof.h
@@ -26,7 +26,7 @@ extern "C" {
 #define ACL_PROF_ACL_API 0x0001
 #define ACL_PROF_TASK_TIME 0x0002
 #define ACL_PROF_AICORE_METRICS 0x0004
 #define ACL_PROF_AICPU_TRACE 0x0008
 #define ACL_PROF_AICPU 0x0008

 #define ACL_PROF_MAX_OP_NAME_LEN 257
 #define ACL_PROF_MAX_OP_TYPE_LEN 65
@@ -289,34 +289,8 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI
 */
 ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);

 /**
 * @ingroup AscendCL
 * @brief get cube ops from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 * @param  index [IN]      index of op array in opInfo
 *
 * @retval cube ops of subscription data
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY uint64_t aclprofGetOpCubeOps(const void *opInfo, size_t opInfoLen, uint32_t index);

 /**
 * @ingroup AscendCL
 * @brief get vector ops from subscription data
 *
 * @param  opInfo [IN]      pointer to subscription data
 * @param  opInfoLen [IN]   memory size of subscription data
 * @param  index [IN]       index of op array in opInfo
 *
 * @retval vector ops of subscription data
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY uint64_t aclprofGetOpVectorOps(const void *opInfo, size_t opInfoLen, uint32_t index);

 #ifdef __cplusplus
 }
 #endif

 #endif  // INC_EXTERNAL_ACL_PROF_H_
 #endif  // INC_EXTERNAL_ACL_PROF_H_
--- a/inc/external/acl/ops/acl_fv.h
+++ b/inc/external/acl/ops/acl_fv.h
@@ -13,6 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
 #define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -394,7 +394,7 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
 // for interface: aclgrphParse
 const std::set<std::string> ir_parser_suppported_options = {
  INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
  OUT_NODES,        COMPRESS_WEIGHT_CONF,      ENABLE_SCOPE_FUSION_PASSES};
  OUT_NODES,        ENABLE_SCOPE_FUSION_PASSES};

 // for interface: aclgrphBuildInitialize
 const std::set<std::string> global_options = {CORE_TYPE,
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -102,16 +102,6 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat

 /**
 * @ingroup AscendCL
 * @brief infer shape and data type
 *
 * @param graph[IN] the graph ready to build
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 graphStatus aclgrphInferShapeAndType(ge::Graph &graph);

 /**
 * @ingroup AscendCL
 * @brief dump graph
 *
 * @param graph[IN] the graph ready to build
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -1,11 +1,17 @@
 /**
 * @file rt_error_codes.h
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -28,7 +28,7 @@
 #include "ge/ge_api_error_codes.h"

 #if !defined(__ANDROID__) && !defined(ANDROID)
 #define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__)
 #define DOMI_LOGE(fmt, ...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, fmt, ##__VA_ARGS__)
 #else
 #include <android/log.h>
 #if defined(BUILD_VERSION_PERF)
--- a/metadef/graph/debug/ge_log.h
+++ b/metadef/graph/debug/ge_log.h
@@ -20,7 +20,7 @@
 #include "graph/ge_error_codes.h"
 #include "framework/common/debug/ge_log.h"

 #define GE_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__)
 #define GE_LOGE(fmt, ...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, fmt, ##__VA_ARGS__)

 #define GE_LOGI_IF(condition, ...) \
  if ((condition)) {               \
--- a/metadef/graph/proto/dump_task.proto
+++ b/metadef/graph/proto/dump_task.proto
@@ -108,4 +108,5 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name = 6;
 }
--- a/metadef/graph/shape_refiner.cc
+++ b/metadef/graph/shape_refiner.cc
@@ -766,6 +766,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node, bool before_sub
             TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(),
             TypeUtils::DataTypeToSerialString(output_tensor->GetOriginDataType()).c_str());
    }
    GE_CHK_STATUS_RET_NOLOG(NodeUtils::UpdatePeerNodeInputDesc(node));
  } else {
    GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str());
    return GRAPH_FAILED;
--- a/metadef/graph/utils/node_utils.cc
+++ b/metadef/graph/utils/node_utils.cc
@@ -318,10 +318,17 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
           TypeUtils::DataTypeToSerialString(output_tensor->GetOriginDataType()).c_str());

    for (const auto &peer_anchor : out_anchor->GetPeerInDataAnchors()) {
      if (peer_anchor->GetOwnerNode()->GetOpDesc() == nullptr) {
      auto peer_anchor_opdesc = peer_anchor->GetOwnerNode()->GetOpDesc();
      if (peer_anchor_opdesc == nullptr) {
        GELOGE(GRAPH_FAILED, "peer_anchor opdesc is null");
        continue;
      }
      if (op_desc->GetId() < peer_anchor_opdesc->GetId() ||
          peer_anchor_opdesc->GetType() == CONSTANT ||
          peer_anchor_opdesc->GetType() == CONSTANTOP) {
          GELOGD("no need to UpdatePeerNodeInputDesc");
          continue;
      }
      auto peer_input_desc = peer_anchor->GetOwnerNode()->GetOpDesc()->MutableInputDesc(peer_anchor->GetIdx());
      if (peer_input_desc == nullptr) {
        GELOGE(GRAPH_FAILED, "peer_input_desc is nullptr");
@@ -337,22 +344,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
               peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(),
               TypeUtils::DataTypeToSerialString(peer_input_dtype).c_str());
      } else if ((!peer_input_dims.empty()) && (out_dims != peer_input_dims)) {
        string out_shape_str, peer_in_shape_str;
        out_shape_str += "[";
        for (int64_t dim : out_dims) {
          out_shape_str += std::to_string(dim) + " ";
        }
        out_shape_str += "]";
        peer_in_shape_str += "[";
        for (int64_t dim : peer_input_dims) {
          peer_in_shape_str += std::to_string(dim) + " ";
        }
        peer_in_shape_str += "]";

        GELOGW("current node [%s] [%d]\'th out_shape is [%s].peer input node [%s] [%d]\'th "
               "input_shape is [%s].The two shape should be same! Please check graph and fix it",
               node_ptr->GetName().c_str(), out_anchor->GetIdx(), out_shape_str.c_str(),
               peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(), peer_in_shape_str.c_str());
               node_ptr->GetName().c_str(), out_anchor->GetIdx(), output_tensor->GetShape().ToString().c_str(),
               peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(),
               peer_input_desc->GetShape().ToString().c_str());
      }
      GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d",
             peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
--- a/metadef/inc/common/proto/dump_task.proto
+++ b/metadef/inc/common/proto/dump_task.proto
@@ -108,4 +108,5 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name = 6;
 }
--- a/metadef/inc/register/proto/dump_task.proto
+++ b/metadef/inc/register/proto/dump_task.proto
@@ -108,4 +108,5 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name;
 }
--- a/metadef/proto/dump_task.proto
+++ b/metadef/proto/dump_task.proto
@@ -108,4 +108,5 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name = 6;
 }
--- a/third_party/fwkacllib/inc/cce/aicpu_engine.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h
@@ -13,6 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AICPU_ENGINE_H__
 #define AICPU_ENGINE_H__

--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/all_ops.h
+++ b/third_party/fwkacllib/inc/ops/all_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/audio_ops.h
+++ b/third_party/fwkacllib/inc/ops/audio_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/bitwise_ops.h
+++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
+++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
+++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/condtake_ops.h
+++ b/third_party/fwkacllib/inc/ops/condtake_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/get_data_ops.h
+++ b/third_party/fwkacllib/inc/ops/get_data_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/hvd_ops.h
+++ b/third_party/fwkacllib/inc/ops/hvd_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/internal_ops.h
+++ b/third_party/fwkacllib/inc/ops/internal_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/logging_ops.h
+++ b/third_party/fwkacllib/inc/ops/logging_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -365,6 +365,25 @@ REG_OP(BiasAddGrad)
 * 4-D with shape [batch, out_height, out_width, out_channels]
 * or [batch, out_channels, out_height, out_width].
 * Gradients with respect to the output of the convolution.
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | out_bckprop | filter  | y
    ------------|-------------|---------|--------
    | Data Type | float16     | float16 | float16
    |           |-------------|---------|--------
    |           | float32     | float32 | float32
    |           |-------------|---------|--------
    |           | float64     | float64 | float64
    ------------|-------------|---------|--------
    | Format    | NCHW        | NCHW    | NCHW
    |           | NHWC        | HWCN    | NHWC
@endverbatim
 * For float32 and float64 type, the actual calculation on the chip is based on
 * float16.
 *\n
 *
 *@par Attributes:
 * Five attributes:
 * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -377,8 +396,52 @@ REG_OP(BiasAddGrad)
 * channels.
 * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
 * "NHWC". Specify the data format of the input and output data.
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | input_size       | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Filter           | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | out_backprop     | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | y(fmap)          | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]

@endverbatim
 * In Ascend910, fmap or out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 *\n
 *
 *@par Outputs:
 * y: A Tensor. Has the same type as filter,and has same format as input_size.
 *\n
 *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (fmap_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv2d_backprop_input
 */
@@ -454,6 +517,21 @@ REG_OP(Conv2DBackpropInputD)
 * @li bias: An optional tensor. Must have the same type as "y".
 * @li offset_w: An optional 1D tensor for quantized deconvolution.
 * Type is int8. Reserved.\n
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | x       | filter  | bias    | y
    ------------|---------|---------|---------|--------
    | Data Type | float16 | float16 | float16 | float16
    |           |---------|---------|---------|--------
    |           | int8    | int8    | int32   | int32
    ------------|---------|---------|---------|--------
    | Format    | NCHW    | NCHW    | ND      | NCHW
@endverbatim
 * For int8, a dequant or requant operator must be followed.
 *\n
 *
 *@par Attributes:
 * Six attributes:
 * @li strides: A tuple or list of 2 integers. The stride of the sliding window
@@ -468,8 +546,51 @@ REG_OP(Conv2DBackpropInputD)
  Specify the data format of the input and output data.
 * @li offset_x: An optional integer for quantized deconvolution.
 * Defaults to "0".
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | x (out_backprop) | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Filter           | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | y (fmap)         | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | Offset_x         |          | [-128, 127]

@endverbatim
 * In Ascend910, fmap or out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 *\n
 *
 *@par Outputs:
 * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
 *\n
 *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (fmap_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 * When type of x is float16, the type of y must be float16.
 * When type of x is int8, the type of y must be int32.
 */
@@ -502,6 +623,25 @@ REG_OP(Deconvolution)
 * [batch, out_height, out_width, out_channels] or [batch, out_channels,
 * out_height, out_width]. Gradients with respect to the output of the
 * convolution.
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | x       | out_backprop | y
    ------------|---------|--------------|---------
    | Data Type | float16 |    float16   | float16
    |           |---------|--------------|---------
    |           | float32 |    float32   | float32
    |           |---------|--------------|---------
    |           | float64 |    float64   | float64
    |-----------|---------|--------------|---------
    | Format    | NCHW    |     NCHW     | NCHW
    |           | NHWC    |     NHWC     | HWCN
@endverbatim
 * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
 * is based on float16.
 *\n
 *
 *@par Attributes:
 * Five attributes:
 * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -514,8 +654,52 @@ REG_OP(Deconvolution)
 * channels.
 * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
 * "NHWC". Specify the data format of the input and output data.
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | x(fmap)          | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Filter Size      | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | out_backprop     | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | y                | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]

@endverbatim
 * In Ascend910, out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 *\n
 *
 *@par Outputs:
 * y: A Tensor. Has the same type as x, has the same format as filter_size.
 *\n
 *     out_backprop_height = (in_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (in_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv2d_backprop_filter
 */
@@ -1031,9 +1215,7 @@ REG_OP(Conv3DBackpropInputD)
 * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n

 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator adds.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 * Compatible with the Caffe operator LSTM.
 */
 REG_OP(LSTM)
    .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1275,6 +1457,22 @@ REG_OP(Conv3DTransposeD)
 * or [out_channels, in_channel, filter_height, filter_width].
 * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
 * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | x       | filter  | bias    | y
    ------------|---------|---------|---------|--------
    | Data Type | float16 | float16 | float16 | float16
    |           |---------|---------|---------|--------
    |           | int8    | int8    | int32   | int32
    ------------|---------|---------|---------|--------
    | Format    | NCHW    | NCHW    | ND      | NCHW
    |           | NHWC    | HWCN    |         | NHWC
@endverbatim
 * For int8, a dequant or requant operator must be followed.
 *\n
 *
 *@par Required Attributes:
 * @li strides: A required tuple/list of 4 integers. The stride of the sliding
 * window for H/W dimension. The index of H/W is same as data_format.
@@ -1293,9 +1491,55 @@ REG_OP(Conv3DTransposeD)
 * to [0, 0, 0, 0].
 * @li offset_x: An optional int. Input offset, used for quantized inference.
 * Defaults to "0".
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | input_size       | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | x (out_backprop) | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | filter           | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | y (fmap)         | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | Offset_x         |          | [-128, 127]

@endverbatim
 * In Ascend910, fmap or out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 *\n
 *
 *@par Outputs:
 * y: A Tensor. A Tensor of type float16 or int32, and has same format as
 * input_size.
 *\n
 *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (fmap_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 */
 REG_OP(Conv2DTranspose)
    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -1487,6 +1487,9 @@ REG_OP(DecodeBboxV2)
 *@par Outputs:
 * @li y1: A Tensor. Must have the same type as x.
 * @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
 * 
 *@attention Constraints:
 * The upper limit of data on the direction axis is 7040.
 */
 REG_OP(Sort)
    .INPUT(x, TensorType({ DT_FLOAT16 }))
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -223,6 +223,83 @@ REG_OP(AvgPool3DD)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3DD)

 /**
 * @brief Computes AvgPool3DGrad function.

 * @par Inputs:
 * @li orig_input_shape: An NDHWC tensor of type float16, float32, or double.
 * @li grads: An NDHWC tensor of type int32.

 * @par Attributes:
 * @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
 * @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
 * @li pads: List of ints, implicit zero paddings on both sides of the input.
 * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
 * @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 * @li data_format: A string, format of input data.

 * @par Outputs:
 * @output: A mutable tensor with the same shape and type as "orig_input".

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
 */

 REG_OP(AvgPool3DGrad)
    .INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .INPUT(grads, TensorType({DT_INT32}))
    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(ceil_mode, Bool, false)
    .ATTR(count_include_pad, Bool, true)
    .ATTR(divisor_override, Int, 0)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3DGrad)

 /**
 * @brief Performs average pooling on the input.

 * @par Inputs:
 * @li grads: An NDHWC tensor of type float16.
 * @li filter: An optional tensor of type float16, fractal_z_3d layout.
 * @li multiplier: An optional tensor of float16.

 * @par Attributes:
 * @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor.
 * @li ksize: List of ints that has length 3. The size of the window for each dimension of the input tensor.
 * @li strides:List of ints that has length 3. The stride of the sliding window for each dimension of the input tensor.
 * @li pads: List of ints, implicit zero paddings on both sides of the input.
 * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
 * @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 * @li data_format: A string, format of input data . \n

 * @par Outputs:
 * @output: The average pooled output tensor . \n

 * @attention Constraints:
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3DGradD.
 */
 REG_OP(AvgPool3DGradD)
    .INPUT(grads, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16}))
    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .REQUIRED_ATTR(orig_input_shape, ListInt)
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(ceil_mode, Bool, false)
    .ATTR(count_include_pad, Bool, true)
    .ATTR(divisor_override, Int, 0)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3DGradD)

 /**
 *@brief Performs max_pool_ext2 on the input . \n
@@ -350,6 +427,31 @@ REG_OP(MaxPool3D)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(MaxPool3D)

 /**
 *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
 * The output is of size H x W, for any input size. 

 * @par Inputs:
 * One input, including:
 * @li x: A Tensor. Must be one of the following data types:
 *     float16, float32, float64. \n

 * @par Attributes:
 * @li output_size: A required list of 2 ints
 *    specifying the size (H,W) of the output tensor. \n

 * @par Outputs:
 * @li y: A Tensor. Has the same data type as "x" \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator AdaptiveMaxPool2d.
 */
 REG_OP(AdaptiveMaxPool2d)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OUTPUT(argmax, TensorType::IndexNumberType())
    .REQUIRED_ATTR(output_size, ListInt)
    .OP_END_FACTORY_REG(AdaptiveMaxPool2d)

 /**
 * @brief Computes second-order gradients of the maxpooling3d function . \n
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/no_op.h
+++ b/third_party/fwkacllib/inc/ops/no_op.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/outfeed_ops.h
+++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad)
 *@brief: DynamicRNN calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n

 *@par Attributes:
 *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
@@ -221,6 +221,8 @@ REG_OP(DynamicRNNGrad)
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Third-party framework compatibility:
 * Compatible with the TF operator LSTM.
 */
 REG_OP(DynamicRNN)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -258,17 +260,17 @@ REG_OP(DynamicRNN)
 *@brief: DynamicLSTMV2 calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li cont:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li w_xc_x_static:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li h0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .

 *@par Attributes:
 *@li num_output:An integer identifying the num projection in the op. Default to 0.
@@ -283,6 +285,10 @@ REG_OP(DynamicRNN)
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Third-party framework compatibility:
 * Compatible with the Caffe operator LSTM.
 *@par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicLSTMV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -854,6 +860,56 @@ REG_OP(EmbeddingDenseGrad)
    .ATTR(padding_idx, Int, -1)
    .ATTR(scale_grad_by_freq, Bool, false)
    .OP_END_FACTORY_REG(EmbeddingDenseGrad)

 /**
 *@brief CommonLSTM calculation.
 *@par Inputs:
 *eight inputs: \n
 *@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
 *@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.

 *@par Attributes:
 *@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported.
 *@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported.
 *@li activations:The list of activation functions. Empty is currently supported.
 *@li clip:An float identifying the cell clip in the op. Default to -1.
 *@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional.
 *@li hidden_size:Number of neurons in the hidden layer. Reserved.
 *@li input_forget:Couple the input and forget gates if 1. Reserved.

 *@par Outputs:
 *three outputs: \n
 *@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 */

 REG_OP(CommonLSTM)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(activation_alpha, ListFloat, {})
    .ATTR(activation_beta, ListFloat, {})
    .ATTR(activations, ListString, {})
    .ATTR(clip, Float, -1.0)
    .ATTR(direction, String, "forward")
    .REQUIRED_ATTR(hidden_size, Int)
    .ATTR(input_forget, Int, 0)
    .OP_END_FACTORY_REG(CommonLSTM)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/set_ops.h
+++ b/third_party/fwkacllib/inc/ops/set_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/swap_co_ops.h
+++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
+++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -519,7 +519,8 @@ REG_OP(Unpack)
 * @par Inputs:
 * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
 *    following types:float32, double, int32, uint8, int16, int8, int64, uint16,
 *    float16, uint32, uint64
 *    float16, uint32, uint64. The inputs must have data_format with one of follows:
 *    NHWC, NCHW.

 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -534,7 +535,6 @@ REG_OP(Unpack)
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding: A required string. The type of padding algorithm to use,
  support "SAME" or "VALID". \n
 * @li data_format: A required string. The format of input, only supported NHWC. \n

 * @par Outputs:
 * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
@@ -555,7 +555,6 @@ REG_OP(ExtractImagePatches)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(rates, ListInt)
    .REQUIRED_ATTR(padding, String)
    .ATTR(data_format, String, "NHWC")
    .OP_END_FACTORY_REG(ExtractImagePatches)

 /**
@@ -564,6 +563,7 @@ REG_OP(ExtractImagePatches)

 * @par Inputs:
 * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n
 *    The inputs must have data_format with one of follows: NDHWC, NCDHW. \n

 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -572,7 +572,6 @@ REG_OP(ExtractImagePatches)
 * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1].
 * @li padding: A required string. The type of padding algorithm to use ,
 * support "SAME" or "VALID" . \n
 * @li data_format: An optional string. The format of input, only supported NDHWC. \n

 * @par Outputs:
 * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes *
@@ -591,7 +590,6 @@ REG_OP(ExtractVolumePatches)
    .REQUIRED_ATTR(ksizes, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(padding, String)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(ExtractVolumePatches)

 /**
--- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
+++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -1,18 +1,18 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd

 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at

 *
 * http://www.apache.org/licenses/LICENSE-2.0

 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 */

 #ifndef __CCE_RUNTIME_BASE_H__
 #define __CCE_RUNTIME_BASE_H__
@@ -81,11 +81,11 @@ typedef enum tagRtLimitType {
 } rtLimitType_t;

 typedef struct rtExceptionInfo {
  uint32_t taskid;
  uint32_t streamid;
  uint32_t tid;
  uint32_t deviceid;
  uint32_t retcode;
    uint32_t taskid;
    uint32_t streamid;
    uint32_t tid;
    uint32_t deviceid;
    uint32_t retcode;
 } rtExceptionInfo;

 typedef void (*rtErrorCallback)(rtExceptionType);