Fix code check

4 years ago · e9c4f2d926
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -201,6 +201,7 @@ set(TRAIN_SRC_LIST
    "host_kernels/sub_kernel.cc"
    "host_kernels/transdata_kernel.cc"
    "host_kernels/unpack_kernel.cc"
    "host_kernels/reformat_kernel.cc"
    "graph/passes/folding_pass.cc"
    "graph/passes/get_original_format_pass.cc"
    "graph/passes/guarantee_const_pass.cc"
@@ -487,6 +488,7 @@ set(INFER_SRC_LIST
    "host_kernels/slice_d_kernel.cc"
    "host_kernels/dynamic_stitch_kernel.cc"
    "host_kernels/identity_kernel.cc"
    "host_kernels/reformat_kernel.cc"
    "graph/passes/stop_gradient_pass.cc"
    "graph/passes/prevent_gradient_pass.cc"
    "graph/passes/identity_pass.cc"
--- a/ge/ge_inference.mk
+++ b/ge/ge_inference.mk
@@ -164,6 +164,7 @@ OMG_HOST_SRC_FILES := \
    host_kernels/slice_d_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    host_kernels/identity_kernel.cc \
    host_kernels/reformat_kernel.cc \
    graph/passes/stop_gradient_pass.cc \
    graph/passes/prevent_gradient_pass.cc \
    graph/passes/identity_pass.cc \
--- a/ge/ge_runner.mk
+++ b/ge/ge_runner.mk
@@ -170,6 +170,7 @@ LIBGE_LOCAL_SRC_FILES := \
    host_kernels/sub_kernel.cc \
    host_kernels/transdata_kernel.cc \
    host_kernels/unpack_kernel.cc \
    host_kernels/reformat_kernel.cc \
    graph/passes/folding_pass.cc \
    graph/passes/get_original_format_pass.cc \
    graph/passes/guarantee_const_pass.cc \
--- a/ge/graph/build/memory/binary_block_mem_assigner.cc
+++ b/ge/graph/build/memory/binary_block_mem_assigner.cc
@@ -22,7 +22,7 @@ namespace {
 const uint32_t kRangeCeilInterval = 2;
 const uint32_t kLogBase = 2;
 const int64_t kLargeBlockSize = 8388608;   // 8 * 1024 * 1024
 const int64_t kLargeBlockRangeSize = 10;
 const int64_t kLargeBlockRangeSize = 2;
 }  // namespace

 namespace ge {
@@ -73,15 +73,17 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
    GELOGE(FAILED, "dividend is 0!");
    return FAILED;
  }
  // Memory size is 512 aligned, so it is not necessary to take less than 512
  int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front();
  auto range_number = static_cast<size_t>(
    ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase)));
    ceil(log(all_memory_size.back() / static_cast<double>(min_memory_size)) / log(kLogBase)));
  range_number = (range_number == 0) ? 1 : range_number;
  GELOGD("Range number: %zu", range_number);

  vector<vector<int64_t>> ranges(range_number);
  GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
  size_t range_number_limit = all_memory_size.size() / range_number;
  int64_t range_ceil = all_memory_size[0];
  int64_t range_ceil = min_memory_size;
  for (size_t i = 1; i <= range_number; i++) {
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
                    GELOGE(FAILED, "Multiply result is out of range.");
@@ -114,7 +116,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
      range_ceils.push_back(range.back());
    }
  }
  GELOGD("Range ceils: %s", ToString(range_ceils).c_str());
  GELOGI("Range ceils: %s", ToString(range_ceils).c_str());

  return SUCCESS;
 }
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -65,6 +65,98 @@ void AlignMemOffset(size_t &mem_align_size) {
  mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
 }

 static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
  auto left_node_op_desc = left.node->GetOpDesc();
  auto right_node_op_desc = right.node->GetOpDesc();
  if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)
      && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) {
    return true;
  }
  return false;
 }

 void GetLifeList(const MemoryBlock &block, std::vector<NodeTypeIndex> &life_list, bool child) {
  for (auto &node : block.NodeTypeIndexList()) {
    life_list.emplace_back(node);
  }

  if (child) {
    for (auto child_block : block.ChildBlockList()) {
      if (child_block == nullptr) {
        continue;
      }
      if (block.stream_id_ != child_block->stream_id_ || !block.same_stream_ || !child_block->same_stream_) {
        life_list.clear();
        return;
      }
      GetLifeList(*child_block, life_list, child);
    }
  }
 }

 bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
  if ((left.node == nullptr) || (right.node == nullptr)) {
    return true;
  }
  auto left_node_op_desc = left.node->GetOpDesc();
  auto right_node_op_desc = right.node->GetOpDesc();
  if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) {
    if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) {
      if (left.life_time_end >= static_cast<size_t>(right_node_op_desc->GetId())) {
        return true;
      }
    } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) {
      return true;
    } else {
      if (right.life_time_end >= static_cast<size_t>(left_node_op_desc->GetId())) {
        return true;
      }
    }
  }
  return false;
 }

 ///
 /// When child block's life time are not cross with parent block, they can be reused(only same stream).
 /// |-----------------------------parent block---------------------|
 /// |------child block1--------------||------child block2------|
 /// |--child block1-1-|
 ///
 bool CanIntervalLifeReuse(MemoryBlock &parent_block, MemoryBlock &child_block) {
  // judge by interval life time, only same stream can be judged by interval life time
  if (parent_block.stream_id_ != child_block.stream_id_ || !parent_block.same_stream_ || !child_block.same_stream_
      || parent_block.NodeTypeIndexList().empty() || child_block.NodeTypeIndexList().empty()) {
    return false;
  }

  // quick judge by front and back node
  if (CrossLifeTime(parent_block.NodeTypeIndexList().front(), child_block.NodeTypeIndexList().front())) {
    return false;
  }
  if (CrossLifeTime(parent_block.NodeTypeIndexList().back(), child_block.NodeTypeIndexList().back())) {
    return false;
  }

  std::vector<NodeTypeIndex> life_list;
  GetLifeList(parent_block, life_list, false);
  GetLifeList(child_block, life_list, true);
  if (life_list.empty()) {
    return false;
  }
  std::sort(life_list.begin(), life_list.end(), CompareLifeTime);
  size_t pre_life_end = 0;
  for (auto &node : life_list) {
    auto node_op_desc = node.node->GetOpDesc();
    if (node_op_desc != nullptr && pre_life_end >= static_cast<size_t>(node_op_desc->GetId())) {
      // life time cross
      return false;
    }
    pre_life_end = node.life_time_end;
  }
  GELOGI("Block size[%zu, %zu] life time are not cross.", parent_block.Size(), child_block.Size());
  return true;
 }

 void MemoryBlock::SetHeadOffset(size_t offset) {
  head_offset_ = offset;
  size_t child_offset = head_offset_;
@@ -125,20 +217,12 @@ size_t MemoryBlock::AlignSize() const {
  return align_block_size;
 }

 bool MemoryBlock::IsSameLabel(std::string &first_batch_label) {
  if (node_type_index_list_.empty()) {
 bool MemoryBlock::IsSameBatchLabel() {
  // only same batch label can reuse
  if (batch_label_.empty() || node_type_index_list_.empty()) {
    return false;
  }

  auto node_op_desc = node_type_index_list_[0].node->GetOpDesc();
  if (node_op_desc == nullptr) {
    return false;
  }
  // not all op has ATTR_NAME_BATCH_LABEL, no need check return value, only check out parameter
  (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, first_batch_label);
  if (first_batch_label.empty()) {
    return false;
  }
  bool all_same_label = true;
  for (size_t index = 1; index < node_type_index_list_.size(); ++index) {
    if (node_type_index_list_[index].node == nullptr) {
@@ -147,8 +231,9 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) {
    std::string batch_label;
    auto index_op_desc = node_type_index_list_[index].node->GetOpDesc();
    GE_IF_BOOL_EXEC(index_op_desc == nullptr, continue);
    // not all op has ATTR_NAME_BATCH_LABEL, no need check return value, only check out parameter
    (void)ge::AttrUtils::GetStr(index_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
    if (first_batch_label != batch_label) {
    if (batch_label_ != batch_label) {
      all_same_label = false;
      break;
    }
@@ -197,7 +282,7 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi
 }

 void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) {
  if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) {
  if (CanNotLifeReuse(this) || CanNotLifeReuse(block) || (batch_label_ != block->batch_label_)) {
    return;
  }
  if (block->continuous_block_) {
@@ -207,16 +292,27 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_
  MemoryBlock *parent = nullptr;
  MemoryBlock *child = nullptr;
  // merge small block to large block
  if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) {
    if ((child_offset_ + block->AlignSize()) <= AlignSize()) {
      parent = this;
      child = block;
    } else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) {
      parent = block;
      child = this;
  // noalign size         802816 + 802816 = 1605632       can reuse
  // after 32 align size  802848 + 802848 > 1605664       can't reuse
  // after 512 align size 803328 + 803328 > 1606144       can't reuse
  // so                   803328 + 803328 = 1606144 + 512 can reuse
  if ((child_offset_ + block->AlignSize()) <= (AlignSize() + MEM_ALIGN_SIZE)) {
    parent = this;
    child = block;
  } else if ((block->child_offset_ + AlignSize()) <= (block->AlignSize() + MEM_ALIGN_SIZE)) {
    parent = block;
    child = this;
  }

  if ((parent != nullptr) && (child != nullptr)) {
    // Different streams must use stream dependency to judge the life cycle
    // In case same stream if it has child block, can judge all the child block's life time in CanIntervalLifeReuse
    bool can_block_life_reuse = (child->child_blocks_.empty()
        && (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()));
    if (!can_block_life_reuse && !CanIntervalLifeReuse(*parent, *child)) {
      return;
    }
  }
  if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) {

    parent->child_blocks_.emplace_back(child);
    parent->child_offset_ += child->AlignSize();
    child->deleted_block_ = true;
@@ -261,6 +357,7 @@ size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &tota
 void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id,
                   std::map<int64_t, size_t> &depend_stream_life, DependStreamLife &total_node_depend_stream_life) {
  GE_CHECK_NOTNULL_EXEC(node, return);
  GE_CHECK_NOTNULL_EXEC(org_node, return);
  auto node_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL_EXEC(node_desc, return);
  auto node_id = node_desc->GetId();
@@ -415,12 +512,60 @@ BlockMemAssigner::~BlockMemAssigner() {
  }
 }

 void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_memory_size,
                              std::map<std::string, int64_t> batch_total_size, vector<int64_t> &all_memory_size,
                              std::string &max_batch_label) {
  // use max batch all memory size for reuse range
  int64_t max_batch_size = 0;
  for (const auto &it : batch_total_size) {
    GELOGI("Batch[%s] total memory size[%ld]", it.first.c_str(), it.second);
    // no batch label
    if (it.first.empty()) {
      continue;
    }
    if (it.second > max_batch_size) {
      max_batch_size = it.second;
      max_batch_label = it.first;
    }
  }
  GELOGI("Max batch[%s] total memory size[%ld]", max_batch_label.c_str(), max_batch_size);

  for (const auto &it : batch_all_memory_size) {
    if (it.first.empty() || (it.first == max_batch_label)) {
      all_memory_size.insert(all_memory_size.end(), it.second.begin(), it.second.end());
    }
  }
  // all_memory_size can't be empty
  if (all_memory_size.empty()) {
    all_memory_size.emplace_back(MEM_ALIGN_SIZE);
  }
  sort(all_memory_size.begin(), all_memory_size.end());
  GELOGD("All memory size: %s", ToString(all_memory_size).c_str());

  for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
    if (*iter == 0) {
      iter = all_memory_size.erase(iter);
    } else {
      ++iter;
    }
  }
 }

 void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
  vector<int64_t> temp;
  std::map<std::string, vector<int64_t>> batch_all_memory_size;
  std::map<std::string, int64_t> batch_total_size;
  for (const NodePtr &n : compute_graph_->GetAllNodes()) {
    auto node_op_desc = n->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);

    if (CheckIsZeroMemNodeType(node_op_desc->GetType())) {
      continue;
    }

    std::string batch_label;
    (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);

    if (node_op_desc->GetType() == ATOMICADDRCLEAN) {
      atomic_addr_clean_id_ = node_op_desc->GetId();
    }
@@ -434,9 +579,14 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
      if (!reuse_input) {
        int64_t size = 0;
        GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
        if (anchor_to_symbol_.empty()) {
          all_memory_size.emplace_back(size);
        batch_all_memory_size[batch_label].emplace_back(size);
        if (batch_total_size.find(batch_label) == batch_total_size.end()) {
          batch_total_size[batch_label] = size;
        } else {
          batch_total_size[batch_label] += size;
        }

        if (!anchor_to_symbol_.empty()) {
          auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString());
          if (iter1 == anchor_to_symbol_.end()) {
            continue;
@@ -452,23 +602,11 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
      }
    }
    temp.clear();
    GetNodeWorkSpaceSize(n, temp);
    all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end());
  }
  for (const auto &pair : symbol_size_) {
    all_memory_size.emplace_back(pair.second);
  }
  sort(all_memory_size.begin(), all_memory_size.end());
  GELOGD("All memory size: %s", ToString(all_memory_size).c_str());

  for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
    if (*iter == 0) {
      iter = all_memory_size.erase(iter);
    } else {
      ++iter;
    }
    GetNodeWorkSpaceSize(n, temp, batch_total_size[batch_label]);
    batch_all_memory_size[batch_label].insert(batch_all_memory_size[batch_label].end(), temp.begin(), temp.end());
  }

  GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_);
  GetMaxBatchAllMemorySize(batch_all_memory_size, batch_total_size, all_memory_size, max_batch_label_);
  InitReuseFlag();
  PrintSymbolMap();
 }
@@ -529,16 +667,6 @@ bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const Me
  bool can_reuse = false;
  if (reusable_block.Size() == block_size) {
    can_reuse = true;
  } else {
    string key = std::to_string(reusable_block.Size());
    key += "_" + std::to_string(reusable_block.stream_id_);
    key += "_" + std::to_string(reusable_block.memory_type_);
    auto it = reusable_block_counts.find(key);
    GE_IF_BOOL_EXEC((it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) &&
                    (reusable_block.Size() > block_size),
                     can_reuse = true;
                     GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu",
                            reusable_block.Size(), block_size););
  }
  return can_reuse;
 }
@@ -860,17 +988,26 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null.");
  auto node_op_desc = n->GetOpDesc();
  GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr);
  std::string batch_label;
  (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  if (batch_label.empty() || (batch_label == max_batch_label_)) {
    size_t align_size = real_size;
    AlignMemOffset(align_size);
    theory_memory_size_ += align_size;
    if (theory_memory_size_ > theory_min_memory_size_) {
      theory_min_memory_size_ = theory_memory_size_;
    }
  }

  bool is_reuse_memory = false;
  string ge_disable_reuse_mem_env = "0";
  (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env);
  if (ge_disable_reuse_mem_env != "1") {
  if (ge_disable_reuse_mem_env_ != "1") {
    bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) :
                          !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]);
    is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) &&
                      !node_op_desc->HasAttr(kOpNoReuseMem) && reuse_mem_flag && is_op_reuse_mem;
    auto stream_id = node_op_desc->GetStreamId();
    if (is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty()) {
    bool do_reuse = is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty();
    if (do_reuse) {
      auto stream_id = node_op_desc->GetStreamId();
      for (auto it = reusable_blocks_[memory_type][stream_id].rbegin();
           it != reusable_blocks_[memory_type][stream_id].rend(); ++it) {
        MemoryBlock *reusable_block = *it;
@@ -879,15 +1016,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
          GELOGI("Unreusable block.");
          continue;
        }
        std::string batch_label;
        if (reusable_block->IsSameLabel(batch_label)) {
          std::string op_label;
          (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label);
          if (batch_label != op_label) {
            GELOGI("label diff, op name %s", node_op_desc->GetName().c_str());
            continue;
          }
        }
        GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue);

        // A node can reuse blocks of the same stream and preorder streams
        if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) {
@@ -914,10 +1043,11 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  // Data and netoutput need zero copy block
  block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);

  block->Init(real_size, mem_type, n, out_index, no_align_size);
  block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId());
  block->stream_id_ = node_op_desc->GetStreamId();
  block->ref_count_++;
  block->continuous_block_ = continuous;
  block->batch_label_ = batch_label;
  if (mem_type == kOutput) {
    auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
    if (iter != anchor_to_symbol_.end()) {
@@ -945,6 +1075,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
      return nullptr;
    }

    if (CheckIsZeroMemNodeType(n->GetType())) {
      zero_memory_list_.emplace_back(n, kOutput, index);
      continue;
    }

    int64_t size = 0;
    if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
      GELOGI("Get size failed");
@@ -957,9 +1092,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
    // only apply total size in first block
    if (index != 0) {
      zero_memory_list_.emplace_back(n, kOutput, index);
    }

    if (index == 0) {
    } else {
      NodeIndexIO node_index_io(n, index, kOut);
      auto iter = anchor_to_symbol_.find(node_index_io.ToString());
      if (iter != anchor_to_symbol_.end()) {
@@ -972,6 +1105,10 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
    }
  }

  if (total_size == 0) {
    return nullptr;
  }

  auto block_size = GetBlockSize(total_size, ranges);
  GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(),
         total_size, block_size);
@@ -1119,15 +1256,28 @@ bool IsKnownSubgraphData(const NodePtr &node) {
  return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
 }

 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) {
 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory,
                                     bool same_stream) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null.");
  GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory");
  GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory");
  --to_release->ref_count_;
  if (!same_stream) {
    to_release->same_stream_ = false;
  }
  if (to_release->ref_count_ == 0) {
    to_release->SetLifeTimeEnd(life_time_);
    reusable_memory.emplace_back(to_release);
    AddReusableBlockCount(*to_release, reusable_block_counts_);
    if (to_release->reuse_mem_ && !to_release->RealSizeList().empty()) {
      if (to_release->batch_label_.empty() || (to_release->batch_label_ == max_batch_label_)) {
        size_t align_size = to_release->RealSizeList().back();
        AlignMemOffset(align_size);
        theory_memory_size_ -= align_size;
      }
    }
    if (to_release->same_stream_) {
      to_release->SetLifeTimeEnd(life_time_);
      reusable_memory.emplace_back(to_release);
      AddReusableBlockCount(*to_release, reusable_block_counts_);
    }
  }
 }

@@ -1167,10 +1317,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
             node_type_indexs.back().node->GetName().c_str());

      if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
          (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())) &&
          (node->GetOpDesc()->GetStreamId() == block->stream_id_)) {
        ReleaseMemory(block, reusable_memory);
        if (block->ref_count_ == 0) {
          (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
        ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
        if (block->ref_count_ == 0 && block->same_stream_) {
          SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
        }
      }
@@ -1328,7 +1477,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
      iter->second[stream_id].clear();
    }
    vector<int64_t> temp;
    GetNodeWorkSpaceSize(n, temp);
    int64_t tatal_size = 0;
    GetNodeWorkSpaceSize(n, temp, tatal_size);
    vector<int64_t> workspace_bytes;
    vector<int64_t> tvm_workspace_memory_type;
    bool has_tvm_workspace_mem_type_attr =
@@ -1380,9 +1530,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
    (void)mem_block;  // Fix warning
  }

  bool merge_dynamic_batch = false;
  GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks());
  GE_IF_BOOL_EXEC((!(ge_disable_reuse_mem_env_ == "1") && !merge_dynamic_batch), ReuseBlocksByLifeTime(ranges.size()));
  GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), ReuseBlocksByLifeTime(ranges.size()));
  AssignContinuousBlocks();
  ResizeMemoryBlocks();

@@ -1402,92 +1550,19 @@ void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_f
  }
 }

 void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector<int64_t> &workspace_memory) {
 void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector<int64_t> &workspace_memory,
                                            int64_t &total_size) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null.");
  vector<int64_t> workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes();

  GELOGD("node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size());
  for (int64_t byte_size : workspace_byte_nums) {
    workspace_memory.emplace_back(byte_size);
    total_size += byte_size;
    GELOGD("push back size:%ld", byte_size);
  }
 }

 // descending order
 static bool CompareBlockMaxSize(MemoryBlock *left, MemoryBlock *right) {
  if (left == nullptr || right == nullptr) {
    return false;
  }
  auto left_max_size = std::max_element(left->RealSizeList().begin(), left->RealSizeList().end());
  if (left_max_size != left->RealSizeList().end()) {
    auto right_max_size = std::max_element(right->RealSizeList().begin(), right->RealSizeList().end());
    if (right_max_size == right->RealSizeList().end() || (*left_max_size > *right_max_size)) {
      return true;
    }
  }
  return false;
 }

 void MergeBlocks(std::vector<MemoryBlock *> &dest, std::vector<MemoryBlock *> &src) {
  for (size_t i = 0; i < dest.size(); ++i) {
    if (i >= src.size()) {
      return;
    }
    if (dest[i] != nullptr && src[i] != nullptr) {
      if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) {
        GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.",
               i, dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_);
        continue;
      }
      for (auto &symbol : src[i]->SymbolList()) {
        dest[i]->AddSymbol(symbol);
      }
      for (size_t j = 0; j < src[i]->NodeTypeIndexList().size(); ++j) {
        dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j],
                                  src[i]->RealSizeList()[j],
                                  src[i]->NoAlignSizeList()[j]);
        src[i]->deleted_block_ = true;
      }
    }
  }
 }

 bool BlockMemAssigner::MergeDynamicBatchBlocks() {
  bool merged = false;
  std::map<std::string, std::vector<MemoryBlock *>> dynamic_batch_blocks;
  for (auto block : memory_blocks_) {
    if (block == nullptr) {
      continue;
    }
    std::string batch_label;
    if (block->IsSameLabel(batch_label)) {
      dynamic_batch_blocks[batch_label].emplace_back(block);
    }
  }

  auto it = dynamic_batch_blocks.begin();
  auto it_max = it;

  // find max block counts
  for (; it != dynamic_batch_blocks.end(); ++it) {
    if (it->second.size() > it_max->second.size()) {
      it_max = it;
    }
    std::sort(it->second.begin(), it->second.end(), CompareBlockMaxSize);
  }
  if (it_max != dynamic_batch_blocks.end()) {
    GELOGD("MergeDynamicBatch %s block counts %zu", it_max->first.c_str(), it_max->second.size());
  }
  for (it = dynamic_batch_blocks.begin(); it != dynamic_batch_blocks.end(); ++it) {
    if (it != it_max) {
      GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str());
      MergeBlocks(it_max->second, it->second);
      merged = true;
    }
  }
  return merged;
 }

 // asending order
 static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) {
  if (left == nullptr || right == nullptr) {
@@ -1597,38 +1672,93 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) {
  }
 }

 void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) {
  if (block.memory_type_ == RT_MEMORY_HBM) {
    if (block.first_continuous_block_) {
      mem_offset += MEM_ALIGN_SIZE;
    }
    block.Resize();
    block.SetHeadOffset(mem_offset);
    mem_offset += block.Size();
    block.SetTailOffset(mem_offset - 1);
  } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) {
    if (block.first_continuous_block_) {
      p2p_mem_offset += MEM_ALIGN_SIZE;
    }
    block.Resize();
    block.SetHeadOffset(p2p_mem_offset);
    p2p_mem_offset += block.Size();
    block.SetTailOffset(p2p_mem_offset - 1);
  }
 }

 bool DynamicBatchBlockReuse(MemoryBlock &block) {
  return (block.IsSameBatchLabel() && block.reuse_mem_);
 }

 ///
 /// @ingroup domi_omg
 /// @brief traverse memory size, resize, calculate offset
 /// @brief get max batch memory size, others reuse this block memory
 /// @param [in&out] memory_blocks_ memory block, after calculating offset
 /// |-dynamic batch block batch1|
 /// |-dynamic batch block batch2----|
 /// |-dynamic batch block batch3--|
 ///
 void BlockMemAssigner::ResizeMemoryBlocks() {
  for (auto &memory_block : memory_blocks_) {
    if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) {
 void BlockMemAssigner::ResizeDynamicBatchBlocks() {
  std::map<std::string, std::vector<MemoryBlock *>> dynamic_batch_blocks;
  for (auto block : memory_blocks_) {
    if (block == nullptr) {
      continue;
    }
    if (memory_block->memory_type_ == RT_MEMORY_HBM) {
      if (memory_block->first_continuous_block_) {
        mem_offset_ += MEM_ALIGN_SIZE;
      }
    // when memory is not reuseable, it can't be reused by different branch
    if (DynamicBatchBlockReuse(*block)) {
      dynamic_batch_blocks[block->batch_label_].emplace_back(block);
    }
  }

      memory_block->Resize();
      memory_block->SetHeadOffset(mem_offset_);
      mem_offset_ += memory_block->Size();
      memory_block->SetTailOffset(mem_offset_ - 1);
    } else if (memory_block->memory_type_ == RT_MEMORY_P2P_DDR) {
      if (memory_block->first_continuous_block_) {
        p2p_mem_offset_ += MEM_ALIGN_SIZE;
  size_t max_mem_offset = mem_offset_;
  size_t max_p2p_mem_offset = p2p_mem_offset_;
  for (auto &batch_blocks : dynamic_batch_blocks) {
    size_t mem_offset = mem_offset_;
    size_t p2p_mem_offset = p2p_mem_offset_;
    for (auto block : batch_blocks.second) {
      if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) {
        continue;
      }
      AddBlockMemOffset(mem_offset, p2p_mem_offset, *block);
    }
    if (mem_offset > max_mem_offset) {
      max_mem_offset = mem_offset;
    }
    if (p2p_mem_offset > max_p2p_mem_offset) {
      max_p2p_mem_offset = p2p_mem_offset;
    }
    GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset);
  }
  mem_offset_ = max_mem_offset;
  p2p_mem_offset_ = max_p2p_mem_offset;
 }

      memory_block->Resize();
      memory_block->SetHeadOffset(p2p_mem_offset_);
      p2p_mem_offset_ += memory_block->Size();
      memory_block->SetTailOffset(p2p_mem_offset_ - 1);
 ///
 /// @ingroup domi_omg
 /// @brief traverse memory size, resize, calculate offset
 /// @param [in&out] memory_blocks_ memory block, after calculating offset
 /// |-not dynamic batch block-||-dynamic batch block batch1|    |-zero copy block-|
 /// |-not dynamic batch block-||-dynamic batch block batch2----||-zero copy block-|
 /// |-not dynamic batch block-||-dynamic batch block batch3--|  |-zero copy block-|
 ///
 void BlockMemAssigner::ResizeMemoryBlocks() {
  for (auto &memory_block : memory_blocks_) {
    if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_
        || DynamicBatchBlockReuse(*memory_block)) {
      continue;
    }

    AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block);
  }
  GELOGD("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.",
         mem_offset_, p2p_mem_offset_);
  ResizeDynamicBatchBlocks();
  GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu,"
         "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_);
 }

 ///
@@ -1641,7 +1771,7 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
 /// @return Status result
 ///
 void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
                   size_t real_size, size_t no_align_size, bool child_block) {
                   size_t real_size, size_t no_align_size, int32_t child_block_level) {
  ge::OpDescPtr op_desc = node_type.node->GetOpDesc();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null.");
  string graph_name = node_type.node->GetOwnerComputeGraph()->GetName();
@@ -1689,14 +1819,15 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
    }
    op_desc->SetWorkspace(workspace_list);
  }
  GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]"
         " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(),
  GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] "
         "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(),
         op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),
         block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_,
         block->continuous_block_, block->deleted_block_, node_type.ref_input);
         block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_,
         block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input,
         block->batch_label_.c_str());
 }

 void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) {
 void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) {
  if (block == nullptr) {
    return;
  }
@@ -1709,9 +1840,14 @@ void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) {
      real_size = block->RealSizeList()[index];
      no_align_size = block->NoAlignSizeList()[index];
    }
    SetOffsetSize(node_type_index, block, real_size, no_align_size, child_block);
    SetOffsetSize(node_type_index, block, real_size, no_align_size, child_block_level);
    index++;
  }

  child_block_level++;
  for (MemoryBlock *child_block : block->ChildBlockList()) {
      SetBlockOpMemOffset(child_block, child_block_level);
  }
 }

 void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) {
@@ -1724,16 +1860,13 @@ void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) {
      continue;
    }

    SetBlockOpMemOffset(memory_block, false);
    for (MemoryBlock *child_block : memory_block->ChildBlockList()) {
      SetBlockOpMemOffset(child_block, true);
    }
    SetBlockOpMemOffset(memory_block, 0);
  }

  if (!is_zero_copy) {
    for (const NodeTypeIndex &node_type_index : zero_memory_list_) {
      MemoryBlock block(0, 0);
      SetOffsetSize(node_type_index, &block, 0, 0, false);
      SetOffsetSize(node_type_index, &block, 0, 0, 0);
    }
  }
 }
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -65,6 +65,7 @@ class MemoryBlock {
        stream_id_(stream_id),
        deleted_block_(false),
        reuse_mem_(reuse_mem),
        same_stream_(true),
        input_index_(0),
        continuous_block_(false),
        first_continuous_block_(false),
@@ -85,10 +86,14 @@ class MemoryBlock {
    symbol_list_.clear();
  }

  void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) {
  void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size,
            int64_t stream_id) {
    real_size_list_.emplace_back(real_size);
    no_align_size_list_.emplace_back(no_align_size);
    node_type_index_list_.emplace_back(node, type, out_index, false);
    if (stream_id != stream_id_) {
        same_stream_ = false;
    }
  }
  size_t Size() const { return block_size_; }

@@ -106,6 +111,12 @@ class MemoryBlock {
    node_type_index_list_.emplace_back(node_type_index);
    real_size_list_.emplace_back(real_size);
    no_align_size_list_.emplace_back(no_align_size);
    if ((node_type_index.node != nullptr) && (node_type_index.node->GetOpDesc() != nullptr)) {
      auto stream_id = node_type_index.node->GetOpDesc()->GetStreamId();
      if (stream_id != stream_id_) {
        same_stream_ = false;
      }
    }
  }

  void AddSymbol(const std::string &symbol) {
@@ -122,7 +133,7 @@ class MemoryBlock {

  std::string String();

  bool IsSameLabel(std::string &first_batch_label);
  bool IsSameBatchLabel();

  void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life);

@@ -142,6 +153,7 @@ class MemoryBlock {
  int64_t stream_id_;
  bool deleted_block_;
  bool reuse_mem_;
  bool same_stream_;
  uint32_t input_index_;
  bool continuous_block_;
  bool first_continuous_block_;
@@ -149,6 +161,7 @@ class MemoryBlock {
  bool is_zero_copy_;
  std::map<int64_t, size_t> depend_stream_life_;
  int64_t memory_type_;
  std::string batch_label_;
 private:
  size_t block_size_;
  std::vector<size_t> real_size_list_;
@@ -209,7 +222,7 @@ class BlockMemAssigner : public MemAssigner {

  void GetOutAndWorkSpaceMem(std::vector<int64_t> &all_memory_size);

  void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory);
  void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory, int64_t &total_size);

  ///
  /// @ingroup GE
@@ -353,7 +366,7 @@ class BlockMemAssigner : public MemAssigner {
  /// @return void
  /// @author
  ///
  void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory);
  void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, bool same_stream = true);

  ///
  /// @ingroup GE
@@ -379,11 +392,11 @@ class BlockMemAssigner : public MemAssigner {

  ///
  /// @ingroup GE
  /// @brief Merge memory blocks between different batchs
  /// @brief Resize memory blocks for each batchs
  /// @return merge or not
  /// @author
  ///
  bool MergeDynamicBatchBlocks();
  void ResizeDynamicBatchBlocks();

  void AssignContinuousBlocks();

@@ -436,6 +449,17 @@ class BlockMemAssigner : public MemAssigner {

  int64_t atomic_addr_clean_id_ = 0;

  size_t theory_min_memory_size_ = 0;

  size_t theory_memory_size_ = 0;

  std::string max_batch_label_;

  ///
  /// @          [stream1][nodeid]
  /// @[nodeid]  [stream2][nodeid]
  /// @          [stream2][nodeid]
  ///
  DependStreamLife total_node_depend_stream_life_;
 };
 }  // namespace ge
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -1646,9 +1646,9 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve
    }
    string atomic_mem_size_str = ss.str();

    GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
    GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]",
           node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
           atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
           atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str());
  }
  return SUCCESS;
 }
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2203,7 +2203,7 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data
    void *mem_addr = data.second.GetBasicAddr();
    void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data));
    uint64_t data_buf_length = data_buf.length;
    GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]",
    GELOGI("CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]",
           runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length);
    GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind));
  }
--- a/ge/graph/load/new_model_manager/model_utils.cc
+++ b/ge/graph/load/new_model_manager/model_utils.cc
@@ -61,7 +61,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) {
      GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i);
      continue);

    GELOGI("[IMAS]GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
    GELOGI("GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
    v_input_size.push_back(tensor_size);
  }

@@ -96,7 +96,7 @@ vector<int64_t> ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) {
      GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i);
      continue);

    GELOGI("[IMAS]GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
    GELOGI("GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
    v_output_size.push_back(tensor_size);
  }

--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -2467,7 +2467,6 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
    GetContext().SetSessionId(session_id);
    GetThreadLocalContext() = ge_context;
    graph_manager->UpdateLocalOmgContext(root_graph_id);

    ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph();
    const std::string &engine_name = sub_graph_info_ptr->GetEngineName();
    GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu",
@@ -2475,6 +2474,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
           pthread_self());
    GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore");
    GE_CHECK_NOTNULL(compute_graph_tmp);
    if (!AttrUtils::SetInt(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_ID, root_graph_id)) {
      GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id);
      return FAILED;
    }
    compute_graph_tmp->SetSessionID(session_id);
    Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp,
                                                                                            compute_graph,
--- a/ge/graph/passes/atomic_addr_clean_pass.cc
+++ b/ge/graph/passes/atomic_addr_clean_pass.cc
@@ -74,10 +74,87 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
  return SUCCESS;
 }

 // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input
 bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) {
  // 1.Check if isAtomic attrs exist for HCOM
  std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGW("GELib not initialized, atomic from ops kernel judge false, node_name: %s", node->GetName().c_str());
    return false;
  }

  OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
  vector<OpInfo> op_info_vec = ops_kernel_manager.GetOpsKernelInfo(node->GetType());
  for (const auto &op_info : op_info_vec) {
    if (op_info.isAtomic) {
      // check peer input is DATA
      for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
        if (in_data_anchor->GetPeerOutAnchor() != nullptr &&
            in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) {
          auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode();
          if (peer_in_node->GetType() == DATA) {
            GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str());
            return false;
          }
        }
      }
      GELOGI("Recognized atomic op %s from %s engine.", node->GetName().c_str(), op_info.engine.c_str());
      hcom_node_vec_.push_back(node);
      return true;
    }
  }
  return false;
 }

 bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index) {
  auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  if (out_data_anchor == nullptr) {
    return false;
  }

  for (auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    auto output_node = input_anchor->GetOwnerNode();
    // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input
    // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here
    if (CheckAtomicFromOpsKernel(output_node)) {
      return true;
    }
  }
  return false;
 }

 bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) {
  OpDescPtr op_desc = node->GetOpDesc();
  std::map<string, std::map<int, int>> node_workspace_offset;
  bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
  bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
  node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset);
  if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) {
    std::vector<int64_t> atomic_output_index;
    (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
    bool is_all_output_peer_also_atomic = true;
    for (const auto &output_index : atomic_output_index) {
      if (!IsOutputIndexPeerInputAtomic(node, output_index)) {
        is_all_output_peer_also_atomic = false;
        break;
      }
    }
    if (is_all_output_peer_also_atomic) {
      GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str());
      return true;
    }
  }
  return false;
 }

 Status AtomicAddrCleanPass::HandleLoopGraph(ComputeGraphPtr &graph, const vector<NodePtr> &atomic_node_vec) {
  // Loop graph , insert clean node follow atomic node
  int index = 0;
  for (const auto &node : atomic_node_vec) {
    if (CheckSkipInsertInLoopGraph(node)) {
      continue;
    }

    // Insert atomic clean op
    NodePtr clean_addr_node = InsertAtomicAddrCleanNode(graph);
    if (clean_addr_node == nullptr) {
@@ -249,32 +326,10 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) {
    return false;
  }
  // 1.Check if isAtomic attrs exist for HCOM
  std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
    GELOGW("GELib not initialized");
    return false;
  if (CheckAtomicFromOpsKernel(node)) {
    return true;
  }

  OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
  vector<OpInfo> op_info_vec = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType());
  for (const auto &op_info : op_info_vec) {
    if (op_info.isAtomic) {
      GELOGI("Recognized atomic op %s from DNN_HCCL engine.", op_desc->GetName().c_str());
      // check peer input is DATA
      for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
        if (in_data_anchor->GetPeerOutAnchor() != nullptr &&
            in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) {
          auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode();
          if (peer_in_node->GetType() == DATA) {
            GELOGI("Recognized atomic op %s from DNN_HCCL engine and input is DATA.", op_desc->GetName().c_str());
            return false;
          }
        }
      }
      hcom_node_vec_.push_back(node);
      return true;
    }
  }
  // 2.Check atomic attr in node
  std::map<string, std::map<int, int>> node_workspace_offset;
  bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
--- a/ge/graph/passes/atomic_addr_clean_pass.h
+++ b/ge/graph/passes/atomic_addr_clean_pass.h
@@ -84,6 +84,11 @@ class AtomicAddrCleanPass : public GraphPass {
  Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector<NodePtr> &atomic_node_vec,
                                    std::vector<NodePtr> &common_atomic_nodes);

  bool CheckAtomicFromOpsKernel(const NodePtr &node);

  bool IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index);

  bool CheckSkipInsertInLoopGraph(const NodePtr &node);

  vector<NodePtr> hcom_node_vec_;
  bool is_loop_graph_ = false;
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -36,7 +36,6 @@
 #include "model/ge_model.h"
 #include "graph/shape_refiner.h"
 #include "graph/opsproto_manager.h"
 #include "graph/utils/type_utils.h"

 using std::string;
 using namespace std;
@@ -50,6 +49,8 @@ const std::string IR_OPTION_LOG_LEVEL_DEFAULT = "default";
 const std::string IR_OPTION_BUFFER_OPTIMIZE_DEFAULT = "l2_optimize";
 const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0";
 const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false";
 const std::string kInputShape = "input_shape";
 const std::string kInputFormat = "input_format";
 }  // namespace

 static graphStatus CheckGlobalOptions(std::map<std::string, std::string> &global_options) {
@@ -227,6 +228,7 @@ class Impl {
  graphStatus CheckOptions(const std::map<std::string, std::string> &options);
  graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
  graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape);
  graphStatus UpdateDataOpAttr(const Graph &graph);
  graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
  graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
                         ModelBufferData &ge_models);
@@ -242,6 +244,40 @@ class Impl {
  OmgContext omg_context_;
 };

 graphStatus Impl::UpdateDataOpAttr(const Graph &graph) {
  GELOGD("Enter Update Data Attr Process!");
  if (options_.find(kInputShape) == options_.end()) {
    return GRAPH_SUCCESS;
  }
  unordered_map<string, vector<int64_t>> shape_map;
  vector<pair<string, vector<int64_t>>> user_shape_map;
  GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true),
    return GRAPH_PARAM_INVALID, "parse input shape failed!");
  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);
  for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(input_node);
    ge::OpDescPtr op = input_node->GetOpDesc();
    GE_CHECK_NOTNULL(op);
    if (op->GetType() == DATA) {
      auto tensor_input = op->MutableInputDesc(0);
      auto tensor_output = op->MutableOutputDesc(0);
      GE_CHECK_NOTNULL(tensor_input);
      GE_CHECK_NOTNULL(tensor_output);
      string data_op_name = op->GetName();
      auto iter = shape_map.find(data_op_name);
      if (iter != shape_map.end()) {
        tensor_input->SetShape(ge::GeShape(iter->second));
        tensor_output->SetShape(ge::GeShape(iter->second));
        GELOGD("update input [%s] shape info", data_op_name.c_str());
      } else {
        GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str());
      }
    }
  }
  return GRAPH_SUCCESS;
 }

 graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options) {
  for (auto &ele : options) {
    auto it = ge::ir_option::ir_builder_suppported_options.find(ele.first);
@@ -277,6 +313,11 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options
      return GRAPH_PARAM_INVALID;
    }
  }
  // Check option EXEC_DISABLE_REUSED_MEMORY
  it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY);
  if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) {
    return GRAPH_PARAM_INVALID;
  }
  return GRAPH_SUCCESS;
 }

@@ -323,7 +364,10 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri
    GELOGE(ret, "User input options are illegal! Please check!");
    return ret;
  }

  ret = UpdateDataOpAttr(graph);
  if (ret != GRAPH_SUCCESS) {
    return ret;
  }
  std::string build_mode = (options_.find(BUILD_MODE) == options_.end() || options_[BUILD_MODE] == BUILD_MODE_NORMAL)
                           ? "" : options_[BUILD_MODE];
  options_[BUILD_MODE] = build_mode;
--- a/ge/offline/single_op_parser.cc
+++ b/ge/offline/single_op_parser.cc
@@ -27,6 +27,7 @@
 #include "common/ge_inner_error_codes.h"
 #include "framework/common/util.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/operator_factory_impl.h"

@@ -176,6 +177,7 @@ T GetValue(const map<string, T> &dict, string &key, T default_val) {
 }

 void from_json(const Json &j, SingleOpTensorDesc &desc) {
  bool is_tensor_valid = true;
  desc.dims = j.at(kKeyShape).get<vector<int64_t>>();
  auto it = j.find(kKeyShapeRange);
  if (it != j.end()) {
@@ -189,9 +191,12 @@ void from_json(const Json &j, SingleOpTensorDesc &desc) {
  string type_str = j.at(kKeyType).get<string>();
  desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED);
  desc.type = GetValue(kDataTypeDict, type_str, DT_UNDEFINED);
  is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsFormatValid(format_str);
  is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsDataTypeValid(type_str);
  it = j.find(kKeyOriginFormat);
  if (it != j.end()) {
    string origin_format_str = j.at(kKeyOriginFormat).get<string>();
    is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsFormatValid(origin_format_str);
    desc.ori_format = GetValue(kFormatDict, origin_format_str, FORMAT_RESERVED);
  }
  auto tensor_name = j.find(kKeyName);
@@ -202,6 +207,9 @@ void from_json(const Json &j, SingleOpTensorDesc &desc) {
  if (dynamic_input_name != j.end()) {
    desc.dynamic_input_name = dynamic_input_name->get<string>();
  }
  if (!is_tensor_valid) {
    desc.SetValidFlag(is_tensor_valid);
  }
 }

 void from_json(const Json &j, SingleOpAttr &attr) {
@@ -305,6 +313,12 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {

  int index = 0;
  for (auto &tensor_desc : op_desc.input_desc) {
    if (!tensor_desc.GetValidFlag()) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
          {"intput", "datatype or format", std::to_string(index)});
      GELOGE(PARAM_INVALID, "Input's dataType or format is invalid when the index is %d", index);
      return false;
    }
    if ((tensor_desc.type == DT_UNDEFINED && tensor_desc.format != FORMAT_RESERVED) ||
        (tensor_desc.type != DT_UNDEFINED && tensor_desc.format == FORMAT_RESERVED)){
      ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
@@ -317,6 +331,12 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {

  index = 0;
  for (auto &tensor_desc : op_desc.output_desc) {
    if (!tensor_desc.GetValidFlag()) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
          {"output", "datatype", std::to_string(index)});
      GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index);
      return false;
    }
    if (tensor_desc.type == DT_UNDEFINED) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
          {"output", "datatype", std::to_string(index)});
--- a/ge/offline/single_op_parser.h
+++ b/ge/offline/single_op_parser.h
@@ -28,6 +28,10 @@

 namespace ge {
 struct SingleOpTensorDesc {
 public:
  bool GetValidFlag() const { return is_valid_; }
  void SetValidFlag(bool is_valid) { is_valid_ = is_valid; }
 public:
  std::string name;
  std::vector<int64_t> dims;
  std::vector<int64_t> ori_dims;
@@ -36,6 +40,8 @@ struct SingleOpTensorDesc {
  ge::Format ori_format = ge::FORMAT_RESERVED;
  ge::DataType type = ge::DT_UNDEFINED;
  std::string dynamic_input_name;
 private:
  bool is_valid_ = true;
 };

 struct SingleOpAttr {
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit d19c9c5c92f21a0335c18681dcceed44f3a54ddc
 Subproject commit bd2cfdfa85a3d9dcbd7dc825f5759c7f8b3ffa9a