From ad5bc1bdcce060e857b1c000bbf63460ca21e3b0 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Fri, 15 Jan 2021 14:00:24 +0800 Subject: [PATCH 01/41] iterator case, control edge move up to switch --- ge/graph/build/stream_allocator.cc | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 63112ea8..88ffda02 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { return false; } +// Iteraotor loop : +// StreamSwitch -> StreamActive +// FpBp loop: +// StreamSwitch -> AssignAdd -> StreamActive +NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { + for (auto pre_node : active_node->GetInControlNodes()) { + if (pre_node->GetType() == STREAMSWITCH) { + return pre_node; + } + for (auto pre_pre_node : pre_node->GetInControlNodes()) { + if (pre_pre_node->GetType() == STREAMSWITCH) { + return pre_pre_node; + } + } + } + return nullptr; +} + Status StreamAllocator::SetActiveStreamsForLoop() { vector loop_active_streams; for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { @@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { bool is_loop_active = false; if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { vector activated_label_list; + + NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); + if (pre_switch_node == nullptr) { + GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); + return FAILED; + } + if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || activated_label_list.empty()) { GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), @@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { // it may cause some stream actived by iterator next step when this stream still alive. // If above situation happen, active message will lose, cause process block in next iteration. // In order to avoid this abnormal happen, - // add event between each last node and iterator active node in target active stream + // add event between each last node and iterator switch node GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); for (auto iter : stream_id_to_last_node) { if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { @@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { continue; } AddSendEventId(iter.second, event_num_); - AddRecvEventId(node, event_num_); + AddRecvEventId(pre_switch_node, event_num_); event_num_++; } From 7d4f981f92ddd8ae33493697799535e9e7e6b6f8 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Fri, 15 Jan 2021 16:00:12 +0800 Subject: [PATCH 02/41] Fix aclmdlGetOutputNameByIndex --- ge/graph/load/new_model_manager/davinci_model.cc | 9 +++++---- ge/graph/load/new_model_manager/davinci_model.h | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 35844b2d..cf2d9c5f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -722,7 +722,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); - (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); string fp_ceiling_mode; if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { @@ -2068,6 +2067,8 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { GELOGD("Output node size: %zu", output_op_list.size()); + vector out_node_name; + (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); for (uint32_t index = 0; index < out_size; index++) { @@ -2081,11 +2082,11 @@ Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, "construct output_name failed."); // forward compatbility, if old om has no out_node_name, need to return output follow origin way - if (out_size == out_node_name_.size()) { + if (out_size == out_node_name.size()) { // neweast plan, the index will add to name during generate model. - bool contains_colon = out_node_name_[index].find(":") != std::string::npos; + bool contains_colon = out_node_name[index].find(":") != std::string::npos; output_name = - contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); + contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); } else { output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 4108f2c7..e9804dc5 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -883,7 +883,6 @@ class DavinciModel { GeModelPtr ge_model_; // release after DavinciModel::Init bool need_destroy_aicpu_kernel_{false}; - vector out_node_name_; map op_list_; // release after DavinciModel::Init From 00cc4279444a3d50d652eab94cc22ff30e7222b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Mon, 18 Jan 2021 09:30:47 +0800 Subject: [PATCH 03/41] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!9?= =?UTF-8?q?53=20:=20Continuous=20memory=20optimization,=20code=20refactori?= =?UTF-8?q?ng'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/graph/build/memory/binary_block_mem_assigner.cc | 4 +- ge/graph/build/memory/block_mem_assigner.cc | 264 +++---- ge/graph/build/memory/block_mem_assigner.h | 54 +- ge/graph/build/memory/graph_mem_assigner.cc | 855 ++++++++++++++------- ge/graph/build/memory/graph_mem_assigner.h | 24 +- ge/graph/load/new_model_manager/davinci_model.cc | 6 +- metadef | 2 +- parser | 2 +- 8 files changed, 708 insertions(+), 503 deletions(-) diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 97a0aed6..fff589f3 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGW("Vector all_memory_size is empty!"); return SUCCESS; } - if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { - GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); + if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { + GELOGE(FAILED, "dividend is 0!"); return FAILED; } // Memory size is 512 aligned, so it is not necessary to take less than 512 diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 21d6a49e..76e7efbe 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -65,7 +65,10 @@ void AlignMemOffset(size_t &mem_align_size) { } static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { - if (left.GetLifeBegin() < right.GetLifeBegin()) { + auto left_node_op_desc = left.node->GetOpDesc(); + auto right_node_op_desc = right.node->GetOpDesc(); + if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) + && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { return true; } return false; @@ -97,14 +100,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { auto left_node_op_desc = left.node->GetOpDesc(); auto right_node_op_desc = right.node->GetOpDesc(); if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { - if (left.GetLifeBegin() < right.GetLifeBegin()) { - if (left.life_time_end >= right.GetLifeBegin()) { + if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { + if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { return true; } - } else if (left.GetLifeBegin() == right.GetLifeBegin()) { + } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { return true; } else { - if (right.life_time_end >= left.GetLifeBegin()) { + if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { return true; } } @@ -322,7 +325,12 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ size_t MemoryBlock::GetLifeBegin() { size_t life_time = 0; if (!node_type_index_list_.empty()) { - life_time = node_type_index_list_.front().GetLifeBegin(); + if (node_type_index_list_.front().node != nullptr) { + auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); + if (node_op_desc != nullptr) { + life_time = node_op_desc->GetId(); + } + } } return life_time; } @@ -409,7 +417,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ depend_stream_life_[stream_id_] = GetLifeBegin(); } -size_t MemoryBlock::GetLifeEnd() const { +size_t MemoryBlock::GetLifeEnd() { if (!node_type_index_list_.empty()) { return node_type_index_list_.back().life_time_end; } @@ -563,29 +571,32 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", - node_op_desc->GetName().c_str(), size); - return;); - batch_all_memory_size[batch_label].emplace_back(size); - if (batch_total_size.find(batch_label) == batch_total_size.end()) { - batch_total_size[batch_label] = size; - } else { - batch_total_size[batch_label] += size; - } - - if (!anchor_to_symbol_.empty()) { - auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); - if (iter1 == anchor_to_symbol_.end()) { - continue; + bool reuse_input = false; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, + GELOGI("Get reuse_input failed")); + + if (!reuse_input) { + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; + } else { + batch_total_size[batch_label] += size; } - const std::string &symbol = iter1->second; - auto iter2 = symbol_size_.find(symbol); - if (iter2 == symbol_size_.end()) { - symbol_size_[symbol] = size; - } else if (size > static_cast(iter2->second)) { - iter2->second = size; + + if (!anchor_to_symbol_.empty()) { + auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); + if (iter1 == anchor_to_symbol_.end()) { + continue; + } + const std::string &symbol = iter1->second; + auto iter2 = symbol_size_.find(symbol); + if (iter2 == symbol_size_.end()) { + symbol_size_[symbol] = size; + } else if (size > static_cast(iter2->second)) { + iter2->second = size; + } } } } @@ -626,17 +637,35 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { return false; } -bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { +void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { + string key = std::to_string(mem_block.Size()); + key += "_" + std::to_string(mem_block.stream_id_); + key += "_" + std::to_string(mem_block.memory_type_); + auto it = reusable_block_counts.find(key); + if (it != reusable_block_counts.end()) { + it->second++; + } else { + reusable_block_counts[key] = 1; + } +} + +void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { + string key = std::to_string(mem_block.Size()); + key += "_" + std::to_string(mem_block.stream_id_); + key += "_" + std::to_string(mem_block.memory_type_); + auto it = reusable_block_counts.find(key); + if (it != reusable_block_counts.end()) { + if (it->second > 0) { + it->second--; + } + } +} + +bool CanReuseBySize(const map &reusable_block_counts, const MemoryBlock &reusable_block, + size_t block_size, size_t real_size, bool continuous) { bool can_reuse = false; if (reusable_block.Size() == block_size) { - // in some continuous input case, continuous first input node's is not same as topo first node. - if (continuous_life_begin > 0) { - if (continuous_life_begin > reusable_block.GetLifeEnd()) { - can_reuse = true; - } - } else { - can_reuse = true; - } + can_reuse = true; } return can_reuse; } @@ -647,13 +676,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } - auto node_desc = n->GetOpDesc(); - GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); - return false;); - std::vector offsets_for_fusion = {}; - bool has_lx_fusion_attr = - AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); - if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { auto out_anchor = n->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_anchor == nullptr, @@ -676,17 +698,16 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false;); // If GetBool fail, is_input_continuous is false. - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); - if (is_input_continuous) { + bool is_input_continuous_no_padding = false; + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, + is_input_continuous_no_padding); + if (is_input_continuous_no_padding) { reset_zero_copy_flag = true; - has_lx_fusion_attr = true; - } else { - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + return false; } + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - // lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later - GE_IF_BOOL_EXEC(is_input_continuous && - (CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), + GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); no_need_assign_memory = true; return false;); @@ -700,10 +721,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // Only set attr one times. if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); - // lx fusion case assign max size for first block, so reuse as none continuous - GE_IF_BOOL_EXEC(has_lx_fusion_attr, - is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); - return false;); node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); } peer_input_index = peer_in_anchor->GetIdx(); @@ -716,95 +733,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false; } -bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { - if (n == nullptr) { - return false; - } - - int64_t max_node_life_time = 0; - int64_t continuous_input_node_life_time = 0; - if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { - auto out_anchor = n->GetOutDataAnchor(out_index); - if(out_anchor == nullptr) { - return false; - } - - // continuous input node's life time should be max - for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { - if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ - return false; - } - auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); - GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, - GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); - return false;); - - if(peer_in_node_desc->GetId() > max_node_life_time) { - max_node_life_time = peer_in_node_desc->GetId(); - } - - // If GetBool fail, is_input_continuous is false. - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); - if (!is_input_continuous) { - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - } - if (is_input_continuous) { - continuous_input_node_life_time = peer_in_node_desc->GetId(); - } - } - } - return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; -} - -/// -/// @ingroup GE -/// @brief Check continuous memory reuseable -/// @return void -/// -bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { - // n,peer_node_desc have been checked - auto node_desc = n->GetOpDesc(); - auto peer_node_desc = peer_node->GetOpDesc(); - continuous_life_begin_ = static_cast(node_desc->GetId()); - // lx fusion case check all continuous input node, firt input node's life time should be min - for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { - if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { - GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); - return false; - } - auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); - /// - /// node2 node1 node3 - /// | / / | - /// node5 node6 - /// firt input node's life time is not min - /// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse - /// - if (static_cast(peer_out_node_desc->GetId()) < continuous_life_begin_) { - continuous_life_begin_ = static_cast(peer_out_node_desc->GetId()); - GELOGI( - "Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," - "min is node[%s] life[%zu]", - n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), - peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); - } - // when node3's output node5's life time is not max(node6 > node5), not reuse - if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), - in_anchor->GetPeerOutAnchor()->GetIdx())) { - GELOGI( - "Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " - "max life node", - n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), - peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); - return false; - } - } - return true; -} - /// /// @ingroup GE /// @brief Check pre_reuse flag & post_reuse glag for each symbol @@ -1090,9 +1018,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams - if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { - reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, - real_size, no_align_size); + if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { + reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -1101,6 +1028,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; + ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } @@ -1113,7 +1041,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); + + block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; @@ -1213,23 +1142,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; - GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); - return nullptr); - // reduce old size - size_t align_size = block->Size(); - AlignMemOffset(align_size); - theory_memory_size_ -= align_size; - - auto block_size = GetBlockSize(size, ranges); - block->SetSize(block_size); - block->SetLifeTimeEnd(life_time_); - block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); + block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); block->ref_count_++; - - // add new size - align_size = block_size; - AlignMemOffset(align_size); - theory_memory_size_ += align_size; } else { int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; @@ -1282,6 +1196,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, GELOGI("Get dst_reuse_input_index failed")); if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { + block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); out_count_reuse_input += 1; reuse_input = true; } @@ -1322,7 +1237,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i if (static_cast(index) == output_index) { if (node->GetOwnerComputeGraph() != nullptr) { string graph_name = node->GetOwnerComputeGraph()->GetName(); - GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), + GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), op_desc->GetName().c_str(), index, op_desc->GetStreamId()); } return true; @@ -1360,6 +1275,7 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vectorsame_stream_) { to_release->SetLifeTimeEnd(life_time_); reusable_memory.emplace_back(to_release); + AddReusableBlockCount(*to_release, reusable_block_counts_); } } } @@ -1459,7 +1375,6 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector } is_op_reuse_mem_ = true; - continuous_life_begin_ = 0; if (op_reuse_env_valid_ == true) { vector::iterator it_name = std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); @@ -1511,7 +1426,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } // atomic can't be reused - bool need_change = is_op_reuse_mem_ && is_atomic; + bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; if (need_change) { is_op_reuse_mem_ = false; } @@ -1904,12 +1819,11 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " - "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", - graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), - node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, - block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, - block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " + "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), + op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), + block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, + block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, block->batch_label_.c_str()); } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 78584078..58bcda75 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -39,15 +39,14 @@ using DependStreamLife = std::map>; enum OpMemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) - : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} + NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) + : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} ge::NodePtr node = nullptr; OpMemoryType mem_type = kOutput; uint32_t index = 0; - bool ref_input = false; - size_t life_time_begin = 0; size_t life_time_end = kMaxLifeTime; + bool ref_input = false; const string GetMemType() const { if (mem_type == kOutput) { return "output"; @@ -56,34 +55,6 @@ struct NodeTypeIndex { } return "unknown"; } - - size_t GetLifeBegin() const { - if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { - return 0; - } - - if ((life_time_begin > 0) && (life_time_begin < static_cast(node->GetOpDesc()->GetId()))) { - return life_time_begin; - } else { - return node->GetOpDesc()->GetId(); - } - } - - std::string GetLifeBeginDesc() const { - if (node == nullptr) { - return ""; - } - auto node_op_desc = node->GetOpDesc(); - if (node_op_desc != nullptr) { - auto life_begin = GetLifeBegin(); - if (life_begin != static_cast(node_op_desc->GetId())) { - return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); - } else { - return std::to_string(node_op_desc->GetId()); - } - } - return ""; - } }; class MemoryBlock { @@ -115,13 +86,16 @@ class MemoryBlock { symbol_list_.clear(); } - size_t Size() const { return block_size_; } - - void SetSize(size_t size) { - if (size > block_size_) { - block_size_ = size; + void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, + int64_t stream_id) { + real_size_list_.emplace_back(real_size); + no_align_size_list_.emplace_back(no_align_size); + node_type_index_list_.emplace_back(node, type, out_index, false); + if (stream_id != stream_id_) { + same_stream_ = false; } } + size_t Size() const { return block_size_; } size_t AlignSize() const; @@ -169,7 +143,7 @@ class MemoryBlock { size_t GetLifeBegin(); - size_t GetLifeEnd() const; + size_t GetLifeEnd(); void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); @@ -432,7 +406,6 @@ class BlockMemAssigner : public MemAssigner { bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); - bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); /// /// @ingroup GE /// @|+++++++++block1++++++++| |+++++++++block1++++++++| @@ -452,6 +425,8 @@ class BlockMemAssigner : public MemAssigner { std::unordered_map>> reusable_blocks_; + std::map reusable_block_counts_; + std::unordered_map>> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; @@ -481,7 +456,6 @@ class BlockMemAssigner : public MemAssigner { std::string max_batch_label_; - size_t continuous_life_begin_ = 0; /// /// @ [stream1][nodeid] /// @[nodeid] [stream2][nodeid] diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f94eb275..98d073d4 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -35,9 +35,10 @@ namespace { const int kAllInputAddrIsAtomic = -1; const int kVirtualInputNodeMemoryReuse = 0; const int kVirtualOutputNodeMemoryReuse = 1; -// One state per bit cannot be repeated -enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 }; - +const size_t kVirtualInputNodeOutputSize = 1; +const size_t kVirtualOutputNodeInputSize = 1; +const size_t kVirtualNodeDataIndex = 0; +const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, const std::map> &symbol_to_anchors, const ge::NodePtr &node, const uint32_t i) { @@ -135,7 +136,7 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } -ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, +ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); @@ -180,6 +181,68 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out return SUCCESS; } +Status GraphMemoryAssigner::GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, + int32_t mem_reuse_model, string &max_batch_label) { + for (auto &i_map : mem_reuse_virtual_nodes_map) { + vector virtual_nodes_list = i_map.second; + vector max_shape_dims; + size_t max_batch_dim = 0; + bool max_batch_dim_find = false; + for (size_t i = 0; i < virtual_nodes_list.size(); ++i) { + GE_CHECK_NOTNULL(virtual_nodes_list[i]); + OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + ge::ConstGeTensorDescPtr input_output_desc; + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex); + } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex); + } else { + std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GE_CHECK_NOTNULL(input_output_desc); + + if (i == 0) { + // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + max_shape_dims = input_output_desc->GetShape().GetDims(); + } else { + vector current_shape_dims = input_output_desc->GetShape().GetDims(); + if (current_shape_dims.size() != max_shape_dims.size()) { + std::string error = "The shape of several nodes between multiple batches does not match."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + for (size_t j = 0; j < current_shape_dims.size(); ++j) { + if (current_shape_dims[j] == max_shape_dims[j]) { + continue; + } + if (max_batch_dim_find && max_batch_dim != j) { + std::string error = "The shape of several nodes between multiple batches does not match."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + max_batch_dim_find = true; + max_batch_dim = j; + if (current_shape_dims[j] > max_shape_dims[j]) { + max_shape_dims[j] = current_shape_dims[j]; + // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + } + // Only compare the first different dim in shape. + break; + } + } + } + // In every element of virtual_input_nodes_map, the label of the max batch node is the same. + break; + } + return SUCCESS; +} + Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); @@ -187,6 +250,13 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, mapGetGraphMemoryMaxSize())}); - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), - iter.second, iter.first); } return ge::FAILED; } @@ -245,137 +313,22 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offse return SUCCESS; } -uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { - if (op_desc == nullptr) { - return 0; - }; - - bool is_continuous = false; - uint32_t continuous_type = 0; - // If GetBool fail, is_continuous is false. - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous); - if (is_continuous) { - continuous_type |= kTypeInput; - } else { - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous); - if (is_continuous) { - bool attr_reuse = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - if (attr_reuse) { - continuous_type |= kTypeInputNoPadding; - } - } - } - - is_continuous = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous); - if (is_continuous) { - continuous_type |= kTypeOutput; - } else { - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous); - if (is_continuous) { - bool attr_reuse = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - if (attr_reuse) { - continuous_type |= kTypeOutputNoPadding; - } - } - } - - if (continuous_type != 0) { - GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); - } - return continuous_type; -} - -Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, - int64_t &tensor_size, int64_t &nopadding_size) { - if ((op_desc == nullptr) || (output_desc == nullptr)) { - GELOGE(FAILED, "Input para is nullptr."); - return FAILED; - } - tensor_size = 0; - nopadding_size = 0; - bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); - if (is_nopadding) { - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; - } - - // Calculate tensor real size of each piece of data and out size of complete data - int64_t batch_dim_num = 1; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); - return FAILED; - } - } else { - if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { - GELOGE(FAILED, "GetSize failed."); - return FAILED; - } - } - if ((tensor_size < 0) || (nopadding_size < 0)) { - GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -void AlignMemOffset(int64_t &mem_align_size) { - if (mem_align_size <= 0) { - return; - } - mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; -} - -bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) { - bool is_peer_output_continuous = false; - // If GetBool fail, is_peer_output_continuous is false. - (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); - - // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and - // continuous output of the previous node is the same, we can support it. If size != 1, there may be - // conflict between the two, we can not support it. - auto peer_output_size = peer_op_desc->GetOutputsSize(); - GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return true;); - - bool is_peer_reference = false; - // If GetBool fail, is_peer_reference is false. - (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); - GE_IF_BOOL_EXEC(is_peer_reference, - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return true;); - return false; -} - Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { Status ret; for (auto &node : compute_graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); + // Get the continuous input type of the node, default is false + bool is_input_continuous = false; + GE_CHECK_NOTNULL(node->GetOpDesc()); + // If GetBool fail, is_input_continuous is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); // Assign continuous input memory - bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); - if (continuous_input) { + if (is_input_continuous) { + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -385,6 +338,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { vector input_indexes; // If GetListInt fail, input_indexes is empty. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); + if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node if (!CheckInputIsSupportAtomic(node)) { @@ -396,10 +350,9 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { const auto &in_control_anchor = node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_control_anchor); for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { - GE_CHECK_NOTNULL(peer_out_control_anchor); auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); if (peer_out_node->GetType() == ATOMICADDRCLEAN) { - ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); if (ret != SUCCESS) { GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); return ret; @@ -409,12 +362,23 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } - // Assign continuous output memory - bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); - if (continuous_output) { - ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); + // Get the reference type of the node, default is false + bool is_ref = false; + // If GetBool fail, is_ref is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); + + // Get the continuous output type of the node, default is false + bool is_output_continuous = false; + // If GetBool fail, is_output_continuous is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); + + // If the output is ref type and refers to the ref of an input, the name of the output + // and the input are the same. Ge encounters ref type, finds matching relationship according + // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast + if (!is_ref && is_output_continuous) { // Assign continuous output memory + ret = AssignContinuousOutputMemory(node); if (ret != ge::SUCCESS) { - GELOGE(ret, "Assign continuous output memory failed!"); + GELOGE(ret, "Assign reference memory failed!"); return ret; } } @@ -427,181 +391,520 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { + int64_t &continuous_mem_size, int64_t memory_type) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); + bool continuous_input_alloc = false; + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - // The head and tail of hcom continuous input should be added 512 - iter->second.mem_offset_ += MEM_ALIGN_SIZE; continuous_mem_start = iter->second.mem_offset_; - int64_t mem_offset = iter->second.mem_offset_; - int64_t extra_memory_size = 0; - bool is_continuous_input_allocated = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); + auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); - GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;); + bool is_peer_output_continuous = false; + // If GetBool fail, is_peer_output_continuous is false. + (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + + // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and + // continuous output of the previous node is the same, we can support it. If size != 1, there may be + // conflict between the two, we can not support it. + auto peer_output_size = peer_op_desc->GetOutputsSize(); + GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return PARAM_INVALID;); + + bool is_peer_reference = false; + // If GetBool fail, is_peer_reference is false. + (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + GE_IF_BOOL_EXEC(is_peer_reference, + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return PARAM_INVALID;); + + vector output_list = peer_op_desc->GetOutputOffset(); + std::vector offsets_for_fusion = {}; + bool has_offset_attr = + AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { + if (continuous_input_alloc && !has_offset_attr) { + if (in_data_anchor->GetIdx() == 0) { + continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); + } + // can not use else if, incase only one input + if (in_data_anchor->GetIdx() == static_cast(node->GetAllInDataAnchors().size()) - 1) { + int64_t tensor_desc_size = 0; + Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), + tensor_desc_size); + GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); + + tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; + continuous_mem_size = + output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; + } + GELOGI( + "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " + "real_size[%u].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), + 0, 0); + continue; + } + + output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; + } else { + std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); + return FAILED; + } + peer_op_desc->SetOutputOffset(output_list); + size_t pre_mem_offset = iter->second.mem_offset_; int64_t tensor_desc_size = 0; - int64_t nopadding_size = 0; - int64_t real_size = 0; - std::vector offsets_of_fusion = {}; - bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion); - lx_fusion = lx_fusion && !offsets_of_fusion.empty(); - if (lx_fusion) { - if (peer_out_data_anchor->GetIdx() >= static_cast(offsets_of_fusion.size())) { + if (has_offset_attr) { + if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { + auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; + iter->second.mem_offset_ += offset_for_fusion; + } else { std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()]; - tensor_desc_size = nopadding_size; } else { - if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()), - continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) { - return FAILED; - } + Status ret = + TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); + GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); + + iter->second.mem_offset_ += tensor_desc_size; } - bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; - vector output_list = peer_op_desc->GetOutputOffset(); - if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { - std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + // If set tensor_actual_size, Memory alignment is not required. + int32_t is_tensor_actual_size = 0; + ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); + if (is_tensor_actual_size == 0) { + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + } + GELOGI( + "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), + (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); + } + + iter->second.mem_offset_ += MEM_ALIGN_SIZE; + if (!continuous_input_alloc) { + continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; + } + return SUCCESS; +} + +Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) { + GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); + auto out_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); + vector output_list = out_op_desc->GetOutputOffset(); + + if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { + GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", + out_op_desc->GetOutputsSize(), output_list.size()); + return ge::FAILED; + } + + size_t mem_offset = output_list[0]; + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset; + int64_t tensor_desc_size = 0; + if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != + ge::SUCCESS) { + GELOGE(FAILED, "GetSize failed."); + return FAILED; + } + mem_offset += tensor_desc_size; + if (mem_offset <= 0) { return FAILED; } + mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; + GELOGI( + "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); + } + out_op_desc->SetOutputOffset(output_list); + return ge::SUCCESS; +} - // when continuous input has been allocated first input is beginning offset - bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); - if (is_allocated_first_input) { - mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); - continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); - } else { - // set offset for input - output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; - peer_op_desc->SetOutputOffset(output_list); +Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { + OpDescPtr op_desc = node->GetOpDesc(); + vector output_list = op_desc->GetOutputOffset(); + if (output_list.empty()) { + GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str()); + return FAILED; + } + output_list.at(0) = mem_offset_reuse; + op_desc->SetOutputOffset(output_list); + GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); + + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; + } + + size_t extra_memory_size = 0; + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_data_anchor); + auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); + GE_CHECK_NOTNULL(peer_op_desc); + vector output_offsets = peer_op_desc->GetOutputOffset(); + if (peer_out_data_anchor->GetIdx() >= static_cast(output_offsets.size())) { + GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); + return ge::FAILED; } + output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; + peer_op_desc->SetOutputOffset(output_offsets); + size_t pre_mem_offset = mem_offset_reuse; - int64_t align_size = tensor_desc_size; - if (is_nopadding) { - mem_offset += nopadding_size; - extra_memory_size += (tensor_desc_size - nopadding_size); - real_size = nopadding_size; - } else { - ge::AlignMemOffset(align_size); - mem_offset += align_size; - // The head and tail of hcom continuous input should be added 512 - extra_memory_size = MEM_ALIGN_SIZE; - real_size = tensor_desc_size; + // Calculate tensor real size of each piece of data and out size of complete data + ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); + GE_CHECK_NOTNULL(output_desc); + int64_t output_mem_size; + int64_t batch_dim_num = 1; + int64_t out_size; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", + peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); + return FAILED; } - GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " - "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), - node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), - output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, - is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); - } + mem_offset_reuse += output_mem_size; + extra_memory_size = extra_memory_size + out_size - output_mem_size; - mem_offset += extra_memory_size; - ge::AlignMemOffset(mem_offset); - continuous_mem_size = mem_offset - continuous_mem_start; - if (is_continuous_input_allocated) { - // not allocate memory here, so no need add 512 in header - iter->second.mem_offset_ -= MEM_ALIGN_SIZE; - } else { - iter->second.mem_offset_ = mem_offset; + GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, + output_mem_size); } + mem_offset_reuse += extra_memory_size; + size_t after_mem_offset = mem_offset_reuse; + GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); return SUCCESS; } -Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { - auto in_data_anchor_list = node->GetAllInDataAnchors(); - if (in_data_anchor_list.empty()) { - GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); +Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { + map> mem_reuse_virtual_input_nodes_map; + int64_t memory_type = RT_MEMORY_HBM; + for (const auto &n : compute_graph_->GetAllNodes()) { + OpDescPtr op_desc = n->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + bool attr_continuous = false; + bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous); + GE_IF_BOOL_EXEC(!get_continuous_flag, continue); + bool attr_reuse = false; + bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + GE_IF_BOOL_EXEC(!get_reuse_flag, continue); + if (attr_reuse && attr_continuous) { + if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { + // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. + std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) + + " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + string batch_label_string; + // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + if (batch_label_string.empty()) { + size_t node_mem_offset = iter->second.mem_offset_; + // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. + Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str()); + return FAILED; + } + + iter->second.mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + } else { + // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. + string current_node_full_name = op_desc->GetName(); + size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); + if (pos == string::npos) { + GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", + kMbatchNodeNameFlag, n->GetName().c_str()); + return FAILED; + } + string fixed_name = current_node_full_name.substr(0, pos); + vector parallel_virtual_input_nodes; + if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { + parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; + } + parallel_virtual_input_nodes.emplace_back(n); + mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes; + } + } + } + + int32_t mem_reuse_model = 0; + if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual input nodes failed."); return FAILED; } + return SUCCESS; +} + +Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { + OpDescPtr op_desc = node->GetOpDesc(); + + // 1. set memory of to be reused input tensor + auto in_data_anchor_list = node->GetAllInDataAnchors(); auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); - return ge::FAILED); + GE_CHECK_NOTNULL(peer_out_data_anchor); auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); + GE_CHECK_NOTNULL(peer_op_desc); vector in_node_output_offsets = peer_op_desc->GetOutputOffset(); if (peer_out_data_anchor->GetIdx() >= static_cast(in_node_output_offsets.size())) { GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } - mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); - return SUCCESS; -} + in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; + peer_op_desc->SetOutputOffset(in_node_output_offsets); + GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); -Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, - uint32_t continuous_type) { - GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); - auto out_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); - vector output_list = out_op_desc->GetOutputOffset(); - if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { - GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", - out_op_desc->GetOutputsSize(), output_list.size()); - return ge::FAILED; + // 2. set memory of output tensor + vector output_list = op_desc->GetOutputOffset(); + if (output_list.empty()) { + GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str()); + return FAILED; + } + if (op_desc->GetOutputsSize() > output_list.size()) { + GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(), + output_list.size()); + return FAILED; + } + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; } - int64_t mem_offset = 0; - bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0); - if (is_nopadding) { - // out tensor memory must be reused input tensor memory - if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) { - return ge::FAILED; + size_t extra_memory_size = 0; + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset_reuse; + size_t pre_mem_offset = mem_offset_reuse; + + // calculate tensor real size of each piece of data and out size of complete data + ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()); + GE_CHECK_NOTNULL(output_desc); + int64_t output_mem_size; + int64_t batch_dim_num = 1; + int64_t out_size; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", + op_desc->GetName().c_str(), out_data_anchor->GetIdx()); + return FAILED; } - } else { - // Get the reference type of the node, default is false - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - // If the output is ref type and refers to the ref of an input, the name of the output - // and the input are the same. Ge encounters ref type, finds matching relationship according - // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast - if (is_ref) { - GELOGI("Current node %s no needs assign continuous output because reference input by name.", - node->GetName().c_str()); - return SUCCESS; + mem_offset_reuse += output_mem_size; + extra_memory_size = extra_memory_size + out_size - output_mem_size; + + GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + pre_mem_offset, out_size, output_mem_size); + } + op_desc->SetOutputOffset(output_list); + mem_offset_reuse += extra_memory_size; + size_t after_mem_offset = mem_offset_reuse; + GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); + return SUCCESS; +} + +Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { + map> mem_reuse_virtual_output_nodes_map; + int64_t memory_type = RT_MEMORY_HBM; + for (const auto &n : compute_graph_->GetAllNodes()) { + OpDescPtr op_desc = n->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + bool attr_continuous = false; + bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous); + GE_IF_BOOL_EXEC(!get_continuous_flag, continue); + bool attr_reuse = false; + bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + GE_IF_BOOL_EXEC(!get_reuse_flag, continue); + + if (attr_reuse && attr_continuous) { + auto in_data_anchor_list = n->GetAllInDataAnchors(); + if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) { + // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse. + std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) + + " has " + FmtToStr(in_data_anchor_list.size()) + " inputs."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + string batch_label_string; + // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + if (batch_label_string.empty()) { + size_t node_mem_offset = iter->second.mem_offset_; + // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. + Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); + return FAILED; + } + iter->second.mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + } else { + // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. + string current_node_full_name = op_desc->GetName(); + size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); + if (pos == string::npos) { + std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) + + " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName()); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + string fixed_name = current_node_full_name.substr(0, pos); + vector parallel_virtual_output_nodes; + if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { + parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; + } + parallel_virtual_output_nodes.emplace_back(n); + mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes; + } } - mem_offset = output_list[0]; } - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset; - int64_t tensor_desc_size = 0; - int64_t nopadding_size = 0; - if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type, - tensor_desc_size, nopadding_size) != ge::SUCCESS) { + int32_t mem_reuse_model = 1; + if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual output nodes failed."); + return FAILED; + } + return SUCCESS; +} + +Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, + int32_t mem_reuse_model) { + // Find max batch label value + string max_batch_label; + GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), + "Get max batch label failed."); + PrintMemoryOffset(); + vector nodes_mem_offset_list; + for (auto &i_map : mem_reuse_nodes_map) { + vector virtual_nodes_list = i_map.second; + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), + "Get node list memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + size_t max_batch_node_mem_offset = iter->second.mem_offset_; + nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); + for (auto &i_node : virtual_nodes_list) { + // Op_desc is not nullptr, it has been checked. + OpDescPtr op_desc = i_node->GetOpDesc(); + string batch_label_string; + // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + if (batch_label_string == max_batch_label) { + Status status = SUCCESS; + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset); + } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset); + } else { + std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } - if (is_nopadding) { - mem_offset += nopadding_size; - } else { - mem_offset += tensor_desc_size; - ge::AlignMemOffset(mem_offset); + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); + return FAILED; + } + iter->second.mem_offset_ = max_batch_node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + // Only assign memory of max batch nodes. + break; + } } - GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" - " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), - node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, - is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); } - out_op_desc->SetOutputOffset(output_list); - return ge::SUCCESS; + PrintMemoryOffset(); + size_t memory_reuse_index = 0; + for (auto &i_map : mem_reuse_nodes_map) { + vector virtual_nodes_list = i_map.second; + for (auto &i_node : virtual_nodes_list) { + size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index]; + Status status = SUCCESS; + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset); + } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset); + } else { + std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); + return FAILED; + } + } + memory_reuse_index++; + } + return SUCCESS; } Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { @@ -643,7 +946,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { GE_CHECK_NOTNULL(mem_assigner_); GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger()); if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), + GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}), "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); } } @@ -781,7 +1084,7 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector & } // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. - if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { + if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { GELOGE(FAILED, "Failed to set atomic attr separately."); return FAILED; } @@ -928,10 +1231,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve output_list[output_index] = iter->second.mem_offset_; std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%ld] " - "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), - node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, - size, size, batch_label.c_str()); + GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld]" + " batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, + iter->second.mem_offset_, op_desc->GetStreamId(), size, size, batch_label.c_str()); iter->second.mem_offset_ += size; AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM); @@ -1007,10 +1309,10 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "memtype[%ld] size[%ld] real_size[%ld] batch[%s].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, + "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "size[%ld] real_size[%ld] batch[%s].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; @@ -1048,10 +1350,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "memtype[%ld] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_, - op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str()); + "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, + batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); @@ -1127,7 +1429,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { return FAILED; } for (auto pair : memory_offset_) { - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(), pair.second.mem_offset_, pair.first); } @@ -1296,7 +1598,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { } Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const vector &mem_offset_end, int64_t memory_type) { + const vector &mem_offset_end) { GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); // Parsing offset and size vectors @@ -1325,7 +1627,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { + if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { GELOGE(FAILED, "Set atomic clean attr failed."); return FAILED; } @@ -1336,7 +1638,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in } ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector &atomic_mem_start, - const vector &atomic_mem_size, int64_t memory_type) { + const vector &atomic_mem_size) { auto node_op_desc = node->GetOpDesc(); if (node_op_desc != nullptr) { GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); @@ -1375,10 +1677,9 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve } string atomic_mem_size_str = ss.str(); - GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]" - " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type, - atomic_mem_size_str.c_str()); + GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]", + node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str()); } return SUCCESS; } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index a380e594..def24287 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -119,15 +119,31 @@ class GraphMemoryAssigner { /// ge::Status ReAssignContinuousMemory(bool is_loop_graph); + ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); + + ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); + + ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); + + ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); + + ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); + + ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, + int32_t mem_reuse_model, string &max_batch_label); + + ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, + int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); + ge::Status ReAssignAtomicMemory(bool is_loop_graph); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); + int64_t &continuous_mem_size, int64_t memory_type); - ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); + ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); /// /// @brief check the input of node whether support atomic attr @@ -153,10 +169,10 @@ class GraphMemoryAssigner { ge::Status AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes); ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const std::vector &mem_offset_end, int64_t memory_type); + const std::vector &mem_offset_end); ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector &atomic_mem_start, - const std::vector &atomic_mem_size, int64_t memory_type); + const std::vector &atomic_mem_size); ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 2afbdf30..35844b2d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -1809,7 +1809,7 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector &user_inp /// Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { - GELOGW("There is not AIPP related with index %u.", index); + GELOGW("there is not AIPP related with index %u.", index); return SUCCESS; } @@ -1818,7 +1818,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); - GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", + GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); AippConfigInfo aipp_info; @@ -2481,7 +2481,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r uint64_t buffer_length = buffer.length; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data)); - GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", + GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); idx++; diff --git a/metadef b/metadef index fcd0833c..dc6cceb6 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit fcd0833cffcd201701f71d17db0c696c1bb01715 +Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 diff --git a/parser b/parser index 1601d66b..4e72aae4 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 1601d66b6187c83cbf38e762beb5538ce2c7c573 +Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 From 2a42c89921ce0e1de2941b433c82abc84f143670 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Mon, 18 Jan 2021 10:19:08 +0800 Subject: [PATCH 04/41] profiling task desc info --- ge/common/profiling/profiling_manager.cc | 4 +- ge/graph/load/new_model_manager/davinci_model.cc | 99 ++++++++++++++-------- ge/graph/load/new_model_manager/davinci_model.h | 3 + ge/hybrid/executor/worker/execution_engine.cc | 25 +----- .../node_executor/aicore/aicore_node_executor.cc | 12 +-- ge/hybrid/node_executor/aicore/aicore_op_task.h | 2 + .../node_executor/aicpu/aicpu_node_executor.cc | 12 +-- ge/hybrid/node_executor/task_context.cc | 38 +++++++++ ge/hybrid/node_executor/task_context.h | 6 ++ ge/single_op/single_op.cc | 1 + ge/single_op/task/op_task.cc | 6 ++ ge/single_op/task/op_task.h | 4 + inc/framework/common/ge_types.h | 13 +-- metadef | 2 +- parser | 2 +- 15 files changed, 142 insertions(+), 87 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 1fc4dba6..9ca3aced 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t stream_id = task.stream_id; std::string shape_type = task.shape_type; int64_t cur_iter_num = task.cur_iter_num; + uint32_t task_type = task.task_type; data = model_name.append(" ") .append(op_name).append(" ") .append(std::to_string(block_dim)).append(" ") @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append(" ") .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append("\n"); + .append(std::to_string(cur_iter_num)).append(" ") + .append(std::to_string(task_type)).append("\n"); ReporterData reporter_data{}; reporter_data.deviceId = device_id; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 35844b2d..b0c2a0a4 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3064,6 +3064,65 @@ Status DavinciModel::MallocKnownArgs() { return SUCCESS; } +void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index) { + task_desc_info_.clear(); + bool flag = GetL1FusionEnableOption(); + char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; + INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); + int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; + if (env_flag != 0) { + flag = true; + } + + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = task_def.kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info.shape_type = "static"; + task_desc_info.cur_iter_num = 0; + // task type + task_desc_info.task_type = kTaskTypeInvalid; + auto model_task_type = static_cast(task_def.type()); + if (model_task_type == RT_MODEL_TASK_KERNEL) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { + task_desc_info.task_type = kTaskTypeAicore; + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Other kernel type: %u", context.kernel_type()); + } + } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Skip task type: %d", static_cast(model_task_type)); + } + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + } + } + return; +} + Status DavinciModel::DistributeTask() { GELOGI("do Distribute."); for (auto &task : cpu_task_list_) { @@ -3074,19 +3133,11 @@ Status DavinciModel::DistributeTask() { GE_CHK_STATUS_RET(task->Distribute()); } - task_desc_info_.clear(); - bool flag = GetL1FusionEnableOption(); - char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; - INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; - if (env_flag != 0) { - flag = true; - } - const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); + GE_CHECK_NOTNULL(task); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump auto op_index = std::max(task_def.kernel().context().op_index(), @@ -3106,33 +3157,9 @@ Status DavinciModel::DistributeTask() { GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); - // Load task info for profiling - TaskDescInfo task_desc_info; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = task_def.kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info.shape_type = "static"; - task_desc_info.cur_iter_num = 0; - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - } - } + + // save task info for profiling + SaveProfilingTaskDescInfo(op, task, task_def, task_index); } // launch dump kernel to aicpu GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 4108f2c7..582535cd 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -623,6 +623,9 @@ class DavinciModel { Status DistributeTask(); + void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index); + uint8_t *MallocFeatureMapMem(size_t data_size); uint8_t *MallocWeightsMem(size_t weights_size); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index b5de2a70..5e9d3607 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); - auto op_desc = node->GetOpDesc(); - std::string op_name = op_desc->GetName(); - std::string dynamic_model_name = model->GetModelName(); - uint32_t task_id = context_->GetTaskId(); - uint32_t stream_id = context_->GetStreamId(); - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = graph_context_->iteration; - GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", - node->GetName().c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info = context_->GetProfilingTaskDescInfo(); + context_->ClearProfilingTaskDescInfo(); + return SUCCESS; } @@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() { GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); std::vector task_desc_info; - TaskDescInfo tmp_task_desc_info; auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); if (profiling_ret != RT_ERROR_NONE) { GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 2abc5b03..a8736154 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; - } - context.SetTaskId(task_id); - context.SetStreamId(stream_id); - GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + // save profiling data + (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 5818f384..dd15c608 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -48,6 +48,8 @@ class AiCoreOpTask { bool GetClearAtomic() const {return clear_atomic_;} + uint32_t GetBlockDim() const {return block_dim_;} + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..2a7cbc67 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionSynchronize(GetStream()); } + +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return rt_ret; + } + GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); + + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string op_name = op_desc->GetName(); + std::string dynamic_model_name = model->GetModelName(); + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = dynamic_model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_type = task_type; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = "dynamic"; + tmp_task_desc_info.cur_iter_num = iteration_; + task_desc_info.emplace_back(tmp_task_desc_info); + } + + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 6a4bcb8c..9a668f8c 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -22,6 +22,7 @@ #include #include "common/properties_manager.h" #include "external/ge/ge_api_error_codes.h" +#include "framework/common/ge_types.h" #include "hybrid/common/tensor_value.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/executor/rt_callback_manager.h" @@ -108,6 +109,10 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); + void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + private: TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, @@ -127,6 +132,7 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; + std::vector task_desc_info; }; } // namespace hybrid } // namespace ge diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 1f3fc5c5..081ce13b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = shape_type; tmp_task_desc_info.cur_iter_num = 0; + tmp_task_desc_info.task_type = op_task->GetTaskType(); GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index cc63e811..1772ca88 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector &input_desc, return UNSUPPORTED; } +uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } + TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); @@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } const std::string &TbeOpTask::GetStubName() const { return stub_name_; } +uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { return DoUpdateArgTable(param, false); } +uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } + void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = reinterpret_cast(io_addr_host_.data()); arg_count = io_addr_host_.size(); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 2d0740a6..78e1f6f0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -52,6 +52,7 @@ class OpTask { std::vector &output_desc, std::vector &output_buffers, rtStream_t stream); + virtual uint32_t GetTaskType() const; protected: Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); @@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); + uint32_t GetTaskType() const override; private: friend class SingleOpModel; @@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { ~AiCpuBaseTask() override; UnknowShapeOpType GetUnknownType() const { return unknown_type_; } Status UpdateArgTable(const SingleOpModelParam ¶m) override; + uint32_t GetTaskType() const override; + protected: Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetInputConst(); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index f7e6d679..9ca77f1c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -41,12 +41,7 @@ enum FrameworkType { }; const std::map kFwkTypeToStr = { - {"0", "Caffe"}, - {"1", "MindSpore"}, - {"3", "TensorFlow"}, - {"4", "Android_NN"}, - {"5", "Onnx"} -}; + {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; enum OpEngineType { ENGINE_SYS = 0, // default engine @@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; +// profiling data +const uint32_t kTaskTypeAicore = 0; +const uint32_t kTaskTypeAicpu = 1; +const uint32_t kTaskTypeInvalid = 0xFFFF; + // Data cache, including data address and length struct DataBuffer { public: @@ -256,6 +256,7 @@ struct TaskDescInfo { uint32_t stream_id; std::string shape_type; int64_t cur_iter_num; + uint32_t task_type; }; // Profiling info of graph diff --git a/metadef b/metadef index dc6cceb6..b00c50c2 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 +Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 diff --git a/parser b/parser index 4e72aae4..f0109a2c 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 +Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 From 4d1f43053c477868154d72fe21d6ad8ef56de6ca Mon Sep 17 00:00:00 2001 From: wxl Date: Mon, 18 Jan 2021 15:50:59 +0800 Subject: [PATCH 05/41] infershape paralelly --- ge/hybrid/executor/worker/shape_inference_engine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 56ae3ea3..46ee6bd6 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -68,7 +68,6 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); { - std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); From 40463c84ab92312331159128c2f53f4be863afab Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Mon, 18 Jan 2021 16:39:53 +0800 Subject: [PATCH 06/41] profiling iter num start with 1 --- ge/hybrid/node_executor/task_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index a95fac13..8b7c623f 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -530,7 +530,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block tmp_task_desc_info.task_id = task_id; tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = iteration_; + tmp_task_desc_info.cur_iter_num = iteration_ + 1; task_desc_info.emplace_back(tmp_task_desc_info); } From a3114f023d2384932fb1cadfc6b6a601a59dd8bf Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Mon, 18 Jan 2021 16:53:09 +0800 Subject: [PATCH 07/41] cache support --- ge/CMakeLists.txt | 1 + ge/common/types.cc | 1 + ge/executor/CMakeLists.txt | 1 + ge/executor/module.mk | 1 + ge/ge_runner.mk | 1 + ge/graph/build/memory/var_mem_assign_util.cc | 8 +- ge/graph/load/new_model_manager/model_utils.cc | 37 ++++--- ge/graph/load/new_model_manager/model_utils.h | 9 ++ ge/graph/manager/graph_var_manager.cc | 74 ++++++++++--- ge/graph/manager/graph_var_manager.h | 29 +++++- ge/graph/manager/rdma_pool_allocator.h | 4 + ge/graph/partition/dynamic_shape_partition.cc | 27 ++++- ge/graph/partition/dynamic_shape_partition.h | 3 +- ge/graph/partition/stage_partition.cc | 38 +++++-- ge/graph/passes/subgraph_pass.cc | 7 +- ge/host_cpu_engine/ops_kernel_store/op/host_op.cc | 3 + ge/hybrid/executor/hybrid_model_async_executor.cc | 7 +- ge/hybrid/model/hybrid_model_builder.cc | 7 +- ge/hybrid/node_executor/hccl/hccl_node_executor.cc | 114 ++++++++++++++++----- ge/hybrid/node_executor/hccl/hccl_node_executor.h | 2 + .../node_executor/host_cpu/kernel/assign_kernel.cc | 4 +- .../node_executor/host_cpu/kernel/data_kernel.cc | 41 ++++++++ .../node_executor/host_cpu/kernel/data_kernel.h | 42 ++++++++ .../node_executor/host_cpu/kernel/no_op_kernel.cc | 2 +- .../host_cpu/kernel/random_uniform_kernel.cc | 4 +- .../host_cpu/kernel/variable_kernel.cc | 4 +- inc/framework/common/types.h | 1 + inc/framework/omg/parser/parser_types.h | 2 + tests/ut/ge/CMakeLists.txt | 1 + tests/ut/ge/graph/load/model_utils_unittest.cc | 70 +++++++++++++ third_party/fwkacllib/inc/runtime/mem.h | 1 + 31 files changed, 459 insertions(+), 87 deletions(-) create mode 100644 ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc create mode 100644 ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h create mode 100644 tests/ut/ge/graph/load/model_utils_unittest.cc diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index a8eabf05..edbf837d 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "hybrid/node_executor/controlop/control_op_executor.cc" "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "hybrid/node_executor/hccl/hccl_node_executor.cc" diff --git a/ge/common/types.cc b/ge/common/types.cc index 268e7caa..90ff9fe4 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 755bdf97..d7bca1fa 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -104,6 +104,7 @@ set(SRC_LIST "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "../hybrid/node_executor/controlop/control_op_executor.cc" "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "../hybrid/node_executor/rts/rts_node_executor.cc" diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 87abdade..7f2c1c53 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -95,6 +95,7 @@ local_ge_executor_src_files := \ ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ ../hybrid/node_executor/controlop/control_op_executor.cc \ ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ ../hybrid/node_executor/rts/rts_node_executor.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 460d5068..af938686 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ hybrid/node_executor/controlop/control_op_executor.cc \ hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ hybrid/node_executor/hccl/hccl_node_executor.cc \ diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index 639bfaa0..dfc633af 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr return FAILED); ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); GE_CHECK_NOTNULL(tensor_desc); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) @@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr } uint8_t *dev_ptr = nullptr; - rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index 22a657ad..efd8c619 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -15,18 +15,10 @@ */ #include "graph/load/new_model_manager/model_utils.h" - #include - #include "common/debug/log.h" #include "common/op/ge_op_utils.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" -#include "runtime/base.h" -#include "runtime/kernel.h" - -#include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ @@ -342,8 +334,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); v_input_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); @@ -382,6 +374,27 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co /// /// @ingroup ge +/// @brief Get variable address. +/// @return Status +/// +Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr) { + if (ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset) == RT_MEMORY_RDMA_HBM) { + if (offset < 0) { + GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + return PARAM_INVALID; + } + var_addr = reinterpret_cast(offset); + GE_CHECK_NOTNULL(var_addr); + } else { + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + var_addr = model_param.var_base + offset - model_param.logic_var_base; + } + return SUCCESS; +} + +/// +/// @ingroup ge /// @brief Get output data address. /// @return vector /// @@ -405,8 +418,8 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C } for (size_t i = 0; i < outputs_size; ++i) { GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); v_output_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/new_model_manager/model_utils.h index 4b3d7ae7..417b9b89 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/new_model_manager/model_utils.h @@ -107,6 +107,15 @@ class ModelUtils { /// @return Status /// static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); + + private: + /// + /// @ingroup ge + /// @brief Get variable address. + /// @return Status + /// + static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr); }; } // namespace ge diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 821de257..928c893f 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -16,17 +16,10 @@ #include "graph/manager/graph_var_manager.h" -#include - -#include "common/l2_cache_optimize.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "ge/ge_api_types.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" #include "graph/manager/trans_var_data_utils.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/type_utils.h" using std::map; @@ -37,7 +30,7 @@ namespace ge { VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {} VarResource::~VarResource() { - var_offset_set_.clear(); + var_offset_map_.clear(); var_addr_mgr_map_.clear(); cur_var_tensor_desc_map_.clear(); var_broad_cast_info_.clear(); @@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen std::string var_key = VarKey(var_name, tensor_desc); GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); if (var_addr_mgr_map_.count(var_key) == 0) { - uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + - static_cast(reinterpret_cast(address)); + uint64_t logic_address = static_cast(reinterpret_cast(address)); + if (memory_type != RT_MEMORY_RDMA_HBM) { + logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase(); + } GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); @@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen var_addr_mgr.tensor_desc = tensor_desc; var_addr_mgr.memory_type = memory_type; var_addr_mgr_map_[var_key] = var_addr_mgr; - var_offset_set_.insert(logic_address); + var_offset_map_[logic_address] = memory_type; return SUCCESS; } @@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); } -bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } +bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } + +rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { + if (var_offset_map_.count(offset) > 0) { + return var_offset_map_[offset]; + } + return RT_MEMORY_HBM; +} VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { auto iter = var_to_trans_road_.find(var_name); @@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} -Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { +MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) { + switch (mem_type) { + case RT_MEMORY_HBM: + return new (std::nothrow) HbmMemResource(); + case RT_MEMORY_RDMA_HBM: + return new (std::nothrow) RdmaMemResource(); + default: + return nullptr; + } +} + +Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, + size_t &mem_offset) { size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; uint64_t real_size = size; total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize(); @@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return SUCCESS; } +Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) { + uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size); + if (buffer == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); + return MEMALLOC_FAILED; + } + address = reinterpret_cast(reinterpret_cast(buffer)); + var_mem_size_ += size; + GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", + session_id, var_name.c_str(), 0, buffer, size); + return SUCCESS; +} + uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; @@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { MemResource *mem_resource = nullptr; auto iter = mem_resource_map_.find(memory_type); if (iter == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen MemResource *mem_resource = nullptr; auto it = mem_resource_map_.find(memory_type); if (it == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) { return var_resource_->IsVarAddr(offset); } +rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { + std::lock_guard lock(mutex_); + if (var_resource_ == nullptr) { + GELOGW("VarManager has not been init."); + return RT_MEMORY_HBM; + } + return var_resource_->GetVarMemType(offset); +} + ge::Status VarManager::MallocVarMemory(size_t memory_size) { std::lock_guard lock(mutex_); uint8_t *var_mem_base = nullptr; @@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); + } string memory_key = std::to_string(session_id_); return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); } uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return logic_addr; + } string mem_key = std::to_string(session_id_); uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); if (mem_base == nullptr) { diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index 9cf0068c..924ddcb7 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -158,13 +158,15 @@ class VarResource { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + std::unordered_map GetAllVarDesc() const { return cur_var_tensor_desc_map_; } private: std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); uint64_t session_id_; - std::unordered_set var_offset_set_; + std::unordered_map var_offset_map_; std::unordered_map var_addr_mgr_map_; std::unordered_map cur_var_tensor_desc_map_; std::unordered_map> var_to_trans_road_; @@ -176,19 +178,36 @@ class VarResource { class MemResource { public: MemResource(); - ~MemResource() = default; + virtual ~MemResource() = default; + static MemResource *BuildMemResourceFromType(rtMemType_t mem_type); - Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); + virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0; uint64_t GetVarMemSize() const; void UpdateVarMemSize(int64_t mem_size); - private: + protected: uint64_t total_size_; uint64_t var_mem_size_; }; +class HbmMemResource : public MemResource { + public: + HbmMemResource() = default; + ~HbmMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + +class RdmaMemResource : public MemResource { + public: + RdmaMemResource() = default; + ~RdmaMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { public: static VarManager *Instance(uint64_t session_id); @@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + uint8_t *GetVarMemoryBase(rtMemType_t memory_type); uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); diff --git a/ge/graph/manager/rdma_pool_allocator.h b/ge/graph/manager/rdma_pool_allocator.h index 4d8cf71e..0a895a11 100644 --- a/ge/graph/manager/rdma_pool_allocator.h +++ b/ge/graph/manager/rdma_pool_allocator.h @@ -53,6 +53,10 @@ class RdmaPoolAllocator { Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size); + uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; } + + size_t GetRdmaMemSize() { return rdma_mem_size_; } + private: void MergeBlocks(Block *dst, Block *src); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 6c81b21f..1c82eaf3 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -213,6 +213,7 @@ std::string DynamicShapePartitioner::DebugString() const { size_t data = 0; size_t netoutput = 0; size_t is_inputnode = 0; + size_t stage = 0; std::stringstream ss; ss << "All unknown shape nodes:" << std::endl; for (const auto &node : unknown_shape_nodes_) { @@ -229,10 +230,13 @@ std::string DynamicShapePartitioner::DebugString() const { netoutput++; } else if (cluster->IsInputNode()) { is_inputnode++; + } else if (cluster->IsIndependent()) { + stage++; } } ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known - << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl; + << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode + << ", stage:" << stage << std::endl; for (const auto &cluster : unique_clusters_) { ss << " " << cluster->DebugString() << std::endl; } @@ -272,12 +276,15 @@ Status DynamicShapePartitioner::InitClusters() { for (const auto &node : graph->GetDirectNode()) { Cluster::Type type = Cluster::DATA; bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty(); + REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null"); if (node->GetType() == DATA) { type = Cluster::DATA; } else if (is_input) { type = Cluster::INPUT_NODE; } else if (node->GetType() == NETOUTPUT) { type = Cluster::NETOUTPUT; + } else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) { + type = Cluster::STAGE; } else if (unknown_shape_nodes_.count(node) > 0) { type = Cluster::UNKNOWN_SHAPE; } else { @@ -360,6 +367,9 @@ static std::string ToString(const std::vector &clusters) { void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } for (const auto &in_cluster : cluster->Inputs()) { if (!in_cluster->IsUnknownShape()) { continue; @@ -379,6 +389,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() { void DynamicShapePartitioner::MergeClustersKnownShape() { // Merge known shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { auto in_cluster = *(cluster->Inputs().begin()); in_cluster->Merge(cluster); @@ -606,6 +619,7 @@ void Cluster::UpdateRank(size_t rank) { bool Cluster::IsData() const { return type_ == DATA; }; bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; }; bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; }; +bool Cluster::IsIndependent() const { return type_ == STAGE; }; bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; }; bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; }; bool Cluster::IsRefVariable() const { @@ -641,6 +655,9 @@ void Cluster::RemoveOutput(ClusterPtr out) { out->in_clusters_.end()); }; void Cluster::Merge(ClusterPtr other) { + if (other->IsIndependent()) { + return; + } nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end()); other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()), other->in_clusters_.end()); @@ -689,7 +706,9 @@ std::vector Cluster::MergeAllPathFrom(ClusterPtr other) { std::unordered_set forward_reached_clusters; std::unordered_set backward_reached_clusters; std::vector path_clusters; - + if (other->IsIndependent()) { + return path_clusters; + } if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) == other->out_clusters_.end()) { return path_clusters; @@ -772,7 +791,7 @@ Status Cluster::BuildFrame() { } } } - if (IsData()) { + if (IsData() || IsIndependent()) { for (const auto &anchor : node->GetAllOutDataAnchors()) { AddFrameOutput(anchor); } @@ -888,7 +907,7 @@ Status Cluster::CombinePartitionFrame() { } Status Cluster::BuildPartitionSubgraph() { - if (IsData() || IsNetOutput()) { + if (IsData() || IsNetOutput() || IsIndependent()) { return SUCCESS; } int64_t parent_node_index = 0; diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 9772615e..e8408ff9 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -32,7 +32,7 @@ class DynamicShapePartitioner { // DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT. class Cluster : public std::enable_shared_from_this { public: - enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE }; + enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE }; Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner) : id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) { nodes_.push_back(node); @@ -45,6 +45,7 @@ class DynamicShapePartitioner { bool IsData() const; bool IsKnownShape() const; bool IsUnknownShape() const; + bool IsIndependent() const; bool IsNetOutput() const; std::vector> Inputs() const; std::vector> Outputs() const; diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc index 93a06afe..f6e49bbd 100644 --- a/ge/graph/partition/stage_partition.cc +++ b/ge/graph/partition/stage_partition.cc @@ -25,6 +25,10 @@ #include "common/types.h" namespace ge { +namespace { +const std::set kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA }; +} + Status StagePartitioner::Partition() { GE_CHECK_NOTNULL(root_graph_); if (root_graph_->GetParentGraph() != nullptr) { @@ -37,6 +41,10 @@ Status StagePartitioner::Partition() { if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { continue; } + if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) { + continue; + } + GELOGD("original node %s for stage %u", node->GetName().c_str(), level); stage_nodes_[level].insert(node); } if (stage_nodes_.empty()) { @@ -54,6 +62,13 @@ Status StagePartitioner::Partition() { return FAILED; } + root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool { + uint32_t a_level = UINT32_MAX; + (void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level); + uint32_t b_level = UINT32_MAX; + (void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level); + return a_level < b_level; + }); if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); @@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() { auto node = nodes.top(); nodes.pop(); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { + uint32_t tmp_level = cur_stage_level; + (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); + if (tmp_level != cur_stage_level) { continue; } for (const auto &in_node : node->GetInAllNodes()) { if (visited_stage_nodes.count(in_node) != 0) { continue; } + if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str()); + return INTERNAL_ERROR; + } + GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level); + if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) { + GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level); + continue; + } nodes.push(in_node); } - if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { - GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); visited_stage_nodes.emplace(node); } for (const auto &node : visited_stage_nodes) { @@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const op_desc->AddSubgraphName("f"); op_desc->SetSubgraphInstanceName(0, graph_name); + if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str()); + return nullptr; + } + NodePtr subgraph_node = root_graph_->AddNode(op_desc); if (subgraph_node == nullptr) { GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index d1111d52..dc6269ac 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node GE_CHECK_NOTNULL(in_node); // Need insert memcpy - // 1. Const->NetOutput in subgraph + // 1. Const->NetOutput in subgraph & parent graph is known // 2. AtomicOp->NetOutput in subgraph // 3. OutputContinuesRequiredOp->NetOutput in subgraph // 4. Data->NetOutput in subgraph but parent_node is not while // 5. While->NetOutput in known subgraph std::string op_type; - bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || + bool insert_flag = + (NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) || IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && - (kWhileOpTypes.count(in_node->GetType()) != 0)); + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index a6e00f4a..7f709f03 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp); REGISTER_OP_CREATOR(RandomUniform, HostOp); REGISTER_OP_CREATOR(Add, HostOp); REGISTER_OP_CREATOR(Mul, HostOp); +REGISTER_OP_CREATOR(ConcatV2, HostOp); +REGISTER_OP_CREATOR(Data, HostOp); +REGISTER_OP_CREATOR(Fill, HostOp); } // namespace host_cpu } // namespace ge diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index e9881224..3673edf0 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -59,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr &lis run_flag_ = true; listener_ = listener; future_ = std::async(std::launch::async, [&]() -> Status { + GetThreadLocalContext() = *executor_->GetContext()->ge_context; GetContext().SetSessionId(executor_->GetContext()->session_id); return RunInternal(); }); @@ -229,7 +230,11 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy } GE_CHECK_GE(tensor_size, 0); - auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + AllocationAttr attr; + if (GetContext().GetHostExecFlag()) { + attr.SetMemType(HOST_DDR); + } + auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr); GE_CHECK_NOTNULL(tensor_buffer); args.inputs.emplace_back(std::shared_ptr(tensor_buffer.release())); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d1f61985..7ee0bef7 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -772,7 +772,12 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ var_name.c_str(), hybrid_model_.GetSessionId()); - uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(var_node->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } + uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type); if (dev_mem == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to copy var %s from device, cant not get " diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 94c734ca..5387a176 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -15,23 +15,25 @@ */ #include "hybrid/node_executor/hccl/hccl_node_executor.h" -#include "common/ge/ge_util.h" #include "common/ge/plugin_manager.h" #include "common/math/math_util.h" -#include "framework/common/debug/ge_log.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/util/hcom_util.h" #include "graph/runtime_inference_context.h" -#include "hccl/hcom.h" +#include "graph/utils/type_utils.h" +#include "hybrid/executor/hybrid_execution_context.h" +namespace ge { namespace { -const size_t kVarTableDims = 2; -const size_t kVarTableRowCnt = 3; -const size_t kVarTableIdxAddr = 1; -const size_t kVarTableIdxLen = 2; +constexpr size_t kVarTableDims = 2; +constexpr size_t kVarTableRowCnt = 3; +constexpr size_t kVarTableIdxAddr = 1; +constexpr size_t kVarTableIdxLen = 2; +const std::set kRdmaReadTypes = { HCOMREMOTEREAD, HCOMREMOTEREFREAD }; +const std::set kRdmaWriteTypes = { HCOMREMOTEWRITE, HCOMREMOTESCATTERWRITE }; +const std::set kRdmaScatterTypes = { HCOMREMOTEREFREAD, HCOMREMOTESCATTERWRITE }; } // namespace -namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HCCL, HcclNodeExecutor); @@ -142,11 +144,22 @@ Status RdmaNodeTask::Init(TaskContext &context) { GE_CHECK_NOTNULL(peer_node->GetOpDesc()); remote_index_ = {peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx()}; - if (node_item.node->GetType() == HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(node_item.node->GetType()) > 0) { local_index_ = 0; } else { local_index_ = op_desc->GetInputIndexByName("local"); } + int32_t offset_idx = node_item.op_desc->GetInputIndexByName("local_offset"); + if ((offset_idx != -1) && (node_item.op_desc->GetInputDescPtr(offset_idx) != nullptr)) { + skip_flag_ = true; + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()); + offset_index_ = { + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()->GetId(), + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetIdx() }; + } return SUCCESS; } @@ -158,8 +171,13 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetTensor(remote_index_.first, remote_index_.second, remote_tensor)); auto data = reinterpret_cast(remote_tensor.GetData()); if (data == nullptr) { - GELOGE(FAILED, "Tensor data is nullptr."); - return FAILED; + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + GELOGE(FAILED, "Tensor data is nullptr."); + return FAILED; + } } auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { @@ -183,30 +201,63 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(tensor_buffer.release())))); } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) } TensorValue *tv; - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - tv = context.MutableOutput(0); + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { + tv = context.MutableOutput(local_index_); } else { tv = context.MutableInput(local_index_); } GE_CHECK_NOTNULL(tv); - auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); auto row_num = dims.front(); addr_infos.resize(row_num); - auto device_len = tv->GetSize() / row_num; - if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { - GELOGE(FAILED, "Local embedding length is out of range."); - return FAILED; - } + if (skip_flag_) { + int32_t offset_idx = context.GetNodeItem().op_desc->GetInputIndexByName("local_offset"); + GE_CHECK_NOTNULL(context.GetNodeItem().op_desc->GetInputDescPtr(offset_idx)); + auto data_type = context.GetNodeItem().op_desc->GetInputDesc(offset_idx).GetDataType(); + + Tensor offset_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(offset_index_.first, offset_index_.second, offset_tensor)) + if (static_cast(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) { + GELOGE(PARAM_INVALID, "num of offset and remote addr mismatch, offset size=%zu, remote_addr size=%lld, dtype=%s", + offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); + return PARAM_INVALID; + } - for (auto idx = 0; idx < row_num; ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, - device_len}; - local_addr += device_len; + auto addr_offset = reinterpret_cast(offset_tensor.GetData()); + GE_CHECK_NOTNULL(addr_offset); + auto base_addr = reinterpret_cast(tv->MutableData()); + GE_CHECK_NOTNULL(base_addr); + + for (auto idx = 0; idx < row_num; idx++) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), + data[line_idx + kVarTableIdxAddr], + reinterpret_cast(reinterpret_cast(base_addr + addr_offset[idx])), + data[line_idx + kVarTableIdxLen] }; + } + } else { + auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); + auto device_len = tv->GetSize() / row_num; + if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { + GELOGE(FAILED, "Local embedding length is out of range, expect %lld, but %lld exactly.", + data[kVarTableIdxLen], device_len); + return FAILED; + } + + for (auto idx = 0; idx < row_num; ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, + device_len }; + local_addr += device_len; + } } return SUCCESS; @@ -226,6 +277,10 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do } vector addr_infos; GE_CHK_STATUS_RET(ExtractTensor(context, addr_infos)); + if (addr_infos.empty()) { + done_callback(); + return SUCCESS; + } auto callback = [this](HcclResult status) { if (status != HCCL_SUCCESS) { @@ -235,6 +290,11 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do this->cond_.notify_all(); GELOGI("rdma callback success."); }; + + std::string executor_type = context.GetNodeItem().NodeType(); + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + executor_type = context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD ? HCOMREMOTEREAD : HCOMREMOTEWRITE; + } HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); @@ -262,7 +322,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const GE_CHK_STATUS_RET(task.Init(context), "hccl node load hccl so failed."); // allocate output mem, output mem or remote read will be calculated when node execute. - if (context.GetNodeItem().NodeType() != HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) == 0) { GE_CHK_STATUS_RET(context.AllocateOutputs(), "hccl node task allocate output failed."); } @@ -274,7 +334,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const Status HcclNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GELOGI("[%s] HcclNodeExecutor::LoadTask in.", node->GetName().c_str()); GE_CHECK_NOTNULL(node); - if (node->GetType() == HCOMREMOTEREAD || node->GetType() == HCOMREMOTEWRITE) { + if ((kRdmaReadTypes.count(node->GetType()) > 0) || (kRdmaWriteTypes.count(node->GetType()) > 0)) { task = MakeShared(); } else { task = MakeShared(); diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 07dd848b..873f259f 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -55,9 +55,11 @@ class RdmaNodeTask : public NodeTask { private: Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; + std::pair offset_index_; int32_t local_index_ = 0; std::mutex hccl_mutex_; std::condition_variable cond_; + bool skip_flag_; }; class HcclNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 01fd391d..d54195d6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -29,8 +29,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status AssignKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto ref_tensor = context.MutableInput(kAssignRefInputIndex); GE_CHECK_NOTNULL(ref_tensor); const auto value_tensor = context.GetInput(kAssignValueInputIndex); @@ -50,7 +48,7 @@ Status AssignKernel::Compute(TaskContext& context) { GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc new file mode 100644 index 00000000..e34f601a --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hybrid/node_executor/host_cpu/kernel/data_kernel.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "hybrid/node_executor/host_cpu/kernel_factory.h" + +namespace { +constexpr size_t kDataInputIndex = 0; +constexpr size_t kDataOutputIndex = 0; +} + +namespace ge { +namespace hybrid { +namespace host_cpu { +Status DataKernel::Compute(TaskContext& context) { + auto input = context.MutableInput(kDataInputIndex); + GE_CHECK_NOTNULL(input); + GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input), "[%s] Failed to set output.", context.GetNodeName()) + GELOGD("[%s] compute success.", node_->GetName().c_str()); + return SUCCESS; +} + +REGISTER_KERNEL_CREATOR(Data, DataKernel); +} // namespace host_cpu +} // namespace hybrid +} // namespace ge diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h new file mode 100644 index 00000000..ca42d647 --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ +#define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ + +#include "hybrid/node_executor/host_cpu/kernel/kernel.h" + +namespace ge { +namespace hybrid { +namespace host_cpu { +class DataKernel : public Kernel { + public: + DataKernel(const NodePtr &node) : Kernel(node) {} + ~DataKernel() override = default; + DataKernel &operator=(const DataKernel &op) = delete; + DataKernel(const DataKernel &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(TaskContext& context) override; +}; +} // namespace host_cpu +} // namespace hybrid +} // namespace ge + +#endif // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index ff5a7c6d..b1b4e68c 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -23,7 +23,7 @@ namespace ge { namespace hybrid { namespace host_cpu { Status NoOpKernel::Compute(TaskContext& context) { - GELOGI("[%s] no need to compute.", node_->GetName().c_str()); + GELOGD("[%s] no need to compute.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc index 37b07e37..52d48821 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -30,8 +30,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status RandomUniformKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - int64_t seed = 0; int64_t seed2 = 0; (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed); @@ -66,7 +64,7 @@ Status RandomUniformKernel::Compute(TaskContext& context) { return UNSUPPORTED; } - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index 2a836458..16738c2a 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -23,8 +23,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status VariableKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto tensor = context.GetVariable(node_->GetName()); if (tensor == nullptr) { GELOGE(PARAM_INVALID, "tensor is NULL."); @@ -32,7 +30,7 @@ Status VariableKernel::Compute(TaskContext& context) { } // Constant & Variable Op has and only has one output GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 4d4c54d1..2dbb1753 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -437,6 +437,7 @@ REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index 62c9c750..f2bd4e28 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -370,7 +370,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESC FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREFREAD; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTESCATTERWRITE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 91a6620d..5979f5cf 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -589,6 +589,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" "graph/load/model_helper_unittest.cc" + "graph/load/model_utils_unittest.cc" ) set(PASS_TEST_FILES diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc new file mode 100644 index 00000000..bd86c71e --- /dev/null +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#define protected public +#define private public +#include "graph/load/new_model_manager/model_utils.h" +#include "graph/manager/graph_var_manager.h" + +using namespace std; + +namespace ge { +class UtestModelUtils : public testing::Test { + protected: + void TearDown() {} +}; + +// test ModelUtils::GetVarAddr +TEST_F(UtestModelUtils, get_var_addr_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + runtime_param.var_size = 16; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(runtime_param.var_base + offset - runtime_param.logic_var_base, var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} + +TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(reinterpret_cast(offset), var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} +} // namespace ge diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 32bd9e6b..c305fb12 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -34,6 +34,7 @@ extern "C" { */ #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device +#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device From 2fc8c77a01f54d4c8f2f57d7eea7314d89541b3b Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Mon, 18 Jan 2021 16:59:24 +0800 Subject: [PATCH 08/41] cache support --- inc/framework/omg/parser/parser_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index f2bd4e28..f3b7f00a 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -238,8 +238,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char - *REQUIREDSPACETOBATCHPADDINGS; // for retinanet scope fusion +// for retinanet scope fusion +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REQUIREDSPACETOBATCHPADDINGS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS; From ad0d140f6e4e2d7d018f811643aea04cda26fc6f Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Tue, 19 Jan 2021 11:17:35 +0800 Subject: [PATCH 09/41] dts: profiling task desc info save data error --- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 063c5b4c..75a5f6af 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3067,7 +3067,6 @@ Status DavinciModel::MallocKnownArgs() { void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, const domi::TaskDef &task_def, size_t task_index) { - task_desc_info_.clear(); bool flag = GetL1FusionEnableOption(); char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); @@ -3134,6 +3133,7 @@ Status DavinciModel::DistributeTask() { GE_CHK_STATUS_RET(task->Distribute()); } + task_desc_info_.clear(); const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); From a892b2bf901e9939e49d8125014dbaa599519902 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 19 Jan 2021 12:35:38 +0800 Subject: [PATCH 10/41] cache support --- ge/graph/load/new_model_manager/model_utils.cc | 25 ++++++++++++++++--------- ge/graph/manager/graph_var_manager.cc | 4 ++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index efd8c619..d9a9f3ca 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -379,17 +379,24 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co /// Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, uint8_t *&var_addr) { - if (ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset) == RT_MEMORY_RDMA_HBM) { - if (offset < 0) { - GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); + switch (mem_type) { + case RT_MEMORY_RDMA_HBM: + if (offset < 0) { + GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + return PARAM_INVALID; + } + var_addr = reinterpret_cast(offset); + break; + case RT_MEMORY_HBM: + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + var_addr = model_param.var_base + offset - model_param.logic_var_base; + break; + default: + GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); return PARAM_INVALID; - } - var_addr = reinterpret_cast(offset); - GE_CHECK_NOTNULL(var_addr); - } else { - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); - var_addr = model_param.var_base + offset - model_param.logic_var_base; } + GE_CHECK_NOTNULL(var_addr); return SUCCESS; } diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 928c893f..8a829d47 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -212,7 +212,7 @@ rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { if (var_offset_map_.count(offset) > 0) { return var_offset_map_[offset]; } - return RT_MEMORY_HBM; + return RT_MEMORY_RESERVED; } VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { @@ -660,7 +660,7 @@ rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { std::lock_guard lock(mutex_); if (var_resource_ == nullptr) { GELOGW("VarManager has not been init."); - return RT_MEMORY_HBM; + return RT_MEMORY_RESERVED; } return var_resource_->GetVarMemType(offset); } From bac7bcfc09933b1a5ca41bd837138025023b129e Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Fri, 15 Jan 2021 10:29:25 +0800 Subject: [PATCH 11/41] Optional output does not allocate memory --- ge/graph/build/memory/block_mem_assigner.cc | 7 +++++++ ge/graph/load/new_model_manager/model_utils.cc | 20 ++++++++++++------ ge/hybrid/node_executor/aicore/aicore_op_task.cc | 26 +++++++++++++++++++++++- ge/hybrid/node_executor/aicore/aicore_op_task.h | 1 + ge/hybrid/node_executor/task_context.cc | 8 ++++++++ metadef | 2 +- parser | 2 +- 7 files changed, 57 insertions(+), 9 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 76e7efbe..a523ce3f 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -24,6 +24,7 @@ #include "graph/buffer.h" #include "graph/ge_attr_value.h" #include "graph/ge_context.h" +#include "graph/types.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" @@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (output_op_desc != nullptr) { GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } + // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { @@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + GE_IF_BOOL_EXEC((ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); + std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index d9a9f3ca..3c141f06 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -20,6 +20,7 @@ #include "common/op/ge_op_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/manager/graph_var_manager.h" +#include "graph/types.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ @@ -340,7 +341,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps @@ -424,6 +425,18 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C return v_output_data_addr; } for (size_t i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); + continue; + } GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), uint8_t *variable_addr = nullptr; GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); @@ -431,11 +444,6 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); - if (tensor_desc == nullptr) { - GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); - continue; - } int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 80ea579b..f61caf19 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -20,6 +20,7 @@ #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" #include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/types.h" using optiling::OpRunInfo; @@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); + + GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast(INT_MAX)); + int outputs_size = static_cast(op_desc.GetOutputsSize()); + + for (int i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + output_indices_to_skip_.push_back(i); + } + } return SUCCESS; } @@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) } Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { - size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces(); + size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces() + - output_indices_to_skip_.size(); if (tiling_buffer_ != nullptr) { ++expected_arg_count; } @@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { for (int i = 0; i < task_context.NumOutputs(); ++i) { const auto output = task_context.GetOutput(i); GE_CHECK_NOTNULL(output); + if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) { + GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.", + task_context.GetNodeName(), i); + continue; + } arg_base_[index++] = reinterpret_cast(output->GetData()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index dd15c608..3f350531 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -72,6 +72,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + std::vector output_indices_to_skip_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 8b7c623f..e89ad874 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -18,6 +18,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/debug/log.h" #include "graph/utils/tensor_utils.h" +#include "graph/types.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/subgraph_executor.h" @@ -213,6 +214,13 @@ Status TaskContext::AllocateOutput(int index, return SUCCESS; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + outputs_start_[index] = TensorValue(); + return SUCCESS; + } + auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; diff --git a/metadef b/metadef index b00c50c2..88d053a5 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 +Subproject commit 88d053a5f94c40ff21620cef50b87075d5054292 diff --git a/parser b/parser index f0109a2c..6904ba94 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 +Subproject commit 6904ba9488658afc30076d299183fc8875045f49 From 22f83073fee7d983aea14d827c0de5bda485f4b6 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Tue, 19 Jan 2021 17:00:15 +0800 Subject: [PATCH 12/41] Delete useless vector &subgraph_ptr_list --- ge/graph/build/graph_builder.cc | 22 +++++++++------------- ge/graph/build/graph_builder.h | 9 ++++----- ge/graph/manager/graph_manager.cc | 5 ++--- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index ed77a7f1..7b09cbc6 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph return SUCCESS; } -Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { +Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { if (comp_graph == nullptr) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); return GE_GRAPH_PARAM_NULLPTR; @@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vectorGetGraphUnknownFlag()) { GE_CHK_STATUS_RET( - BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), + BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), "Build for dynamic shape graph failed."); return SUCCESS; } - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); return SUCCESS; } -Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, +Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) { if (ge::GetContext().GetHostExecFlag()) { GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); @@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v } GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); - Status ret = SecondPartition(comp_graph, subgraph_list); + Status ret = SecondPartition(comp_graph); GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); auto subgraph_map = graph_partitioner_.GetSubGraphMap(); @@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { } Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, - std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id) { GELOGI("Start to build BuildForDynamicShape for dynamic shape."); @@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } } // known shape build flow - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); } ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); @@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) return SUCCESS; } -Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list) { +Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { GE_TIMESTAMP_START(GraphPartition2); auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); if (ret != SUCCESS) { @@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list, - GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); + Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); void SetOptions(const GraphManagerOptions &options); private: @@ -59,12 +58,12 @@ class GraphBuilder { Status UpdateDataInputSize(const ge::NodePtr &node_ptr); Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); - Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); + Status SecondPartition(ge::ComputeGraphPtr &comp_graph); Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); - Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, + Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); - Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, + Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index b0d412dc..d5ee690c 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -3121,9 +3121,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp graph_name.append(std::to_string(graph_node->GetGraphId())); compute_graph->SetName(graph_name); } - std::vector sub_graph_list; - auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, - session_id); + + auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); if (ret != SUCCESS) { GELOGE(ret, "SubGraph build Failed."); return ret; From 06272b2340a4952f1cf51ccbedead70e4f9d7303 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 19 Jan 2021 19:06:20 +0800 Subject: [PATCH 13/41] modify cast --- ge/graph/manager/graph_var_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 8a829d47..2469094c 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -302,7 +302,7 @@ Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); return MEMALLOC_FAILED; } - address = reinterpret_cast(reinterpret_cast(buffer)); + address = reinterpret_cast(reinterpret_cast(reinterpret_cast(buffer))); var_mem_size_ += size; GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", session_id, var_name.c_str(), 0, buffer, size); From 74424181814c0a6251bacf7b6cb22aabd1be318e Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 19 Jan 2021 19:16:50 +0800 Subject: [PATCH 14/41] modify cast --- ge/graph/manager/graph_var_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 2469094c..e7dce824 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -302,7 +302,7 @@ Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); return MEMALLOC_FAILED; } - address = reinterpret_cast(reinterpret_cast(reinterpret_cast(buffer))); + address = static_cast(reinterpret_cast(buffer)); var_mem_size_ += size; GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", session_id, var_name.c_str(), 0, buffer, size); From bc1f6ca510bc8129481891fe00e44149231cb626 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 19:28:25 +0800 Subject: [PATCH 15/41] UpdateTiling pre-place --- ge/hybrid/executor/node_state.cc | 8 ++++++++ ge/hybrid/executor/node_state.h | 5 +++++ ge/hybrid/executor/subgraph_executor.cc | 29 +++++++++++++++++++++++++---- ge/hybrid/executor/subgraph_executor.h | 2 +- ge/hybrid/node_executor/node_executor.cc | 1 - 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 171ddaf3..00921705 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -188,6 +188,14 @@ Status NodeState::WaitForPrepareDone() { return SUCCESS; } +void NodeState::SetTaskContext(std::shared_ptr &task_context) { + task_context_ = task_context; +} + +std::shared_ptr NodeState::GetTaskContext() { + return task_context_; +} + Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 02a362b4..c68a19ac 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -29,6 +29,7 @@ namespace hybrid { class NodeTask; struct GraphExecutionContext; class SubgraphContext; +class TaskContext; class ShapeFuture { public: @@ -103,6 +104,9 @@ struct NodeState { Status AwaitInputTensors(GraphExecutionContext &context) const; + void SetTaskContext(std::shared_ptr &task_context); + std::shared_ptr GetTaskContext(); + private: const NodeItem *node_item_ = nullptr; std::shared_ptr kernel_task_ = nullptr; @@ -110,6 +114,7 @@ struct NodeState { OpDescPtr op_desc_; ShapeInferenceState shape_inference_state_; SubgraphContext *subgraph_context_; + std::shared_ptr task_context_ = nullptr; std::mutex mu_; }; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index f7b063c7..8f7334de 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -232,6 +232,15 @@ Status SubgraphExecutor::PrepareNodes() { node_state->SetKernelTask(node_item.kernel_task); } } + auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state->GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state->SetTaskContex(shared_task_context); } if (!ready_queue_.Push(p_node_state)) { @@ -267,6 +276,19 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state.GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state.GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state.SetTaskContex(shared_task_context); + GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); + RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] start"); + GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws + RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] end"); return SUCCESS; } @@ -295,10 +317,9 @@ Status SubgraphExecutor::LaunchTasks() { GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); - auto task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(task_context); - task_context->SetForceInferShape(force_infer_shape_); - auto shared_task_context = std::shared_ptr(task_context.release()); + auto shared_task_context = node_state->GetTaskContext(); + GE_CHECK_NOTNULL(shared_task_context); + shared_task_context->SetForceInferShape(force_infer_shape_); HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), "[%s] Execute node failed.", node_state->GetName().c_str()); diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index d1949947..4523e2c4 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -75,7 +75,7 @@ class SubgraphExecutor { Status GetOutputs(std::vector &outputs, std::vector &output_desc); private: - static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); + Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); Status Init(const std::vector &inputs, const std::vector &input_desc); diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 02427b91..12e98160 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -38,7 +38,6 @@ const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces()); GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context)); return SUCCESS; From c22fe4378608c493fdee9c48ffbdcdf59c78bc93 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 19:38:37 +0800 Subject: [PATCH 16/41] UpdateTiling pre-place --- ge/hybrid/executor/subgraph_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 8f7334de..6103e6e8 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -240,7 +240,7 @@ Status SubgraphExecutor::PrepareNodes() { return INTERNAL_ERROR; } auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContex(shared_task_context); + node_state->SetTaskContext(shared_task_context); } if (!ready_queue_.Push(p_node_state)) { @@ -284,7 +284,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta return INTERNAL_ERROR; } auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state.SetTaskContex(shared_task_context); + node_state.SetTaskContext(shared_task_context); GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] start"); GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws From f0d77cbb217f767743dfc00d262d31b5d7a0035f Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 20:31:16 +0800 Subject: [PATCH 17/41] UpdateTiling pre-place --- ge/hybrid/executor/subgraph_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 6103e6e8..c4d866a9 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -286,9 +286,9 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta auto shared_task_context = std::shared_ptr(unique_task_context.release()); node_state.SetTaskContext(shared_task_context); GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); - RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] start"); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start"); GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws - RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] end"); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end"); return SUCCESS; } From c193588e2ff401a2dfea143c02813a2ac565eb26 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Tue, 19 Jan 2021 21:02:07 +0800 Subject: [PATCH 18/41] Rename new_model_manager to model_manager. --- ge/CMakeLists.txt | 126 ++++++++++----------- ge/common/helper/model_cache_helper.cc | 2 +- ge/common/helper/model_helper.cc | 2 +- ge/common/profiling/profiling_manager.cc | 2 +- ge/executor/CMakeLists.txt | 62 +++++----- ge/executor/ge_executor.cc | 6 +- ge/executor/module.mk | 62 +++++----- ge/ge_inference.mk | 64 +++++------ ge/ge_runner.mk | 64 +++++------ ge/graph/execute/graph_execute.cc | 2 +- ge/graph/load/graph_loader.cc | 4 +- .../aipp_utils.cc | 2 +- .../aipp_utils.h | 0 .../cpu_queue_schedule.cc | 2 +- .../cpu_queue_schedule.h | 4 +- .../data_dumper.cc | 4 +- .../data_dumper.h | 0 .../data_inputer.cc | 2 +- .../data_inputer.h | 0 .../davinci_model.cc | 8 +- .../davinci_model.h | 12 +- .../davinci_model_parser.cc | 2 +- .../davinci_model_parser.h | 0 .../model_manager.cc | 6 +- .../model_manager.h | 0 .../model_utils.cc | 2 +- .../model_utils.h | 2 +- .../task_info/end_graph_task_info.cc | 4 +- .../task_info/end_graph_task_info.h | 2 +- .../task_info/event_record_task_info.cc | 4 +- .../task_info/event_record_task_info.h | 2 +- .../task_info/event_wait_task_info.cc | 4 +- .../task_info/event_wait_task_info.h | 2 +- .../task_info/fusion_start_task_info.cc | 4 +- .../task_info/fusion_start_task_info.h | 2 +- .../task_info/fusion_stop_task_info.cc | 4 +- .../task_info/fusion_stop_task_info.h | 2 +- .../task_info/hccl_task_info.cc | 6 +- .../task_info/hccl_task_info.h | 2 +- .../task_info/kernel_ex_task_info.cc | 6 +- .../task_info/kernel_ex_task_info.h | 2 +- .../task_info/kernel_task_info.cc | 8 +- .../task_info/kernel_task_info.h | 2 +- .../task_info/label_goto_ex_task_info.cc | 4 +- .../task_info/label_goto_ex_task_info.h | 2 +- .../task_info/label_set_task_info.cc | 4 +- .../task_info/label_set_task_info.h | 2 +- .../task_info/label_switch_by_index_task_info.cc | 4 +- .../task_info/label_switch_by_index_task_info.h | 2 +- .../task_info/memcpy_addr_async_task_info.cc | 4 +- .../task_info/memcpy_addr_async_task_info.h | 2 +- .../task_info/memcpy_async_task_info.cc | 4 +- .../task_info/memcpy_async_task_info.h | 2 +- .../task_info/model_exit_task_info.cc | 4 +- .../task_info/model_exit_task_info.h | 2 +- .../task_info/profiler_trace_task_info.cc | 4 +- .../task_info/profiler_trace_task_info.h | 2 +- .../task_info/stream_active_task_info.cc | 4 +- .../task_info/stream_active_task_info.h | 2 +- .../task_info/stream_switch_task_info.cc | 6 +- .../task_info/stream_switch_task_info.h | 2 +- .../task_info/stream_switchn_task_info.cc | 6 +- .../task_info/stream_switchn_task_info.h | 2 +- .../task_info/super_kernel/super_kernel.cc | 0 .../task_info/super_kernel/super_kernel.h | 0 .../task_info/super_kernel/super_kernel_factory.cc | 0 .../task_info/super_kernel/super_kernel_factory.h | 0 .../task_info/task_info.cc | 2 +- .../task_info/task_info.h | 4 +- .../task_info/task_info_factory.h | 0 .../tbe_handle_store.cc | 0 .../tbe_handle_store.h | 0 .../ts_mem_mall.h | 0 .../zero_copy_offset.cc | 6 +- .../zero_copy_offset.h | 2 +- .../zero_copy_task.cc | 4 +- .../zero_copy_task.h | 0 ge/hybrid/executor/hybrid_model_async_executor.cc | 2 +- ge/hybrid/executor/hybrid_model_async_executor.h | 2 +- ge/hybrid/executor/hybrid_model_executor.h | 2 +- ge/hybrid/hybrid_davinci_model.h | 2 +- ge/hybrid/model/hybrid_model.cc | 2 +- ge/hybrid/model/hybrid_model.h | 4 +- ge/hybrid/model/hybrid_model_builder.cc | 4 +- ge/hybrid/model/hybrid_model_builder.h | 2 +- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 2 +- .../node_executor/aicpu/aicpu_node_executor.cc | 2 +- .../compiledsubgraph/known_node_executor.cc | 4 +- .../compiledsubgraph/known_node_executor.h | 2 +- ge/init/gelib.cc | 2 +- ge/session/inner_session.cc | 2 +- ge/session/session_manager.cc | 2 +- ge/single_op/single_op.cc | 4 +- ge/single_op/single_op_model.cc | 2 +- ge/single_op/single_op_model.h | 2 +- ge/single_op/task/aicpu_kernel_task_builder.cc | 2 +- ge/single_op/task/aicpu_task_builder.cc | 4 +- ge/single_op/task/build_task_utils.cc | 2 +- ge/single_op/task/tbe_task_builder.cc | 2 +- tests/ut/ge/CMakeLists.txt | 72 ++++++------ tests/ut/ge/graph/ge_executor_unittest.cc | 10 +- tests/ut/ge/graph/graph_load_unittest.cc | 4 +- tests/ut/ge/graph/load/data_dumper_unittest.cc | 4 +- tests/ut/ge/graph/load/davinci_model_unittest.cc | 2 +- tests/ut/ge/graph/load/end_graph_task_unittest.cc | 4 +- tests/ut/ge/graph/load/hccl_task_info_unittest.cc | 4 +- .../ge/graph/load/kernel_ex_task_info_unittest.cc | 4 +- .../ut/ge/graph/load/kernel_task_info_unittest.cc | 6 +- .../load/memcpy_addr_async_task_info_unittest.cc | 4 +- .../graph/load/memcpy_async_task_info_unittest.cc | 4 +- tests/ut/ge/graph/load/model_utils_unittest.cc | 2 +- .../new_model_manager_data_inputer_unittest.cc | 2 +- .../new_model_manager_davinci_model_unittest.cc | 32 +++--- ...w_model_manager_model_manager_aicpu_unittest.cc | 6 +- .../new_model_manager_model_manager_unittest.cc | 6 +- .../load/new_model_manager_task_build_unittest.cc | 2 +- tests/ut/ge/graph/load/new_op_test_utils.h | 2 +- .../ut/ge/graph/load/output_net_output_unittest.cc | 4 +- .../ut/ge/graph/load/tbe_handle_store_unittest.cc | 2 +- tests/ut/ge/single_op/single_op_model_unittest.cc | 2 +- 120 files changed, 406 insertions(+), 406 deletions(-) rename ge/graph/load/{new_model_manager => model_manager}/aipp_utils.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/aipp_utils.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/cpu_queue_schedule.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/cpu_queue_schedule.h (97%) rename ge/graph/load/{new_model_manager => model_manager}/data_dumper.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/data_dumper.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/data_inputer.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/data_inputer.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model_parser.cc (92%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model_parser.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/model_manager.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/model_manager.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/model_utils.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/model_utils.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/end_graph_task_info.cc (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/end_graph_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_record_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_record_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_wait_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_wait_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_start_task_info.cc (92%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_start_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_stop_task_info.cc (92%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_stop_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/hccl_task_info.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/hccl_task_info.h (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_ex_task_info.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_ex_task_info.h (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_task_info.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_task_info.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_goto_ex_task_info.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_goto_ex_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_set_task_info.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_set_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_switch_by_index_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_switch_by_index_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_addr_async_task_info.cc (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_addr_async_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_async_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_async_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/model_exit_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/model_exit_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/profiler_trace_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/profiler_trace_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_active_task_info.cc (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_active_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switch_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switch_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switchn_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switchn_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel.cc (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel_factory.cc (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel_factory.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/task_info.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/task_info_factory.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/tbe_handle_store.cc (100%) rename ge/graph/load/{new_model_manager => model_manager}/tbe_handle_store.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/ts_mem_mall.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_offset.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_offset.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_task.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_task.h (100%) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index edbf837d..888f565c 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST "graph/label/partitioned_call_label_maker.cc" "graph/label/while_label_maker.cc" "graph/load/graph_loader.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/hccl_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/hccl_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" "graph/manager/graph_context.cc" "graph/manager/graph_manager.cc" "graph/manager/graph_manager_utils.cc" @@ -606,37 +606,37 @@ set(INFER_SRC_LIST "graph/manager/util/rt_context_util.cc" "graph/manager/util/variable_accelerate_ctrl.cc" "graph/manager/util/debug.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "single_op/task/op_task.cc" "single_op/task/build_task_utils.cc" "single_op/task/tbe_task_builder.cc" diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index 0b592e11..7ec8cc0f 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -28,7 +28,7 @@ #include "framework/common/util.h" #include "graph/detail/attributes_holder.h" #include "graph/detail/model_serialize_imp.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 1d5a4a9b..92f279be 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/omg/version.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 9ca3aced..32f0ee40 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -21,7 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const char *const kTrainingTrace = "training_trace"; diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index d7bca1fa..26e53c7b 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -32,37 +32,37 @@ set(SRC_LIST "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" - "../graph/load/new_model_manager/davinci_model.cc" - "../graph/load/new_model_manager/davinci_model_parser.cc" - "../graph/load/new_model_manager/model_manager.cc" - "../graph/load/new_model_manager/tbe_handle_store.cc" - "../graph/load/new_model_manager/cpu_queue_schedule.cc" - "../graph/load/new_model_manager/model_utils.cc" - "../graph/load/new_model_manager/aipp_utils.cc" - "../graph/load/new_model_manager/data_inputer.cc" - "../graph/load/new_model_manager/data_dumper.cc" - "../graph/load/new_model_manager/zero_copy_task.cc" - "../graph/load/new_model_manager/zero_copy_offset.cc" - "../graph/load/new_model_manager/task_info/task_info.cc" - "../graph/load/new_model_manager/task_info/event_record_task_info.cc" - "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_task_info.cc" - "../graph/load/new_model_manager/task_info/label_set_task_info.cc" - "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "../graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "../graph/load/model_manager/davinci_model.cc" + "../graph/load/model_manager/davinci_model_parser.cc" + "../graph/load/model_manager/model_manager.cc" + "../graph/load/model_manager/tbe_handle_store.cc" + "../graph/load/model_manager/cpu_queue_schedule.cc" + "../graph/load/model_manager/model_utils.cc" + "../graph/load/model_manager/aipp_utils.cc" + "../graph/load/model_manager/data_inputer.cc" + "../graph/load/model_manager/data_dumper.cc" + "../graph/load/model_manager/zero_copy_task.cc" + "../graph/load/model_manager/zero_copy_offset.cc" + "../graph/load/model_manager/task_info/task_info.cc" + "../graph/load/model_manager/task_info/event_record_task_info.cc" + "../graph/load/model_manager/task_info/event_wait_task_info.cc" + "../graph/load/model_manager/task_info/fusion_start_task_info.cc" + "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "../graph/load/model_manager/task_info/kernel_task_info.cc" + "../graph/load/model_manager/task_info/label_set_task_info.cc" + "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "../graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "../graph/load/model_manager/task_info/stream_active_task_info.cc" + "../graph/load/model_manager/task_info/stream_switch_task_info.cc" + "../graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "../graph/load/model_manager/task_info/end_graph_task_info.cc" + "../graph/load/model_manager/task_info/model_exit_task_info.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "../graph/common/local_context.cc" "../opskernel_manager/ops_kernel_builder_manager.cc" "../single_op/single_op_manager.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 0ea0e66d..b71a8be4 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -29,15 +29,15 @@ #include "framework/common/util.h" #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "mmpa/mmpa_api.h" #include "single_op/single_op_manager.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" using std::string; diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 7f2c1c53..4966eeb5 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -22,37 +22,37 @@ local_ge_executor_src_files := \ ../graph/manager/util/debug.cc \ ../model/ge_model.cc \ ../model/ge_root_model.cc \ - ../graph/load/new_model_manager/davinci_model.cc \ - ../graph/load/new_model_manager/davinci_model_parser.cc \ - ../graph/load/new_model_manager/model_manager.cc \ - ../graph/load/new_model_manager/tbe_handle_store.cc \ - ../graph/load/new_model_manager/cpu_queue_schedule.cc \ - ../graph/load/new_model_manager/model_utils.cc \ - ../graph/load/new_model_manager/aipp_utils.cc \ - ../graph/load/new_model_manager/data_inputer.cc \ - ../graph/load/new_model_manager/data_dumper.cc \ - ../graph/load/new_model_manager/zero_copy_task.cc \ - ../graph/load/new_model_manager/zero_copy_offset.cc \ - ../graph/load/new_model_manager/task_info/task_info.cc \ - ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ - ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + ../graph/load/model_manager/davinci_model.cc \ + ../graph/load/model_manager/davinci_model_parser.cc \ + ../graph/load/model_manager/model_manager.cc \ + ../graph/load/model_manager/tbe_handle_store.cc \ + ../graph/load/model_manager/cpu_queue_schedule.cc \ + ../graph/load/model_manager/model_utils.cc \ + ../graph/load/model_manager/aipp_utils.cc \ + ../graph/load/model_manager/data_inputer.cc \ + ../graph/load/model_manager/data_dumper.cc \ + ../graph/load/model_manager/zero_copy_task.cc \ + ../graph/load/model_manager/zero_copy_offset.cc \ + ../graph/load/model_manager/task_info/task_info.cc \ + ../graph/load/model_manager/task_info/event_record_task_info.cc \ + ../graph/load/model_manager/task_info/event_wait_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_start_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_task_info.cc \ + ../graph/load/model_manager/task_info/label_set_task_info.cc \ + ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + ../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + ../graph/load/model_manager/task_info/stream_active_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switch_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + ../graph/load/model_manager/task_info/end_graph_task_info.cc \ + ../graph/load/model_manager/task_info/model_exit_task_info.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ ../opskernel_manager/ops_kernel_builder_manager.cc \ ../single_op/single_op_manager.cc \ ../single_op/single_op_model.cc \ diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 6f9e60db..a20ff437 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \ graph/manager/util/rt_context_util.cc \ graph/manager/util/variable_accelerate_ctrl.cc \ graph/manager/util/debug.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ single_op/task/op_task.cc \ single_op/task/build_task_utils.cc \ single_op/task/tbe_task_builder.cc \ @@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \ single_op/single_op_manager.cc \ hybrid/hybrid_davinci_model_stub.cc \ hybrid/node_executor/aicpu/aicpu_ext_info.cc \ - # graph/load/new_model_manager/task_info/hccl_task_info.cc + # graph/load/model_manager/task_info/hccl_task_info.cc OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index af938686..4434dc2b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ graph/label/partitioned_call_label_maker.cc \ graph/label/while_label_maker.cc \ graph/load/graph_loader.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/hccl_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/hccl_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ graph/manager/graph_context.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 3c5618e8..79c22a29 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/model_parser/base.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" #include "runtime/mem.h" diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 6272e581..29afc939 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -22,8 +22,8 @@ #include "common/helper/model_helper.h" #include "common/util.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/model_manager/aipp_utils.cc similarity index 98% rename from ge/graph/load/new_model_manager/aipp_utils.cc rename to ge/graph/load/model_manager/aipp_utils.cc index e0e60d2b..8a18c421 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/model_manager/aipp_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/aipp_utils.h" +#include "graph/load/model_manager/aipp_utils.h" #include diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/model_manager/aipp_utils.h similarity index 100% rename from ge/graph/load/new_model_manager/aipp_utils.h rename to ge/graph/load/model_manager/aipp_utils.h diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc similarity index 99% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.cc rename to ge/graph/load/model_manager/cpu_queue_schedule.cc index 430321bd..d9b716ea 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" #include "common/debug/ge_log.h" #include "common/debug/log.h" diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h similarity index 97% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.h rename to ge/graph/load/model_manager/cpu_queue_schedule.h index 8999e975..de4c5327 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/model_manager/cpu_queue_schedule.h @@ -20,8 +20,8 @@ #include #include "common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "runtime/kernel.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc similarity index 99% rename from ge/graph/load/new_model_manager/data_dumper.cc rename to ge/graph/load/model_manager/data_dumper.cc index a12a2b2a..947aac1d 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/model_manager/data_dumper.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_dumper.h" +#include "graph/load/model_manager/data_dumper.h" #include #include @@ -29,7 +29,7 @@ #include "framework/common/util.h" #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/util/debug.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h similarity index 100% rename from ge/graph/load/new_model_manager/data_dumper.h rename to ge/graph/load/model_manager/data_dumper.h diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/model_manager/data_inputer.cc similarity index 94% rename from ge/graph/load/new_model_manager/data_inputer.cc rename to ge/graph/load/model_manager/data_inputer.cc index 5efc710e..0fe75465 100755 --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/model_manager/data_inputer.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/model_manager/data_inputer.h similarity index 100% rename from ge/graph/load/new_model_manager/data_inputer.h rename to ge/graph/load/model_manager/data_inputer.h diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc similarity index 99% rename from ge/graph/load/new_model_manager/davinci_model.cc rename to ge/graph/load/model_manager/davinci_model.cc index 75a5f6af..2430ae3d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include #include @@ -36,9 +36,9 @@ #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/graph.h" -#include "graph/load/new_model_manager/cpu_queue_schedule.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h similarity index 98% rename from ge/graph/load/new_model_manager/davinci_model.h rename to ge/graph/load/model_manager/davinci_model.h index f02015a8..53db77a7 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -32,12 +32,12 @@ #include "common/types.h" #include "framework/common/util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/aipp_utils.h" -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/aipp_utils.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/model.h" #include "graph/node.h" #include "graph/op_desc.h" diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/model_manager/davinci_model_parser.cc similarity index 92% rename from ge/graph/load/new_model_manager/davinci_model_parser.cc rename to ge/graph/load/model_manager/davinci_model_parser.cc index 76526de2..c6f48b84 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/model_manager/davinci_model_parser.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" namespace ge { DavinciModelParser::DavinciModelParser() {} diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/model_manager/davinci_model_parser.h similarity index 100% rename from ge/graph/load/new_model_manager/davinci_model_parser.h rename to ge/graph/load/model_manager/davinci_model_parser.h diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc similarity index 99% rename from ge/graph/load/new_model_manager/model_manager.cc rename to ge/graph/load/model_manager/model_manager.cc index edc60e50..7cf869ac 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include @@ -28,8 +28,8 @@ #include "framework/common/util.h" #include "graph/common/ge_call_wrapper.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "graph/utils/attr_utils.h" diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h similarity index 100% rename from ge/graph/load/new_model_manager/model_manager.h rename to ge/graph/load/model_manager/model_manager.h diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc similarity index 99% rename from ge/graph/load/new_model_manager/model_utils.cc rename to ge/graph/load/model_manager/model_utils.cc index 3c141f06..410e9364 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/model_manager/model_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include #include "common/debug/log.h" #include "common/op/ge_op_utils.h" diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/model_manager/model_utils.h similarity index 98% rename from ge/graph/load/new_model_manager/model_utils.h rename to ge/graph/load/model_manager/model_utils.h index 417b9b89..26f8d700 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/model_manager/model_utils.h @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/types.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" #include "graph/utils/tensor_adapter.h" diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc rename to ge/graph/load/model_manager/task_info/end_graph_task_info.cc index b8b02f59..c306c650 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kDumpFlag = 2; diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/model_manager/task_info/end_graph_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.h rename to ge/graph/load/model_manager/task_info/end_graph_task_info.h index 614544f9..efce19b2 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EndGraphTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/model_manager/task_info/event_record_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.cc rename to ge/graph/load/model_manager/task_info/event_record_task_info.cc index 11589258..f736c386 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/model_manager/task_info/event_record_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.h rename to ge/graph/load/model_manager/task_info/event_record_task_info.h index d3f5961e..a79f1d3b 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventRecordTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc rename to ge/graph/load/model_manager/task_info/event_wait_task_info.cc index 5701179b..34058502 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/model_manager/task_info/event_wait_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.h rename to ge/graph/load/model_manager/task_info/event_wait_task_info.h index a92252d7..bd8acab1 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventWaitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.cc index 32c79647..6feea9e4 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.h index b1897533..284a5e0f 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStartTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc index dd4edfd0..22d1589c 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.h index 880ca487..994498d5 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStopTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/model_manager/task_info/hccl_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.cc rename to ge/graph/load/model_manager/task_info/hccl_task_info.cc index 7b18a9a3..2d0ad560 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" #include #include "common/opskernel/ops_kernel_info_store.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace ge { std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/model_manager/task_info/hccl_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.h rename to ge/graph/load/model_manager/task_info/hccl_task_info.h index 777f5bbf..3df155ad 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.h @@ -23,7 +23,7 @@ #include #include "common/opskernel/ge_task_info.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/manager/util/hcom_util.h" namespace ge { class HcclTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 98d9cb78..c34a4e9a 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include @@ -24,8 +24,8 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" #include "graph/attr_value.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index f6873c6c..265316ce 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc similarity index 99% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_task_info.cc index 83bf2779..27fe8eb0 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" #include #include #include @@ -25,9 +25,9 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/l2_cache_optimize.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/kernel.h" #include "super_kernel/super_kernel.h" #include "super_kernel/super_kernel_factory.h" diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_task_info.h index cea25320..7cabf259 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -22,7 +22,7 @@ #include #include -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { class KernelTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc index 393c0b31..1921c85d 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" +#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h index f83cd1d9..25310368 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelGotoExTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/model_manager/task_info/label_set_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.cc rename to ge/graph/load/model_manager/task_info/label_set_task_info.cc index 5fa96a96..45cb586a 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/model_manager/task_info/label_set_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.h rename to ge/graph/load/model_manager/task_info/label_set_task_info.h index bb02ccf0..36e41f1b 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSetTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc index ae7865a4..c2997678 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" +#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { constexpr uint8_t kLabelSwitchIndexNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h index 538b2d68..00ca0844 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSwitchByIndexTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc index b95705f0..a1f58e42 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kAlignBytes = 64; diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h index c7645b9f..4631c67c 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class MemcpyAddrAsyncTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc index fa320d81..22f9267d 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.h index 43b5ba13..728305ff 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc rename to ge/graph/load/model_manager/task_info/model_exit_task_info.cc index ff8057aa..eb200e3f 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/model_exit_task_info.h" +#include "graph/load/model_manager/task_info/model_exit_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h b/ge/graph/load/model_manager/task_info/model_exit_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.h rename to ge/graph/load/model_manager/task_info/model_exit_task_info.h index c219fcc8..1e4a3923 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ModelExitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc index 533c459a..b8fd1828 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.h index 8989096d..b57ebfae 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ProfilerTraceTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_active_task_info.cc index 33ebea3b..ec807777 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/model_manager/task_info/stream_active_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.h rename to ge/graph/load/model_manager/task_info/stream_active_task_info.h index c6b263b4..dfbf48d1 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamActiveTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.cc index 616ba85f..f129950a 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.h index a72d7de2..0e75e183 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamSwitchTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc index 27adbbe4..35eb23e3 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" +#include "graph/load/model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace { const uint8_t kStreamSwitchnInputNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.h index 3d65a086..6e6ca190 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/task_info.cc rename to ge/graph/load/model_manager/task_info/task_info.cc index 674d477f..e521f95c 100755 --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/model_manager/task_info/task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/task_info.h rename to ge/graph/load/model_manager/task_info/task_info.h index 26f22564..99ec3c4e 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/model_manager/task_info/task_info.h @@ -22,8 +22,8 @@ #include "cce/customize.h" #include "framework/common/taskdown_common.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/ts_mem_mall.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/ts_mem_mall.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" #include "proto/task.pb.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/model_manager/task_info/task_info_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/task_info_factory.h rename to ge/graph/load/model_manager/task_info/task_info_factory.h diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/model_manager/tbe_handle_store.cc similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.cc rename to ge/graph/load/model_manager/tbe_handle_store.cc diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/model_manager/tbe_handle_store.h similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.h rename to ge/graph/load/model_manager/tbe_handle_store.h diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h similarity index 100% rename from ge/graph/load/new_model_manager/ts_mem_mall.h rename to ge/graph/load/model_manager/ts_mem_mall.h diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.cc rename to ge/graph/load/model_manager/zero_copy_offset.cc index f27d862d..3f8555bb 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_task.h" namespace ge { namespace { diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.h rename to ge/graph/load/model_manager/zero_copy_offset.h index 66fcd887..fc63fced 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -25,7 +25,7 @@ #include "external/ge/ge_api_error_codes.h" #include "framework/common/ge_types.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" #include "runtime/mem.h" diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/model_manager/zero_copy_task.cc similarity index 97% rename from ge/graph/load/new_model_manager/zero_copy_task.cc rename to ge/graph/load/model_manager/zero_copy_task.cc index b938f14b..367de87a 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/model_manager/zero_copy_task.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "common/ge_compiler_options.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/model_manager/zero_copy_task.h similarity index 100% rename from ge/graph/load/new_model_manager/zero_copy_task.h rename to ge/graph/load/model_manager/zero_copy_task.h diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 3673edf0..b7c6c33d 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -15,7 +15,7 @@ */ #include "hybrid/executor/hybrid_model_async_executor.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "graph/ge_context.h" diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21d2d033..a69cc45f 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -21,7 +21,7 @@ #include #include "external/ge/ge_api_error_codes.h" #include "external/ge/ge_api_types.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_model_executor.h" #include "runtime/stream.h" diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 6299d4ff..6b2e52b4 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -17,7 +17,7 @@ #ifndef GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #define GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #include "common/thread_pool.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/rt_callback_manager.h" #include "hybrid/executor/subgraph_executor.h" diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 5349390c..369c732a 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -19,7 +19,7 @@ #include #include "external/ge/ge_api_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "model/ge_root_model.h" namespace ge { diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 91b6a549..7e5d8fe5 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -17,7 +17,7 @@ #include "hybrid_model.h" #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index e521b776..72495cad 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -21,8 +21,8 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/common/tensor_value.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 7ee0bef7..861cd30a 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -20,8 +20,8 @@ #include "graph/ge_context.h" #include "graph/build/memory/var_mem_assign_util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 55a19b6c..045bf3ef 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -21,7 +21,7 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f61caf19..f1bd6466 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,7 +19,7 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "graph/types.h" using optiling::OpRunInfo; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 2a7cbc67..109939d9 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -18,7 +18,7 @@ #include "framework/common/taskdown_common.h" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 7f2c6288..2bca3e06 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -21,8 +21,8 @@ #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "hybrid/executor/hybrid_execution_context.h" namespace ge { diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 2dde993b..6e9740ad 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -19,7 +19,7 @@ #include "hybrid/node_executor/node_executor.h" #include "hybrid/model/hybrid_model.h" #include "graph/op_desc.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { namespace hybrid { diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index b81632bd..1a97b6f8 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -37,7 +37,7 @@ #include "graph/common/ge_call_wrapper.h" #include "graph/ge_context.h" #include "graph/ge_global_options.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index c4f8a53b..5a67f7cd 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -29,7 +29,7 @@ #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" #include "graph/common/local_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/tensor_adapter.h" #include "runtime/mem.h" diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index 5d5a299a..3c531747 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -20,7 +20,7 @@ #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/util/rt_context_util.h" using std::map; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 081ce13b..2fa7182b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -22,11 +22,11 @@ #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" #include "single_op/task/build_task_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { namespace { diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 2a1a14e6..220adde8 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 6d0109fe..6637271c 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -24,7 +24,7 @@ #include #include "common/helper/model_helper.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "single_op/single_op.h" #include "single_op/stream_resource.h" diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 2a5f968f..6580ea31 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -16,7 +16,7 @@ #include "single_op/task/aicpu_kernel_task_builder.h" #include "framework/common/taskdown_common.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "build_task_utils.h" namespace ge { diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 1bfbcb3c..90ddc696 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -19,8 +19,8 @@ #include "single_op/task/build_task_utils.h" #include "runtime/mem.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 071e514b..9e4d55e1 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -17,7 +17,7 @@ #include "single_op/task/build_task_utils.h" #include "runtime/rt.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/type_utils.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 594352aa..9ba30b8e 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -20,7 +20,7 @@ #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "runtime/rt.h" #include "single_op/task/build_task_utils.h" diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 5979f5cf..dafb97e0 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -132,7 +132,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" "${GE_CODE_DIR}/ge/session/session_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" "${GE_CODE_DIR}/ge/session/inner_session.cc" @@ -140,15 +140,15 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/base.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" @@ -254,13 +254,13 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_offset.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_task.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/aipp_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" "${GE_CODE_DIR}/ge/omm/csa_interact.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" "${GE_CODE_DIR}/ge/common/kernel_store.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/auth/file_saver.cc" @@ -386,32 +386,32 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/common/model_parser/base.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/util.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model_parser.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_record_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_wait_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/hccl_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_active_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3d04fd0c..3ef8a750 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -33,11 +33,11 @@ #include "common/properties_manager.h" #include "common/types.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "ge/common/dump/dump_properties.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" diff --git a/tests/ut/ge/graph/graph_load_unittest.cc b/tests/ut/ge/graph/graph_load_unittest.cc index af9d5a37..54972af7 100644 --- a/tests/ut/ge/graph/graph_load_unittest.cc +++ b/tests/ut/ge/graph/graph_load_unittest.cc @@ -24,7 +24,7 @@ #include "common/helper/model_helper.h" #include "common/op/ge_op_utils.h" #include "common/types.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/op_desc.h" #include "graph/types.h" #include "graph/utils/attr_utils.h" @@ -35,7 +35,7 @@ #include "graph/load/graph_loader.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_manager_utils.h" #include "model/ge_model.h" #undef private diff --git a/tests/ut/ge/graph/load/data_dumper_unittest.cc b/tests/ut/ge/graph/load/data_dumper_unittest.cc index e53b76f4..1866f4eb 100644 --- a/tests/ut/ge/graph/load/data_dumper_unittest.cc +++ b/tests/ut/ge/graph/load/data_dumper_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 0c03c934..35413a6b 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -20,7 +20,7 @@ #define protected public #include "graph/utils/graph_utils.h" #include "common/profiling/profiling_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" using namespace std; diff --git a/tests/ut/ge/graph/load/end_graph_task_unittest.cc b/tests/ut/ge/graph/load/end_graph_task_unittest.cc index 29e7a53a..a66aaaff 100644 --- a/tests/ut/ge/graph/load/end_graph_task_unittest.cc +++ b/tests/ut/ge/graph/load/end_graph_task_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc index 5c056007..6a2468ee 100644 --- a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { class UtestHcclTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc index 443d2975..53436820 100644 --- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "cce/aicpu_engine_struct.h" namespace ge { diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index fe886b49..a3a27a7b 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { extern OpDescPtr CreateOpDesc(string name, string type); diff --git a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc index 9348d49e..1652841d 100644 --- a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" namespace ge { class UtestMemcpyAddrAsyncTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc index 8769ec39..afc04130 100644 --- a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" namespace ge { diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc index bd86c71e..ac886cea 100644 --- a/tests/ut/ge/graph/load/model_utils_unittest.cc +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -17,7 +17,7 @@ #include #define protected public #define private public -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" using namespace std; diff --git a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc index 56e673f7..43c2ad15 100644 --- a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc @@ -17,7 +17,7 @@ #include -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc index 00069930..38a250ad 100644 --- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc @@ -24,29 +24,29 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include #include "runtime/dev.h" #include "runtime/kernel.h" #include "cce/fwk_adpt_struct.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc index 43e094b5..a68fb307 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc @@ -30,9 +30,9 @@ #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" //#include "new_op_test_utils.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc index 1c6e5a10..8750610a 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc @@ -25,13 +25,13 @@ #define private public #define protected public -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "new_op_test_utils.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc index 620fac09..f10ccd7f 100644 --- a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc @@ -30,7 +30,7 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h index 4cbc78ac..984cbfb4 100644 --- a/tests/ut/ge/graph/load/new_op_test_utils.h +++ b/tests/ut/ge/graph/load/new_op_test_utils.h @@ -40,7 +40,7 @@ #define private public #include "graph/compute_graph.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/node.h" #include "graph/op_desc.h" #include "graph/utils/attr_utils.h" diff --git a/tests/ut/ge/graph/load/output_net_output_unittest.cc b/tests/ut/ge/graph/load/output_net_output_unittest.cc index ecd28fe3..97246dad 100644 --- a/tests/ut/ge/graph/load/output_net_output_unittest.cc +++ b/tests/ut/ge/graph/load/output_net_output_unittest.cc @@ -23,8 +23,8 @@ #define private public #include "common/debug/memory_dumper.h" #include "common/op/ge_op_utils.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "new_op_test_utils.h" #include "proto/om.pb.h" diff --git a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc index a98e14c6..82ffb388 100644 --- a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc +++ b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc @@ -18,7 +18,7 @@ #define protected public #define private public -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "runtime/kernel.h" #undef protected #undef private diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index b6b97d89..ab909e11 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -18,7 +18,7 @@ #include //#include "cce/taskdown_common.hpp" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "runtime/rt.h" From 53a1717ba1c13731f5e46e6ab8684a7b8051ba61 Mon Sep 17 00:00:00 2001 From: zhangxiaokun9 Date: Tue, 19 Jan 2021 21:38:32 +0800 Subject: [PATCH 19/41] Ignore model manager for UT --- build.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/build.sh b/build.sh index 5222ab5c..561a7efc 100644 --- a/build.sh +++ b/build.sh @@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then # fi # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then - echo "Generating coverage statistics, please wait..." - cd ${BASEPATH} - rm -rf ${BASEPATH}/cov - mkdir ${BASEPATH}/cov - lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info - cd ${BASEPATH}/cov - genhtml coverage.info + echo "Generating coverage statistics, please wait..." + cd ${BASEPATH} + rm -rf ${BASEPATH}/cov + mkdir ${BASEPATH}/cov + lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info + lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '*/model_manager/*' -o cov/coverage.info + cd ${BASEPATH}/cov + genhtml coverage.info fi # generate output package in tar form, including ut/st libraries/executables From 912338363e99a846b121f6e2bbd4d04d81e46a32 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 22:10:05 +0800 Subject: [PATCH 20/41] UpdateTiling pre-place --- ge/hybrid/executor/subgraph_executor.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index c4d866a9..f8f122b1 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -231,16 +231,16 @@ Status SubgraphExecutor::PrepareNodes() { } else { node_state->SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state->GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state->SetTaskContext(shared_task_context); } - auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(unique_task_context); - const auto &task = node_state->GetKernelTask(); - if (task == nullptr) { - GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); - return INTERNAL_ERROR; - } - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); } if (!ready_queue_.Push(p_node_state)) { From fe3fc12aed85e11d692006d1e7e6d46bb7c05306 Mon Sep 17 00:00:00 2001 From: zhangxiaokun9 Date: Wed, 20 Jan 2021 09:15:42 +0800 Subject: [PATCH 21/41] Recover 'Remove files matching' for UT lcov --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 561a7efc..f2fafd48 100644 --- a/build.sh +++ b/build.sh @@ -240,7 +240,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then rm -rf ${BASEPATH}/cov mkdir ${BASEPATH}/cov lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '*/model_manager/*' -o cov/coverage.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info cd ${BASEPATH}/cov genhtml coverage.info fi From da46f912ab25a56c54019651f6ead2675085918a Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Tue, 19 Jan 2021 19:15:38 +0800 Subject: [PATCH 22/41] profiling graph desc modify --- ge/graph/load/new_model_manager/davinci_model.cc | 16 ++++-- ge/hybrid/executor/worker/execution_engine.cc | 38 +----------- .../node_executor/aicore/aicore_node_executor.cc | 11 +++- .../node_executor/aicpu/aicpu_node_executor.cc | 12 +++- ge/hybrid/node_executor/task_context.cc | 67 ++++++++++++++++++---- ge/hybrid/node_executor/task_context.h | 7 ++- 6 files changed, 94 insertions(+), 57 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 75a5f6af..7f2ec132 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -4021,14 +4021,18 @@ Status DavinciModel::GetComputeGraphInfo(vector &graph_des } else { compute_graph_info.model_name = name_; } + + std::vector format = { FORMAT_NULL }; + std::vector> shape = { {0} }; + std::vector data_type = { DT_UNDEFINED }; compute_graph_info.op_name = op_desc.op_name; compute_graph_info.op_type = op_desc.op_type; - compute_graph_info.input_format = op_desc.input_format; - compute_graph_info.input_shape = op_desc.input_shape; - compute_graph_info.input_data_type = op_desc.input_data_type; - compute_graph_info.output_format = op_desc.output_format; - compute_graph_info.output_shape = op_desc.output_shape; - compute_graph_info.output_data_type = op_desc.output_data_type; + compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; + compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; + compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; + compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; + compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; + compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; uint32_t task_id = 0; uint32_t stream_id = 0; auto iter = profiler_report_op_info_.find(op_desc.op_name); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 5e9d3607..44f7d87f 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -171,43 +171,9 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel GE_CHECK_NOTNULL(model); GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); + compute_graph_info = context_->GetProfilingGraphDescInfo(); + context_->ClearProfilingGraphDescInfo(); - std::string dynamic_model_name = model->GetModelName(); - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is nullptr."); - return PARAM_INVALID; - } - - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - ComputeGraphDescInfo tmp_compute_graph_info; - tmp_compute_graph_info.model_name = dynamic_model_name; - tmp_compute_graph_info.op_name = op_desc->GetName(); - tmp_compute_graph_info.op_type = op_desc->GetType(); - - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } - tmp_compute_graph_info.task_id = context_->GetTaskId(); - tmp_compute_graph_info.stream_id = context_->GetStreamId(); - compute_graph_info.emplace_back(tmp_compute_graph_info); - GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); - } return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index a8736154..cb5a7d4c 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -183,7 +183,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); // save profiling data - (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return FAILED; + } + GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); + (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 2a7cbc67..21bfed8e 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -191,8 +191,16 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionSynchronize(GetStream()); } -Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, + uint32_t task_type, uint32_t block_dim) { if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { const NodeItem &node_item = GetNodeItem(); auto op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - - uint32_t task_id = 0; - uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; - } - GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); - const GraphExecutionContext * graph_context = GetExecutionContext(); GE_CHECK_NOTNULL(graph_context); const HybridModel *model = graph_context->model; @@ -536,5 +527,59 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block return SUCCESS; } + +Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string dynamic_model_name = model->GetModelName(); + auto op_mode = static_cast(domi::ImplyType::INVALID); + if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && + op_mode == static_cast(domi::ImplyType::TVM)) { + ComputeGraphDescInfo tmp_compute_graph_info; + tmp_compute_graph_info.model_name = dynamic_model_name; + tmp_compute_graph_info.op_name = op_desc->GetName(); + tmp_compute_graph_info.op_type = op_desc->GetType(); + // default + if (op_desc->GetAllInputsSize() == 0) { + tmp_compute_graph_info.input_format = { FORMAT_NULL }; + tmp_compute_graph_info.input_shape = { {0} }; + tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; + } + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); + if (input_desc == nullptr) { + continue; + } + tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); + tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); + tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); + } + + if (op_desc->GetOutputsSize() == 0) { + tmp_compute_graph_info.output_format = { FORMAT_NULL }; + tmp_compute_graph_info.output_shape = { {0} }; + tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; + } + for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { + GeTensorDesc output_desc = op_desc->GetOutputDesc(j); + tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); + tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); + tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); + } + tmp_compute_graph_info.task_id = task_id; + tmp_compute_graph_info.stream_id = stream_id; + compute_graph_info.emplace_back(tmp_compute_graph_info); + } + } + return SUCCESS; +} + } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 9a668f8c..e7ee4fc8 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -110,9 +110,13 @@ class TaskContext { void *handle_ = nullptr; const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } - Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); + Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + const std::vector& GetProfilingGraphDescInfo() const { return compute_graph_info; } + Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); + void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } + private: TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, @@ -133,6 +137,7 @@ class TaskContext { uint32_t task_id_ = 0; uint32_t stream_id_ = 0; std::vector task_desc_info; + std::vector compute_graph_info; }; } // namespace hybrid } // namespace ge From c08216f2969e2ea91f843d2588ab88796fa20729 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Wed, 20 Jan 2021 11:48:54 +0800 Subject: [PATCH 23/41] modified: ge/graph/load/model_manager/model_manager.cc modified: ge/graph/preprocess/graph_preprocess.cc modified: tests/ut/ge/CMakeLists.txt modified: tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc new file: tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc --- ge/graph/load/model_manager/model_manager.cc | 1 + ge/graph/preprocess/graph_preprocess.cc | 43 ++++++------ tests/ut/ge/CMakeLists.txt | 3 +- .../new_model_manager_model_manager_unittest.cc | 40 ++++++----- .../graph/preprocess/graph_preprocess_unittest.cc | 77 ++++++++++++++++++++++ 5 files changed, 124 insertions(+), 40 deletions(-) create mode 100644 tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index 7cf869ac..8be8b60f 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -527,6 +527,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector right_range)) { + GELOGE(PARAM_INVALID, "Given shape range is [%ld~%ld], current dim shape is %ld, out of range.Pleace Check.", + left_range, right_range, curr_dim); + return PARAM_INVALID; + } + } + origin_shape.SetDim(i, UNKNOWN_DIM); } } desc.SetShape(origin_shape); desc.SetShapeRange(current_shape_range_vec); - int64_t dynamic_shape_size = 1; - for (const auto range_pair : range_vec.at(index)) { - FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); - dynamic_shape_size *= range_pair.second; - } - auto data_type_size = GetSizeByDataType(desc.GetDataType()); - if (data_type_size < 0) { - GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.", - TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str()); - return PARAM_INVALID; - } - FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); - dynamic_shape_size *= data_type_size; - GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); - ge::TensorUtils::SetSize(desc, dynamic_shape_size); graphStatus graph_ret = op->UpdateInputDesc(0, desc); GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); graph_ret = op->UpdateOutputDesc(0, desc); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index dafb97e0..abff433c 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -573,7 +573,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/data_dumper_unittest.cc" #"graph/load/new_model_manager_data_inputer_unittest.cc" #"graph/load/new_model_manager_davinci_model_unittest.cc" - #"graph/load/new_model_manager_model_manager_unittest.cc" + "graph/load/new_model_manager_model_manager_unittest.cc" #"graph/load/new_model_manager_task_build_unittest.cc" "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" "graph/load/end_graph_task_unittest.cc" @@ -697,6 +697,7 @@ set(MULTI_PARTS_TEST_FILES "graph/variable_accelerate_ctrl_unittest.cc" "graph/build/logical_stream_allocator_unittest.cc" "graph/build/mem_assigner_unittest.cc" + "graph/preprocess/graph_preprocess_unittest.cc" "session/omg_omg_unittest.cc" ) diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc index 8750610a..3cffd2ed 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc @@ -15,24 +15,18 @@ */ #include - -#include +#include #include "common/debug/log.h" -#include "common/model_parser/base.h" -#include "common/properties_manager.h" #include "common/types.h" -#include "common/l2_cache_optimize.h" - +#include "graph/utils/graph_utils.h" #define private public #define protected public #include "graph/load/model_manager/model_manager.h" - #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/model_manager/davinci_model.h" -#include "graph/load/model_manager/davinci_model_parser.h" -#include "new_op_test_utils.h" +#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/new_model_manager/davinci_model_parser.h" #undef private #undef protected @@ -87,7 +81,6 @@ class UtestModelManagerModelManager : public testing::Test { data.model_data = new uint8_t[data.model_len]; uint8_t data_ori[model_len]; memset(data_ori, 10, model_len); - uint32_t out_len; ModelFileHeader *header = (ModelFileHeader *)data.model_data; header->magic = MODEL_FILE_MAGIC_NUM; header->version = MODEL_VERSION; @@ -97,7 +90,7 @@ class UtestModelManagerModelManager : public testing::Test { void LoadStandardModelData(ge::ModelData &data) { static const std::string STANDARD_MODEL_DATA_PATH = - "llt/framework/domi/ut/ome/test/data/standard_partition_model.txt"; + "llt/framework/domi/ut/ome/test/data/standard_partition_model.txt"; ge::proto::ModelDef model_def; ReadProtoFromText(STANDARD_MODEL_DATA_PATH.c_str(), &model_def); @@ -113,9 +106,8 @@ class DModelListener : public ge::ModelListener { uint32_t OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t resultCode) { return 0; } }; -shared_ptr UTEST_CALL_BACK_FUN(new DModelListener()); -TEST_F(UtestModelManagerModelManager, case_load_incorrect_param) { +/*TEST_F(UtestModelManagerModelManager, case_load_incorrect_param) { ModelManager mm; uint32_t model_id = 0; ge::ModelData model; @@ -307,7 +299,7 @@ TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_fail) { } -/* +*//* // test GetInputOutputDescInfo fail TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_zero_copy_fail) { ModelManager manager; @@ -316,7 +308,7 @@ TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_zero_copy_fail) vector output_shape; EXPECT_EQ(ge::PARAM_INVALID, manager.GetInputOutputDescInfoForZeroCopy(2, input_shape, output_shape)); } -*/ +*//* // test Stop TEST_F(UtestModelManagerModelManager, stop_fail) { @@ -347,6 +339,20 @@ TEST_F(UtestModelManagerModelManager, destroy_aicpu_session) { manager.sess_ids_.insert(0); manager.DestroyAicpuSession(0); +}*/ +// test DataInputTensor +TEST_F(UtestModelManagerModelManager, test_data_input_tensor) { + shared_ptr g_label_call_back(nullptr); + auto model = std::make_shared(0, g_label_call_back); + ModelManager mm; + uint32_t model_id = 1; + mm.model_map_[1] = model; + mm.hybrid_model_map_[1] = std::make_shared(); + + auto input_tensor = InputTensorInfo(); + vector inputs; + inputs.emplace_back(input_tensor); + auto ret = mm.DataInputTensor(model_id,inputs); + EXPECT_EQ(ge::UNSUPPORTED, ret); } - } // namespace ge diff --git a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc new file mode 100644 index 00000000..2f149761 --- /dev/null +++ b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc @@ -0,0 +1,77 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "common/ge_inner_error_codes.h" +#include "common/types.h" +#include "common/util.h" +#include "graph/passes/graph_builder_utils.h" +#include "graph/utils/attr_utils.h" +#include "graph/debug/ge_attr_define.h" + +#define private public +#define protected public +#include "graph/preprocess/graph_preprocess.h" +#include "ge/ge_api.h" +#undef private +#undef protected + +using namespace std; +namespace ge { +class UtestGraphPreproces : public testing::Test { + protected: + void SetUp() { + } + void TearDown() { + } +}; + +ComputeGraphPtr BuildGraph1(){ + auto builder = ut::GraphBuilder("g1"); + auto data1 = builder.AddNode("data1",DATA,1,1); + auto data_opdesc = data1->GetOpDesc(); + AttrUtils::SetInt(data_opdesc, ATTR_NAME_INDEX, 0); + data1->UpdateOpDesc(data_opdesc); + return builder.GetGraph(); +} + +TEST_F(UtestGraphPreproces, test_dynamic_input_shape_parse) { + ge::GraphPrepare graph_prepare; + graph_prepare.compute_graph_ = BuildGraph1(); + // prepare user_input & graph option + ge::GeTensorDesc tensor1; + tensor1.SetFormat(ge::FORMAT_NCHW); + tensor1.SetShape(ge::GeShape({3, 12, 5, 5})); + tensor1.SetDataType(ge::DT_FLOAT); + GeTensor input1(tensor1); + std::vector user_input = {input1}; + std::map graph_option = {{"ge.exec.dynamicGraphExecuteMode","dynamic_execute"}, + {"ge.exec.dataInputsShapeRange","[3,1~20,2~10,5]"}}; + auto ret = graph_prepare.UpdateInput(user_input, graph_option); + EXPECT_EQ(ret, ge::SUCCESS); + // check data node output shape_range and shape + auto data_node = graph_prepare.compute_graph_->FindNode("data1"); + auto data_output_desc = data_node->GetOpDesc()->GetOutputDescPtr(0); + vector expect_shape = {3,-1,-1,5}; + auto result_shape = data_output_desc->GetShape(); + EXPECT_EQ(result_shape.GetDimNum(), expect_shape.size()); + for(size_t i =0; i< expect_shape.size(); ++i){ + EXPECT_EQ(result_shape.GetDim(i), expect_shape.at(i)); + } +} +} \ No newline at end of file From b411d7d7ba323f6b72caec5612de55c5862e94da Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Wed, 20 Jan 2021 14:03:32 +0800 Subject: [PATCH 24/41] modified: tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc --- tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc index 3cffd2ed..688e73d4 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc @@ -25,8 +25,8 @@ #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #undef private #undef protected From b77ca9049f25ff098cce124f44046904b40c0cbd Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Wed, 20 Jan 2021 15:27:04 +0800 Subject: [PATCH 25/41] mod format_trans log level error to warning --- ge/common/formats/utils/formats_trans_utils.cc | 12 ++++++------ inc/framework/common/debug/log.h | 6 ++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ge/common/formats/utils/formats_trans_utils.cc b/ge/common/formats/utils/formats_trans_utils.cc index 18f2d70f..052951ce 100755 --- a/ge/common/formats/utils/formats_trans_utils.cc +++ b/ge/common/formats/utils/formats_trans_utils.cc @@ -32,7 +32,7 @@ int64_t GetCubeSizeByDataType(DataType data_type) { if (size <= 0) { std::string error = "Failed to get cube size, the data type " + FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid"; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return -1; } else if (size == 1) { return kCubeSize * 2; // 32 bytes cube size @@ -61,7 +61,7 @@ bool CheckShapeValid(const std::vector &shape, const int64_t expect_dim if (expect_dims <= 0 || shape.size() != static_cast(expect_dims)) { std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) + ", expect " + FmtToStr(expect_dims); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } return IsShapeValid(shape); @@ -75,12 +75,12 @@ bool IsShapeValid(const std::vector &shape) { for (auto dim : shape) { if (dim < 0) { std::string error = "Invalid negative dims in the shape " + FmtToStr(ShapeToString(shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } if (dim != 0 && kShapeItemNumMAX / dim < num) { std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } num *= dim; @@ -108,7 +108,7 @@ bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector &expect_ FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " + FmtToStr(ShapeToString(args.src_shape)) + " and dst " + FmtToStr(ShapeToString(args.dst_shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } return true; @@ -121,7 +121,7 @@ bool IsTransShapeDstCorrect(const TransArgs &args, std::vector &expect_ FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" + FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" + FmtToStr(ShapeToString(expect_shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); return false; } return true; diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 72dba126..31281cd6 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -261,6 +261,12 @@ ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ } +#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ + { \ + GELOGW("%s", errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ + } + #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ do { \ bool b = (expr); \ From bef69ab2cf9aa7bbbbead8d4179ff8d8e75d830a Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Jan 2021 20:06:24 +0800 Subject: [PATCH 26/41] Remove gentask in DEPEND_COMPUTE task executor. --- .../node_executor/aicpu/aicpu_node_executor.cc | 128 ++++++++++----------- .../node_executor/aicpu/aicpu_node_executor.h | 13 ++- 2 files changed, 71 insertions(+), 70 deletions(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index a2e610b4..b94b89c5 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -22,7 +22,6 @@ #include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace ge { namespace hybrid { @@ -356,6 +355,44 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { return SUCCESS; } +Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { + if (node_item_->num_outputs == 0) { + GELOGD("Node[%s] type[%s] has no output, no need set mem_copy task.", + node_name_.c_str(), node_item_->node_type.c_str()); + return SUCCESS; + } + + const domi::KernelExDef &kernel_def = task_def.kernel_ex(); + if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); + return PARAM_INVALID; + } + STR_FWK_OP_KERNEL aicpu_task = {0}; + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), + kernel_def.args().data(), kernel_def.args_size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } + + GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_), + "Node[%s] alloc copy task workspace buf failed, size=%zu.", + node_name_.c_str(), kernel_def.task_info_size()); + + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(), + kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(copy_workspace_buf_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; + + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) { // get step_id_addr auto var_tensor = model.GetVariable(NODE_NAME_GLOBAL_STEP); @@ -407,32 +444,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, "Node[%s] has %d outputs but out shape is %zu.", node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); - uint64_t copy_num = 0; - GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); - - STR_FWK_OP_KERNEL aicpu_task = {0}; - std::string task_info; - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] Start"); - GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] End"); - - std::unique_ptr kernel_workspace_buf; - GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", - node_name_.c_str(), task_info.size()); - - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), - task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); - - aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; - aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), - &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), @@ -445,8 +457,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, } Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, - uint64_t ©_num) { + const std::vector> &out_shape_hbm) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -458,34 +469,23 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, node_name_.c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); - if (summary.raw_data_size > 0) { - auto output = context.GetOutput(i); - GE_CHECK_NOTNULL(output); - GE_CHECK_NOTNULL(output->GetData()); - copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.raw_data_size); - copy_input_src.emplace_back(summary.raw_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(output->GetData())); - } - - if (summary.shape_data_size > 0) { - const auto &shape_buffer = out_shape_hbm[i]; - GE_CHECK_NOTNULL(shape_buffer); - GE_CHECK_NOTNULL(shape_buffer->GetData()); - copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.shape_data_size); - copy_input_src.emplace_back(summary.shape_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); - } + auto output = context.GetOutput(i); + GE_CHECK_NOTNULL(output); + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.raw_data_size); + copy_input_src.emplace_back(summary.raw_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(output->GetData())); + + const auto &shape_buffer = out_shape_hbm[i]; + GE_CHECK_NOTNULL(shape_buffer); + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.shape_data_size); + copy_input_src.emplace_back(summary.shape_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); } - copy_num = copy_input_release_flag.size(); - - GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, - "Node[%s] need copy num is 0", node_name_.c_str()); - - // copy task need copy output and output shape - const size_t copy_input_buf_len = copy_num * sizeof(uint64_t); + // copy task need copy all output_data and output_shape, len is 2 * output_num + const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t); GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_->GetData(), copy_input_release_flag_dev_->GetSize(), ©_input_release_flag[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); @@ -498,15 +498,6 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, return SUCCESS; } -Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) { - static constexpr const char *const kKernelLibName = "aicpu_tf_kernel"; - auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName); - GE_CHK_BOOL_RET_STATUS(kernel_builder != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); - auto ret = kernel_builder->GenMemCopyTask(copy_num, task, task_info); - GE_CHK_STATUS_RET(ret, "Call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret); - return SUCCESS; -} - Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm) { GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), @@ -813,9 +804,9 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size()); } else { - // The number of tasks of the fourth type operator may be 2 - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, - "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", + // The number of tasks of the fourth type operator must be 2 + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 2, PARAM_INVALID, + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 2", node->GetName().c_str(), (*task_defs).size()); } const auto &task_def = (*task_defs)[0]; @@ -836,6 +827,9 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, "Load task for node %s failed.", node->GetName().c_str()); GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str()); + if (node_item->shape_inference_type == DEPEND_COMPUTE) { + GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask((*task_defs)[1])); + } task = std::move(aicpu_task); GELOGD("Node[%s] load task end.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 8f0b1d0a..c6e63ee0 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -21,6 +21,7 @@ #include "cce/aicpu_engine_struct.h" #include "hybrid/node_executor/node_executor.h" #include "aicpu_ext_info.h" +#include "common/ge_inner_error_codes.h" namespace ge { namespace hybrid { @@ -41,6 +42,10 @@ class AicpuNodeTaskBase : public NodeTask { virtual Status Init(const HybridModel &model) = 0; + virtual Status SetMemCopyTask(const domi::TaskDef &task_def) { + return UNSUPPORTED; + } + Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; @@ -89,6 +94,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { Status Init(const HybridModel &model) override; + Status SetMemCopyTask(const domi::TaskDef &task_def) override; + protected: Status LaunchTask(TaskContext &context) override; @@ -117,11 +124,9 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { const std::vector> &out_shape_hbm); Status PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, - uint64_t ©_num); + const std::vector> &out_shape_hbm); static Status EnsureSessionCreated(uint64_t session_id); - static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info); static uint64_t GetStepIdAddr(const HybridModel &model); private: // kernel buf, device mem @@ -145,6 +150,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { std::unique_ptr copy_input_src_dev_; std::unique_ptr copy_input_dst_dev_; bool need_sync_ = false; + + std::unique_ptr copy_workspace_buf_; }; class AicpuNodeTask : public AicpuNodeTaskBase { From 829e43c4e3bf773af4b1afb9afe6d8a80f235a8f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Jan 2021 20:07:47 +0800 Subject: [PATCH 27/41] Add log. --- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index b94b89c5..c6fb76ed 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -362,6 +362,7 @@ Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { return SUCCESS; } + GELOGD("Start to set memcpy task for node[%s].", node_name_.c_str()); const domi::KernelExDef &kernel_def = task_def.kernel_ex(); if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", @@ -390,6 +391,7 @@ Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GELOGD("Set memcpy task for node[%s] successfully.", node_name_.c_str()); return SUCCESS; } From b86236fe3094bd7e653e070618e684a7acff449c Mon Sep 17 00:00:00 2001 From: wuweikang Date: Wed, 20 Jan 2021 21:19:52 +0800 Subject: [PATCH 28/41] add KernelDefWithHandle --- ge/client/proto/task.proto | 14 ++++++++++++++ ge/common/proto/task.proto | 14 ++++++++++++++ ge/executor/proto/task.proto | 14 ++++++++++++++ ge/ge_local_engine/proto/task.proto | 14 ++++++++++++++ ge/host_cpu_engine/proto/task.proto | 14 ++++++++++++++ ge/offline/proto/task.proto | 14 ++++++++++++++ ge/proto/task.proto | 14 ++++++++++++++ metadef | 2 +- parser | 2 +- 9 files changed, 100 insertions(+), 2 deletions(-) diff --git a/ge/client/proto/task.proto b/ge/client/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/client/proto/task.proto +++ b/ge/client/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/common/proto/task.proto b/ge/common/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/common/proto/task.proto +++ b/ge/common/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/executor/proto/task.proto b/ge/executor/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/executor/proto/task.proto +++ b/ge/executor/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/ge_local_engine/proto/task.proto b/ge/ge_local_engine/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/ge_local_engine/proto/task.proto +++ b/ge/ge_local_engine/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/host_cpu_engine/proto/task.proto b/ge/host_cpu_engine/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/host_cpu_engine/proto/task.proto +++ b/ge/host_cpu_engine/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/offline/proto/task.proto b/ge/offline/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/offline/proto/task.proto +++ b/ge/offline/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/ge/proto/task.proto b/ge/proto/task.proto index d0c09840..0da5631e 100644 --- a/ge/proto/task.proto +++ b/ge/proto/task.proto @@ -57,6 +57,7 @@ message TaskDef { LabelSetDef label_set = 37; LabelGotoExDef label_goto_ex = 38; LabelSwitchByIndexDef label_switch_by_index = 39; + KernelDefWithHandle kernel_with_handle = 40; } message KernelDef { @@ -74,6 +75,19 @@ message KernelDef { uint32 kernel_ext_info_size = 19; } +message KernelDefWithHandle { + KernelContext context = 1; + + uint64 handle = 10; + string dev_func = 11; + uint32 block_dim = 12; + uint32 args_size = 13; + bytes args = 14; + bytes sm_desc = 15; + string original_kernel_key = 16; + string node_info = 17; +} + message KernelContext { uint32 kernel_type = 1; uint32 op_id = 2; // OP type in CCE diff --git a/metadef b/metadef index 88d053a5..848cf412 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 88d053a5f94c40ff21620cef50b87075d5054292 +Subproject commit 848cf412caa9b42ce4e75ab4d0a147ec97dc579b diff --git a/parser b/parser index 6904ba94..756c64c5 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 6904ba9488658afc30076d299183fc8875045f49 +Subproject commit 756c64c59e451a955e81b59d957ad55f96c27d89 From b598ea75cd1d0add7235fe9a878f3024158f9485 Mon Sep 17 00:00:00 2001 From: lianghao Date: Thu, 21 Jan 2021 16:09:48 +0800 Subject: [PATCH 29/41] CondRemovePass --- ge/graph/passes/cond_remove_pass.cc | 15 +++++++++++++-- ge/graph/passes/cond_remove_pass.h | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index bf2e1170..ce5ff7c0 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -234,7 +234,7 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c const auto &output_desc_size = node->GetOpDesc()->GetOutputsSize(); // Create subgraph opdesc & node auto partitioncall_opdesc = - CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); + CreateSubgraphOpDesc(node, save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); auto partitioncall_node = node->GetOwnerComputeGraph()->AddNode(partitioncall_opdesc); // Link node's peerout anchors to new node's inanchors for (const auto &input_anchor : node->GetAllInAnchors()) { @@ -289,7 +289,8 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c /// @param [in] output_num /// @return OpDescPtr /// -OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const std::string &name, size_t input_num, size_t output_num) { +OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const NodePtr &node, const std::string &name, size_t input_num, + size_t output_num) { OpDescBuilder op_desc_builder(name, PARTITIONEDCALL); op_desc_builder.AddDynamicInput("args", input_num).AddDynamicOutput("output", output_num); @@ -299,6 +300,16 @@ OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const std::string &name, size_t i size_t index = op_desc->GetSubgraphInstanceNames().size(); op_desc->AddSubgraphName("f"); op_desc->SetSubgraphInstanceName(static_cast(index), name); + + auto node_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL_EXEC(node_desc, return nullptr); + for (size_t i = 0; i < input_num; ++i) { + (void)op_desc->UpdateInputDesc(i, node_desc->GetInputDesc(i + 1)); + } + for (size_t i = 0; i < output_num; ++i) { + (void)op_desc->UpdateOutputDesc(i, node_desc->GetOutputDesc(i)); + } + return op_desc; } diff --git a/ge/graph/passes/cond_remove_pass.h b/ge/graph/passes/cond_remove_pass.h index 72ca64b8..e466d684 100644 --- a/ge/graph/passes/cond_remove_pass.h +++ b/ge/graph/passes/cond_remove_pass.h @@ -70,7 +70,7 @@ class CondRemovePass : public BaseNodePass { /// Status ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, const ComputeGraphPtr &save_branch); - OpDescPtr CreateSubgraphOpDesc(const std::string &name, size_t input_num, size_t output_num); + OpDescPtr CreateSubgraphOpDesc(const NodePtr &node, const std::string &name, size_t input_num, size_t output_num); int32_t GetCondIndex(const ConstGeTensorPtr &tensor); }; From ee7e56a261215ac468d61b96fa0e3f98d8e964e9 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 21 Jan 2021 20:08:03 +0800 Subject: [PATCH 30/41] broadcast in train graph related --- ge/graph/build/memory/block_mem_assigner.cc | 116 ++++++++-- ge/graph/build/memory/block_mem_assigner.h | 6 +- ge/graph/load/model_manager/davinci_model.cc | 11 - ge/graph/manager/graph_manager.cc | 3 + ge/graph/passes/hccl_memcpy_pass.cc | 334 ++++++++++++++++++++++++--- ge/graph/passes/hccl_memcpy_pass.h | 17 ++ ge/graph/preprocess/graph_preprocess.cc | 3 - 7 files changed, 429 insertions(+), 61 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index a523ce3f..a7564e01 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -551,11 +551,31 @@ void GetMaxBatchAllMemorySize(std::map> &batch_all_ } } +void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { + auto node_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); + // if input size just one and from variable, no need to reassign continuous memory + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { + auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); + auto in_node = peer_out_anchor->GetOwnerNode(); + GE_IF_BOOL_EXEC(in_node == nullptr, return); + if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { + GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); + (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + } + } +} + void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; std::map> batch_all_memory_size; std::map batch_total_size; for (const NodePtr &n : compute_graph_->GetAllNodes()) { + MarkContinuousAllocedForOneInputFromVariable(n); + auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -1061,18 +1081,73 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, return block; } -MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, - const bool is_op_reuse_mem) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); +bool IsOutputIndexRef(const OpDescPtr &op_desc, uint32_t index) { + auto output_tensor = op_desc->GetOutputDescPtr(index); + bool dst_reuse_input = false; + (void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input); + if (dst_reuse_input) { + return true; + } + + bool is_ref = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); + if (is_ref) { + string output_name = op_desc->GetOutputNameByIndex(index); + for (const auto &input_name : op_desc->GetAllInputNames()) { + if (output_name == input_name) { + return true;; + } + } + } + return false; +} + +void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, + const NodePtr &n) { + const auto node_op_desc = n->GetOpDesc(); + for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { + if (!IsOutputIndexRef(node_op_desc, index)) { + isAllOutputRef = false; + break; + } else { + zero_memory_list_.emplace_back(n, kOutput, index); + isOutputHasRef = true; + } + } +} + + +Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, + const bool is_op_reuse_mem) { + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); auto node_op_desc = n->GetOpDesc(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); + + // continuous output support ref only when all output ref input + bool isAllOutputRef = true; + bool isOutputHasRef = false; + + ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); + + if (isAllOutputRef) { + GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); + return SUCCESS; + } + + if (!isAllOutputRef && isOutputHasRef) { + GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", + n->GetName().c_str()); + return INTERNAL_ERROR; + } + MemoryBlock *block = nullptr; int64_t total_size = 0; int64_t memory_type = RT_MEMORY_HBM; for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { - return nullptr; + GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + return INTERNAL_ERROR; } if (CheckIsZeroMemNodeType(n->GetType())) { @@ -1082,8 +1157,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec int64_t size = 0; if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { - GELOGI("Get size failed"); - return nullptr; + GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + return INTERNAL_ERROR; } size_t align_size = static_cast(size); AlignMemOffset(align_size); @@ -1106,7 +1181,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec } if (total_size == 0) { - return nullptr; + return SUCCESS; } auto block_size = GetBlockSize(total_size, ranges); @@ -1120,8 +1195,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec // hccl task need align header and tail block->first_continuous_block_ = true; block->last_continuous_block_ = true; + } else { + GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); + return INTERNAL_ERROR; } - return block; + return SUCCESS; } MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector &ranges, @@ -1133,9 +1211,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, NodeIndexIO node_index_io(n, index, kOut); int64_t size = 0; auto output_op_desc = node_op_desc->GetOutputDescPtr(index); - if (output_op_desc != nullptr) { - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); - } + GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); size_t no_align_size = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, return nullptr, "Get no align size failed"); @@ -1146,6 +1223,13 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); block->ref_count_++; } else { + // if ref input is variable, can not find symbol, must judge alone + if (IsOutputIndexRef(node_op_desc, index)) { + zero_memory_list_.emplace_back(n, kOutput, index, false); + GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); + return nullptr; + } + int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); @@ -1393,8 +1477,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); if (IsContinuousOutput(node)) { - (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); - return SUCCESS; + return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); } for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { int64_t size = 0; @@ -1894,9 +1977,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || - (node_type == HVDCALLBACKBROADCAST); + (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || + (node_type == ASSIGN) || (node_type == HVDWAIT); } bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 58bcda75..e1db6cad 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -421,7 +421,11 @@ class BlockMemAssigner : public MemAssigner { bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); - MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); + void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); + + Status ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); + + void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); std::unordered_map>> reusable_blocks_; diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 0a92447b..3f73b0e1 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -2148,11 +2148,6 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } - for (const auto &item : broadcast_variable_) { - ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_); - GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, - item.first.c_str()); - } return ret; } @@ -2636,12 +2631,6 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b /// Status DavinciModel::ReturnNoOutput(uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id_); - for (const auto item : broadcast_variable_) { - Status ret = VarManager::Instance(session_id_) - ->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_); - GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, - item.first.c_str()); - } GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); std::vector outputs; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index d5ee690c..0d58e9c2 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -92,6 +92,7 @@ #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" #include "graph/passes/memcpy_addr_async_pass.h" +#include "graph/passes/hccl_memcpy_pass.h" #include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" #include "inc/pass_manager.h" @@ -2150,6 +2151,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) TransOpWithoutReshapeFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass)) + GE_CHK_STATUS_RET( + after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 21747f42..3f607f84 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -28,6 +28,8 @@ namespace { const int32_t kAnchorSize = 1; const int kAnchorNum = 0; +const int32_t kAnchorAssignRefIndex = 0; +const int32_t kAnchorAssignValueIndex = 1; const char *const kInputMutable = "_input_mutable"; } // namespace namespace ge { @@ -35,43 +37,147 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); for (const auto &node : graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + Status ret = ContinuousInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); + return ret; + } + + ret = MutableInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); + return ret; + } + + ret = P2pmemInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); + return ret; + } + + } + return SUCCESS; +} + +// If node has _input_mutable attr, means input mem may be modified when op execute. +// In order to avoid to affect another op execute with same input when data modified, +// need to inset memcpy node between. +// also works on situation that input is variable or const. +Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + + bool node_input_mutable = false; + if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { + return SUCCESS; + } + + if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); + return FAILED; + } + if (!node_input_mutable) { + return SUCCESS; + } - bool node_input_mutable = false; - if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { + GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); + for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { + if (hccl_in_anchor == nullptr) { continue; } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(src_out_anchor); - GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); - if (!node_input_mutable) { + int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); + if (src_out_anchor_size == kAnchorSize) { + // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } continue; } - GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } + return SUCCESS; +} + +// If broadcast input size is bigger than 1, and input from variable, +// cause by broadcast input memory should be continuous, +// another featuremap mem will be allocated for broadcast input. +// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. +// In order to avoid move action out of model, use memcpy node instead of move action code. +Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + + if (is_input_continuous && op_desc->GetInputsSize() > 1) { + GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); + // if input size bigger than one, insert memcpy between var data for support continous mem alloc for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { if (hccl_in_anchor == nullptr) { continue; } auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - - int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); - if (src_out_anchor_size == kAnchorSize) { - // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. - NodePtr src_node = src_out_anchor->GetOwnerNode(); - std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); - if (check_src_type) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; } - continue; } + } + } + return SUCCESS; +} + +// if input is var type, and node input need p2p mem, then memcpy should be insert between the two +Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + vector input_memory_types; + (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); + + if (input_memory_types.empty()) { + return SUCCESS; + } + + for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { + if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { + continue; + } + + GELOGD("p2p input op is:%s.", op_desc->GetName().c_str()); + auto hccl_in_anchor = node->GetInDataAnchor(index); + if (hccl_in_anchor == nullptr) { + continue; + } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); @@ -82,8 +188,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { return SUCCESS; } +bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { + return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); +} + /// -/// @brief Add MemcpyAsync Node +/// @brief Add Identity Node /// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr @@ -101,20 +211,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O node_name = CheckDuplicateName(node_name); OpDescPtr op_desc = MakeShared(node_name.c_str(), IDENTITY); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); return nullptr; } - GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); + GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); return nullptr; } ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); return nullptr; } // because history reason ,this pass can not do work after constant fold so mark it @@ -122,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O NodePtr memcpy_node = graph->AddNode(op_desc); if (memcpy_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert identity node fail."); + GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); return nullptr; } @@ -155,7 +265,38 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { /// Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor) { - GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); + GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); + GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); + + Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.", + src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + return ret; + } + + ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.", + src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + return ret; + } + return SUCCESS; +} + +/// +/// @brief Insert Identity node Between Hccl node and variable +/// @param [in] ComputeGraphPtr graph +/// @param [in] OutDataAnchorPtr src_out_anchor +/// @param [in] InDataAnchorPtr hccl_in_anchor +/// @return status +/// +Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { + GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); @@ -182,6 +323,141 @@ Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const } return SUCCESS; } + +/// +/// @brief Insert assign node after broadcast node and variable to refresh variable data +/// @param [in] ComputeGraphPtr graph +/// @param [in] OutDataAnchorPtr var_out_anchor +/// @param [in] InDataAnchorPtr hccl_in_anchor +/// @return status +/// +Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &var_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { + if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { + GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + return SUCCESS; + } + + if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) { + GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str()); + return SUCCESS; + } + + GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + + for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) { + if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) { + GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str()); + return SUCCESS; + } + } + + NodePtr assign_node = CreateAssignNode(graph, var_out_anchor); + GE_CHECK_NOTNULL(assign_node); + + OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx()); + GE_CHECK_NOTNULL(hccl_out_anchor); + + Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex)); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(), + assign_node->GetName().c_str()); + return FAILED; + } + + ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex)); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(), + assign_node->GetName().c_str()); + return FAILED; + } + + // add control edge between assign node and node after broadcast node + OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(assign_out_control_anchor); + + for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) { + if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { + continue; + } + ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor()); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", + assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str()); + return FAILED; + } + } + + for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) { + if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { + continue; + } + ret = assign_out_control_anchor->LinkTo(in_control_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", + assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), + in_control_anchor->GetOwnerNode()->GetName().c_str()); + return FAILED; + } + } + return SUCCESS; +} + +/// +/// @brief create assign Node, add to graph +/// @param [in] ge::ComputeGraphPtr graph +/// @param [in] ge::OutDataAnchorPtr variable node out anchor +/// @return ge::NodePtr +/// +NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { + GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); + NodePtr pre_node = out_data_anchor->GetOwnerNode(); + OpDescPtr pre_op_desc = pre_node->GetOpDesc(); + if (pre_op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid."); + return nullptr; + } + + std::string node_name = pre_node->GetName() + "_" + ASSIGN; + node_name = CheckDuplicateName(node_name); + OpDescPtr op_desc = MakeShared(node_name.c_str(), ASSIGN); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail."); + return nullptr; + } + GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); + + graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); + return nullptr; + } + + ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail."); + return nullptr; + } + + ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail."); + return nullptr; + } + + NodePtr assign_node = graph->AddNode(op_desc); + if (assign_node == nullptr) { + GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); + return nullptr; + } + + return assign_node; +} + + /// /// @brief Clear Status, used for subgraph pass /// @return SUCCESS diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index e73a5483..98e05964 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -32,11 +32,28 @@ class HcclMemcpyPass : public GraphPass { private: NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); + NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); + std::string CheckDuplicateName(const std::string &node_name); Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); + Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor); + + Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor); + + Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + bool IsDataNode(const std::string& node_type); + std::unordered_map node_num_map_; }; } // namespace ge diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 19f5ef54..787a28cf 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -49,7 +49,6 @@ #include "graph/passes/for_pass.h" #include "graph/passes/guarantee_const_pass.h" #include "graph/passes/hccl_group_pass.h" -#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/infershape_pass.h" #include "graph/passes/merge_pass.h" @@ -1892,8 +1891,6 @@ Status GraphPrepare::PrepareOptimize() { PassManager graph_pass; try { (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); - // todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy - (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; From 9ef1c5a89f0b0f7b8f9b89d6bafb42d2dbb8d0bd Mon Sep 17 00:00:00 2001 From: wqtshg Date: Fri, 22 Jan 2021 19:12:11 +0800 Subject: [PATCH 31/41] update thirdparty and submodule --- metadef | 2 +- parser | 2 +- .../inc/aicpu/aicpu_schedule/aicpu_op_type_list.h | 40 ++--- third_party/fwkacllib/inc/cce/aicpu_engine.h | 1 + third_party/fwkacllib/inc/cce/fwk_adpt_struct.h | 1 + .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 3 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h | 166 ++++++++++----------- third_party/fwkacllib/inc/runtime/base.h | 71 ++++++--- third_party/fwkacllib/inc/runtime/config.h | 134 +++++++++-------- third_party/fwkacllib/inc/runtime/context.h | 25 ++-- third_party/fwkacllib/inc/runtime/dev.h | 53 +++---- third_party/fwkacllib/inc/runtime/kernel.h | 90 +++++------ third_party/fwkacllib/inc/runtime/mem.h | 120 +++++++-------- third_party/fwkacllib/inc/runtime/rt_model.h | 1 - third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h | 24 +-- third_party/fwkacllib/inc/tdt/tdt_host_interface.h | 21 +-- third_party/fwkacllib/inc/toolchain/slog.h | 12 +- .../fwkacllib/inc/toolchain/tuning_tool/tune_api.h | 144 +++++++++--------- 18 files changed, 462 insertions(+), 448 deletions(-) diff --git a/metadef b/metadef index 848cf412..bb864122 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 848cf412caa9b42ce4e75ab4d0a147ec97dc579b +Subproject commit bb86412204fc72fa8fe4063e6044090dfd714321 diff --git a/parser b/parser index 756c64c5..d85b5fc6 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 756c64c59e451a955e81b59d957ad55f96c27d89 +Subproject commit d85b5fc685b9e1f8dbee778c9c7b3ab6f379af79 diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h index 8d16467c..703225e8 100644 --- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -18,43 +18,43 @@ #define AICPU_OP_TYPE_LIST_H_ enum OpKernelType { - TF_KERNEL, - CPU_KERNEL + TF_KERNEL, + CPU_KERNEL }; enum ReturnCode { - OP_TYPE_NOT_SUPPORT, - FORMAT_NOT_SUPPORT, - DTYPE_NOT_SUPPORT + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT }; #pragma pack(push, 1) //One byte alignment struct SysOpInfo { - uint64_t opLen; - uint64_t opType; - OpKernelType kernelsType; + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; }; struct OpParamInfo { - uint64_t num; - uint64_t dtypeList; - uint64_t formatList; + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; }; struct SysOpCheckInfo { - uint64_t opListNum; - uint64_t offSetLen; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; }; struct SysOpCheckResp { - uint64_t opListNum; - bool isWithoutJson; - uint64_t returnCodeList; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; }; #pragma pack(pop) #endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index b83731a8..042d952b 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -31,6 +31,7 @@ typedef enum { AE_STATUS_KERNEL_API_INNER_ERROR = 5, AE_STATUS_END_OF_SEQUENCE = 6, AE_STATUS_DUMP_FAILED = 7, + AE_STATUS_TASK_WAIT = 101, AE_STATUS_RESERVED } aeStatus_t; diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 50b39d91..7a2cbc50 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -60,6 +60,7 @@ enum FWKTaskExtInfoType { FWK_ADPT_EXT_UPDATE_ADDR, FWK_ADPT_EXT_OP_NAME, FWK_ADPT_EXT_SESSION_INFO, + FWK_ADPT_EXT_BITMAP, FWK_ADPT_EXT_INVALID }; diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 005014ed..993f36ba 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -50,7 +50,7 @@ typedef int (*mmFilter)(const mmDirent *entry); typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); -typedef size_t mmSize_t; +typedef size_t mmSize_t; //lint !e410 !e1051 typedef off_t mmOfft_t; typedef pid_t mmPid_t; typedef long MM_LONG; @@ -283,6 +283,7 @@ typedef struct { #define M_W_OK W_OK #define M_R_OK R_OK + #define MM_DT_DIR DT_DIR #define MM_DT_REG DT_REG diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h index 8200bea6..58ebb1a0 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h @@ -1,83 +1,83 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MMPA_TYPEDEF_WIN_H -#define MMPA_TYPEDEF_WIN_H - -#ifdef __cplusplus -#if __cplusplus -extern "C" { -#endif // __cpluscplus -#endif // __cpluscplus - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#define EN_OK 0 -#define EN_ERR 1 -#define EN_ERROR (-1) -#define EN_INVALID_PARAM (-2) -#define EN_TIMEOUT (-3) - -#define HANDLE_INVALID_VALUE (-1) -#define INVALID_SOCKET_HANDLE INVALID_SOCKET -#define MMPA_MEM_MAX_LEN (0x7fffffff) -#define MMPA_PROCESS_ERROR (0x7fffffff) - -#define MMPA_ONE_THOUSAND 1000 -#define MMPA_COMPUTER_BEGIN_YEAR 1900 -#define SUMMER_TIME_OR_NOT (-1) -#define MMPA_ZERO 0 -#define MMPA_VALUE_ONE 1 -#define MMPA_SOCKET_MAIN_EDITION 2 -#define MMPA_SOCKET_SECOND_EDITION 0 -#define MMPA_PIPE_BUF_SIZE 1024 -#define MMPA_MAX_SCANDIR_COUNT 1024 -#define MAX_IOVEC_SIZE 32 -#define MMPA_PIPE_COUNT 2 -#define MMPA_THREADNAME_SIZE 16 -#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) -#define MMPA_MIN_OS_VERSION_SIZE 64 - -#define MMPA_MAX_NI 19 -#define MMPA_MIDDLE_NI 5 -#define MMPA_LOW_NI (-5) -#define MMPA_MIN_NI (-20) -#define MMPA_MAX_FILE 128 - -#define MMPA_MAX_THREAD_PIO 99 -#define MMPA_MIDDLE_THREAD_PIO 66 -#define MMPA_LOW_THREAD_PIO 33 -#define MMPA_MIN_THREAD_PIO 1 - -#define MMPA_THREAD_SCHED_RR 0 -#define MMPA_THREAD_SCHED_FIFO 0 -#define MMPA_THREAD_SCHED_OTHER 0 -#define MMPA_THREAD_MIN_STACK_SIZE 0 - -#define MM_MUTEX_INITIALIZER NULL - -#ifdef __cplusplus -#if __cplusplus -} -#endif // __cpluscplus -#endif // __cpluscplus -#endif // _MMPA_TYPEDEF_WIN_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MMPA_TYPEDEF_WIN_H +#define MMPA_TYPEDEF_WIN_H + +#ifdef __cplusplus +#if __cplusplus +extern "C" { +#endif // __cpluscplus +#endif // __cpluscplus + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#define EN_OK 0 +#define EN_ERR 1 +#define EN_ERROR (-1) +#define EN_INVALID_PARAM (-2) +#define EN_TIMEOUT (-3) + +#define HANDLE_INVALID_VALUE (-1) +#define INVALID_SOCKET_HANDLE INVALID_SOCKET +#define MMPA_MEM_MAX_LEN (0x7fffffff) +#define MMPA_PROCESS_ERROR (0x7fffffff) + +#define MMPA_ONE_THOUSAND 1000 +#define MMPA_COMPUTER_BEGIN_YEAR 1900 +#define SUMMER_TIME_OR_NOT (-1) +#define MMPA_ZERO 0 +#define MMPA_VALUE_ONE 1 +#define MMPA_SOCKET_MAIN_EDITION 2 +#define MMPA_SOCKET_SECOND_EDITION 0 +#define MMPA_PIPE_BUF_SIZE 1024 +#define MMPA_MAX_SCANDIR_COUNT 1024 +#define MAX_IOVEC_SIZE 32 +#define MMPA_PIPE_COUNT 2 +#define MMPA_THREADNAME_SIZE 16 +#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) +#define MMPA_MIN_OS_VERSION_SIZE 64 + +#define MMPA_MAX_NI 19 +#define MMPA_MIDDLE_NI 5 +#define MMPA_LOW_NI (-5) +#define MMPA_MIN_NI (-20) +#define MMPA_MAX_FILE 128 + +#define MMPA_MAX_THREAD_PIO 99 +#define MMPA_MIDDLE_THREAD_PIO 66 +#define MMPA_LOW_THREAD_PIO 33 +#define MMPA_MIN_THREAD_PIO 1 + +#define MMPA_THREAD_SCHED_RR 0 +#define MMPA_THREAD_SCHED_FIFO 0 +#define MMPA_THREAD_SCHED_OTHER 0 +#define MMPA_THREAD_MIN_STACK_SIZE 0 + +#define MM_MUTEX_INITIALIZER NULL + +#ifdef __cplusplus +#if __cplusplus +} +#endif // __cpluscplus +#endif // __cpluscplus +#endif // _MMPA_TYPEDEF_WIN_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index ebfc09f3..5b246eed 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -41,12 +41,12 @@ static const int32_t RT_ERROR_NONE = 0; // success * @brief runtime exception numbers. */ typedef enum tagRtExceptionType { - RT_EXCEPTION_NONE = 0, - RT_EXCEPTION_TS_DOWN = 1, - RT_EXCEPTION_TASK_TIMEOUT = 2, - RT_EXCEPTION_TASK_FAILURE = 3, - RT_EXCEPTION_DEV_RUNNING_DOWN = 4, - RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5 + RT_EXCEPTION_NONE = 0, + RT_EXCEPTION_TS_DOWN = 1, + RT_EXCEPTION_TASK_TIMEOUT = 2, + RT_EXCEPTION_TASK_FAILURE = 3, + RT_EXCEPTION_DEV_RUNNING_DOWN = 4, + RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5 } rtExceptionType; /** @@ -54,12 +54,12 @@ typedef enum tagRtExceptionType { * @brief Switch type. */ typedef enum tagRtCondition { - RT_EQUAL = 0, - RT_NOT_EQUAL, - RT_GREATER, - RT_GREATER_OR_EQUAL, - RT_LESS, - RT_LESS_OR_EQUAL + RT_EQUAL = 0, + RT_NOT_EQUAL, + RT_GREATER, + RT_GREATER_OR_EQUAL, + RT_LESS, + RT_LESS_OR_EQUAL } rtCondition_t; /** @@ -67,25 +67,25 @@ typedef enum tagRtCondition { * @brief Data Type of Extensible Switch Task. */ typedef enum tagRtSwitchDataType { - RT_SWITCH_INT32 = 0, - RT_SWITCH_INT64 = 1, + RT_SWITCH_INT32 = 0, + RT_SWITCH_INT64 = 1, } rtSwitchDataType_t; typedef enum tagRtStreamFlagType { - RT_HEAD_STREAM = 0, // first stream - RT_INVALID_FLAG = 0xFFFFFFFF, + RT_HEAD_STREAM = 0, // first stream + RT_INVALID_FLAG = 0xFFFFFFFF, } rtStreamFlagType_t; typedef enum tagRtLimitType { - RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms + RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms } rtLimitType_t; typedef struct rtExceptionInfo { - uint32_t taskid; - uint32_t streamid; - uint32_t tid; - uint32_t deviceid; - uint32_t retcode; + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; } rtExceptionInfo; typedef void (*rtErrorCallback)(rtExceptionType); @@ -113,6 +113,12 @@ typedef void *rtEvent_t; typedef void *rtLabel_t; /** + * @ingroup dvrt_base + * @brief model handle. + */ +typedef void *rtModel_t; + +/** * @ingroup profiling_base * @brief runtime handle. */ @@ -219,6 +225,16 @@ RTS_API rtError_t rtLabelCreate(rtLabel_t *label); /** * @ingroup dvrt_base + * @brief create label instance + * @param [out] label created label + * @param [in] model label set model + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model); + +/** + * @ingroup dvrt_base * @brief set label and stream instance * @param [in] label set label * @param [in] stream set stream @@ -316,6 +332,17 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); /** * @ingroup dvrt_base + * @brief labels to dev info + * @param [out] label created label handle + * @param [in] model label bind model + * @param [in] stream label bind stream + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream); + +/** + * @ingroup dvrt_base * @brief get current thread last stream id and task id * @param [out] stream id and task id * @param [in] null diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 8bfc9893..ee104693 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -24,105 +24,106 @@ extern "C" { #endif #define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) -#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) -#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) -#define PLAT_GET_VER(type) (type & 0xff) +#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) +#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) +#define PLAT_GET_VER(type) (type & 0xff) typedef enum tagRtArchType { - ARCH_BEGIN = 0, - ARCH_V100 = ARCH_BEGIN, - ARCH_V200, - ARCH_END, + ARCH_BEGIN = 0, + ARCH_V100 = ARCH_BEGIN, + ARCH_V200, + ARCH_END, } rtArchType_t; typedef enum tagRtChipType { - CHIP_BEGIN = 0, - CHIP_MINI = CHIP_BEGIN, - CHIP_CLOUD, - CHIP_MDC, - CHIP_LHISI, - CHIP_DC, - CHIP_CLOUD_V2, - CHIP_END, + CHIP_BEGIN = 0, + CHIP_MINI = CHIP_BEGIN, + CHIP_CLOUD, + CHIP_MDC, + CHIP_LHISI, + CHIP_DC, + CHIP_CLOUD_V2, + CHIP_END, } rtChipType_t; typedef enum tagRtVersion { - VER_BEGIN = 0, - VER_NA = VER_BEGIN, - VER_ES, - VER_CS, - VER_END, + VER_BEGIN = 0, + VER_NA = VER_BEGIN, + VER_ES, + VER_CS, + VER_SD3403, + VER_END, } rtVersion_t; /* match rtChipType_t */ typedef enum tagRtPlatformType { - PLATFORM_BEGIN = 0, - PLATFORM_MINI_V1 = PLATFORM_BEGIN, - PLATFORM_CLOUD_V1, - PLATFORM_MINI_V2, - PLATFORM_LHISI_ES, - PLATFORM_LHISI_CS, - PLATFORM_DC, - PLATFORM_CLOUD_V2, - PLATFORM_END, + PLATFORM_BEGIN = 0, + PLATFORM_MINI_V1 = PLATFORM_BEGIN, + PLATFORM_CLOUD_V1, + PLATFORM_MINI_V2, + PLATFORM_LHISI_ES, + PLATFORM_LHISI_CS, + PLATFORM_DC, + PLATFORM_CLOUD_V2, + PLATFORM_END, } rtPlatformType_t; typedef enum tagRtCubeFracMKNFp16 { - RT_CUBE_MKN_FP16_2_16_16 = 0, - RT_CUBE_MKN_FP16_4_16_16, - RT_CUBE_MKN_FP16_16_16_16, - RT_CUBE_MKN_FP16_Default, + RT_CUBE_MKN_FP16_2_16_16 = 0, + RT_CUBE_MKN_FP16_4_16_16, + RT_CUBE_MKN_FP16_16_16_16, + RT_CUBE_MKN_FP16_Default, } rtCubeFracMKNFp16_t; typedef enum tagRtCubeFracMKNInt8 { - RT_CUBE_MKN_INT8_2_32_16 = 0, - RT_CUBE_MKN_INT8_4_32_4, - RT_CUBE_MKN_INT8_4_32_16, - RT_CUBE_MKN_INT8_16_32_16, - RT_CUBE_MKN_INT8_Default, + RT_CUBE_MKN_INT8_2_32_16 = 0, + RT_CUBE_MKN_INT8_4_32_4, + RT_CUBE_MKN_INT8_4_32_16, + RT_CUBE_MKN_INT8_16_32_16, + RT_CUBE_MKN_INT8_Default, } rtCubeFracMKNInt8_t; typedef enum tagRtVecFracVmulMKNFp16 { - RT_VEC_VMUL_MKN_FP16_1_16_16 = 0, - RT_VEC_VMUL_MKN_FP16_Default, + RT_VEC_VMUL_MKN_FP16_1_16_16 = 0, + RT_VEC_VMUL_MKN_FP16_Default, } rtVecFracVmulMKNFp16_t; typedef enum tagRtVecFracVmulMKNInt8 { - RT_VEC_VMUL_MKN_INT8_1_32_16 = 0, - RT_VEC_VMUL_MKN_INT8_Default, + RT_VEC_VMUL_MKN_INT8_1_32_16 = 0, + RT_VEC_VMUL_MKN_INT8_Default, } rtVecFracVmulMKNInt8_t; typedef struct tagRtAiCoreSpec { - uint32_t cubeFreq; - uint32_t cubeMSize; - uint32_t cubeKSize; - uint32_t cubeNSize; - rtCubeFracMKNFp16_t cubeFracMKNFp16; - rtCubeFracMKNInt8_t cubeFracMKNInt8; - rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16; - rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8; + uint32_t cubeFreq; + uint32_t cubeMSize; + uint32_t cubeKSize; + uint32_t cubeNSize; + rtCubeFracMKNFp16_t cubeFracMKNFp16; + rtCubeFracMKNInt8_t cubeFracMKNInt8; + rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16; + rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8; } rtAiCoreSpec_t; typedef struct tagRtAiCoreRatesPara { - uint32_t ddrRate; - uint32_t l2Rate; - uint32_t l2ReadRate; - uint32_t l2WriteRate; - uint32_t l1ToL0ARate; - uint32_t l1ToL0BRate; - uint32_t l0CToUBRate; - uint32_t ubToL2; - uint32_t ubToDDR; - uint32_t ubToL1; + uint32_t ddrRate; + uint32_t l2Rate; + uint32_t l2ReadRate; + uint32_t l2WriteRate; + uint32_t l1ToL0ARate; + uint32_t l1ToL0BRate; + uint32_t l0CToUBRate; + uint32_t ubToL2; + uint32_t ubToDDR; + uint32_t ubToL1; } rtAiCoreMemoryRates_t; typedef struct tagRtMemoryConfig { - uint32_t flowtableSize; - uint32_t compilerSize; + uint32_t flowtableSize; + uint32_t compilerSize; } rtMemoryConfig_t; typedef struct tagRtPlatformConfig { - uint32_t platformConfig; + uint32_t platformConfig; } rtPlatformConfig_t; /** @@ -165,7 +166,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); - /** * @ingroup * @brief get l2 buffer Info,virtual baseaddr,Size @@ -176,14 +176,16 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); /** * @ingroup - * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. + * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be + * represented by 9020. * @param [out] runtimeVersion * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_STREAM_H__ +#endif // __CCE_RUNTIME_STREAM_H__ diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index ee0d8f0a..e95d4c89 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -30,24 +30,24 @@ extern "C" { typedef void *rtContext_t; typedef enum tagDryRunFlag { - RT_DRYRUN_FLAG_FALSE = 0, - RT_DRYRUN_FLAG_TRUE = 1, + RT_DRYRUN_FLAG_FALSE = 0, + RT_DRYRUN_FLAG_TRUE = 1, } rtDryRunFlag_t; typedef enum tagCtxMode { - RT_CTX_NORMAL_MODE = 0, - RT_CTX_GEN_MODE = 1, + RT_CTX_NORMAL_MODE = 0, + RT_CTX_GEN_MODE = 1, } rtCtxMode_t; typedef struct tagRtGroupInfo { - int32_t groupId; - uint32_t flag; - uint32_t aicoreNum; - uint32_t aicpuNum; - uint32_t aivectorNum; - uint32_t sdmaNum; - uint32_t activeStreamNum; - void *extrPtr; + int32_t groupId; + uint32_t flag; + uint32_t aicoreNum; + uint32_t aicpuNum; + uint32_t aivectorNum; + uint32_t sdmaNum; + uint32_t activeStreamNum; + void *extrPtr; } rtGroupInfo_t; /** @@ -156,6 +156,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count); * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtSetCtxINFMode(bool mode); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index d6ffbc9a..49f6a3f6 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -27,44 +27,44 @@ extern "C" { #define RT_CAPABILITY_NOT_SUPPORT (0x0) typedef struct tagRTDeviceInfo { - uint8_t env_type; // 0: FPGA 1: EMU 2: ESL - uint32_t ctrl_cpu_ip; - uint32_t ctrl_cpu_id; - uint32_t ctrl_cpu_core_num; - uint32_t ctrl_cpu_endian_little; - uint32_t ts_cpu_core_num; - uint32_t ai_cpu_core_num; - uint32_t ai_core_num; - uint32_t ai_core_freq; - uint32_t ai_cpu_core_id; - uint32_t ai_core_id; - uint32_t aicpu_occupy_bitmap; - uint32_t hardware_version; - uint32_t ts_num; + uint8_t env_type; // 0: FPGA 1: EMU 2: ESL + uint32_t ctrl_cpu_ip; + uint32_t ctrl_cpu_id; + uint32_t ctrl_cpu_core_num; + uint32_t ctrl_cpu_endian_little; + uint32_t ts_cpu_core_num; + uint32_t ai_cpu_core_num; + uint32_t ai_core_num; + uint32_t ai_core_freq; + uint32_t ai_cpu_core_id; + uint32_t ai_core_id; + uint32_t aicpu_occupy_bitmap; + uint32_t hardware_version; + uint32_t ts_num; } rtDeviceInfo_t; typedef enum tagRtRunMode { - RT_RUN_MODE_OFFLINE = 0, - RT_RUN_MODE_ONLINE = 1, - RT_RUN_MODE_AICPU_SCHED = 2, - RT_RUN_MODE_RESERVED + RT_RUN_MODE_OFFLINE = 0, + RT_RUN_MODE_ONLINE = 1, + RT_RUN_MODE_AICPU_SCHED = 2, + RT_RUN_MODE_RESERVED } rtRunMode; typedef enum tagRtAicpuDeployType { - AICPU_DEPLOY_CROSS_OS = 0x0, - AICPU_DEPLOY_CROSS_PROCESS = 0x1, - AICPU_DEPLOY_CROSS_THREAD = 0x2, - AICPU_DEPLOY_RESERVED + AICPU_DEPLOY_CROSS_OS = 0x0, + AICPU_DEPLOY_CROSS_PROCESS = 0x1, + AICPU_DEPLOY_CROSS_THREAD = 0x2, + AICPU_DEPLOY_RESERVED } rtAicpuDeployType_t; typedef enum tagRtFeatureType { - FEATURE_TYPE_MEMCPY = 0, - FEATURE_TYPE_RSV + FEATURE_TYPE_MEMCPY = 0, + FEATURE_TYPE_RSV } rtFeatureType_t; typedef enum tagMemcpyInfo { - MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, - MEMCPY_INFO_RSV + MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, + MEMCPY_INFO_RSV } rtMemcpyInfo_t; /** @@ -356,6 +356,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index f44b181c..dc16ca58 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -29,15 +29,15 @@ extern "C" { * @brief shared memory data control */ typedef struct tagRtSmData { - uint64_t L2_mirror_addr; // preload or swap source address - uint32_t L2_data_section_size; // every data size - uint8_t L2_preload; // 1 - preload from mirrorAddr, 0 - no preload - uint8_t modified; // 1 - data will be modified by kernel, 0 - no modified - uint8_t priority; // data priority - int8_t prev_L2_page_offset_base; // remap source section offset - uint8_t L2_page_offset_base; // remap destination section offset - uint8_t L2_load_to_ddr; // 1 - need load out, 0 - no need - uint8_t reserved[2]; // reserved + uint64_t L2_mirror_addr; // preload or swap source address + uint32_t L2_data_section_size; // every data size + uint8_t L2_preload; // 1 - preload from mirrorAddr, 0 - no preload + uint8_t modified; // 1 - data will be modified by kernel, 0 - no modified + uint8_t priority; // data priority + int8_t prev_L2_page_offset_base; // remap source section offset + uint8_t L2_page_offset_base; // remap destination section offset + uint8_t L2_load_to_ddr; // 1 - need load out, 0 - no need + uint8_t reserved[2]; // reserved } rtSmData_t; /** @@ -45,12 +45,12 @@ typedef struct tagRtSmData { * @brief shared memory description */ typedef struct tagRtSmCtrl { - rtSmData_t data[8]; // data description - uint64_t size; // max page Num - uint8_t remap[64]; /* just using for static remap mode, default:0xFF + rtSmData_t data[8]; // data description + uint64_t size; // max page Num + uint8_t remap[64]; /* just using for static remap mode, default:0xFF array index: virtual l2 page id, array value: physic l2 page id */ - uint8_t l2_in_main; // 0-DDR, 1-L2, default:0xFF - uint8_t reserved[3]; + uint8_t l2_in_main; // 0-DDR, 1-L2, default:0xFF + uint8_t reserved[3]; } rtSmDesc_t; typedef rtSmDesc_t rtL2Ctrl_t; @@ -60,10 +60,10 @@ typedef rtSmDesc_t rtL2Ctrl_t; * @brief device binary type */ typedef struct tagRtDevBinary { - uint32_t magic; // magic number - uint32_t version; // version of binary - const void *data; // binary data - uint64_t length; // binary length + uint32_t magic; // magic number + uint32_t version; // version of binary + const void *data; // binary data + uint64_t length; // binary length } rtDevBinary_t; /** @@ -73,15 +73,15 @@ typedef struct tagRtDevBinary { #define ONLINE_PROF_MAX_PMU_NUM (8) typedef struct ProfilefDataInfo { - const void *stubFunc; - uint32_t blockDim; - const void *args; - uint32_t argsSize; - rtSmDesc_t *smDesc; - rtStream_t stream; - uint64_t totalcycle; - uint64_t ovcycle; - uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM]; + const void *stubFunc; + uint32_t blockDim; + const void *args; + uint32_t argsSize; + rtSmDesc_t *smDesc; + rtStream_t stream; + uint64_t totalcycle; + uint64_t ovcycle; + uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM]; } rtProfDataInfo_t; /** @@ -89,12 +89,12 @@ typedef struct ProfilefDataInfo { * @brief function mode type */ typedef enum { - FUNC_MODE_NORMAL = 0, - FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP, - FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP, - FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP, - FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP, - FUNC_MODE_BUTT + FUNC_MODE_NORMAL = 0, + FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP, + FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP, + FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP, + FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP, + FUNC_MODE_BUTT } rtFuncModeType_t; /** @@ -102,23 +102,23 @@ typedef enum { * @brief kernel info */ typedef struct rtKernelInfo { - uint64_t task_offset; // kernel offset in module - /* flowtable */ - void *arg; // launch kernel arg - uint32_t arg_size; - /* module */ - void *module_addr; // module::baseaddr_ - uint32_t module_size; -} * rtKernelInfo_t; + uint64_t task_offset; // kernel offset in module + /* flowtable */ + void *arg; // launch kernel arg + uint32_t arg_size; + /* module */ + void *module_addr; // module::baseaddr_ + uint32_t module_size; +} *rtKernelInfo_t; /** * @ingroup rt_KernelConfigDump * @brief device dump type */ typedef enum tagRtDumpKind { - RT_DATA_DUMP_KIND_INVALID = -1, - RT_DATA_DUMP_KIND_DUMP = 0, - RT_DATA_DUMP_KIND_RESERVED + RT_DATA_DUMP_KIND_INVALID = -1, + RT_DATA_DUMP_KIND_DUMP = 0, + RT_DATA_DUMP_KIND_RESERVED } rtDumpKind_t; /** @@ -376,7 +376,6 @@ RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kern const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags); -typedef void *rtModel_t; /** * @ingroup rt_kernel * @brief L1 fusion dump addr transfered to device @@ -414,6 +413,7 @@ RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length); RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr); #else RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream); + #endif #endif // __CLANG_CCE_RUNTIME_H__ diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index c305fb12..30af85d9 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -90,40 +90,40 @@ typedef uint32_t rtMemType_t; * @brief memory copy type */ typedef enum tagRtMemcpyKind { - RT_MEMCPY_HOST_TO_HOST = 0, // host to host - RT_MEMCPY_HOST_TO_DEVICE, // host to device - RT_MEMCPY_DEVICE_TO_HOST, // device to host - RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P - RT_MEMCPY_MANAGED, // managed memory - RT_MEMCPY_ADDR_DEVICE_TO_DEVICE, - RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes) - RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex - RT_MEMCPY_RESERVED, + RT_MEMCPY_HOST_TO_HOST = 0, // host to host + RT_MEMCPY_HOST_TO_DEVICE, // host to device + RT_MEMCPY_DEVICE_TO_HOST, // device to host + RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P + RT_MEMCPY_MANAGED, // managed memory + RT_MEMCPY_ADDR_DEVICE_TO_DEVICE, + RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes) + RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex + RT_MEMCPY_RESERVED, } rtMemcpyKind_t; typedef enum tagRtMemInfoType { - RT_MEMORYINFO_DDR, - RT_MEMORYINFO_HBM, - RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR - RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR - RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM - RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM - RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR - RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR - RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM - RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM + RT_MEMORYINFO_DDR, + RT_MEMORYINFO_HBM, + RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR + RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR + RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM + RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM + RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR + RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR + RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM + RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM } rtMemInfoType_t; typedef enum tagRtRecudeKind { - RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P - RT_RECUDE_KIND_END + RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P + RT_RECUDE_KIND_END } rtRecudeKind_t; typedef enum tagRtDataType { - RT_DATA_TYPE_FP32 = 0, // fp32 - RT_DATA_TYPE_FP16 = 1, // fp16 - RT_DATA_TYPE_INT16 = 2, // int16 - RT_DATA_TYPE_END + RT_DATA_TYPE_FP32 = 0, // fp32 + RT_DATA_TYPE_FP16 = 1, // fp16 + RT_DATA_TYPE_INT16 = 2, // int16 + RT_DATA_TYPE_END } rtDataType_t; /** @@ -131,10 +131,10 @@ typedef enum tagRtDataType { * @brief memory copy channel type */ typedef enum tagRtMemcpyChannelType { - RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P - RT_MEMCPY_CHANNEL_TYPE_PCIe, - RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now - RT_MEMCPY_CHANNEL_TYPE_RESERVED, + RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P + RT_MEMCPY_CHANNEL_TYPE_PCIe, + RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now + RT_MEMCPY_CHANNEL_TYPE_RESERVED, } rtMemcpyChannelType_t; /** @@ -142,18 +142,18 @@ typedef enum tagRtMemcpyChannelType { * @brief ai core memory size */ typedef struct rtAiCoreMemorySize { - uint32_t l0ASize; - uint32_t l0BSize; - uint32_t l0CSize; - uint32_t l1Size; - uint32_t ubSize; - uint32_t l2Size; - uint32_t l2PageNum; - uint32_t blockSize; - uint64_t bankSize; - uint64_t bankNum; - uint64_t burstInOneBlock; - uint64_t bankGroupNum; + uint32_t l0ASize; + uint32_t l0BSize; + uint32_t l0CSize; + uint32_t l1Size; + uint32_t ubSize; + uint32_t l2Size; + uint32_t l2PageNum; + uint32_t blockSize; + uint64_t bankSize; + uint64_t bankNum; + uint64_t burstInOneBlock; + uint64_t bankGroupNum; } rtAiCoreMemorySize_t; /** @@ -161,10 +161,10 @@ typedef struct rtAiCoreMemorySize { * @brief memory type */ typedef enum tagRtMemoryType { - RT_MEMORY_TYPE_HOST = 1, - RT_MEMORY_TYPE_DEVICE = 2, - RT_MEMORY_TYPE_SVM = 3, - RT_MEMORY_TYPE_DVPP = 4 + RT_MEMORY_TYPE_HOST = 1, + RT_MEMORY_TYPE_DEVICE = 2, + RT_MEMORY_TYPE_SVM = 3, + RT_MEMORY_TYPE_DVPP = 4 } rtMemoryType_t; /** @@ -172,31 +172,31 @@ typedef enum tagRtMemoryType { * @brief memory attribute */ typedef struct tagRtPointerAttributes { - rtMemoryType_t memoryType; // host memory or device memory - rtMemoryType_t locationType; - uint32_t deviceID; // device ID - uint32_t pageSize; + rtMemoryType_t memoryType; // host memory or device memory + rtMemoryType_t locationType; + uint32_t deviceID; // device ID + uint32_t pageSize; } rtPointerAttributes_t; typedef struct rtMallocHostSharedMemoryIn { - const char *name; - const uint64_t size; - uint32_t flag; + const char *name; + const uint64_t size; + uint32_t flag; } rtMallocHostSharedMemoryIn; typedef struct rtMallocHostSharedMemoryOut { - int fd; - void *ptr; - void *devPtr; + int fd; + void *ptr; + void *devPtr; } rtMallocHostSharedMemoryOut; typedef struct rtFreeHostSharedMemoryIn { - const char *name; - const uint64_t size; - int fd; - void *ptr; - void *devPtr; + const char *name; + const uint64_t size; + int fd; + void *ptr; + void *devPtr; } rtFreeHostSharedMemoryIn; diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index b72b142d..482486a8 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -278,7 +278,6 @@ typedef struct tagLabelDevInfo_t { uint16_t labelId; }rtLabelDevInfo; -typedef void *rtModel_t; typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo); /** diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h index b642cbc8..bef5c05d 100644 --- a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h +++ b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h @@ -15,23 +15,23 @@ extern "C" { struct SoftDpProcsessInfo { - uint8_t* inputBuffer; - uint32_t inputBufferSize; + uint8_t* inputBuffer; + uint32_t inputBufferSize; - uint8_t* outputBuffer; - uint32_t outputBufferSize; + uint8_t* outputBuffer; + uint32_t outputBufferSize; - uint32_t outputWidth; - uint32_t outputHeight; + uint32_t outputWidth; + uint32_t outputHeight; - uint32_t reserved; + uint32_t reserved; }; struct DpCropInfo { - uint32_t left; - uint32_t right; - uint32_t up; - uint32_t down; + uint32_t left; + uint32_t right; + uint32_t up; + uint32_t down; }; /* @@ -49,4 +49,4 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo); */ uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo); } -#endif // EXTERNALSOFTDP_H +#endif // EXTERNALSOFTDP_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h index 1cab6fd1..3e7d11ee 100644 --- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h +++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h @@ -61,7 +61,7 @@ int32_t TdtHostInit(uint32_t deviceId); * @li tdt_host_interface.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'DataItem' defined */ -int32_t TdtHostPushData(const std::string &channelName, const std::vector &item); +int32_t TdtHostPushData(const std::string &channelName, const std::vector &item, uint32_t deviceId = 0); /** * @ingroup TdtHostDestroy @@ -203,25 +203,6 @@ int32_t TdtInFeedDestroy(uint32_t deviceId); * @li tdt_host_interface.h: Header file where the interface declaration is located. */ int32_t TdtOutFeedDestroy(); - -/** -* @ingroup TdtInFeedData -* @brief Blocking queue. When the queue is full, the Push interface will block. -* -* @par Function -* Blocking queue. When the queue is full, the Push interface will block. -* -* @param channelName [IN] type #String. queue channel name -* @param items [IN] type #vector DataItem is defined in data_common.h. input data -* @retval 0 Success -* @retval OtherValues 0 Fail -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li tdt_host_interface.h: Header file where the interface declaration is located. -* @li data_common.h: Header file where 'DataItem' defined -*/ -int32_t TdtInFeedData(const std::string &channelName, const std::vector &item, uint32_t deviceId); } // namespace tdt #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index 2ebce7d9..7c4f7be2 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -120,15 +120,15 @@ typedef struct tagKV { } KeyValue; typedef enum { - APPLICATION = 0, - SYSTEM + APPLICATION = 0, + SYSTEM } ProcessType; typedef struct { - ProcessType type; - unsigned int pid; - unsigned int deviceId; - char reserved[RESERVERD_LENGTH]; + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; } LogAttr; /** diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h index 12b6aa1e..6208f462 100644 --- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h +++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h @@ -1,72 +1,72 @@ -/** - * @file tune_api.h - * - * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n - * 描述:mstune调优接口头文件 - */ -/** @defgroup mstune mstune调优接口 */ -#ifndef TUNE_API_H -#define TUNE_API_H -#include -#include -#include -#include "graph/graph.h" -#include "ge/ge_api.h" - -/** - * @ingroup mstune - * - * mstune status - */ -enum MsTuneStatus { - MSTUNE_SUCCESS, /** tune success */ - MSTUNE_FAILED, /** tune failed */ -}; - -// Option key: for train options sets -const std::string MSTUNE_SELF_KEY = "mstune"; -const std::string MSTUNE_GEINIT_KEY = "initialize"; -const std::string MSTUNE_GESESS_KEY = "session"; - -/** - * @ingroup mstune - * @par 描述: 命令行调优 - * - * @attention 无 - * @param option [IN] 调优参数 - * @param msg [OUT] 调优异常下返回信息 - * @retval #MSTUNE_SUCCESS 执行成功 - * @retval #MSTUNE_FAILED 执行失败 - * @par 依赖: - * @li tune_api.cpp:该接口所属的开发包。 - * @li tune_api.h:该接口声明所在的头文件。 - * @see 无 - * @since - */ -MsTuneStatus MsTuning(const std::map &option, std::string &msg); - -/** - * @ingroup mstune - * @par 描述: 梯度调优 - * - * @attention 无 - * @param tuningGraph [IN] 调优图 - * @param dependGraph [IN] 调优依赖图 - * @param session [IN] ge连接会话 - * @param option [IN] 参数集. 包含调优参数及ge参数 - * @retval #MSTUNE_SUCCESS 执行成功 - * @retval #MSTUNE_FAILED 执行失败 - * @par 依赖: - * @li tune_api.cpp:该接口所属的开发包。 - * @li tune_api.h:该接口声明所在的头文件。 - * @see 无 - * @since - */ -extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &dependGraph, - ge::Session *session, const std::map> &option); - -#endif +/** + * @file tune_api.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n + * 描述:mstune调优接口头文件 + */ +/** @defgroup mstune mstune调优接口 */ +#ifndef TUNE_API_H +#define TUNE_API_H +#include +#include +#include +#include "graph/graph.h" +#include "ge/ge_api.h" + +/** + * @ingroup mstune + * + * mstune status + */ +enum MsTuneStatus { + MSTUNE_SUCCESS, /** tune success */ + MSTUNE_FAILED, /** tune failed */ +}; + +// Option key: for train options sets +const std::string MSTUNE_SELF_KEY = "mstune"; +const std::string MSTUNE_GEINIT_KEY = "initialize"; +const std::string MSTUNE_GESESS_KEY = "session"; + +/** + * @ingroup mstune + * @par 描述: 命令行调优 + * + * @attention 无 + * @param option [IN] 调优参数 + * @param msg [OUT] 调优异常下返回信息 + * @retval #MSTUNE_SUCCESS 执行成功 + * @retval #MSTUNE_FAILED 执行失败 + * @par 依赖: + * @li tune_api.cpp:该接口所属的开发包。 + * @li tune_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +MsTuneStatus MsTuning(const std::map &option, std::string &msg); + +/** + * @ingroup mstune + * @par 描述: 梯度调优 + * + * @attention 无 + * @param tuningGraph [IN] 调优图 + * @param dependGraph [IN] 调优依赖图 + * @param session [IN] ge连接会话 + * @param option [IN] 参数集. 包含调优参数及ge参数 + * @retval #MSTUNE_SUCCESS 执行成功 + * @retval #MSTUNE_FAILED 执行失败 + * @par 依赖: + * @li tune_api.cpp:该接口所属的开发包。 + * @li tune_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &dependGraph, + ge::Session *session, const std::map> &option); + +#endif From 517259fd7947b04fc92756a53171141596cc2650 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Fri, 22 Jan 2021 19:14:03 +0800 Subject: [PATCH 32/41] Optimize InitInputDescInfo & InitOutputDescInfo & CheckHasHcomOp & MarkNodeAndSetIndex --- ge/graph/build/task_generator.cc | 7 -- ge/graph/load/model_manager/davinci_model.cc | 125 +++++++++++++-------------- ge/graph/load/model_manager/davinci_model.h | 6 +- ge/graph/manager/graph_manager.cc | 4 +- 4 files changed, 63 insertions(+), 79 deletions(-) diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index bb72fa8a..04c1a36f 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -527,13 +527,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { return GE_GRAPH_GRAPH_NODE_NULL; } - int64_t node_index = 0; - for (auto &node : all_nodes) { - OpDescPtr op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - op_desc->SetId(node_index++); - } - map> all_stream_ops; for (auto &node : all_nodes) { OpDescPtr op_desc = node->GetOpDesc(); diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 0a92447b..b8a39e21 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -446,23 +446,20 @@ void DavinciModel::InitRuntimeParams() { runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); } -void DavinciModel::CheckHasHcomOp() { - Graph graph = ge_model_->GetGraph(); - auto compute_graph = GraphUtils::GetComputeGraph(graph); - if (compute_graph == nullptr) { - return; - } +void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { + const set hcom_opp_types({ + HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER, + HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE + }); + for (const auto &node : compute_graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); - GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) || - (op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) || - (op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) || - (op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) || - (op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) || - (op_desc->GetType() == HCOMREDUCE)), - uint32_t stream_id = static_cast(op_desc->GetStreamId()); - (void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue); + if (hcom_opp_types.count(op_desc->GetType()) > 0) { + uint32_t stream_id = static_cast(op_desc->GetStreamId()); + hcom_streams_.emplace(stream_id); + GELOGD("hcom stream: %u.", stream_id); + } } } @@ -641,7 +638,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size name_ = ge_model_->GetName(); (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); - CheckHasHcomOp(); + CheckHasHcomOp(compute_graph); vector huge_stream_list; (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); @@ -1027,7 +1024,7 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ const vector &output_op_list) { GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); for (auto &item : data_by_index) { - auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); + const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); input_addrs_list_.emplace_back(output_addrs); @@ -1035,14 +1032,18 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); + GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed"); if (item.second->GetType() == AIPP_DATA_TYPE) { GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); is_dynamic_aipp_ = true; } } + vector out_node_name; + (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); + GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size()); for (const auto &op_desc : output_op_list) { - auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); + const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); output_addrs_list_.emplace_back(input_addrs); @@ -1060,10 +1061,11 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ if (InitOutputTensorInfo(op_desc) != SUCCESS) { return INTERNAL_ERROR; } + + GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed"); } - GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); - return InitOutputDescInfo(output_op_list); + return SUCCESS; } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1979,27 +1981,24 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, } } -Status DavinciModel::InitInputDescInfo(const map &data_by_index) { - for (const auto &item : data_by_index) { - const auto op_desc = item.second; - GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); +Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) { + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); - InputOutputDescInfo input; - ShapeDescription dims_info; - Format format = op_desc->GetInputDescPtr(0)->GetFormat(); - CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); + InputOutputDescInfo input; + ShapeDescription dims_info; + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); - input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); - input.name = op_desc->GetName(); - int64_t input_size = 0; - GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); - input.size = input_size; - input_formats_.push_back(format); - input_descs_.push_back(input); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); + int64_t input_size = 0; + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); + input.size = input_size; + input_formats_.push_back(format); + input_descs_.push_back(input); - input.shape_info = dims_info; - input_descs_dims_.push_back(input); - } + input.shape_info = dims_info; + input_descs_dims_.push_back(input); return SUCCESS; } @@ -2065,37 +2064,31 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { - GELOGD("Output node size: %zu", output_op_list.size()); - vector out_node_name; - (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); - for (const auto &op_desc : output_op_list) { - uint32_t out_size = static_cast(op_desc->GetInputsSize()); - for (uint32_t index = 0; index < out_size; index++) { - string output_name; - InputOutputDescInfo output; - uint32_t format_result; - CreateOutput(index, op_desc, output, format_result); - - std::vector src_name = op_desc->GetSrcName(); - std::vector src_index = op_desc->GetSrcIndex(); - GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, - "construct output_name failed."); - // forward compatbility, if old om has no out_node_name, need to return output follow origin way - if (out_size == out_node_name.size()) { - // neweast plan, the index will add to name during generate model. - bool contains_colon = out_node_name[index].find(":") != std::string::npos; - output_name = - contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); - } else { - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + - std::to_string(src_index[index]); - } - output.name = output_name; - output_descs_.push_back(output); - output_formats_.push_back(format_result); +Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector &out_node_name) { + uint32_t out_size = static_cast(op_desc->GetInputsSize()); + for (uint32_t i = 0; i < out_size; ++i) { + string output_name; + InputOutputDescInfo output; + uint32_t format_result; + CreateOutput(i, op_desc, output, format_result); + + std::vector src_name = op_desc->GetSrcName(); + std::vector src_index = op_desc->GetSrcIndex(); + GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR, + "construct output_name failed."); + // forward compatbility, if old om has no out_node_name, need to return output follow origin way + if (out_size == out_node_name.size()) { + // neweast plan, the index will add to name during generate model. + bool contains_colon = out_node_name[i].find(":") != std::string::npos; + output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]); + } else { + output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]); } + output.name = output_name; + output_descs_.push_back(output); + output_formats_.push_back(format_result); } + return SUCCESS; } diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 53db77a7..22f3bcab 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -827,7 +827,7 @@ class DavinciModel { void OpDebugUnRegister(); - void CheckHasHcomOp(); + void CheckHasHcomOp(const ComputeGraphPtr &graph); Status DoTaskSink(); @@ -850,8 +850,8 @@ class DavinciModel { Status InitOutputTensorInfo(const OpDescPtr &op_desc); Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); - Status InitInputDescInfo(const map &data_by_index); - Status InitOutputDescInfo(const vector &output_op_list); + Status InitInputDescInfo(const OpDescPtr &op_desc); + Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector &out_node_name); Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index d5ee690c..f3a09f13 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -729,9 +729,7 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); - GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", - GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, - compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeBeforeBuildForRts", stages.optimizer.OptimizeGraphBeforeBuildForRts, compute_graph); Status ret = compute_graph->TopologicalSorting(); if (ret != SUCCESS) { From 33945b054b35f03e5e8e4976b3825c9a039b0293 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Jan 2021 19:26:41 +0800 Subject: [PATCH 33/41] Remove gentask in DEPEND_COMPUTE task executor. --- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 7 ++++--- ge/hybrid/node_executor/aicpu/aicpu_node_executor.h | 8 ++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index c6fb76ed..fb0f2d69 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -352,6 +352,10 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { need_sync_ = true; } GELOGI("Node[%s] init end.", node_name_.c_str()); + auto task_defs = model.GetTaskDefs(node_item_->node); + if (unknown_type_ == DEPEND_COMPUTE) { + GE_CHK_STATUS_RET_NOLOG(SetMemCopyTask((*task_defs)[1])); + } return SUCCESS; } @@ -829,9 +833,6 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, "Load task for node %s failed.", node->GetName().c_str()); GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str()); - if (node_item->shape_inference_type == DEPEND_COMPUTE) { - GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask((*task_defs)[1])); - } task = std::move(aicpu_task); GELOGD("Node[%s] load task end.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index c6e63ee0..0a21c6ef 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -42,10 +42,6 @@ class AicpuNodeTaskBase : public NodeTask { virtual Status Init(const HybridModel &model) = 0; - virtual Status SetMemCopyTask(const domi::TaskDef &task_def) { - return UNSUPPORTED; - } - Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; @@ -94,8 +90,6 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { Status Init(const HybridModel &model) override; - Status SetMemCopyTask(const domi::TaskDef &task_def) override; - protected: Status LaunchTask(TaskContext &context) override; @@ -105,6 +99,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { Status UpdateIoAddr(TaskContext &context) override; private: + Status SetMemCopyTask(const domi::TaskDef &task_def); + Status InitForDependComputeTask(); Status UpdateShapeAndDataByResultSummary(TaskContext &context); From bd9cd6d508240697d64e261032a0eaefb40a0e5b Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Jan 2021 19:28:57 +0800 Subject: [PATCH 34/41] Remove gentask in DEPEND_COMPUTE task executor. --- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index fb0f2d69..16a42f9c 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -351,11 +351,11 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { GELOGD("[%s] Is GetNext, set need sync to true, node type = %s", node_name_.c_str(), node_type.c_str()); need_sync_ = true; } - GELOGI("Node[%s] init end.", node_name_.c_str()); auto task_defs = model.GetTaskDefs(node_item_->node); if (unknown_type_ == DEPEND_COMPUTE) { GE_CHK_STATUS_RET_NOLOG(SetMemCopyTask((*task_defs)[1])); } + GELOGI("Node[%s] init end.", node_name_.c_str()); return SUCCESS; } From dadd49c1f44e1534c5979b49a125904f8afdb45f Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Jan 2021 19:29:45 +0800 Subject: [PATCH 35/41] Remove gentask in DEPEND_COMPUTE task executor. --- ge/hybrid/node_executor/aicpu/aicpu_node_executor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 0a21c6ef..b9cc8256 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -21,7 +21,6 @@ #include "cce/aicpu_engine_struct.h" #include "hybrid/node_executor/node_executor.h" #include "aicpu_ext_info.h" -#include "common/ge_inner_error_codes.h" namespace ge { namespace hybrid { From b9d632bbd94e972e2e2037c6c2feadb4251b474f Mon Sep 17 00:00:00 2001 From: lianghao Date: Fri, 22 Jan 2021 20:21:43 +0800 Subject: [PATCH 36/41] VerifyConstOp --- ge/graph/preprocess/graph_preprocess.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 19f5ef54..2864b759 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1665,6 +1665,9 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { auto ge_tensor_desc = ge_tensor_ptr->GetTensorDesc(); int64_t shape_size = ge_tensor_desc.GetShape().GetShapeSize(); auto data_type = ge_tensor_desc.GetDataType(); + if (data_type == DT_STRING) { + return SUCCESS; + } uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { From 9e324f4d892def7bdffe42b2537deb2c25fbebac Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Sat, 23 Jan 2021 09:57:19 +0800 Subject: [PATCH 37/41] mod ut --- tests/ut/ge/graph/load/davinci_model_unittest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 35413a6b..8eea6856 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -333,7 +333,7 @@ TEST_F(UtestDavinciModel, Init_variable_op) { EXPECT_EQ(model.InitNodes(graph), SUCCESS); - EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); + //EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); EXPECT_NE(model.SyncVarData(), SUCCESS); } From 862cae86def88b15d1020d643c8baae9bb1deebd Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 23 Jan 2021 10:01:47 +0800 Subject: [PATCH 38/41] Fix bug of log. --- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 16a42f9c..1c160eea 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -382,7 +382,7 @@ Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { } GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", + "Node[%s] alloc copy task workspace buf failed, size=%u.", node_name_.c_str(), kernel_def.task_info_size()); GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(), From 048f335d99313e1a426c48d7ea495e83b4c08f09 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Sat, 23 Jan 2021 10:14:36 +0800 Subject: [PATCH 39/41] fix ut --- tests/ut/ge/graph/load/davinci_model_unittest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 8eea6856..b803b624 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -333,8 +333,8 @@ TEST_F(UtestDavinciModel, Init_variable_op) { EXPECT_EQ(model.InitNodes(graph), SUCCESS); - //EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); - EXPECT_NE(model.SyncVarData(), SUCCESS); + EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); + EXPECT_EQ(model.SyncVarData(), SUCCESS); } TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) { From f18bb480879d5324e151bcfe8450a5078b580412 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 23 Jan 2021 16:14:02 +0800 Subject: [PATCH 40/41] Fix printf like format --- ge/common/auth/file_saver.cc | 8 ++++---- ge/common/helper/model_cache_helper.cc | 4 ++-- ge/common/helper/model_helper.cc | 4 ++-- ge/common/helper/om_file_helper.cc | 10 +++++----- ge/common/kernel_store.cc | 2 +- ge/common/math/math_util.h | 10 +++++----- ge/executor/ge_executor.cc | 2 +- ge/ge_local_engine/engine/host_cpu_engine.cc | 4 ++-- ge/graph/build/graph_builder.cc | 4 ++-- ge/graph/build/memory/graph_mem_assigner.cc | 4 ++-- ge/graph/build/task_generator.cc | 9 ++++----- ge/graph/load/model_manager/davinci_model.cc | 2 +- ge/graph/load/model_manager/model_manager.cc | 2 +- ge/graph/manager/graph_manager.cc | 2 +- ge/graph/manager/graph_var_manager.cc | 4 ++-- ge/graph/passes/cond_remove_pass.cc | 8 ++++---- ge/graph/passes/for_pass.cc | 4 ++-- ge/graph/passes/multi_batch_clone_pass.cc | 2 +- ge/graph/passes/remove_same_const_pass.cc | 2 +- ge/graph/passes/subgraph_pass.cc | 2 +- ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 2 +- ge/graph/preprocess/multi_batch_copy_graph.cc | 10 +++++----- ge/graph/preprocess/multi_batch_options.cc | 4 ++-- ge/host_kernels/dynamic_stitch_kernel.cc | 8 ++++---- ge/host_kernels/pack_kernel.cc | 2 +- ge/host_kernels/rank_kernel.cc | 2 +- ge/host_kernels/strided_slice_kernel.cc | 8 ++++---- ge/session/omg.cc | 2 +- ge/single_op/task/op_task.cc | 4 ++-- tests/depends/omg/src/omg_stub.cc | 2 +- tests/ut/ge/graph/passes/variable_op_pass_unittest.cc | 4 ++-- 31 files changed, 68 insertions(+), 69 deletions(-) diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index e708653a..12999e54 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -62,7 +62,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { while (size > size_1g) { write_count = mmWrite(fd, reinterpret_cast(seek), size_1g); if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { - GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); + GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); return FAILED; } size -= size_1g; @@ -75,7 +75,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { // -1: Failed to write to file; - 2: Illegal parameter if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { - GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); + GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); return FAILED; } @@ -133,7 +133,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); // Write partition data for (const auto &partitionData : partition_datas) { - GELOGI("GC:size[%zu]", partitionData.size); + GELOGI("GC:size[%u]", partitionData.size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( WriteData(static_cast(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; break); @@ -305,7 +305,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi // Write partition data auto &cur_partition_datas = all_partition_datas[index]; for (const auto &partition_data : cur_partition_datas) { - GELOGI("GC:size[%zu]", partition_data.size); + GELOGI("GC:size[%u]", partition_data.size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( WriteData(static_cast(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; break); diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index 7ec8cc0f..bf8c3ce0 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -1000,8 +1000,8 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { auto offset = (tensor_addr_mgr.offset); // Check logic address and offset if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) { - GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address, - offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); + GELOGW("Check logic_address[%lu] and offset [%lu] of %s failed, var mem logic base is %lu, abandon", + logic_address, offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); return PARAM_INVALID; } // Offset is needed by SaveVarVddr instead of logic address diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 92f279be..05914b22 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -537,7 +537,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod //model verison 1.0 file header does not have model_num member is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && file_header_->model_num > kStatiOmFileModelNum; - GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version); + GELOGD("cur om model is ge root model or no %d, model version %u", is_unknown_shape_model_, file_header_->version); OmFileLoadHelper om_load_helper; if (is_unknown_shape_model_) { @@ -746,7 +746,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); return INTERNAL_ERROR; } - GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); + GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num()); } cur_model->SetModelTaskDef(task); return SUCCESS; diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index d1c52b13..b42aa759 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -203,7 +203,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m auto partition_table = reinterpret_cast(model_data + cur_offset); size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); cur_offset += partition_table_size; - GELOGD("Cur model index %zu: ModelPartitionTable num :%u, " + GELOGD("Cur model index %u: ModelPartitionTable num :%u, " "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); if (model_data_size <= cur_offset) { @@ -219,7 +219,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m partition.type = partition_table->partition[i].type; if (index >= model_contexts_.size()) { if (index != model_contexts_.size()) { - GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index); + GELOGE(FAILED, "cur index is %u make model_contexts_ overflow", index); return FAILED; } @@ -231,16 +231,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m } if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { - GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %u is greater than the model data size %u.", partition.size + cur_offset, model_data_size); return GE_EXEC_MODEL_DATA_SIZE_INVALID; } cur_offset += partition.size; - GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast(partition.type), partition.size, index); + GELOGD("Partition, type:%d, size:%u, model_index:%u", static_cast(partition.type), partition.size, index); } } if (cur_offset != model_data_size) { - GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size); + GELOGE(FAILED, "do not get the complete model, read end offset:%u, all size:%u", cur_offset, model_data_size); return FAILED; } return SUCCESS; diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc index 0fad096a..d746fd10 100755 --- a/ge/common/kernel_store.cc +++ b/ge/common/kernel_store.cc @@ -51,7 +51,7 @@ bool KernelStore::Build() { kernel_head.name_len = static_cast(kernel->GetName().length()); kernel_head.bin_len = static_cast(kernel->GetBinDataSize()); - GELOGD("get kernel bin name %s, addr %p, size %u", + GELOGD("get kernel bin name %s, addr %p, size %zu", kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); diff --git a/ge/common/math/math_util.h b/ge/common/math/math_util.h index 3255e3c1..e077f4b5 100755 --- a/ge/common/math/math_util.h +++ b/ge/common/math/math_util.h @@ -878,11 +878,11 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { return INTERNAL_ERROR; \ } -#define FMK_INT64_UINT32_MULCHECK(a, b) \ - if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT64_UINT32_MULCHECK(a, b) \ + if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int64 %ld and Uint32 %u multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } #define FMK_FP16_ZEROCHECK(a) \ diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index b71a8be4..63cff228 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -454,7 +454,7 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector & if (all_data_dims[i] < 0) { cur_dynamic_dims.push_back(dynamic_dims[i]); } else if (static_cast(all_data_dims[i]) != dynamic_dims[i]) { - GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %d should be %d", + GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", i, dynamic_dims[i], all_data_dims[i]); return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; } diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index 06dc2b96..4aebffb4 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -33,7 +33,7 @@ namespace { uint64_t size = data_num * sizeof(TYPE); \ ge_tensor = MakeShared(out_desc, size); \ GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \ + GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size); \ ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ } else { \ @@ -72,7 +72,7 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { num_size = max_range_size; } if (num_size < 0) { - GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size); + GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size); return INTERNAL_ERROR; } data_num = static_cast(num_size); diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 7b09cbc6..b13781f8 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -741,7 +741,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) { return SUCCESS; } - GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type); + GELOGD("[%s] has attr input_memory_type %u", op_desc->GetName().c_str(), mem_type); for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); @@ -751,7 +751,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { while (true) { const auto &src_desc = src_node->GetOpDesc(); GE_IF_BOOL_EXEC(src_desc == nullptr, continue); - GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), + GELOGD("[%s:%u] set attr output_memory_type %d", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), mem_type); if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, mem_type)) { diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 98d073d4..fe4c59d2 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -1535,8 +1535,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); } - GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", - has_mem_type_attr == true ? "Fusion" : "", + GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]", + has_mem_type_attr ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 04c1a36f..2edc830d 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -466,11 +466,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast(ops_kernel_info_store_ptr)); } - GELOGI( - "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" - " task finished, generate %u task(s).", - op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, - task_list_size_after - task_list_size_before); + GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" + " task finished, generate %zu task(s).", + op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, + task_list_size_after - task_list_size_before); // record nodes which have call generate task successfully fusion_nodes_seen.insert(fusion_node.get()); diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 58fb791e..136a041c 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -1876,7 +1876,7 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); for (const auto item : data_list) { if (item.second->GetName() == releated_name) { - GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); + GELOGI("Find aipp_data [%s] index %u from index %u", releated_name.c_str(), item.first, index); aipp_index = item.first; } } diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index 8be8b60f..c424a60b 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -1704,7 +1704,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op for (uint32_t i = 0; i < res_op_nums; i++) { ReturnCode ret_code = res_ret_code_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); - GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, + GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%lu, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); std::vector op_name; op_name.clear(); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 159f14a4..55e26cf9 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2777,7 +2777,7 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector } GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims); - GELOGI("Shape dims of %d data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str()); + GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str()); } return SUCCESS; } diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index e7dce824..d0292885 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -299,12 +299,12 @@ Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) { uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size); if (buffer == nullptr) { - GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); + GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %lu", var_name.c_str(), size); return MEMALLOC_FAILED; } address = static_cast(reinterpret_cast(buffer)); var_mem_size_ += size; - GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", + GELOGI("[IMAS]AssignVarMem Set session_%lu name[%s] output[%d] addr to [%p] size[%lu].", session_id, var_name.c_str(), 0, buffer, size); return SUCCESS; } diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index ce5ff7c0..9ecc79a6 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -203,7 +203,7 @@ bool CondRemovePass::CheckIfCondConstInput(const OutDataAnchorPtr &cond_out_anch // Get weights from peer node auto weights = OpDescUtils::GetWeights(out_node); if (weights.size() <= static_cast(cond_out_anchor->GetIdx())) { - GELOGI("Get weights of node %s out index %d, weight size %u is not fit for data index %d.", + GELOGI("Get weights of node %s out index %d, weight size %zu is not fit for data index %d.", out_node->GetName().c_str(), cond_out_anchor->GetIdx(), weights.size(), cond_out_anchor->GetIdx()); return false; } @@ -241,7 +241,7 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c for (const auto &peerout_anchor : input_anchor->GetPeerAnchors()) { if (GraphUtils::AddEdge(peerout_anchor, partitioncall_node->GetInAnchor( input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) { - GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", + GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu", peerout_anchor->GetOwnerNode()->GetName().c_str(), peerout_anchor->GetIdx(), partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size, output_desc_size); @@ -254,14 +254,14 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c for (const auto &output_anchor : node->GetAllOutAnchors()) { for (const auto &peerin_anchor : output_anchor->GetPeerAnchors()) { if (GraphUtils::RemoveEdge(node->GetOutAnchor(output_anchor->GetIdx()), peerin_anchor) != ge::GRAPH_SUCCESS) { - GELOGE(FAILED, "Remove edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", + GELOGE(FAILED, "Remove edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu", node->GetName().c_str(), output_anchor->GetIdx(), peerin_anchor->GetOwnerNode()->GetName().c_str(), peerin_anchor->GetIdx(), input_desc_size, output_desc_size); return FAILED; } if (GraphUtils::AddEdge(partitioncall_node->GetOutAnchor(output_anchor->GetIdx()), peerin_anchor) != ge::GRAPH_SUCCESS) { - GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", + GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu", partitioncall_node->GetName().c_str(), output_anchor->GetIdx(), peerin_anchor->GetOwnerNode()->GetName().c_str(), peerin_anchor->GetIdx(), input_desc_size, output_desc_size); diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index 31dee390..3b7a0886 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -469,7 +469,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { continue; } GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor), - "Add data-edge %s:%d->%s:%d failed.", + "Add data-edge %s:%d->%s:%zu failed.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), peer_out_anchor->GetIdx(), while_node->GetName().c_str(), i); } @@ -480,7 +480,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { GE_CHECK_NOTNULL(out_data_anchor); for (auto &peer_in_anchor : while_info.data_outputs[i]) { GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_data_anchor, peer_in_anchor), - "Add data-edge %s:%d->%s:%d failed.", + "Add data-edge %s:%zu->%s:%d failed.", while_node->GetName().c_str(), i + kWhileOutputIndex, peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); } diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index b7efa070..17a1e3bb 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -928,7 +928,7 @@ Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) { auto out_data_anchor = node->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index); - GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.", + GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %d data node failed.", out_data_anchor->GetIdx()); return INTERNAL_ERROR); for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); diff --git a/ge/graph/passes/remove_same_const_pass.cc b/ge/graph/passes/remove_same_const_pass.cc index e75a4553..3d18a92d 100644 --- a/ge/graph/passes/remove_same_const_pass.cc +++ b/ge/graph/passes/remove_same_const_pass.cc @@ -85,7 +85,7 @@ Status RemoveSameConstPass::Run(ComputeGraphPtr graph) { ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s", node->GetName().c_str(), + GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s, ret=%u", node->GetName().c_str(), iter->second->GetName().c_str(), ret); return INTERNAL_ERROR; } diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index dc6269ac..3d83c301 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -311,7 +311,7 @@ Status SubgraphPass::InsertInputMemcpy(const ComputeGraphPtr &graph, const std:: Status SubgraphPass::InsertOutputMemcpy(const ComputeGraphPtr &graph, const NodePtr &output_node, const std::set &bypass_index) { if (output_node->GetAllInDataAnchorsSize() == bypass_index.size()) { - GELOGD("No need to insert output memcpy node in while_body %s, output_size=%zu, bypass_num=%zu.", + GELOGD("No need to insert output memcpy node in while_body %s, output_size=%u, bypass_num=%zu.", graph->GetName().c_str(), output_node->GetAllInDataAnchorsSize(), bypass_index.size()); return SUCCESS; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 3b37003f..7b0ffc02 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -644,7 +644,7 @@ Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) { std::vector aipps; GE_RETURN_IF_ERROR(GetAllAipps(data_node, *aipps_or_switchs_or_case.begin(), aipps)); - GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%u]", data_node->GetName().c_str(), + GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%zu]", data_node->GetName().c_str(), data_node->GetType().c_str(), aipps.size()); for (auto aipp_it : aipps) { diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index 5506435e..e43c5dd2 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -371,7 +371,7 @@ Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(mapGetName().c_str()); + GELOGE(FAILED, "Get attr frame_name of enter[%s] failed.", node->GetName().c_str()); return FAILED; } frame_enter[frame_name].emplace_back(node); @@ -850,19 +850,19 @@ NodePtr MultiBatchGraphCopyer::FindSwitchnNodeForDataEdge(const OutDataAnchorPtr if (is_getnext_sink_data) { auto output_idx = data_out_anchor->GetIdx(); size_t referenced_index = 0; - GELOGI("The output idx %zu has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size()); + GELOGI("The output idx %d has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size()); for (const auto &peer_in_anchor : data_out_anchor->GetPeerInDataAnchors()) { if (peer_in_anchor->GetOwnerNode()->GetOpDesc() == nullptr) { GELOGE(INTERNAL_ERROR, "Op desc should not be nullptr."); return nullptr; } if (getnext_nodes_to_switchn_.at(output_idx).empty()) { - GELOGI("Output idx %zu of %s is static output.", output_idx, data_node->GetName().c_str()); + GELOGI("Output idx %d of %s is static output.", output_idx, data_node->GetName().c_str()); return nullptr; } if (output_idx >= static_cast(getnext_nodes_to_switchn_.size()) || referenced_index >= getnext_nodes_to_switchn_.at(output_idx).size()) { - GELOGE(INTERNAL_ERROR, "Output idx is %zu, referenced index is %zu", output_idx, referenced_index); + GELOGE(INTERNAL_ERROR, "Output idx is %d, referenced index is %zu", output_idx, referenced_index); return nullptr; } if (peer_in_anchor->GetOwnerNode()->GetOpDesc()->GetName() == origin_node->GetName()) { @@ -1203,7 +1203,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNAndUpdateMaxShape(const NodePtr &node for (size_t i = 0; i < getnext_sink_dynamic_out_mapping_.size(); ++i) { if(UpdateMaxShapeToData(node, i) != SUCCESS) { - GELOGE(PARAM_INVALID, "Failed to update max shape of %zu out anchor", node->GetName().c_str(), i); + GELOGE(PARAM_INVALID, "Failed to update %s max shape of %zu out anchor", node->GetName().c_str(), i); return PARAM_INVALID; } } diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 8aab0981..84f38fa6 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -435,7 +435,7 @@ Status CheckDynamicParams(const vector> &shapes) { "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " - "value size [%zu] must be greater than [%zu].", + "value size [%zu] must be greater than [%d].", shapes.size(), kMinShapesCount - 1); return PARAM_INVALID; } @@ -444,7 +444,7 @@ Status CheckDynamicParams(const vector> &shapes) { "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " - "value size [%zu] must be less than [%zu].", + "value size [%zu] must be less than [%d].", shapes.size(), kMaxShapesCount + 1); return PARAM_INVALID; } diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index 32611b03..3037934e 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -126,10 +126,10 @@ void DynamicStitchKernel::ComputeMergedShape(const vector &inp vector merged_dim_vec = {merged_first_dim + 1}; if (step > 0) { merged_dim_vec.emplace_back(step); - GELOGD("merged_shape is [ %ld, %ld].", merged_first_dim, step); + GELOGD("merged_shape is [ %d, %ld].", merged_first_dim, step); } merged_shape = GeShape(merged_dim_vec); - GELOGD("merged_shape is [ %ld ].", merged_first_dim); + GELOGD("merged_shape is [ %d ].", merged_first_dim); } Status DynamicStitchKernel::GenData(const vector &input, GeTensorPtr &output_ptr) { @@ -196,14 +196,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec // if index repeated, need new data replace old data , so give more allowance if (indices_set.find(input_indices[j]) != indices_set.end()) { if (ge::CheckInt64AddOverflow(input_indices[j], data_unit) != SUCCESS) { - GELOGW("Check int64 mul overflow failed. Indices is %ld, data_unit is %ld.", input_indices[j], data_unit); + GELOGW("Check int64 mul overflow failed. Indices is %d, data_unit is %ld.", input_indices[j], data_unit); return NOT_CHANGED; } allowance += data_unit; } indices_set.insert(input_indices[j]); if (!CheckInt64MulOverflow(input_indices[j], data_unit)) { - GELOGW("Check int64 mul overflow failed. Indices is %ld, data_unit is %ld.", input_indices[j], data_unit); + GELOGW("Check int64 mul overflow failed. Indices is %d, data_unit is %ld.", input_indices[j], data_unit); return NOT_CHANGED; } dst_offset = input_indices[j] * data_unit; diff --git a/ge/host_kernels/pack_kernel.cc b/ge/host_kernels/pack_kernel.cc index 476005ef..bf7a2a1f 100644 --- a/ge/host_kernels/pack_kernel.cc +++ b/ge/host_kernels/pack_kernel.cc @@ -124,7 +124,7 @@ Status PackKernel::ValidateInputs(const ge::OpDescPtr &op_desc_ptr, const std::v int64_t num = 1; for (auto dim : dst_shape.GetDims()) { if (dim < 0) { - GELOGW("Invalid dim ld% in the shape %s", dim, formats::ShapeToString(shape).c_str()); + GELOGW("Invalid dim %ld in the shape %s", dim, formats::ShapeToString(shape).c_str()); return NOT_CHANGED; } num *= dim; diff --git a/ge/host_kernels/rank_kernel.cc b/ge/host_kernels/rank_kernel.cc index 1de9478c..b246b976 100755 --- a/ge/host_kernels/rank_kernel.cc +++ b/ge/host_kernels/rank_kernel.cc @@ -42,7 +42,7 @@ Status RankKernel::Compute(const NodePtr &node, std::vector &v_outp GE_CHECK_NOTNULL(op_desc); size_t input_node_size = op_desc->GetInputsSize(); if (input_node_size != kRankInputSize) { - GELOGW("input node size must be %d", kRankInputSize); + GELOGW("input node size must be %zu", kRankInputSize); return NOT_CHANGED; } diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index b1bfb10a..c7e4b2c8 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -250,16 +250,16 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vectorsecond.emplace_back(index); } else { diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 1772ca88..82303894 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -507,7 +507,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vecto if (input_index < input_is_const_.size() && input_is_const_[input_index]) { // const input no need update addr GE_CHECK_NOTNULL(arg_base); - GELOGD("AICpuTask input[%zu] addr = %u", input_index, *arg_base); + GELOGD("AICpuTask input[%zu] addr = %lu", input_index, *arg_base); arg_base++; continue; } @@ -710,7 +710,7 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output Status AiCpuTask::InitForSummaryAndCopy() { if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { - GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); + GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_); return SUCCESS; } diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc index a6221570..13ddf8bb 100644 --- a/tests/depends/omg/src/omg_stub.cc +++ b/tests/depends/omg/src/omg_stub.cc @@ -315,7 +315,7 @@ long GetFileLength(const std::string &input_file) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1, "open file failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %ld is out of limit: %d.", + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.", file_length, MAX_FILE_SIZE_LIMIT); return file_length; } diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc index b51908e2..d6af6de9 100644 --- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc @@ -849,7 +849,7 @@ class VariableOpPassSimulator { if (variable_ref_node_format != FORMAT_NC1HWC0 || variable_ref_node_data_type != DT_FLOAT || variable_ref_node_shape.size() != 5) { GELOGI("wanted data format is (%d,%d,%u)", FORMAT_NC1HWC0, DT_FLOAT, 5); - GELOGI("variable_ref_node_format is (%d,%d,%u)", variable_ref_node_format, variable_ref_node_data_type, + GELOGI("variable_ref_node_format is (%d,%d,%zu)", variable_ref_node_format, variable_ref_node_data_type, variable_ref_node_shape.size()); std::cout << "var ref format not changed !" << std::endl; @@ -918,7 +918,7 @@ class VariableOpPassSimulator { if (variable_ref_node_format != FORMAT_NCHW || variable_ref_node_data_type != DT_INT32 || variable_ref_node_shape.size() != 4) { GELOGI("wanted data format is (%d,%d,%u)", FORMAT_NCHW, DT_INT32, 4); - GELOGI("variable_ref_node_format is (%d,%d,%u)", variable_ref_node_format, variable_ref_node_data_type, + GELOGI("variable_ref_node_format is (%d,%d,%zu)", variable_ref_node_format, variable_ref_node_data_type, variable_ref_node_shape.size()); std::cout << "var ref format not changed !" << std::endl; From 031c423636e5ee99397176f4c547ea042ec00444 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 23 Jan 2021 16:40:52 +0800 Subject: [PATCH 41/41] Fix printf like format --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index f2fafd48..a111f387 100644 --- a/build.sh +++ b/build.sh @@ -240,7 +240,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then rm -rf ${BASEPATH}/cov mkdir ${BASEPATH}/cov lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '*/ge/common/*' '*/ge/executor/*' '*/ge/graph/*' '*/ge/host_kernels/*' '/usr/local/*' -o cov/coverage.info cd ${BASEPATH}/cov genhtml coverage.info fi