diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2d30c57e..a406d384 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -425,6 +425,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { atomic_addr_clean_id_ = node_op_desc->GetId(); } + // if input size just one, no need to reassign continuous memory + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous && (node_op_desc->GetInputSize() <= 1)) { + (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + } + for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); bool reuse_input = false; @@ -928,6 +935,13 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); auto node_op_desc = n->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); + + // if output size just one, no need to reassign continuous memory + if (node_op_desc->GetOutputsSize() == 1) { + zero_memory_list_.emplace_back(n, kOutput, 0); + return nullptr; + } + MemoryBlock *block = nullptr; int64_t total_size = 0; int64_t memory_type = RT_MEMORY_HBM; @@ -1746,9 +1760,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || - (node_type == HVDCALLBACKBROADCAST); + (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || + (node_type == ASSIGN) || (node_type == HVDWAIT); } bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cb37182c..05b4cd10 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -1993,12 +1993,6 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } - for (auto op_desc : variable_op_list_) { - ret = - VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); - GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, - op_desc->GetName().c_str()); - } return ret; } diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 21747f42..2c34c38e 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -37,6 +37,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + Status ret = ProcessBroadcastMemcpy(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy."); + return ret; + } + bool node_input_mutable = false; if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { continue; @@ -61,7 +67,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. NodePtr src_node = src_out_anchor->GetOwnerNode(); std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT); if (check_src_type) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { @@ -82,6 +88,44 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { return SUCCESS; } +// If broadcast input size is bigger than 1, and input from variable, +// cause by broadcast input memory should be continuous, +// another featuremap mem will be allocated for broadcast input. +// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. +// In order to avoid move action out of model, use memcpy node instead of move action code. +Status HcclMemcpyPass::ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if ((node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) && op_desc->GetInputSize() > 1) { + for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { + if (hccl_in_anchor == nullptr) { + continue; + } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + NodePtr src_node = src_out_anchor->GetOwnerNode(); + std::string src_type = src_node->GetType(); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT); + if (check_src_type) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } + } + } + return SUCCESS; +} + /// /// @brief Add MemcpyAsync Node /// @param [in] ge::ComputeGraphPtr graph diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index e73a5483..aaf00779 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -37,6 +37,8 @@ class HcclMemcpyPass : public GraphPass { Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); + Status ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node); + std::unordered_map node_num_map_; }; } // namespace ge