From 082f9195d4f85e0194c00440322d6e13678374fb Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 5 Nov 2020 17:01:53 +0800 Subject: [PATCH 01/13] 1.if broadcast only one input or output, skip continuous mem reassign 2.if broadcast input more than one, and from variable, add memcpy node between them. delete move variable to broadcast input in davinci model run --- ge/graph/build/memory/block_mem_assigner.cc | 19 ++++++++-- ge/graph/load/new_model_manager/davinci_model.cc | 6 ---- ge/graph/passes/hccl_memcpy_pass.cc | 46 +++++++++++++++++++++++- ge/graph/passes/hccl_memcpy_pass.h | 2 ++ 4 files changed, 63 insertions(+), 10 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2d30c57e..a406d384 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -425,6 +425,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { atomic_addr_clean_id_ = node_op_desc->GetId(); } + // if input size just one, no need to reassign continuous memory + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous && (node_op_desc->GetInputSize() <= 1)) { + (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + } + for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); bool reuse_input = false; @@ -928,6 +935,13 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); auto node_op_desc = n->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); + + // if output size just one, no need to reassign continuous memory + if (node_op_desc->GetOutputsSize() == 1) { + zero_memory_list_.emplace_back(n, kOutput, 0); + return nullptr; + } + MemoryBlock *block = nullptr; int64_t total_size = 0; int64_t memory_type = RT_MEMORY_HBM; @@ -1746,9 +1760,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || - (node_type == HVDCALLBACKBROADCAST); + (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || + (node_type == ASSIGN) || (node_type == HVDWAIT); } bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cb37182c..05b4cd10 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -1993,12 +1993,6 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } - for (auto op_desc : variable_op_list_) { - ret = - VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); - GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, - op_desc->GetName().c_str()); - } return ret; } diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 21747f42..2c34c38e 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -37,6 +37,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + Status ret = ProcessBroadcastMemcpy(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy."); + return ret; + } + bool node_input_mutable = false; if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { continue; @@ -61,7 +67,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. NodePtr src_node = src_out_anchor->GetOwnerNode(); std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT); if (check_src_type) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { @@ -82,6 +88,44 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { return SUCCESS; } +// If broadcast input size is bigger than 1, and input from variable, +// cause by broadcast input memory should be continuous, +// another featuremap mem will be allocated for broadcast input. +// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. +// In order to avoid move action out of model, use memcpy node instead of move action code. +Status HcclMemcpyPass::ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if ((node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) && op_desc->GetInputSize() > 1) { + for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { + if (hccl_in_anchor == nullptr) { + continue; + } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + NodePtr src_node = src_out_anchor->GetOwnerNode(); + std::string src_type = src_node->GetType(); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT); + if (check_src_type) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } + } + } + return SUCCESS; +} + /// /// @brief Add MemcpyAsync Node /// @param [in] ge::ComputeGraphPtr graph diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index e73a5483..aaf00779 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -37,6 +37,8 @@ class HcclMemcpyPass : public GraphPass { Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); + Status ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node); + std::unordered_map node_num_map_; }; } // namespace ge From 94f2185b9b72fb0fab84d08c477e60927775aa3e Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 5 Nov 2020 20:11:13 +0800 Subject: [PATCH 02/13] fix --- ge/graph/build/memory/block_mem_assigner.cc | 2 +- ge/graph/passes/hccl_memcpy_pass.cc | 154 ++++++++++++++++++++-------- ge/graph/passes/hccl_memcpy_pass.h | 8 +- 3 files changed, 117 insertions(+), 47 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index a406d384..d59023f8 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -428,7 +428,7 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { // if input size just one, no need to reassign continuous memory bool is_input_continuous = false; (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - if (is_input_continuous && (node_op_desc->GetInputSize() <= 1)) { + if (is_input_continuous && (node_op_desc->GetInputsSize() <= 1)) { (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); } diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 2c34c38e..8471b1d8 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -32,75 +32,101 @@ const char *const kInputMutable = "_input_mutable"; } // namespace namespace ge { Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { + Status ret = SUCCESS; GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); for (const auto &node : graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + if (op_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); + return INTERNAL_ERROR; + } - Status ret = ProcessBroadcastMemcpy(graph, node); + ret = ContinuousInputProcess(graph, node); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy."); + GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); return ret; } - bool node_input_mutable = false; - if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { - continue; + ret = MutableInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); + return ret; + } + + ret = P2pmemInputProcess(graph, node); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); + return ret; } + + } + return ret; +} + +// If node has _input_mutable attr, means input mem may be modified when op execute. +// In order to avoid to affect another op execute with same input when data modified, +// need to inset memcpy node between. +// also works on situation that input is variable or const. +Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + + bool node_input_mutable = false; + if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { + return SUCCESS; + } + + if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); + return FAILED; + } + if (!node_input_mutable) { + return SUCCESS; + } - GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); - if (!node_input_mutable) { + GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); + for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { + if (hccl_in_anchor == nullptr) { continue; } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(src_out_anchor); - GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); - for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { - if (hccl_in_anchor == nullptr) { - continue; - } - auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - - int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); - if (src_out_anchor_size == kAnchorSize) { - // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. - NodePtr src_node = src_out_anchor->GetOwnerNode(); - std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT); - if (check_src_type) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } + int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); + if (src_out_anchor_size == kAnchorSize) { + // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; } - continue; } + continue; + } - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; } } return SUCCESS; } + // If broadcast input size is bigger than 1, and input from variable, // cause by broadcast input memory should be continuous, // another featuremap mem will be allocated for broadcast input. // In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. // In order to avoid move action out of model, use memcpy node instead of move action code. -Status HcclMemcpyPass::ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node) { +Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - if ((node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) && op_desc->GetInputSize() > 1) { + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + + if (is_input_continuous && op_desc->GetInputsSize() > 1) { + // if input size bigger than one, insert memcpy between var data for support continous mem alloc for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { if (hccl_in_anchor == nullptr) { continue; @@ -111,10 +137,7 @@ Status HcclMemcpyPass::ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, cons return INTERNAL_ERROR; } - NodePtr src_node = src_out_anchor->GetOwnerNode(); - std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT); - if (check_src_type) { + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); @@ -126,6 +149,47 @@ Status HcclMemcpyPass::ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, cons return SUCCESS; } +// if input is var type, and node input need p2p mem, then memcpy should be insert between the two +Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { + auto op_desc = node->GetOpDesc(); + + vector input_memory_types; + (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); + + if (input_memory_types.empty()) { + return SUCCESS; + } + + for (int index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { + if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { + continue; + } + + auto hccl_in_anchor = GetInDataAnchor(index); + if (hccl_in_anchor == nullptr) { + continue; + } + auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); + if (src_out_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } + } + } + return SUCCESS; +} + +bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { + return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); +} + /// /// @brief Add MemcpyAsync Node /// @param [in] ge::ComputeGraphPtr graph diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index aaf00779..81de2e80 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -37,7 +37,13 @@ class HcclMemcpyPass : public GraphPass { Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); - Status ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node); + Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); + + bool HcclMemcpyPass::IsDataNode(const std::string& node_type); std::unordered_map node_num_map_; }; From 7f076840f906b768f386dd2af3293f2428c8b95c Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 5 Nov 2020 20:44:17 +0800 Subject: [PATCH 03/13] fix --- ge/graph/passes/hccl_memcpy_pass.cc | 4 ++-- ge/graph/passes/hccl_memcpy_pass.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 8471b1d8..666611c8 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -160,12 +160,12 @@ Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const No return SUCCESS; } - for (int index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { + for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { continue; } - auto hccl_in_anchor = GetInDataAnchor(index); + auto hccl_in_anchor = node->GetInDataAnchor(index); if (hccl_in_anchor == nullptr) { continue; } diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index 81de2e80..1e946fa7 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -43,7 +43,7 @@ class HcclMemcpyPass : public GraphPass { Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - bool HcclMemcpyPass::IsDataNode(const std::string& node_type); + bool IsDataNode(const std::string& node_type); std::unordered_map node_num_map_; }; From 95af966294c57fb7ac2223901ea83b29b9e1d26a Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 5 Nov 2020 21:08:49 +0800 Subject: [PATCH 04/13] fix cmetric --- ge/graph/build/memory/block_mem_assigner.cc | 16 ++++++++++------ ge/graph/build/memory/block_mem_assigner.h | 2 ++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index d59023f8..c00163f8 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -415,6 +415,15 @@ BlockMemAssigner::~BlockMemAssigner() { } } +void BlockMemAssigner::MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc) { + // if input size just one, no need to reassign continuous memory + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous && (node_op_desc->GetInputsSize() <= 1)) { + (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + } +} + void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; for (const NodePtr &n : compute_graph_->GetAllNodes()) { @@ -425,12 +434,7 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { atomic_addr_clean_id_ = node_op_desc->GetId(); } - // if input size just one, no need to reassign continuous memory - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - if (is_input_continuous && (node_op_desc->GetInputsSize() <= 1)) { - (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); - } + MarkContinuousAllocedForOneInput(node_op_desc); for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index f3d26c1d..c79e695b 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -409,6 +409,8 @@ class BlockMemAssigner : public MemAssigner { MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); + void MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc); + std::unordered_map>> reusable_blocks_; std::map reusable_block_counts_; From 634f87da5dc02efca559eaff49ac834a21632e3b Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 9 Nov 2020 11:21:56 +0800 Subject: [PATCH 05/13] fix --- ge/graph/build/memory/block_mem_assigner.cc | 12 ++++++------ ge/graph/passes/hccl_memcpy_pass.cc | 13 ++++++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index c00163f8..39718901 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -826,6 +826,12 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { return false; } + // if output size just one, no need to reassign continuous memory + if (node_op_desc->GetOutputsSize() == 1) { + GELOGI("op %s output size is one, no need to continuous process.", n->GetName().c_str()); + return false; + } + // Get the continuous output type of the node, default is false bool is_output_continuous = false; auto node_desc = n->GetOpDesc(); @@ -939,12 +945,6 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); auto node_op_desc = n->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); - - // if output size just one, no need to reassign continuous memory - if (node_op_desc->GetOutputsSize() == 1) { - zero_memory_list_.emplace_back(n, kOutput, 0); - return nullptr; - } MemoryBlock *block = nullptr; int64_t total_size = 0; diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 666611c8..0635a1a3 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -58,7 +58,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); return ret; } - + } return ret; } @@ -66,7 +66,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { // If node has _input_mutable attr, means input mem may be modified when op execute. // In order to avoid to affect another op execute with same input when data modified, // need to inset memcpy node between. -// also works on situation that input is variable or const. +// also works on situation that input is variable or const. Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { auto op_desc = node->GetOpDesc(); @@ -77,7 +77,7 @@ Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const N if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); - return FAILED; + return FAILED; } if (!node_input_mutable) { return SUCCESS; @@ -126,6 +126,7 @@ Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, cons (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); if (is_input_continuous && op_desc->GetInputsSize() > 1) { + GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); // if input size bigger than one, insert memcpy between var data for support continous mem alloc for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { if (hccl_in_anchor == nullptr) { @@ -136,7 +137,7 @@ Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, cons GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); return INTERNAL_ERROR; } - + if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { @@ -165,6 +166,7 @@ Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const No continue; } + GELOGI("p2p input op is:%s.", op_desc->GetName().c_str()); auto hccl_in_anchor = node->GetInDataAnchor(index); if (hccl_in_anchor == nullptr) { continue; @@ -263,7 +265,8 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { /// Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor) { - GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); + GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), + hccl_in_anchor->GetOwnerNode()->GetName().c_str()); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); From af7109c8ab589bb5770fd009d2859c74c588deec Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 9 Nov 2020 11:45:23 +0800 Subject: [PATCH 06/13] fix --- ge/graph/build/memory/block_mem_assigner.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 39718901..8f3199de 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -826,19 +826,20 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { return false; } + auto node_desc = n->GetOpDesc(); + if (node_desc == nullptr) { + GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); + return false; + } + // if output size just one, no need to reassign continuous memory - if (node_op_desc->GetOutputsSize() == 1) { + if (node_desc->GetOutputsSize() == 1) { GELOGI("op %s output size is one, no need to continuous process.", n->GetName().c_str()); return false; } // Get the continuous output type of the node, default is false bool is_output_continuous = false; - auto node_desc = n->GetOpDesc(); - if (node_desc == nullptr) { - GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); - return false; - } // If GetBool fail, is_output_continuous is false. (void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); From 508b39279245f6b208b0c580bf58509338f4c77b Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 9 Nov 2020 16:49:54 +0800 Subject: [PATCH 07/13] add stream block reuse related --- ge/graph/build/memory/block_mem_assigner.cc | 29 +++++++++++++++++++---------- ge/graph/build/memory/block_mem_assigner.h | 4 +++- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 8f3199de..efb04b68 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -318,7 +318,11 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life); } } - depend_stream_life_[stream_id_] = GetLifeBegin(); + + // not same stream can't be reused by life time directly, should be reused by dependence + if (same_stream_) { + depend_stream_life_[stream_id_] = GetLifeBegin(); + } } size_t MemoryBlock::GetLifeEnd() { @@ -1130,15 +1134,21 @@ bool IsKnownSubgraphData(const NodePtr &node) { return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); } -void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory) { +void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory, + bool same_stream) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory"); --to_release->ref_count_; + if (!same_stream) { + to_release->same_stream_ = false; + } if (to_release->ref_count_ == 0) { to_release->SetLifeTimeEnd(life_time_); - reusable_memory.emplace_back(to_release); - AddReusableBlockCount(*to_release, reusable_block_counts_); + if (to_release->same_stream_) { + reusable_memory.emplace_back(to_release); + AddReusableBlockCount(*to_release, reusable_block_counts_); + } } } @@ -1178,10 +1188,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_mapGetName().c_str()); if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && - (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx())) && - (node->GetOpDesc()->GetStreamId() == block->stream_id_)) { - ReleaseMemory(block, reusable_memory); - if (block->ref_count_ == 0) { + (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx()))) { + ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); + if (block->ref_count_ == 0 && block->same_stream_) { SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); } } @@ -1701,10 +1710,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, op_desc->SetWorkspace(workspace_list); } GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" - " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(), + " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d].", graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, - block->continuous_block_, block->deleted_block_, node_type.ref_input); + block->continuous_block_, block->deleted_block_, block->same_stream_, node_type.ref_input); } void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index c79e695b..3fc1a902 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -65,6 +65,7 @@ class MemoryBlock { stream_id_(stream_id), deleted_block_(false), reuse_mem_(reuse_mem), + same_stream_(true) input_index_(0), continuous_block_(false), first_continuous_block_(false), @@ -142,6 +143,7 @@ class MemoryBlock { int64_t stream_id_; bool deleted_block_; bool reuse_mem_; + bool same_stream_; uint32_t input_index_; bool continuous_block_; bool first_continuous_block_; @@ -353,7 +355,7 @@ class BlockMemAssigner : public MemAssigner { /// @return void /// @author /// - void ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory); + void ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory, bool same_stream = true); /// /// @ingroup GE From 1852d7fefcd9b7d9789a5cefb5eb0e4cf5957ac7 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 9 Nov 2020 16:52:57 +0800 Subject: [PATCH 08/13] add multi mode config --- inc/external/ge/ge_api_types.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 113b8bc6..1a843b03 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -245,6 +245,11 @@ const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; // 0: close debug; 1: open TBE compiler; 2: open ccec compiler const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; +// Configure for fix hcombroadcast format. +// when config model multi, broadcast format should be fixed +// 0: data multi; 1: model multi; +const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; From 2c022e4205311304fbfdb3cf4e055ce25088c2fd Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 9 Nov 2020 19:04:19 +0800 Subject: [PATCH 09/13] fix --- ge/graph/build/memory/block_mem_assigner.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 3fc1a902..d8a9573f 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -65,7 +65,7 @@ class MemoryBlock { stream_id_(stream_id), deleted_block_(false), reuse_mem_(reuse_mem), - same_stream_(true) + same_stream_(true), input_index_(0), continuous_block_(false), first_continuous_block_(false), From 9dbf39f12b1eb15134e2e15591ca725e173343c6 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Tue, 10 Nov 2020 16:33:31 +0800 Subject: [PATCH 10/13] move hccl_memcpy_pass to optimize1 --- ge/graph/manager/graph_manager.cc | 2 ++ ge/graph/preprocess/graph_preprocess.cc | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 4737955d..ce0ddcc3 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -1962,6 +1962,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { } PassManager after_merge_passes; GE_CHK_STATUS_RET( + after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); + GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::MergeInputMemcpyPass", new (std::nothrow) MergeInputMemcpyPass)); GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index f90c0d80..17d8362a 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -60,7 +60,6 @@ #include "graph/passes/get_original_format_pass.h" #include "graph/passes/guarantee_const_pass.h" #include "graph/passes/hccl_group_pass.h" -#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/infershape_pass.h" #include "graph/passes/iterator_op_pass.h" @@ -1691,8 +1690,6 @@ Status GraphPrepare::PrepareOptimize() { PassManager graph_pass; try { (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); - // todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy - (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; From 7610599a3a1d2ca828468e8323bb09b38f6b840d Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Wed, 11 Nov 2020 10:22:58 +0800 Subject: [PATCH 11/13] move hccl memcpy after trans op fusion pass --- ge/graph/manager/graph_manager.cc | 4 ++-- ge/graph/passes/hccl_memcpy_pass.cc | 18 +++++++++--------- ge/graph/passes/hccl_memcpy_pass.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index ce0ddcc3..3057e8ad 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -1962,8 +1962,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { } PassManager after_merge_passes; GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); - GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::MergeInputMemcpyPass", new (std::nothrow) MergeInputMemcpyPass)); GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); @@ -1996,6 +1994,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) TransOpWithoutReshapeFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass)) + GE_CHK_STATUS_RET( + after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 0635a1a3..553dbf20 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -198,7 +198,7 @@ bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr /// -NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { +NodePtr HcclMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); NodePtr pre_node = out_data_anchor->GetOwnerNode(); OpDescPtr pre_op_desc = pre_node->GetOpDesc(); @@ -207,24 +207,24 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O return nullptr; } - std::string node_name = pre_node->GetName() + "_" + IDENTITY; + std::string node_name = pre_node->GetName() + "_" + MEMCPYASYNC; node_name = CheckDuplicateName(node_name); - OpDescPtr op_desc = MakeShared(node_name.c_str(), IDENTITY); + OpDescPtr op_desc = MakeShared(node_name.c_str(), MEMCPYASYNC); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); + GELOGE(INTERNAL_ERROR, "Create MemcpyAsync op: MakeShared op_desc fail."); return nullptr; } - GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); + GELOGI("Create MemcpyAsync op:%s.", op_desc->GetName().c_str()); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); + GELOGE(INTERNAL_ERROR, "Create MemcpyAsync op: add input desc fail."); return nullptr; } ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); + GELOGE(INTERNAL_ERROR, "Create MemcpyAsync op: add output desc fail."); return nullptr; } // because history reason ,this pass can not do work after constant fold so mark it @@ -232,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O NodePtr memcpy_node = graph->AddNode(op_desc); if (memcpy_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert identity node fail."); + GELOGE(INTERNAL_ERROR, "Insert MemcpyAsync node fail."); return nullptr; } @@ -267,7 +267,7 @@ Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const const InDataAnchorPtr &hccl_in_anchor) { GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); + NodePtr memcpy_node = CreateMemcpyAsyncNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); Status ret1 = src_out_anchor->Unlink(hccl_in_anchor); diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index 1e946fa7..26df2de0 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -30,7 +30,7 @@ class HcclMemcpyPass : public GraphPass { Status ClearStatus() override; private: - NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); + NodePtr CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); std::string CheckDuplicateName(const std::string &node_name); From a0537a6a907d5c33e15b59d05e7377f2617713f3 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 12 Nov 2020 09:41:12 +0800 Subject: [PATCH 12/13] mod memcpy back to identity --- ge/graph/passes/hccl_memcpy_pass.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 553dbf20..ada5397b 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -93,7 +93,7 @@ Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const N int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); if (src_out_anchor_size == kAnchorSize) { - // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. + // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { @@ -193,12 +193,12 @@ bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { } /// -/// @brief Add MemcpyAsync Node +/// @brief Add Identity Node /// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr /// -NodePtr HcclMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { +NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); NodePtr pre_node = out_data_anchor->GetOwnerNode(); OpDescPtr pre_op_desc = pre_node->GetOpDesc(); @@ -207,24 +207,24 @@ NodePtr HcclMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, cons return nullptr; } - std::string node_name = pre_node->GetName() + "_" + MEMCPYASYNC; + std::string node_name = pre_node->GetName() + "_" + IDENTITY; node_name = CheckDuplicateName(node_name); - OpDescPtr op_desc = MakeShared(node_name.c_str(), MEMCPYASYNC); + OpDescPtr op_desc = MakeShared(node_name.c_str(), IDENTITY); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create MemcpyAsync op: MakeShared op_desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); return nullptr; } - GELOGI("Create MemcpyAsync op:%s.", op_desc->GetName().c_str()); + GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create MemcpyAsync op: add input desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); return nullptr; } ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create MemcpyAsync op: add output desc fail."); + GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); return nullptr; } // because history reason ,this pass can not do work after constant fold so mark it @@ -232,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, cons NodePtr memcpy_node = graph->AddNode(op_desc); if (memcpy_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert MemcpyAsync node fail."); + GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); return nullptr; } @@ -267,7 +267,7 @@ Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const const InDataAnchorPtr &hccl_in_anchor) { GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - NodePtr memcpy_node = CreateMemcpyAsyncNode(graph, src_out_anchor); + NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); Status ret1 = src_out_anchor->Unlink(hccl_in_anchor); From 404c6063430c278df15aa6aa92bb1e1895702f1b Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 12 Nov 2020 09:46:36 +0800 Subject: [PATCH 13/13] fix --- ge/graph/passes/hccl_memcpy_pass.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index 26df2de0..1e946fa7 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -30,7 +30,7 @@ class HcclMemcpyPass : public GraphPass { Status ClearStatus() override; private: - NodePtr CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); + NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); std::string CheckDuplicateName(const std::string &node_name);