From 12b0168265a07a13831e3b2a2d952554bbb56654 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 19 Nov 2020 10:32:35 +0800 Subject: [PATCH 1/5] fix atomic process bug in loop condition. atomic out node link to atomic input node(hccl), will build error --- ge/graph/passes/atomic_addr_clean_pass.cc | 71 +++++++++++++++++++++++++------ ge/graph/passes/atomic_addr_clean_pass.h | 3 ++ 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 690dee27..e1b0e4b5 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -248,10 +248,50 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { if (op_desc == nullptr) { return false; } + + if (CheckAtomicFromOpsKernel(node)) { + return true; + } + + // 2.Check atomic attr in node + std::map> node_workspace_offset; + bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); + bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); + node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); + if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) { + return false; + } + + if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { + std::vector atomic_output_index; + (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); + bool is_all_output_peer_also_atomic = true; + for (auto &output_index : atomic_output_index) { + if (!IsOutputIndexPeerInputAtomic(node, output_index)) { + is_all_output_peer_also_atomic = false; + break; + } + } + if (is_all_output_peer_also_atomic) { + GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str()); + return false; + } + } + + graphStatus ret = op_desc->SetAttr(ATOMIC_ATTR_IS_ATOMIC_NODE, GeAttrValue::CreateFrom(true)); + if (ret != GRAPH_SUCCESS) { + GELOGW("set attr ATOMIC_ATTR_IS_ATOMIC_NODE fail."); + } + GELOGD("Recognized atomic op %s from attr.", op_desc->GetName().c_str()); + return true; +} + +// just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input +bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { // 1.Check if isAtomic attrs exist for HCOM std::shared_ptr instance_ptr = GELib::GetInstance(); if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { - GELOGW("GELib not initialized"); + GELOGW("GELib not initialized, atomic from ops kernel judge false, node_name: %s", node->GetName().c_str()); return false; } @@ -259,38 +299,41 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType()); for (const auto &op_info : op_info_vec) { if (op_info.isAtomic) { - GELOGI("Recognized atomic op %s from DNN_HCCL engine.", op_desc->GetName().c_str()); // check peer input is DATA for (auto &in_data_anchor : node->GetAllInDataAnchors()) { if (in_data_anchor->GetPeerOutAnchor() != nullptr && in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); if (peer_in_node->GetType() == DATA) { - GELOGI("Recognized atomic op %s from DNN_HCCL engine and input is DATA.", op_desc->GetName().c_str()); + GELOGI("Recognized atomic op %s from %s engine and input is DATA.", op_desc->GetName().c_str(), op_info.engine.c_str()); return false; } } } + GELOGI("Recognized atomic op %s from %s engine.", op_desc->GetName().c_str(), op_info.engine.c_str()); hcom_node_vec_.push_back(node); return true; } } - // 2.Check atomic attr in node - std::map> node_workspace_offset; - bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); - bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); - node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); - if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) { +} + +bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const ge::NodePtr &node, int64_t output_index) { + auto out_data_anchor = node->GetAllInDataAnchors().at(output_index); + if (out_data_anchor == nullptr) { return false; } - graphStatus ret = op_desc->SetAttr(ATOMIC_ATTR_IS_ATOMIC_NODE, GeAttrValue::CreateFrom(true)); - if (ret != GRAPH_SUCCESS) { - GELOGW("set attr ATOMIC_ATTR_IS_ATOMIC_NODE fail."); + for (auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) { + auto output_node = input_anchor->GetOwnerNode(); + // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input + // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here + if (CheckAtomicFromOpsKernel(output_node)) { + return true; + } } - GELOGD("Recognized atomic op %s from FE engine.", op_desc->GetName().c_str()); - return true; + return false; } + /// /// @brief Clear Status, used for subgraph pass /// @return SUCCESS diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index ad60b7b5..9adeb611 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -84,6 +84,9 @@ class AtomicAddrCleanPass : public GraphPass { Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, std::vector &common_atomic_nodes); + bool CheckAtomicFromOpsKernel(const NodePtr &node); + + bool IsOutputIndexPeerInputAtomic(const ge::NodePtr &node, int64_t output_index); vector hcom_node_vec_; bool is_loop_graph_ = false; From 079abb175b200f3bd8e9ffca42def6463061a6c3 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 19 Nov 2020 10:54:19 +0800 Subject: [PATCH 2/5] fix --- ge/graph/passes/atomic_addr_clean_pass.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index e1b0e4b5..88d6b3fe 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -296,7 +296,7 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { } OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); - vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType()); + vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(node->GetType()); for (const auto &op_info : op_info_vec) { if (op_info.isAtomic) { // check peer input is DATA @@ -305,12 +305,12 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); if (peer_in_node->GetType() == DATA) { - GELOGI("Recognized atomic op %s from %s engine and input is DATA.", op_desc->GetName().c_str(), op_info.engine.c_str()); + GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str()); return false; } } } - GELOGI("Recognized atomic op %s from %s engine.", op_desc->GetName().c_str(), op_info.engine.c_str()); + GELOGI("Recognized atomic op %s from %s engine.", node->GetName().c_str(), op_info.engine.c_str()); hcom_node_vec_.push_back(node); return true; } @@ -318,7 +318,7 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { } bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const ge::NodePtr &node, int64_t output_index) { - auto out_data_anchor = node->GetAllInDataAnchors().at(output_index); + auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); if (out_data_anchor == nullptr) { return false; } From c5abd20f91e636ee9ecc64271e82fda6d32a74d1 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 19 Nov 2020 14:22:00 +0800 Subject: [PATCH 3/5] fix --- ge/graph/passes/atomic_addr_clean_pass.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 88d6b3fe..8932bef3 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -315,6 +315,7 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { return true; } } + return false; } bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const ge::NodePtr &node, int64_t output_index) { From c0dc27f58073e35c5349e05cadcb2a65ad2db971 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 19 Nov 2020 21:11:20 +0800 Subject: [PATCH 4/5] fix comment --- ge/graph/passes/atomic_addr_clean_pass.cc | 6 +++--- ge/graph/passes/atomic_addr_clean_pass.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 8932bef3..18cac856 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -266,7 +266,7 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { std::vector atomic_output_index; (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); bool is_all_output_peer_also_atomic = true; - for (auto &output_index : atomic_output_index) { + for (const auto &output_index : atomic_output_index) { if (!IsOutputIndexPeerInputAtomic(node, output_index)) { is_all_output_peer_also_atomic = false; break; @@ -318,13 +318,13 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { return false; } -bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const ge::NodePtr &node, int64_t output_index) { +bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index) { auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); if (out_data_anchor == nullptr) { return false; } - for (auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) { + for (const auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) { auto output_node = input_anchor->GetOwnerNode(); // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index 9adeb611..420ddd01 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -86,7 +86,7 @@ class AtomicAddrCleanPass : public GraphPass { bool CheckAtomicFromOpsKernel(const NodePtr &node); - bool IsOutputIndexPeerInputAtomic(const ge::NodePtr &node, int64_t output_index); + bool IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index); vector hcom_node_vec_; bool is_loop_graph_ = false; From 62591e6cf91b056df020219cf24376814fe47901 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 23 Nov 2020 14:55:03 +0800 Subject: [PATCH 5/5] atomic fix for unknownshape graph --- ge/graph/passes/atomic_addr_clean_pass.cc | 34 +++++++++++++++++-------------- ge/graph/passes/atomic_addr_clean_pass.h | 2 +- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 18cac856..b62f86c7 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -33,10 +33,12 @@ namespace ge { Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); GELOGD("AtomicAddrCleanPass begin."); + bool is_unknown_graph = graph->GetGraphUnknownFlag(); + // 1.Recoginze atomic and loop mark vector atomic_node_vec; for (NodePtr &node : graph->GetDirectNode()) { - if (IsAtomicOp(node)) { + if (IsAtomicOp(node, is_unknown_graph)) { atomic_node_vec.push_back(node); } if (!is_loop_graph_ && node->GetType() == LOOPCOND) { @@ -50,7 +52,6 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { return SUCCESS; } - bool is_unknown_graph = graph->GetGraphUnknownFlag(); if (is_unknown_graph) { GELOGD("Graph[%s] is unknown graph. It will call fe interface to compile op.", graph->GetName().c_str()); GE_CHK_STATUS_RET(CompileUnknownGraphOp(atomic_node_vec)); @@ -242,7 +243,7 @@ Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr return SUCCESS; } -bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { +bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node, bool is_unknown_graph) { GE_IF_BOOL_EXEC(node == nullptr, GELOGE(FAILED, "node is null."); return false); OpDescPtr op_desc = node->GetOpDesc(); if (op_desc == nullptr) { @@ -262,19 +263,21 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { return false; } - if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { - std::vector atomic_output_index; - (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); - bool is_all_output_peer_also_atomic = true; - for (const auto &output_index : atomic_output_index) { - if (!IsOutputIndexPeerInputAtomic(node, output_index)) { - is_all_output_peer_also_atomic = false; - break; + if (!is_unknown_graph) { + if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { + std::vector atomic_output_index; + (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); + bool is_all_output_peer_also_atomic = true; + for (const auto &output_index : atomic_output_index) { + if (!IsOutputIndexPeerInputAtomic(node, output_index)) { + is_all_output_peer_also_atomic = false; + break; + } + } + if (is_all_output_peer_also_atomic) { + GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str()); + return false; } - } - if (is_all_output_peer_also_atomic) { - GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str()); - return false; } } @@ -342,6 +345,7 @@ bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int6 Status AtomicAddrCleanPass::ClearStatus() { hcom_node_vec_.clear(); return SUCCESS; + } Status AtomicAddrCleanPass::CompileUnknownGraphOp(const vector &atomic_node_vec) { diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index 420ddd01..64bc604b 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -72,7 +72,7 @@ class AtomicAddrCleanPass : public GraphPass { * @param node * @return */ - bool IsAtomicOp(const NodePtr &node); + bool IsAtomicOp(const NodePtr &node, bool is_unknown_graph); /** * Handle atomic node in unknown graph