diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 9b53403a..62c779a7 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -562,7 +562,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); if (is_allocated_first_input) { std::map out2ins; - GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "[Get][AllRef]fail for node: %s", node->GetName().c_str()); + GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str()); // output is beginning offset, set offset for input; only support this case now if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); @@ -1263,10 +1263,46 @@ Status GraphMemoryAssigner::CheckOffset() { return FAILED; } } + // check reuse input and output + GE_CHK_STATUS_RET(CheckRefNodeOffset(node), "[Check][Offset]fail for node: %s", node->GetName().c_str()); } + return SUCCESS; } +ge::Status GraphMemoryAssigner::CheckRefNodeOffset(const NodePtr &node) { + std::map out2ins; + GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str()); + auto opdesc = node->GetOpDesc(); + GE_CHECK_NOTNULL(opdesc); + auto output_list = opdesc->GetOutputOffset(); + auto input_list = opdesc->GetInputOffset(); + for (const auto &out2in : out2ins) { + auto out_i = out2in.first; + if (static_cast(out_i) >= output_list.size()) { + std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" + + FmtToStr(output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i); + GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str()); + return ge::FAILED; + } + auto in_i = out2in.second; + if (static_cast(in_i) >= input_list.size()) { + std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset size" + + FmtToStr(input_list.size()) + "should bigger than ref input index" + FmtToStr(in_i); + GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str()); + return ge::FAILED; + } + if (output_list[out_i] != input_list[in_i]) { + std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset " + FmtToStr(input_list[in_i]) + + "should equal to output offset" + FmtToStr(output_list[out_i]) + "with ref in" + + FmtToStr(in_i) + "to output" + FmtToStr(out_i); + GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str()); + return ge::FAILED; + } + } + return ge::SUCCESS; +} + ge::Status GraphMemoryAssigner::SetInputOffset() { if (memory_offset_.empty()) { REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when SetInputOffset, " @@ -1343,6 +1379,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< origin_input_list = tmp_op_desc->GetInputOffset(); int64_t valid_input_index = 0; bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); + std::map out2ins; + GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str()); for (const auto &anchor : node->GetAllInDataAnchors()) { vector output_list; auto peer_out_anchor = anchor->GetPeerOutAnchor(); @@ -1363,17 +1401,25 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< auto ori_input_offset_list_size = origin_input_list.size(); auto mem_type_size = memory_type.size(); if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) { - std::string error = "fusion: node" + FmtToStr(tmp_op_desc->GetName()) + + std::string error = "Node" + FmtToStr(tmp_op_desc->GetName()) + + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" + FmtToStr(mem_type_size) + " from ori_input_offset_list_size" + FmtToStr(ori_input_offset_list_size); GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str()); return ge::FAILED; } - // not hbm keep orignal inputoffest - // hbm inputoffset = original inputoffset + outputoffset - input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index] - : origin_input_list[valid_input_index] + output_list.at(out_index)); + GELOGD("Node[%s] input[%d] has origin offset[%ld]", tmp_op_desc->GetName().c_str(), anchor->GetIdx(), + origin_input_list[valid_input_index]); + // L1 keep original input_offset + if (memory_type[valid_input_index] == RT_MEMORY_L1) { + input_offset = origin_input_list[valid_input_index]; + } else { + // hbm input_offset = original input_offset + output_offset + input_offset = origin_input_list[valid_input_index] + output_list.at(out_index); + // update ref output_offset when input change + GE_CHK_STATUS_RET(UpdateRefOpOutputOffset(node, out2ins, anchor->GetIdx(), input_offset), + "[Update][RefOffset]fail for node: %s", node->GetName().c_str()); + } } const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); if (in_node->GetType() == CONSTANT) { @@ -1381,12 +1427,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); } - GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]", - has_mem_type_attr ? "Fusion" : "", - tmp_op_desc->GetName().c_str(), - valid_input_index, - peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), - out_index, + GELOGD("Node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", tmp_op_desc->GetName().c_str(), + anchor->GetIdx(), peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset); input_list.emplace_back(input_offset); valid_input_index++; @@ -1395,6 +1437,30 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< return ge::SUCCESS; } +ge::Status GraphMemoryAssigner::UpdateRefOpOutputOffset(const NodePtr &node, const std::map &out2ins, + const int ref_in, const int64_t input_offset) const { + auto opdesc = node->GetOpDesc(); + GE_CHECK_NOTNULL(opdesc); + for (const auto &out2in : out2ins) { + auto out_i = out2in.first; + auto in_i = out2in.second; + if (in_i == ref_in) { + auto origin_output_list = opdesc->GetOutputOffset(); + if (static_cast(out_i) >= origin_output_list.size()) { + std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" + + FmtToStr(origin_output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i); + GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str()); + return ge::FAILED; + } + origin_output_list[out_i] = input_offset; + opdesc->SetOutputOffset(origin_output_list); + GELOGI("Node[%s] output[%d] is updated from reuse input index[%d] to offset[%ld]", opdesc->GetName().c_str(), + out_i, ref_in, input_offset); + } + } + return ge::SUCCESS; +} + ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { GE_CHECK_NOTNULL(node->GetOpDesc()); vector input_list; @@ -1639,7 +1705,7 @@ void GraphMemoryAssigner::PrintMemoryOffset() { } } -ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map &out2ins) { +ge::Status GraphMemoryAssigner::TryGetNodeRefIndexes(const NodePtr &node, map &out2ins) const{ for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { int32_t reuse_in_index = -1; bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index); diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index 92e599b8..773df4e6 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -110,8 +110,11 @@ class GraphMemoryAssigner { ge::Status SetInputOffset(); ge::Status UpdateOpInputOffset(const NodePtr &node) const; + ge::Status UpdateRefOpOutputOffset(const NodePtr &node, const std::map &out2ins, const int ref_in, + const int64_t input_offset) const; ge::Status CheckOffset(); + ge::Status CheckRefNodeOffset(const NodePtr &node); ge::Status AssignReferenceMemory(); @@ -125,7 +128,7 @@ class GraphMemoryAssigner { ge::Status ReAssignAtomicMemory(bool is_loop_graph); - ge::Status GetAllRef(const NodePtr &node, std::map &out2ins); + ge::Status TryGetNodeRefIndexes(const NodePtr &node, std::map &out2ins) const; bool AssignContinuousInputMemoryWithAtomicProcessDirectly(const NodePtr &input_continuous_node, std::map &node_2_continuous_type); diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index ba5cdcd4..92f9b5b4 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -191,6 +191,30 @@ class UtestMemoryAssignerTest : public testing::Test { return builder.GetGraph(); } + ComputeGraphPtr MakeRefNodeGraph() { + ge::ut::GraphBuilder builder("graph"); + auto var_input = builder.AddNode("var", "Variable", 1, 1); + auto const_input = builder.AddNode("const", "Const", 1, 1); + auto assign = builder.AddNode("assgin", "Assign", 2, 1); + // add link + builder.AddDataEdge(var_input, 0, assign, 0); + builder.AddDataEdge(const_input, 0, assign, 1); + // set offset + assign->GetOpDesc()->SetInputOffset({100, 0}); + assign->GetOpDesc()->SetOutputOffset({10000}); + var_input->GetOpDesc()->SetOutputOffset({10000}); + const_input->GetOpDesc()->SetOutputOffset({1000}); + // set mem type + ge::AttrUtils::SetListInt(assign->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, {RT_MEMORY_HBM, RT_MEMORY_L1}); + // set ref + auto output_tensordesc = assign->GetOpDesc()->MutableOutputDesc(0); + ge::TensorUtils::SetReuseInput(*output_tensordesc, true); + uint32_t reuse_input_index = 0; + ge::TensorUtils::SetReuseInputIndex(*output_tensordesc, reuse_input_index); + + return builder.GetGraph(); + } + protected: void SetUp() {} @@ -298,4 +322,20 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) { size_t zero_memory_size = 0; VarManager::Instance(0)->Init(0, 0, 0, 0); EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); -} \ No newline at end of file +} + +TEST_F(UtestMemoryAssignerTest, graph_memory_assign_set_input_offset) { + ge::ComputeGraphPtr graph = MakeRefNodeGraph(); + auto assgin = graph->FindNode("assgin"); + EXPECT_EQ(assgin->GetOpDesc()->GetOutputOffset()[0], 10000); + EXPECT_EQ(assgin->GetOpDesc()->GetInputOffset()[0], 100); + EXPECT_EQ(assgin->GetOpDesc()->GetInputOffset()[1], 0); + GraphMemoryAssigner memoryAssigner(graph); + MemoryOffset memory_offset(RT_MEMORY_HBM, 0); + memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); + EXPECT_EQ(memoryAssigner.SetInputOffset(), GRAPH_SUCCESS); + EXPECT_EQ(assgin->GetOpDesc()->GetOutputOffset()[0], 10100); + EXPECT_EQ(assgin->GetOpDesc()->GetInputOffset()[0], 10100); + EXPECT_EQ(assgin->GetOpDesc()->GetInputOffset()[1], 0); + EXPECT_EQ(memoryAssigner.CheckOffset(), GRAPH_SUCCESS); +}