From d983cef480287c108c678ec4a2185949e6203e07 Mon Sep 17 00:00:00 2001 From: TangQunzhang Date: Thu, 25 Mar 2021 11:31:22 +0800 Subject: [PATCH] Performance optimization --- ge/graph/build/memory/block_mem_assigner.cc | 11 +- ge/graph/build/model_builder.cc | 3 +- ge/graph/load/model_manager/davinci_model.cc | 15 +-- ge/graph/load/model_manager/zero_copy_offset.h | 6 +- tests/ut/ge/CMakeLists.txt | 1 + tests/ut/ge/graph/build/mem_assigner_unittest.cc | 14 +++ tests/ut/ge/graph/build/model_builder_unittest.cc | 146 ++++++++++++++++++++++ tests/ut/ge/graph/load/davinci_model_unittest.cc | 48 +++++++ 8 files changed, 225 insertions(+), 19 deletions(-) create mode 100644 tests/ut/ge/graph/build/model_builder_unittest.cc diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index ae0c6e0d..1a4b62e4 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -430,17 +430,14 @@ void SetLastUsedInputMemAttr(NodePtr &node, int input_index) { } auto node_op_desc = node->GetOpDesc(); if (node_op_desc != nullptr) { - auto input_desc = node_op_desc->GetInputDesc(input_index); - if (!ge::AttrUtils::SetInt(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) { + auto input_desc = node_op_desc->MutableInputDesc(input_index); + if (!ge::AttrUtils::SetInt(*input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) { GELOGW("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true failed.", node_op_desc->GetName().c_str(), input_index); return; } GELOGD("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true success.", node_op_desc->GetName().c_str(), input_index); - if (node_op_desc->UpdateInputDesc(input_index, input_desc) != GRAPH_SUCCESS) { - GELOGW("Update %s input[%d] desc failed.", node_op_desc->GetName().c_str(), input_index); - } } } @@ -593,9 +590,9 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { } for (auto &out_anchor : n->GetAllOutDataAnchors()) { - GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); + auto output_desc = node_op_desc->GetOutputDescPtr(out_anchor->GetIdx()); int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed")); GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, " "maybe it is unknown shape node, Node_name:%s", diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 1a14374d..03057286 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -197,8 +197,7 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) { } } - std::string input_const_info = ToString(is_input_const); - GELOGD("update opdesc:%s InputConst:%s", node_op_desc->GetName().c_str(), input_const_info.c_str()); + GELOGD("update opdesc:%s InputConst:%s", node_op_desc->GetName().c_str(), ToString(is_input_const).c_str()); node_op_desc->SetIsInputConst(is_input_const); } diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index c053ad63..52642086 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -3683,33 +3683,34 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); is_dynamic_ = input_data.is_dynamic_batch; - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); + bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn(); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START)); Status ret = CopyModelData(input_data, output_data, is_dynamic_); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", model_id_); GELOGD("current_data.index=%u", input_data.index); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END)); if (!task_list_.empty()) { GELOGD("rtModelExecute do"); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); GELOGD("rtModelExecute end"); } if (!is_async_mode_) { - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, "Copy Output data to user failed."); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END)); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); } // report model time data - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); + GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); GELOGD("Model run end, model id:%u", model_id_); return SUCCESS; } diff --git a/ge/graph/load/model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h index 82e1bb6d..2dea5666 100644 --- a/ge/graph/load/model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -58,15 +58,15 @@ class ZeroCopyOffset { uint32_t GetDataCount() const { return data_count_; } uint32_t GetAddrCount() const { return addr_count_; } // value of *data_info_ from davinci_model - std::vector> GetDataInfo() const { return data_info_; } + const std::vector> &GetDataInfo() const { return data_info_; } // relative_offset from zero_copy_relative_offset_ - std::vector GetRelativeOffset() const { return relative_offset_; } + const std::vector &GetRelativeOffset() const { return relative_offset_; } // data_size of Data/Netoutput int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model const std::vector>> &GetOutsideAddrs() const { return outside_addrs_; } // name of op - std::string GetOpName() const { return op_name_; } + const std::string &GetOpName() const { return op_name_; } const bool IsRelativeOffsetValid() const { return valid_relative_offset_; } private: diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 3caba788..eb721a72 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -764,6 +764,7 @@ set(MULTI_PARTS_TEST_FILES "common/ge_format_util_unittest.cc" "graph/variable_accelerate_ctrl_unittest.cc" "graph/build/logical_stream_allocator_unittest.cc" + "graph/build/model_builder_unittest.cc" "graph/build/mem_assigner_unittest.cc" "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index 0024185b..5cd16399 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -249,3 +249,17 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500); EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); } + +TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeGraph(graph); + auto node_f = graph->FindNode("F"); + MemoryAssigner memory_assigner(graph); + map mem_offset; + size_t zero_memory_size = 0; + EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); + + int32_t flag = 0; + (void) ge::AttrUtils::GetInt(node_f->GetOpDesc()->GetInputDesc(0), ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, flag); + EXPECT_EQ(flag, 1); +} diff --git a/tests/ut/ge/graph/build/model_builder_unittest.cc b/tests/ut/ge/graph/build/model_builder_unittest.cc new file mode 100644 index 00000000..496c1f3e --- /dev/null +++ b/tests/ut/ge/graph/build/model_builder_unittest.cc @@ -0,0 +1,146 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" +#include "../passes/graph_builder_utils.h" + +#define protected public +#define private public +#include "graph/build/model_builder.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestModelBuilderTest : public testing::Test { + public: + ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { + ge::OpDescPtr op_def = make_shared(name, type); + auto desc_temp_ptr = make_shared(); + auto desc_temp = *desc_temp_ptr; + + TensorUtils::SetSize(desc_temp, 1024); + op_def->AddInputDesc(desc_temp); + op_def->AddOutputDesc(desc_temp); + + std::vector workspace_bytes; + workspace_bytes.push_back(wsByte); + op_def->SetWorkspaceBytes(workspace_bytes); + return op_def; + } + ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { + ge::OpDescPtr op_def = make_shared(name, type); + auto desc_temp_ptr = make_shared(); + auto desc_temp = *desc_temp_ptr; + + TensorUtils::SetSize(desc_temp, 1024); + op_def->AddInputDesc(desc_temp); + + auto desc_output_ptr = make_shared(); + auto desc_output = *desc_output_ptr; + TensorUtils::SetSize(desc_output, 6500); + ge::TensorUtils::SetReuseInput(desc_output, true); + ge::TensorUtils::SetReuseInputIndex(desc_output, 0); + op_def->AddOutputDesc(desc_output); + + std::vector workspace_bytes; + workspace_bytes.push_back(wsByte); + op_def->SetWorkspaceBytes(workspace_bytes); + return op_def; + } + void MakeGraph(ge::ComputeGraphPtr &graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); + op_def_a->SetStreamId(0); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); + op_def_b->SetStreamId(0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000); + op_def_c->SetStreamId(1); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000); + op_def_d->SetStreamId(2); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000); + op_def_e->SetStreamId(3); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000); + op_def_f->SetStreamId(2); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000); + op_def_g->SetStreamId(3); + ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000); + op_def_h->SetStreamId(2); + ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000); + op_def_i->SetStreamId(2); + ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT); + op_def_j->SetStreamId(3); + + // add node + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + ge::NodePtr node_h = graph->AddNode(op_def_h); + ge::NodePtr node_i = graph->AddNode(op_def_i); + ge::NodePtr node_j = graph->AddNode(op_def_j); + + // add edge + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_g->GetInDataAnchor(1)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_h->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_g->GetOutDataAnchor(0), node_j->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_h->GetOutDataAnchor(0), node_i->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_i->GetOutDataAnchor(0), node_j->GetInDataAnchor(1)); + + GetContext().out_nodes_map["H"] = {0}; + GetContext().out_nodes_map["I"] = {0}; + GetContext().out_nodes_map["J"] = {0}; + graph->TopologicalSorting(); + } + + + protected: + void SetUp() {} + + void TearDown() { GetContext().out_nodes_map.clear(); } +}; + +// when check GetMemoryRanges return fail, Assign return fail +TEST_F(UtestModelBuilderTest, SetInputIsConst) { + Graph2SubGraphInfoList subgraphs; + std::map stream_max_parallel_num; + ge::ComputeGraphPtr graph = make_shared(""); + MakeGraph(graph); + graph->TopologicalSorting(); + ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); + EXPECT_EQ(builder.PreBuildModel(), SUCCESS); +} diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 18cc622b..3487f8ed 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -942,4 +942,52 @@ TEST_F(UtestDavinciModel, simple_test_gmock) { EXPECT_EQ(mock_stub.func2(2, 5), 1023); EXPECT_EQ(mock_stub.func2(3, 5), 1023); } + +TEST_F(UtestDavinciModel, NnExecute) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = make_shared("default"); + ProfilingManager::Instance().is_load_profiling_ = true; + + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 10240); + AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); + + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + GeTensorDesc tensor(GeShape({1,4,128,128}), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + { + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index = 0 + } + + { + OpDescPtr op_desc = CreateOpDesc("output", NETOUTPUT); + op_desc->AddInputDesc(tensor); + op_desc->SetInputOffset({5120}); + op_desc->SetSrcName( { "memcpy" } ); + op_desc->SetSrcIndex( { 0 } ); + NodePtr node = graph->AddNode(op_desc); // op_index = 3 + } + + EXPECT_EQ(model.Assign(ge_model), SUCCESS); + EXPECT_EQ(model.Init(), SUCCESS); + + rtStream_t stream = nullptr; + InputData input_data; + OutputData output_data; + vector outputs; + EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); + EXPECT_EQ(output_data.blobs.size(), 1); + EXPECT_EQ(outputs.size(), 1); + input_data.blobs = output_data.blobs; + EXPECT_EQ(input_data.blobs.size(), 1); + EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); +} } // namespace ge