From d983cef480287c108c678ec4a2185949e6203e07 Mon Sep 17 00:00:00 2001
From: TangQunzhang <tangqunzhang@huawei.com>
Date: Thu, 25 Mar 2021 11:31:22 +0800
Subject: [PATCH] Performance optimization

---
 ge/graph/build/memory/block_mem_assigner.cc       |  11 +-
 ge/graph/build/model_builder.cc                   |   3 +-
 ge/graph/load/model_manager/davinci_model.cc      |  15 +--
 ge/graph/load/model_manager/zero_copy_offset.h    |   6 +-
 tests/ut/ge/CMakeLists.txt                        |   1 +
 tests/ut/ge/graph/build/mem_assigner_unittest.cc  |  14 +++
 tests/ut/ge/graph/build/model_builder_unittest.cc | 146 ++++++++++++++++++++++
 tests/ut/ge/graph/load/davinci_model_unittest.cc  |  48 +++++++
 8 files changed, 225 insertions(+), 19 deletions(-)
 create mode 100644 tests/ut/ge/graph/build/model_builder_unittest.cc

diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc
index ae0c6e0d..1a4b62e4 100755
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -430,17 +430,14 @@ void SetLastUsedInputMemAttr(NodePtr &node, int input_index) {
   }
   auto node_op_desc = node->GetOpDesc();
   if (node_op_desc != nullptr) {
-    auto input_desc = node_op_desc->GetInputDesc(input_index);
-    if (!ge::AttrUtils::SetInt(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) {
+    auto input_desc = node_op_desc->MutableInputDesc(input_index);
+    if (!ge::AttrUtils::SetInt(*input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) {
       GELOGW("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true failed.", node_op_desc->GetName().c_str(),
              input_index);
       return;
     }
     GELOGD("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true success.", node_op_desc->GetName().c_str(),
            input_index);
-    if (node_op_desc->UpdateInputDesc(input_index, input_desc) != GRAPH_SUCCESS) {
-      GELOGW("Update %s input[%d] desc failed.", node_op_desc->GetName().c_str(), input_index);
-    }
   }
 }
 
@@ -593,9 +590,9 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
     }
 
     for (auto &out_anchor : n->GetAllOutDataAnchors()) {
-      GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
+      auto output_desc = node_op_desc->GetOutputDescPtr(out_anchor->GetIdx());
       int64_t size = 0;
-      GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
+      GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed"));
       GE_IF_BOOL_EXEC(size < 0,
                       GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, "
                              "maybe it is unknown shape node, Node_name:%s",
diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 1a14374d..03057286 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -197,8 +197,7 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) {
     }
   }
 
-  std::string input_const_info = ToString(is_input_const);
-  GELOGD("update opdesc:%s InputConst:%s", node_op_desc->GetName().c_str(), input_const_info.c_str());
+  GELOGD("update opdesc:%s InputConst:%s", node_op_desc->GetName().c_str(), ToString(is_input_const).c_str());
   node_op_desc->SetIsInputConst(is_input_const);
 }
 
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index c053ad63..52642086 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -3683,33 +3683,34 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
   GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");
   is_dynamic_ = input_data.is_dynamic_batch;
 
-  GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START));
+  bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn();
+  GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START));
   Status ret = CopyModelData(input_data, output_data, is_dynamic_);
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u",
                                  model_id_);
 
   GELOGD("current_data.index=%u", input_data.index);
-  GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));
+  GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END));
 
   if (!task_list_.empty()) {
     GELOGD("rtModelExecute do");
-    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
+    GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START));
     rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
     GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
-    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
+    GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END));
     GELOGD("rtModelExecute end");
   }
 
   if (!is_async_mode_) {
-    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));
+    GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START));
     ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE);
     GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR,
         "Copy Output data to user failed.");
-    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END));
+    GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END));
   }
 
   // report model time data
-  GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data));
+  GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data));
   GELOGD("Model run end, model id:%u", model_id_);
   return SUCCESS;
 }
diff --git a/ge/graph/load/model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h
index 82e1bb6d..2dea5666 100644
--- a/ge/graph/load/model_manager/zero_copy_offset.h
+++ b/ge/graph/load/model_manager/zero_copy_offset.h
@@ -58,15 +58,15 @@ class ZeroCopyOffset {
   uint32_t GetDataCount() const { return data_count_; }
   uint32_t GetAddrCount() const { return addr_count_; }
   // value of *data_info_ from davinci_model
-  std::vector<std::pair<int64_t, void *>> GetDataInfo() const { return data_info_; }
+  const std::vector<std::pair<int64_t, void *>> &GetDataInfo() const { return data_info_; }
   // relative_offset from zero_copy_relative_offset_
-  std::vector<int64_t> GetRelativeOffset() const { return relative_offset_; }
+  const std::vector<int64_t> &GetRelativeOffset() const { return relative_offset_; }
   // data_size of Data/Netoutput
   int64_t GetDataSize() const { return data_size_; }
   // value of *outside_addrs_ from davinci_model
   const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
   // name of op
-  std::string GetOpName() const { return op_name_; }
+  const std::string &GetOpName() const { return op_name_; }
   const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }
 
  private:
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 3caba788..eb721a72 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -764,6 +764,7 @@ set(MULTI_PARTS_TEST_FILES
     "common/ge_format_util_unittest.cc"
     "graph/variable_accelerate_ctrl_unittest.cc"
     "graph/build/logical_stream_allocator_unittest.cc"
+    "graph/build/model_builder_unittest.cc"
     "graph/build/mem_assigner_unittest.cc"
     "graph/preprocess/graph_preprocess_unittest.cc"
     "graph/manager/hcom_util_unittest.cc"
diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
index 0024185b..5cd16399 100644
--- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc
+++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
@@ -249,3 +249,17 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
   EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500);
   EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
 }
+
+TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeGraph(graph);
+  auto node_f = graph->FindNode("F");
+  MemoryAssigner memory_assigner(graph);
+  map<int64_t, size_t> mem_offset;
+  size_t zero_memory_size = 0;
+  EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
+
+  int32_t flag = 0;
+  (void) ge::AttrUtils::GetInt(node_f->GetOpDesc()->GetInputDesc(0), ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, flag);
+  EXPECT_EQ(flag, 1);
+}
diff --git a/tests/ut/ge/graph/build/model_builder_unittest.cc b/tests/ut/ge/graph/build/model_builder_unittest.cc
new file mode 100644
index 00000000..496c1f3e
--- /dev/null
+++ b/tests/ut/ge/graph/build/model_builder_unittest.cc
@@ -0,0 +1,146 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "graph/anchor.h"
+#include "graph/attr_value.h"
+#include "graph/debug/ge_attr_define.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/utils/node_utils.h"
+#include "graph/utils/op_desc_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "omg/omg_inner_types.h"
+#include "../passes/graph_builder_utils.h"
+
+#define protected public
+#define private public
+#include "graph/build/model_builder.h"
+#undef protected
+#undef private
+
+using namespace std;
+using namespace testing;
+using namespace ge;
+using domi::GetContext;
+
+class UtestModelBuilderTest : public testing::Test {
+ public:
+  ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
+    ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
+    auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
+    auto desc_temp = *desc_temp_ptr;
+
+    TensorUtils::SetSize(desc_temp, 1024);
+    op_def->AddInputDesc(desc_temp);
+    op_def->AddOutputDesc(desc_temp);
+
+    std::vector<int64_t> workspace_bytes;
+    workspace_bytes.push_back(wsByte);
+    op_def->SetWorkspaceBytes(workspace_bytes);
+    return op_def;
+  }
+  ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
+    ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
+    auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
+    auto desc_temp = *desc_temp_ptr;
+
+    TensorUtils::SetSize(desc_temp, 1024);
+    op_def->AddInputDesc(desc_temp);
+
+    auto desc_output_ptr = make_shared<ge::GeTensorDesc>();
+    auto desc_output = *desc_output_ptr;
+    TensorUtils::SetSize(desc_output, 6500);
+    ge::TensorUtils::SetReuseInput(desc_output, true);
+    ge::TensorUtils::SetReuseInputIndex(desc_output, 0);
+    op_def->AddOutputDesc(desc_output);
+
+    std::vector<int64_t> workspace_bytes;
+    workspace_bytes.push_back(wsByte);
+    op_def->SetWorkspaceBytes(workspace_bytes);
+    return op_def;
+  }
+  void MakeGraph(ge::ComputeGraphPtr &graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
+    op_def_a->SetStreamId(0);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000);
+    op_def_b->SetStreamId(0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000);
+    op_def_c->SetStreamId(1);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000);
+    op_def_d->SetStreamId(2);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000);
+    op_def_e->SetStreamId(3);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000);
+    op_def_f->SetStreamId(2);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000);
+    op_def_g->SetStreamId(3);
+    ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000);
+    op_def_h->SetStreamId(2);
+    ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000);
+    op_def_i->SetStreamId(2);
+    ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT);
+    op_def_j->SetStreamId(3);
+
+    // add node
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+    ge::NodePtr node_h = graph->AddNode(op_def_h);
+    ge::NodePtr node_i = graph->AddNode(op_def_i);
+    ge::NodePtr node_j = graph->AddNode(op_def_j);
+
+    // add edge
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_g->GetInDataAnchor(1));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_h->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_g->GetOutDataAnchor(0), node_j->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_h->GetOutDataAnchor(0), node_i->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_i->GetOutDataAnchor(0), node_j->GetInDataAnchor(1));
+
+    GetContext().out_nodes_map["H"] = {0};
+    GetContext().out_nodes_map["I"] = {0};
+    GetContext().out_nodes_map["J"] = {0};
+    graph->TopologicalSorting();
+  }
+
+
+ protected:
+  void SetUp() {}
+
+  void TearDown() { GetContext().out_nodes_map.clear(); }
+};
+
+// when check GetMemoryRanges return fail, Assign return fail
+TEST_F(UtestModelBuilderTest, SetInputIsConst) {
+  Graph2SubGraphInfoList subgraphs;
+  std::map<std::string, int> stream_max_parallel_num;
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeGraph(graph);
+  graph->TopologicalSorting();
+  ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false);
+  EXPECT_EQ(builder.PreBuildModel(), SUCCESS);
+}
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 18cc622b..3487f8ed 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -942,4 +942,52 @@ TEST_F(UtestDavinciModel, simple_test_gmock) {
     EXPECT_EQ(mock_stub.func2(2, 5), 1023);
     EXPECT_EQ(mock_stub.func2(3, 5), 1023);
 }
+
+TEST_F(UtestDavinciModel, NnExecute) {
+  DavinciModel model(0, nullptr);
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+  ProfilingManager::Instance().is_load_profiling_ = true;
+
+  GeModelPtr ge_model = make_shared<GeModel>();
+  ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
+  AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 10240);
+  AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);
+
+  shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
+  ge_model->SetModelTaskDef(model_task_def);
+
+  GeTensorDesc tensor(GeShape({1,4,128,128}), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+  {
+    OpDescPtr op_desc = CreateOpDesc("data", DATA);
+    op_desc->AddInputDesc(tensor);
+    op_desc->AddOutputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    op_desc->SetOutputOffset({1024});
+    NodePtr node = graph->AddNode(op_desc);    // op_index = 0
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("output", NETOUTPUT);
+    op_desc->AddInputDesc(tensor);
+    op_desc->SetInputOffset({5120});
+    op_desc->SetSrcName( { "memcpy" } );
+    op_desc->SetSrcIndex( { 0 } );
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 3
+  }
+
+  EXPECT_EQ(model.Assign(ge_model), SUCCESS);
+  EXPECT_EQ(model.Init(), SUCCESS);
+
+  rtStream_t stream = nullptr;
+  InputData input_data;
+  OutputData output_data;
+  vector<OutputTensorInfo> outputs;
+  EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
+  EXPECT_EQ(output_data.blobs.size(), 1);
+  EXPECT_EQ(outputs.size(), 1);
+  input_data.blobs = output_data.blobs;
+  EXPECT_EQ(input_data.blobs.size(), 1);
+  EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS);
+}
 }  // namespace ge