Browse Source

!1919 add global step info for known subgraph in unknown model and generate om for remained cases when some single op cases run atc failed

Merge pull request !1919 from lichun/master
tags/v1.5.1
i-robot Gitee 4 years ago
parent
commit
4a6ecb9eed
6 changed files with 92 additions and 28 deletions
  1. +16
    -9
      ge/graph/load/model_manager/davinci_model.cc
  2. +1
    -7
      ge/graph/load/model_manager/davinci_model.h
  3. +16
    -9
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
  4. +2
    -0
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
  5. +2
    -2
      ge/offline/main.cc
  6. +55
    -1
      tests/ut/ge/hybrid/known_node_executor_unittest.cc

+ 16
- 9
ge/graph/load/model_manager/davinci_model.cc View File

@@ -1480,6 +1480,11 @@ Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type
return SUCCESS;
}

void DavinciModel::SetGlobalStep(void *global_step, uint64_t global_step_size) {
global_step_addr_ = global_step;
global_step_size_ = global_step_size;
}

/// @ingroup ge
/// @brief LabelSet Op Initialize.
/// @param [in] op_desc: LabelSet Op descriptor.
@@ -1542,14 +1547,16 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
}

Status DavinciModel::InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name) {
if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) {
const auto output_sizes = ModelUtils::GetOutputSize(op_desc);
if (!output_sizes.empty()) {
global_step_size_ = output_sizes[0];
}
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc);
if (!output_addrs.empty()) {
global_step_addr_ = output_addrs[0];
if (!known_node_) {
if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) {
const auto output_sizes = ModelUtils::GetOutputSize(op_desc);
if (!output_sizes.empty()) {
global_step_size_ = output_sizes[0];
}
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc);
if (!output_addrs.empty()) {
global_step_addr_ = output_addrs[0];
}
}
}

@@ -4365,7 +4372,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str
data_dumper_.SetDeviceId(device_id);

if (known_node_) {
data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr);
data_dumper_.SetLoopAddr(global_step_addr_, nullptr, nullptr);
} else {
// set loop count addr
auto get_var_addr = [&](const string &name) -> void *{


+ 1
- 7
ge/graph/load/model_manager/davinci_model.h View File

@@ -300,6 +300,7 @@ class DavinciModel {
return op_list_.at(index);
}

void SetGlobalStep(void *global_step, uint64_t global_step_size);
void *GetGlobalStep() const { return global_step_addr_; }

// get task info for profiling
@@ -498,10 +499,6 @@ class DavinciModel {
return exception_dumper_.DumpExceptionInfo(exception_infos);
}

void SetKnownShapeGlobalStep(void *global_step) {
known_shape_global_step_ = global_step;
}

void DumperShrink() {
data_dumper_.DumpShrink();
}
@@ -1108,9 +1105,6 @@ class DavinciModel {
vector<InputOutputDescInfo> output_descs_;
vector<uint32_t> output_formats_;

// known shape node for dump
void *known_shape_global_step_;

// op name to attrs mapping
std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_;
};


+ 16
- 9
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc View File

@@ -145,8 +145,6 @@ Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *w
auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId());
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = model.GetGlobalStep();
davinci_model_->SetKnownShapeGlobalStep(global_step);
}

void *weight = nullptr;
@@ -182,6 +180,21 @@ Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons
return SUCCESS;
}

Status KnownNodeExecutor::SetDaviciModel(const HybridModel &model, const NodePtr &node,
std::shared_ptr<DavinciModel> &davinci_model) const {
// set known node flag as true
davinci_model->SetKnownNode(true);
davinci_model->SetId(model.GetModelId());
davinci_model->SetDumpModelName(model.GetModelName());
davinci_model->SetOmName(model.GetOmName());
void *global_step = model.GetGlobalStep();
GE_CHECK_NOTNULL(global_step);
davinci_model->SetGlobalStep(global_step, sizeof(int64_t));
// set model id as root node's node id
davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
return SUCCESS;
}

Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node,
shared_ptr<NodeTask> &task) const {
GELOGI("[%s] KnownNodeExecutor::LoadTask in.", node->GetName().c_str());
@@ -199,13 +212,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
GE_CHECK_NOTNULL(davinci_model);

// set known node flag as true
davinci_model->SetKnownNode(true);
davinci_model->SetId(model.GetModelId());
davinci_model->SetDumpModelName(model.GetModelName());
davinci_model->SetOmName(model.GetOmName());
// set model id as root node's node id
davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
GE_CHK_STATUS_RET_NOLOG(SetDaviciModel(model, node, davinci_model));
GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId());

GE_CHK_STATUS_RET(davinci_model->Assign(ge_model),


+ 2
- 0
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h View File

@@ -59,6 +59,8 @@ class KnownNodeExecutor : public NodeExecutor {
const NodePtr &node,
GeModelPtr &ge_model,
ComputeGraphPtr &graph);
Status SetDaviciModel(const HybridModel &model, const NodePtr &node,
std::shared_ptr<DavinciModel> &davinci_model) const;
};
} // namespace hybrid
} // namespace ge


+ 2
- 2
ge/offline/main.cc View File

@@ -1150,9 +1150,9 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
if (ret != SUCCESS) {
DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index);
ret = domi::FAILED;
break;
} else {
GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str());
}
GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str());
index += 1;
}



+ 55
- 1
tests/ut/ge/hybrid/known_node_executor_unittest.cc View File

@@ -27,6 +27,7 @@
#undef protected
#include "graph/manager/graph_mem_allocator.h"
#include "../graph/passes/graph_builder_utils.h"
#include "../inc/graph/utils/graph_utils.h"

using namespace std;
using namespace testing;
@@ -48,6 +49,34 @@ class KnownNodeTaskMock : public KnownNodeTask {
};
}

static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
auto op_desc = std::make_shared<ge::OpDesc>(name, type);
op_desc->SetStreamId(0);
op_desc->SetId(0);

op_desc->SetWorkspace({});
;
op_desc->SetWorkspaceBytes({});
op_desc->SetInputOffset({});
op_desc->SetOutputOffset({});

ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
bool support_dynamic = true;
ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
return op_desc;
}

static ComputeGraphPtr BuildDataDirectConnectGraph() {
const char *kRefIndex = "_parent_node_index";
ge::ut::GraphBuilder builder("subgraph");
auto data = builder.AddNode("Data", "Data", 1, 1);
auto netoutput = builder.AddNode("NetOutput", "NetOutput", 1, 1);
(void)AttrUtils::SetInt(netoutput->GetOpDesc()->MutableInputDesc(0), kRefIndex, 0);

builder.AddDataEdge(data, 0, netoutput, 0);
return builder.GetGraph();
}

TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) {
auto davinci_model = std::make_shared<DavinciModel>(0, nullptr);
davinci_model->SetDeviceId(0);
@@ -88,4 +117,29 @@ TEST_F(UnknownNodeExecutorTest, TestParseAttrForAllocatingOutputs) {
ASSERT_EQ(node_item.ref_outputs[1], const_node);
ASSERT_EQ(node_item.reuse_inputs.size(), 1);
ASSERT_EQ(node_item.reuse_inputs[0], 0);
}
}

TEST_F(UnknownNodeExecutorTest, TestSetGlobalStep) {
OpDescPtr op_desc = CreateOpDesc("PartitionedCall", "PartitionedCall");
auto root_graph = make_shared<ComputeGraph>("root_graph");
auto node = root_graph->AddNode(op_desc);
node->SetOwnerComputeGraph(root_graph);
auto sub_graph = BuildDataDirectConnectGraph();
sub_graph->SetParentGraph(root_graph);
sub_graph->SetParentNode(node);
node->GetOpDesc()->AddSubgraphName("subgraph");
node->GetOpDesc()->SetSubgraphInstanceName(0, "subgraph");
root_graph->AddSubgraph("subgraph", sub_graph);

GeRootModelPtr ge_root_model = make_shared<GeRootModel>(root_graph);
HybridModel hybrid_model(ge_root_model);
auto *step_id = new int64_t[1];
step_id[0] = 520;
std::unique_ptr<TensorBuffer> tensor_buf;
tensor_buf = tensor_buf->Create((void *)step_id, sizeof(int64_t));
hybrid_model.global_step_ = std::move(tensor_buf);
KnownNodeExecutor known_node_executor;
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
known_node_executor.SetDaviciModel(hybrid_model, node, davinci_model);
EXPECT_EQ(*(static_cast<int64_t*>(davinci_model->global_step_addr_)), 520);
}

Loading…
Cancel
Save