Merge pull request !1919 from lichun/mastertags/v1.5.1
@@ -1480,6 +1480,11 @@ Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type | |||
return SUCCESS; | |||
} | |||
void DavinciModel::SetGlobalStep(void *global_step, uint64_t global_step_size) { | |||
global_step_addr_ = global_step; | |||
global_step_size_ = global_step_size; | |||
} | |||
/// @ingroup ge | |||
/// @brief LabelSet Op Initialize. | |||
/// @param [in] op_desc: LabelSet Op descriptor. | |||
@@ -1542,14 +1547,16 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||
} | |||
Status DavinciModel::InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name) { | |||
if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) { | |||
const auto output_sizes = ModelUtils::GetOutputSize(op_desc); | |||
if (!output_sizes.empty()) { | |||
global_step_size_ = output_sizes[0]; | |||
} | |||
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | |||
if (!output_addrs.empty()) { | |||
global_step_addr_ = output_addrs[0]; | |||
if (!known_node_) { | |||
if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) { | |||
const auto output_sizes = ModelUtils::GetOutputSize(op_desc); | |||
if (!output_sizes.empty()) { | |||
global_step_size_ = output_sizes[0]; | |||
} | |||
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | |||
if (!output_addrs.empty()) { | |||
global_step_addr_ = output_addrs[0]; | |||
} | |||
} | |||
} | |||
@@ -4365,7 +4372,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str | |||
data_dumper_.SetDeviceId(device_id); | |||
if (known_node_) { | |||
data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr); | |||
data_dumper_.SetLoopAddr(global_step_addr_, nullptr, nullptr); | |||
} else { | |||
// set loop count addr | |||
auto get_var_addr = [&](const string &name) -> void *{ | |||
@@ -300,6 +300,7 @@ class DavinciModel { | |||
return op_list_.at(index); | |||
} | |||
void SetGlobalStep(void *global_step, uint64_t global_step_size); | |||
void *GetGlobalStep() const { return global_step_addr_; } | |||
// get task info for profiling | |||
@@ -498,10 +499,6 @@ class DavinciModel { | |||
return exception_dumper_.DumpExceptionInfo(exception_infos); | |||
} | |||
void SetKnownShapeGlobalStep(void *global_step) { | |||
known_shape_global_step_ = global_step; | |||
} | |||
void DumperShrink() { | |||
data_dumper_.DumpShrink(); | |||
} | |||
@@ -1108,9 +1105,6 @@ class DavinciModel { | |||
vector<InputOutputDescInfo> output_descs_; | |||
vector<uint32_t> output_formats_; | |||
// known shape node for dump | |||
void *known_shape_global_step_; | |||
// op name to attrs mapping | |||
std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_; | |||
}; | |||
@@ -145,8 +145,6 @@ Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *w | |||
auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); | |||
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||
davinci_model_->SetDumpProperties(dump_properties); | |||
void *global_step = model.GetGlobalStep(); | |||
davinci_model_->SetKnownShapeGlobalStep(global_step); | |||
} | |||
void *weight = nullptr; | |||
@@ -182,6 +180,21 @@ Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons | |||
return SUCCESS; | |||
} | |||
Status KnownNodeExecutor::SetDaviciModel(const HybridModel &model, const NodePtr &node, | |||
std::shared_ptr<DavinciModel> &davinci_model) const { | |||
// set known node flag as true | |||
davinci_model->SetKnownNode(true); | |||
davinci_model->SetId(model.GetModelId()); | |||
davinci_model->SetDumpModelName(model.GetModelName()); | |||
davinci_model->SetOmName(model.GetOmName()); | |||
void *global_step = model.GetGlobalStep(); | |||
GE_CHECK_NOTNULL(global_step); | |||
davinci_model->SetGlobalStep(global_step, sizeof(int64_t)); | |||
// set model id as root node's node id | |||
davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); | |||
return SUCCESS; | |||
} | |||
Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, | |||
shared_ptr<NodeTask> &task) const { | |||
GELOGI("[%s] KnownNodeExecutor::LoadTask in.", node->GetName().c_str()); | |||
@@ -199,13 +212,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); | |||
GE_CHECK_NOTNULL(davinci_model); | |||
// set known node flag as true | |||
davinci_model->SetKnownNode(true); | |||
davinci_model->SetId(model.GetModelId()); | |||
davinci_model->SetDumpModelName(model.GetModelName()); | |||
davinci_model->SetOmName(model.GetOmName()); | |||
// set model id as root node's node id | |||
davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); | |||
GE_CHK_STATUS_RET_NOLOG(SetDaviciModel(model, node, davinci_model)); | |||
GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId()); | |||
GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), | |||
@@ -59,6 +59,8 @@ class KnownNodeExecutor : public NodeExecutor { | |||
const NodePtr &node, | |||
GeModelPtr &ge_model, | |||
ComputeGraphPtr &graph); | |||
Status SetDaviciModel(const HybridModel &model, const NodePtr &node, | |||
std::shared_ptr<DavinciModel> &davinci_model) const; | |||
}; | |||
} // namespace hybrid | |||
} // namespace ge | |||
@@ -1150,9 +1150,9 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||
if (ret != SUCCESS) { | |||
DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | |||
ret = domi::FAILED; | |||
break; | |||
} else { | |||
GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str()); | |||
} | |||
GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str()); | |||
index += 1; | |||
} | |||
@@ -27,6 +27,7 @@ | |||
#undef protected | |||
#include "graph/manager/graph_mem_allocator.h" | |||
#include "../graph/passes/graph_builder_utils.h" | |||
#include "../inc/graph/utils/graph_utils.h" | |||
using namespace std; | |||
using namespace testing; | |||
@@ -48,6 +49,34 @@ class KnownNodeTaskMock : public KnownNodeTask { | |||
}; | |||
} | |||
static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { | |||
auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||
op_desc->SetStreamId(0); | |||
op_desc->SetId(0); | |||
op_desc->SetWorkspace({}); | |||
; | |||
op_desc->SetWorkspaceBytes({}); | |||
op_desc->SetInputOffset({}); | |||
op_desc->SetOutputOffset({}); | |||
ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); | |||
bool support_dynamic = true; | |||
ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); | |||
return op_desc; | |||
} | |||
static ComputeGraphPtr BuildDataDirectConnectGraph() { | |||
const char *kRefIndex = "_parent_node_index"; | |||
ge::ut::GraphBuilder builder("subgraph"); | |||
auto data = builder.AddNode("Data", "Data", 1, 1); | |||
auto netoutput = builder.AddNode("NetOutput", "NetOutput", 1, 1); | |||
(void)AttrUtils::SetInt(netoutput->GetOpDesc()->MutableInputDesc(0), kRefIndex, 0); | |||
builder.AddDataEdge(data, 0, netoutput, 0); | |||
return builder.GetGraph(); | |||
} | |||
TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) { | |||
auto davinci_model = std::make_shared<DavinciModel>(0, nullptr); | |||
davinci_model->SetDeviceId(0); | |||
@@ -88,4 +117,29 @@ TEST_F(UnknownNodeExecutorTest, TestParseAttrForAllocatingOutputs) { | |||
ASSERT_EQ(node_item.ref_outputs[1], const_node); | |||
ASSERT_EQ(node_item.reuse_inputs.size(), 1); | |||
ASSERT_EQ(node_item.reuse_inputs[0], 0); | |||
} | |||
} | |||
TEST_F(UnknownNodeExecutorTest, TestSetGlobalStep) { | |||
OpDescPtr op_desc = CreateOpDesc("PartitionedCall", "PartitionedCall"); | |||
auto root_graph = make_shared<ComputeGraph>("root_graph"); | |||
auto node = root_graph->AddNode(op_desc); | |||
node->SetOwnerComputeGraph(root_graph); | |||
auto sub_graph = BuildDataDirectConnectGraph(); | |||
sub_graph->SetParentGraph(root_graph); | |||
sub_graph->SetParentNode(node); | |||
node->GetOpDesc()->AddSubgraphName("subgraph"); | |||
node->GetOpDesc()->SetSubgraphInstanceName(0, "subgraph"); | |||
root_graph->AddSubgraph("subgraph", sub_graph); | |||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(root_graph); | |||
HybridModel hybrid_model(ge_root_model); | |||
auto *step_id = new int64_t[1]; | |||
step_id[0] = 520; | |||
std::unique_ptr<TensorBuffer> tensor_buf; | |||
tensor_buf = tensor_buf->Create((void *)step_id, sizeof(int64_t)); | |||
hybrid_model.global_step_ = std::move(tensor_buf); | |||
KnownNodeExecutor known_node_executor; | |||
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); | |||
known_node_executor.SetDaviciModel(hybrid_model, node, davinci_model); | |||
EXPECT_EQ(*(static_cast<int64_t*>(davinci_model->global_step_addr_)), 520); | |||
} |