Browse Source

Pre Merge pull request !1693 from 梁昊/master

pull/1693/MERGE
梁昊 Gitee 4 years ago
parent
commit
73e27e6f08
5 changed files with 29 additions and 9 deletions
  1. +7
    -1
      ge/graph/build/task_generator.cc
  2. +7
    -7
      ge/graph/load/model_manager/davinci_model.cc
  3. +5
    -0
      ge/graph/passes/memcpy_addr_async_pass.cc
  4. +1
    -1
      ge/hybrid/executor/worker/execution_engine.cc
  5. +9
    -0
      tests/ut/ge/graph/build/task_generator_unittest.cc

+ 7
- 1
ge/graph/build/task_generator.cc View File

@@ -743,6 +743,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP
GELOGI("Start AutoFindBpOpIndex"); GELOGI("Start AutoFindBpOpIndex");
NodePtr bp_node = nullptr; NodePtr bp_node = nullptr;
uint32_t current_idx = 0; uint32_t current_idx = 0;
uint32_t netoutput_idx = 0;
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
OpDescPtr op_desc = node->GetOpDesc(); OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
@@ -760,6 +761,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) {
if (bp_node == nullptr) { if (bp_node == nullptr) {
bp_node = node; bp_node = node;
netoutput_idx = current_idx - 1;
} }
} }
if (graph->GetNeedIteration()) { if (graph->GetNeedIteration()) {
@@ -784,9 +786,13 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP
if (bp_node == nullptr) { if (bp_node == nullptr) {
GELOGW("not find bp_node."); GELOGW("not find bp_node.");
return SUCCESS; return SUCCESS;
} else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) {
profiling_point.bp_index = netoutput_idx;
GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx);
} else {
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node);
} }


profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node);
return SUCCESS; return SUCCESS;
} }




+ 7
- 7
ge/graph/load/model_manager/davinci_model.cc View File

@@ -3727,6 +3727,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
binary.magic = RT_DEV_BINARY_MAGIC_ELF; binary.magic = RT_DEV_BINARY_MAGIC_ELF;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE;
} else { } else {
REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid",
TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(),
@@ -4007,13 +4009,11 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
iterator_count_++; iterator_count_++;
} }


if (!is_async_mode_) {
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START));
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR,
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_);
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END));
}
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START));
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR,
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_);
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END));


// report model time data // report model time data
GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data));


+ 5
- 0
ge/graph/passes/memcpy_addr_async_pass.cc View File

@@ -47,6 +47,11 @@ Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) {
return RT_FAILED; return RT_FAILED;
} }


if (value == RT_CAPABILITY_NOT_SUPPORT) {
GELOGW("Not support zero copy, skip it.");
return SUCCESS;
}

for (auto &node : graph->GetAllNodes()) { for (auto &node : graph->GetAllNodes()) {
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); GE_IF_BOOL_EXEC(op_desc == nullptr, continue);


+ 1
- 1
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -428,7 +428,7 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const
} }


int64_t expected_size; int64_t expected_size;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size));
(void)TensorUtils::GetSize(*tensor_desc, expected_size);
GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size);
auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize()); auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize());
if (size_diff > 0) { if (size_diff > 0) {


+ 9
- 0
tests/ut/ge/graph/build/task_generator_unittest.cc View File

@@ -86,3 +86,12 @@ TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) {
// netoutput has no data input, return default value 0 // netoutput has no data input, return default value 0
EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0);
} }

TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) {
auto graph = BuildGraphBpProfiling();
TaskGenerator task_generator(nullptr, 0);
auto net_output = graph->FindNode("netoutput");
ProfilingPoint profiling_point;
vector<uint32_t> all_reduce_nodes;
EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS);
}

Loading…
Cancel
Save