@@ -743,6 +743,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
GELOGI("Start AutoFindBpOpIndex"); | GELOGI("Start AutoFindBpOpIndex"); | ||||
NodePtr bp_node = nullptr; | NodePtr bp_node = nullptr; | ||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
uint32_t netoutput_idx = 0; | |||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
@@ -760,6 +761,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | ||||
if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
bp_node = node; | bp_node = node; | ||||
netoutput_idx = current_idx - 1; | |||||
} | } | ||||
} | } | ||||
if (graph->GetNeedIteration()) { | if (graph->GetNeedIteration()) { | ||||
@@ -784,9 +786,13 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
GELOGW("not find bp_node."); | GELOGW("not find bp_node."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { | |||||
profiling_point.bp_index = netoutput_idx; | |||||
GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); | |||||
} else { | |||||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||||
} | } | ||||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -3725,6 +3725,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||||
binary.magic = RT_DEV_BINARY_MAGIC_ELF; | binary.magic = RT_DEV_BINARY_MAGIC_ELF; | ||||
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | ||||
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | ||||
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") { | |||||
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE; | |||||
} else { | } else { | ||||
REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", | REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", | ||||
TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), | TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), | ||||
@@ -4010,13 +4012,11 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
iterator_count_++; | iterator_count_++; | ||||
} | } | ||||
if (!is_async_mode_) { | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
} | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
// report model time data | // report model time data | ||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); | ||||
@@ -47,6 +47,11 @@ Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { | |||||
return RT_FAILED; | return RT_FAILED; | ||||
} | } | ||||
if (value == RT_CAPABILITY_NOT_SUPPORT) { | |||||
GELOGW("Not support zero copy, skip it."); | |||||
return SUCCESS; | |||||
} | |||||
for (auto &node : graph->GetAllNodes()) { | for (auto &node : graph->GetAllNodes()) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | ||||
@@ -428,7 +428,7 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const | |||||
} | } | ||||
int64_t expected_size; | int64_t expected_size; | ||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size)); | |||||
(void)TensorUtils::GetSize(*tensor_desc, expected_size); | |||||
GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); | GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); | ||||
auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize()); | auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize()); | ||||
if (size_diff > 0) { | if (size_diff > 0) { | ||||
@@ -86,3 +86,12 @@ TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) { | |||||
// netoutput has no data input, return default value 0 | // netoutput has no data input, return default value 0 | ||||
EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); | EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); | ||||
} | } | ||||
TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) { | |||||
auto graph = BuildGraphBpProfiling(); | |||||
TaskGenerator task_generator(nullptr, 0); | |||||
auto net_output = graph->FindNode("netoutput"); | |||||
ProfilingPoint profiling_point; | |||||
vector<uint32_t> all_reduce_nodes; | |||||
EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | |||||
} |