From a8cecada1da2d30fadedc26ce9f1859234861b7d Mon Sep 17 00:00:00 2001 From: l00444296 Date: Thu, 19 Nov 2020 20:52:20 +0800 Subject: [PATCH 1/4] Feature: handle model_exit bug --- ge/graph/load/new_model_manager/davinci_model.cc | 18 ++++++++++++++---- ge/graph/passes/flow_ctrl_pass.cc | 6 +++--- third_party/fwkacllib/inc/runtime/base.h | 1 + 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 13ffbaf0..2083c278 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2804,9 +2804,14 @@ void *DavinciModel::Run(DavinciModel *model) { GELOGI("rtStreamSynchronize start."); rt_ret = rtStreamSynchronize(model->rt_model_stream_); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); - continue); // [No need to check value] + if (rt_ret == RT_ERROR_MODEL_ABORT_NORMAL) { + GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort normal normal"); + } else { + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; + (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); + continue); // [No need to check value] + } + GELOGI("rtStreamSynchronize end."); (void)ProfilingManager::Instance().StopProfiling(); // just profiling, no need to check value } @@ -2827,12 +2832,17 @@ void *DavinciModel::Run(DavinciModel *model) { if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { seq_end_flag = true; } - GE_IF_BOOL_EXEC( + if (ret == RT_ERROR_MODEL_ABORT_NORMAL) { + GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort normal normal"); + } else { + GE_IF_BOOL_EXEC( rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); // [No need to check value] CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); continue); + } + GELOGI("rtStreamSynchronize end."); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index ce114d86..5a294aa2 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -357,9 +357,9 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c return FAILED; } GE_CHK_STATUS_RET(SetStreamLabel(active_node, switch_node->GetName()), "set stream label failed"); - GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); - + GE_CHK_STATUS_RET(SetSwitchBranchNodeLabel(active_node, switch_node->GetName()), + "set switch branch node label failed"); + string model_exit_name = switch_node->GetName() + "_ModelExit"; GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { model_exit_name }), "set active label list failed"); diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index a8341e32..fcb7a627 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -103,6 +103,7 @@ typedef enum tagRtError { RT_ERROR_MODEL_EXIT, RT_ERROR_MODEL_EXIT_STREAM_UNBIND, RT_ERROR_MODEL_EXIT_ID, + RT_ERROR_MODEL_ABORT_NORMAL, RT_ERROR_EVENT_BASE = 0x07050000, RT_ERROR_EVENT_NULL, From 68b8e53e5d91572a0df9d3317a657c3ce0ed8e5b Mon Sep 17 00:00:00 2001 From: l00444296 Date: Thu, 19 Nov 2020 21:31:42 +0800 Subject: [PATCH 2/4] Feature: handle model_exit bug --- ge/graph/load/new_model_manager/davinci_model.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 2083c278..d3c37b48 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2805,7 +2805,7 @@ void *DavinciModel::Run(DavinciModel *model) { GELOGI("rtStreamSynchronize start."); rt_ret = rtStreamSynchronize(model->rt_model_stream_); if (rt_ret == RT_ERROR_MODEL_ABORT_NORMAL) { - GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort normal normal"); + GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort model normal"); } else { GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); @@ -2833,7 +2833,7 @@ void *DavinciModel::Run(DavinciModel *model) { seq_end_flag = true; } if (ret == RT_ERROR_MODEL_ABORT_NORMAL) { - GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort normal normal"); + GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort model normal"); } else { GE_IF_BOOL_EXEC( rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); From bec1cf49b1731e101c9bb34b2d64ae90927a92f0 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Thu, 19 Nov 2020 21:55:51 +0800 Subject: [PATCH 3/4] Feature: handle model_exit bug --- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index d3c37b48..86bfaef1 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2832,7 +2832,7 @@ void *DavinciModel::Run(DavinciModel *model) { if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { seq_end_flag = true; } - if (ret == RT_ERROR_MODEL_ABORT_NORMAL) { + if (rt_ret == RT_ERROR_MODEL_ABORT_NORMAL) { GELOGW("rtStreamSynchronize get result : RT_ERROR_MODEL_ABORT_NORMAL, abort model normal"); } else { GE_IF_BOOL_EXEC( From 43a7bbc795566071c9fef7ecb10d0e75fe0a52f9 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Fri, 20 Nov 2020 14:14:30 +0800 Subject: [PATCH 4/4] Feature: Cancel default set aicpu engine --- ge/graph/passes/compile_nodes_pass.cc | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/ge/graph/passes/compile_nodes_pass.cc b/ge/graph/passes/compile_nodes_pass.cc index eff5ed78..990c214b 100755 --- a/ge/graph/passes/compile_nodes_pass.cc +++ b/ge/graph/passes/compile_nodes_pass.cc @@ -35,7 +35,7 @@ const char *const kAICPUKernelLibName = "aicpu_tf_kernel"; namespace ge { graphStatus CompileNodesPass::Run(ComputeGraphPtr graph) { GE_TIMESTAMP_START(CompileNodesPass); - GELOGI("[CompileNodesPass]: optimize begin."); + GELOGD("[CompileNodesPass]: optimize begin."); if (graph == nullptr) { return GRAPH_SUCCESS; } @@ -81,7 +81,7 @@ graphStatus CompileNodesPass::Run(ComputeGraphPtr graph) { GELOGE(result, "Compile op failed."); return result; } - GELOGI("[CompileNodesPass]: Optimize success."); + GELOGD("[CompileNodesPass]: Optimize success."); GE_TIMESTAMP_EVENT_END(CompileNodesPass, "OptimizeStage2::ControlAttrOptimize::CompileNodesPass"); return GRAPH_SUCCESS; } @@ -111,20 +111,24 @@ graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std: } // begin accuracy supported check if (!CheckAccuracySupport(kernel_info, instance, op_desc)) { - // if check accuracy support failed , try to go to aicpu engine - string aicpu_kernel_lib_name = kAICPUKernelLibName; - OpsKernelInfoStorePtr aicpu_kernel_info = - instance->OpsKernelManagerObj().GetOpsKernelInfoStore(aicpu_kernel_lib_name); - if (aicpu_kernel_info == nullptr) { - GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get aicpu kernel info store failed."); - return ge::GE_GRAPH_PARAM_NULLPTR; - } - if (!CheckAccuracySupport(aicpu_kernel_info, instance, op_desc)) { - GELOGE(GRAPH_FAILED, "AICPU engine does not support node:%s, type:%s , get kernel lib failed.", - node->GetName().c_str(), op_desc->GetType().c_str()); - return GRAPH_FAILED; + // if check accuracy support failed , try to go to other engine. + string kernel_name_origin = kernel_lib_name; + OpsKernelManager &ops_kernel_manager = instance->OpsKernelManagerObj(); + auto kernel_map = ops_kernel_manager.GetAllOpsKernelInfoStores(); + for (auto it = kernel_map.begin(); it != kernel_map.end(); ++it) { + string tmp_kernel_name = it->first; + if (tmp_kernel_name == kernel_name_origin) { + continue; + } + OpsKernelInfoStorePtr tmp_kernel_info = it->second; + if (CheckAccuracySupport(tmp_kernel_info, instance, op_desc)) { + kernel_lib_name = tmp_kernel_name; + return GRAPH_SUCCESS; + } } - kernel_lib_name = kAICPUKernelLibName; + GELOGE(GRAPH_FAILED, "Cannot find engine support node:%s, type:%s , get kernel lib failed.", + node->GetName().c_str(), op_desc->GetType().c_str()); + return GRAPH_FAILED; } return GRAPH_SUCCESS; }