From 628162c7b0fc1bda9e6fa14ba574d23705c54740 Mon Sep 17 00:00:00 2001 From: weiyang Date: Tue, 29 Dec 2020 09:47:03 +0800 Subject: [PATCH] dump --- ge/graph/load/new_model_manager/model_manager.cc | 6 +- .../task_info/kernel_ex_task_info.cc | 22 +++--- .../task_info/kernel_ex_task_info.h | 2 + .../task_info/kernel_task_info.cc | 35 +++++---- .../new_model_manager/task_info/kernel_task_info.h | 4 +- .../compiledsubgraph/known_node_executor.cc | 4 ++ ge/omm/csa_interact.cc | 4 +- tests/ut/ge/graph/ge_executor_unittest.cc | 82 +++++++++++++++++++++- 8 files changed, 125 insertions(+), 34 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 80c6191b..2d3116b5 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -218,8 +218,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { auto it = model_map_.find(model_id); if (it == model_map_.end()) { - GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); - return GE_EXEC_MODEL_ID_INVALID; + GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); + return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); DestroyAicpuSession(session_id); @@ -905,7 +905,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); davinci_model->SetModelDescVersion(new_model_desc); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index e5574e47..fcea441d 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -102,9 +102,10 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return FAILED;) // 2.3 Create session GE_CHECK_NOTNULL(ModelManager::GetInstance()); - GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, - GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); - return FAILED;) + ret = ModelManager::GetInstance()->CreateAicpuSession(session_id); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(ret, "CreateAicpuSession error. session id: %lu", session_id); + return ret;) kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL); if (davinci_model_->IsKnownNode()) { @@ -132,6 +133,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) + InitDumpTask(input_output_addr, op_desc); GELOGI("KernelExTaskInfo knonw node Init Success."); return SUCCESS; } @@ -166,11 +168,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) - if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), - op_desc->GetName())) { - dump_flag_ = RT_KERNEL_DUMPFLAG; - dump_args_ = input_output_addr_; - } + InitDumpTask(input_output_addr_, op_desc); if (davinci_model_->GetOpDugReg()) { GELOGI("Op debug is open in kernel ex task info"); dump_args_ = input_output_addr_; @@ -200,6 +198,14 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return SUCCESS; } +void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { + if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), + op_desc->GetName())) { + dump_flag_ = RT_KERNEL_DUMPFLAG; + dump_args_ = input_output_addr_; + } +} + Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { auto kernel_ex_def = task_def.kernel_ex(); uint32_t op_index = kernel_ex_def.op_index(); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h index e4d3e6fd..1c7d7772 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h @@ -60,6 +60,8 @@ class KernelExTaskInfo : public TaskInfo { private: Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); + void InitDumpTask(void *addr, const OpDescPtr &op_desc); + uint32_t task_id_; uint32_t stream_id_; uint32_t dump_flag_; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 74faeb24..01d21e85 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -571,6 +571,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); GE_CHECK_NOTNULL(op_desc); if (davinci_model_->IsKnownNode()) { + args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); + InitDumpTask(offset); return SUCCESS; } @@ -635,15 +637,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne return FAILED; } skt_dump_args_ = static_cast(args_) + offset; - if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), - op_desc->GetName())) { - if (IsL1FusionOp(op_desc)) { - dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; - } else { - dump_flag_ = RT_KERNEL_DUMPFLAG; - } - dump_args_ = static_cast(args_) + offset; - } + InitDumpTask(offset); GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast(args_) + offset, "Op debug is open in TVM task info"); @@ -941,16 +935,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - - if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), - op_desc->GetName())) { - if (IsL1FusionOp(op_desc)) { - dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; - } else { - dump_flag_ = RT_KERNEL_DUMPFLAG; - } - dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); - } + InitDumpTask(sizeof(aicpu::AicpuParamHead)); if (davinci_model_->GetOpDugReg()) { GELOGI("Op debug is open in aicpu task info"); dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); @@ -964,6 +949,18 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return SUCCESS; } +void KernelTaskInfo::InitDumpTask(uint32_t offset) { + if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), + op_desc_->GetName())) { + if (IsL1FusionOp(op_desc_)) { + dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; + } else { + dump_flag_ = RT_KERNEL_DUMPFLAG; + } + dump_args_ = static_cast(args_) + offset; + } +} + Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { if (ext_info.empty()) { return SUCCESS; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 1f90ede1..2cf95ecc 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -129,7 +129,9 @@ class KernelTaskInfo : public TaskInfo { Status SuperKernelDistribute(); bool IsL1FusionOp(const OpDescPtr &op_desc); - // For super kernel + void InitDumpTask(uint32_t offset); + + // For super kernel Status SaveSKTDumpInfo(); void UpdateTaskId(); void UpdateSKTTaskId(); diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index f985a3d0..c914ac1b 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -123,6 +123,10 @@ Status KnownNodeTask::Init(TaskContext &context) { davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); } if (!load_flag_) { + auto dump_properties = context.GetDumpProperties(); + if (dump_properties.IsDumpOpen()) { + davinci_model_->SetDumpProperties(dump_properties); + } GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed."); load_flag_ = true; } else { diff --git a/ge/omm/csa_interact.cc b/ge/omm/csa_interact.cc index 1b33ddbd..f1d59f41 100644 --- a/ge/omm/csa_interact.cc +++ b/ge/omm/csa_interact.cc @@ -78,8 +78,8 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) { Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode, ErrorModule error_module) { if (!is_init_) { - GELOGE(INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState"); - return INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState"); + return ACL_ERROR_GE_INTERNAL_ERROR; } if ((curr_state_ == JOBSTATE_FAILED) || (curr_state_ == JOBSTATE_KILLED)) { return SUCCESS; diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 5ce619d0..2b883ff8 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -36,6 +36,9 @@ #include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model_parser.h" #include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/new_model_manager/task_info/kernel_task_info.h" +#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "ge/common/dump/dump_properties.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" #include "proto/ge_ir.pb.h" @@ -43,8 +46,8 @@ #undef protected using namespace std; -using namespace ge; +namespace ge{ class UtestGeExecutor : public testing::Test { protected: static void InitModelDefault(ge::Model &model) { @@ -67,6 +70,45 @@ class UtestGeExecutor : public testing::Test { } }; +class DModelListener : public ge::ModelListener { + public: + DModelListener() { + }; + Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t resultCode, + std::vector &outputs) { + GELOGI("In Call back. OnComputeDone"); + return SUCCESS; + } +}; + +shared_ptr g_label_call_back(new DModelListener()); + +static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { + auto op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + op_desc->SetId(0); + + ge::AttrUtils::SetFloat(op_desc, ge::ATTR_NAME_ALPHA, 0); + ge::AttrUtils::SetFloat(op_desc, ge::ATTR_NAME_BETA, 0); + + op_desc->SetWorkspace({}); + op_desc->SetWorkspaceBytes({}); + op_desc->SetInputOffset({}); + op_desc->SetOutputOffset({}); + + ge::AttrUtils::SetListStr(op_desc, ge::ATTR_NAME_WEIGHT_NAME, {}); + ge::AttrUtils::SetInt(op_desc, ge::POOLING_ATTR_MODE, 0); + ge::AttrUtils::SetInt(op_desc, ge::POOLING_ATTR_PAD_MODE, 0); + ge::AttrUtils::SetInt(op_desc, ge::POOLING_ATTR_DATA_MODE, 0); + ge::AttrUtils::SetInt(op_desc, ge::POOLING_ATTR_CEIL_MODE, 0); + ge::AttrUtils::SetInt(op_desc, ge::POOLING_ATTR_NAN_OPT, 0); + ge::AttrUtils::SetListInt(op_desc, ge::POOLING_ATTR_WINDOW, {}); + ge::AttrUtils::SetListInt(op_desc, ge::POOLING_ATTR_PAD, {}); + ge::AttrUtils::SetListInt(op_desc, ge::POOLING_ATTR_STRIDE, {}); + ge::AttrUtils::SetListInt(op_desc, ge::ATTR_NAME_ACTIVE_STREAM_LIST, {1, 1}); + ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0); + return op_desc; +} /* TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) { uint32_t model_id = 1; @@ -87,3 +129,41 @@ TEST_F(UtestGeExecutor, fail_CommandHandle_model_manager_HandleCommand_error) { EXPECT_EQ(ge::PARAM_INVALID, ret); } */ + +TEST_F(UtestGeExecutor, kernel_InitDumpTask) { + DavinciModel model(0, g_label_call_back); + model.om_name_ = "testom"; + model.name_ = "test"; + OpDescPtr op_desc = CreateOpDesc("test", "test"); + + std::map> model_dump_properties_map; + std::set s; + model_dump_properties_map[DUMP_ALL_MODEL] = s; + DumpProperties dp; + dp.model_dump_properties_map_ = model_dump_properties_map; + model.SetDumpProperties(dp); + + KernelTaskInfo kernel_task_info; + kernel_task_info.davinci_model_ = &model; + kernel_task_info.op_desc_ = op_desc; + kernel_task_info.InitDumpTask(0); +} + +TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { + DavinciModel model(0, g_label_call_back); + model.om_name_ = "testom"; + model.name_ = "test"; + OpDescPtr op_desc = CreateOpDesc("test", "test"); + + std::map> model_dump_properties_map; + std::set s; + model_dump_properties_map[DUMP_ALL_MODEL] = s; + DumpProperties dp; + dp.model_dump_properties_map_ = model_dump_properties_map; + model.SetDumpProperties(dp); + + KernelExTaskInfo kernel_ex_task_info; + kernel_ex_task_info.davinci_model_ = &model; + kernel_ex_task_info.InitDumpTask(nullptr, op_desc); +} +}