From 4931c4fa1e7bb0477a2baf6b5835064518a54096 Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Fri, 2 Apr 2021 17:52:03 +0800 Subject: [PATCH 01/18] modify dump content --- ge/common/dump/dump_op.cc | 39 +++++++++++--- ge/common/dump/dump_op.h | 4 +- ge/common/dump/dump_properties.cc | 15 +++--- ge/graph/load/model_manager/davinci_model.cc | 23 +++++--- ge/graph/load/model_manager/davinci_model.h | 13 ++++- ge/graph/load/model_manager/model_manager.cc | 8 +-- .../model_manager/task_info/end_graph_task_info.cc | 5 +- .../model_manager/task_info/kernel_ex_task_info.cc | 3 +- .../model_manager/task_info/kernel_task_info.cc | 8 +-- ge/hybrid/executor/hybrid_model_async_executor.cc | 4 -- ge/hybrid/executor/hybrid_model_async_executor.h | 3 -- ge/hybrid/executor/worker/execution_engine.cc | 36 +++++++------ ge/hybrid/hybrid_davinci_model.cc | 9 ++-- ge/hybrid/hybrid_davinci_model.h | 2 +- ge/hybrid/hybrid_davinci_model_stub.cc | 2 +- ge/hybrid/model/hybrid_model.h | 4 +- .../compiledsubgraph/known_node_executor.cc | 1 + tests/ut/ge/CMakeLists.txt | 2 + tests/ut/ge/common/dump_op_unittest.cc | 61 ++++++++++++++++++++++ 19 files changed, 169 insertions(+), 73 deletions(-) create mode 100644 tests/ut/ge/common/dump_op_unittest.cc diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 0becbdc8..4456383c 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -20,6 +20,7 @@ #include "common/ge/datatype_util.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" +#include "framework/common/types.h" #include "graph/anchor.h" #include "graph/ge_tensor.h" #include "graph/op_desc.h" @@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond loop_cond_ = reinterpret_cast(loop_cond); } -void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) { +void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, + uint32_t dynamic_model_id) { dynamic_model_name_ = dynamic_model_name; + dynamic_om_name_ = dynamic_om_name; dynamic_model_id_ = dynamic_model_id; } @@ -200,6 +203,28 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { return SUCCESS; } +Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) { + std::set model_list = dump_properties_.GetAllDumpModel(); + bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end(); + bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end(); + std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_; + if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { + if (not_find_by_omname && not_find_by_modelname) { + std::string model_list_str; + for (auto &model : model_list) { + model_list_str += "[" + model + "]."; + } + GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str()); + return FAILED; + } + } + if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) { + GELOGD("Dump model name is %s", dump_model_name.c_str()); + op_mapping_info.set_model_name(dump_model_name); + } + return SUCCESS; +} + Status DumpOp::LaunchDumpOp() { GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); int32_t device_id = 0; @@ -209,8 +234,7 @@ Status DumpOp::LaunchDumpOp() { return RT_ERROR_TO_GE_STATUS(rt_ret); } if (device_id < 0) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, - "Check device_id failed, device_id = %d, which should be not less than 0.", + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.", device_id); return ACL_ERROR_GE_INTERNAL_ERROR; } @@ -220,11 +244,12 @@ Status DumpOp::LaunchDumpOp() { op_mapping_info.set_flag(kAicpuLoadFlag); op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); op_mapping_info.set_model_id(dynamic_model_id_); - if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { - op_mapping_info.set_model_name(dynamic_model_name_); + + if (SetDumpModelName(op_mapping_info) != SUCCESS) { + return SUCCESS; } SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); - GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), + GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(), dump_path.c_str()); uint32_t task_id = 0; uint32_t stream_id = 0; @@ -273,4 +298,4 @@ Status DumpOp::LaunchDumpOp() { } return SUCCESS; } -} // namesapce ge +} // namespace ge diff --git a/ge/common/dump/dump_op.h b/ge/common/dump/dump_op.h index d59962e6..4d322bee 100755 --- a/ge/common/dump/dump_op.h +++ b/ge/common/dump/dump_op.h @@ -34,12 +34,13 @@ class DumpOp { vector output_addrs, rtStream_t stream); Status LaunchDumpOp(); void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); - void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id); + void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id); private: Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); Status DumpOutput(aicpu::dump::Task &task); Status DumpInput(aicpu::dump::Task &task); + Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info); DumpProperties dump_properties_; OpDescPtr op_desc_; @@ -54,6 +55,7 @@ class DumpOp { uintptr_t loop_cond_; std::string dynamic_model_name_; + std::string dynamic_om_name_; std::uint32_t dynamic_model_id_; }; } // namespace ge diff --git a/ge/common/dump/dump_properties.cc b/ge/common/dump/dump_properties.cc index 3fbfd16b..65b1e89a 100644 --- a/ge/common/dump/dump_properties.cc +++ b/ge/common/dump/dump_properties.cc @@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on"; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); -} +} // namespace namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { CopyFrom(other); } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( - const DumpProperties &other) { + const DumpProperties &other) { CopyFrom(other); return *this; } @@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti // The following is the new dump scenario of the fusion operator FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( - const std::string &model, const std::set &layers) { + const std::string &model, const std::set &layers) { for (const std::string &layer : layers) { GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); } @@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpPrope } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpProperties::GetPropertyValue( - const std::string &model) const { + const std::string &model) const { auto iter = model_dump_properties_map_.find(model); if (iter != model_dump_properties_map_.end()) { return iter->second; @@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpPrope } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( - const std::string &model, const std::string &om_name, const std::string &op_name) const { + const std::string &model, const std::string &om_name, const std::string &op_name) const { // if dump all + GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str()); if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { return true; } @@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( - const std::string &dump_op_switch) { + const std::string &dump_op_switch) { dump_op_switch_ = dump_op_switch; } @@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() { GELOGI("ge.exec.enableDumpDebug is false or is not set."); } } -} // namespace +} // namespace ge diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index fc861a24..08ff3cc3 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -3067,9 +3067,8 @@ Status DavinciModel::DistributeTask() { task_def.kernel_ex().op_index()); OpDescPtr op = GetOpByIndex(op_index); GE_CHECK_NOTNULL(op); - if (reinterpret_cast(task->GetDumpArgs()) != nullptr) { - bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); + bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo(); if (call_dump || is_op_debug_reg_) { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } @@ -3089,11 +3088,16 @@ Status DavinciModel::DistributeTask() { return SUCCESS; } -void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { +bool DavinciModel::ModelNeedDump() { auto all_dump_model = GetDumpProperties().GetAllDumpModel(); - bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); - bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { + bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || + all_dump_model.find(dump_model_name_) != all_dump_model.end() || + all_dump_model.find(om_name_) != all_dump_model.end(); + return ret; +} + +void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { + if (ModelNeedDump()) { GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); data_dumper_.SaveEndGraphId(task_id, stream_id); } @@ -3893,7 +3897,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) } void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map &variable_by_name) { - data_dumper_.SetModelName(name_); + if(dump_model_name_.empty()) { + dump_model_name_ = name_; + } + data_dumper_.SetModelName(dump_model_name_); data_dumper_.SetModelId(model_id_); data_dumper_.SetOmName(om_name_); data_dumper_.SetComputeGraph(graph); @@ -4082,7 +4089,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { Status DavinciModel::InitL1DataDumperArgs() { auto all_dump_model = GetDumpProperties().GetAllDumpModel(); bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); - bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end(); + bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end(); bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; if (dump_l1fusion_op) { diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index e4b73d7e..00baab9e 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -248,7 +248,10 @@ class DavinciModel { string Name() const { return name_; } // om_name - string OmName() const { return om_name_; } + const string &OmName() const { return om_name_; } + + // dump_model_name + const string &DumpModelName() const { return dump_model_name_; } // version uint32_t Version() const { return version_; } @@ -483,6 +486,12 @@ class DavinciModel { data_dumper_.DumpShrink(); } + bool OpNeedDump(const string &op_name) { + return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name); + } + + bool ModelNeedDump(); + void SetEndGraphId(uint32_t task_id, uint32_t stream_id); DavinciModel &operator=(const DavinciModel &model) = delete; @@ -542,6 +551,7 @@ class DavinciModel { // om file name void SetOmName(const string &om_name) { om_name_ = om_name; } + void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; } void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } @@ -888,6 +898,7 @@ class DavinciModel { // used for inference data dump string om_name_; + string dump_model_name_; uint32_t version_; GeModelPtr ge_model_; // release after DavinciModel::Init diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index 27cbd526..794d8dea 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector &ge_root_model, const shared_ptr &listener) { auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); @@ -279,7 +279,7 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string hybrid_model->SetListener(listener); hybrid_model->SetModelId(model_id); hybrid_model->SetDeviceId(GetContext().DeviceId()); - hybrid_model->SetModelName(model_name); + hybrid_model->SetOmName(om_name); GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); auto shared_model = std::shared_ptr(hybrid_model.release()); InsertModel(model_id, shared_model); @@ -309,9 +309,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrGetSubgraphInstanceNameToModel(); - string model_name = ""; + string om_name; if (IsNeedHybridLoad(*ge_root_model)) { - return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); + return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener); } mmTimespec timespec = mmGetTickCount(); diff --git a/ge/graph/load/model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc index c306c650..5bf436e7 100644 --- a/ge/graph/load/model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc @@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin Status EndGraphTaskInfo::Distribute() { GELOGI("EndGraphTaskInfo Distribute Start."); GE_CHECK_NOTNULL(davinci_model_); - auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || - all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || - all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { + if (davinci_model_->ModelNeedDump()) { GELOGI("Start to call rtEndGraphEx"); rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 2317f961..108fe78e 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin } void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { - if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), - op_desc->GetName())) { + if (davinci_model_->OpNeedDump(op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = addr; } diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc index e7dca2a1..31ed7889 100755 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -409,10 +409,7 @@ Status KernelTaskInfo::Distribute() { call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); // l1 fusion enable and env flag open (kCloseSkt for skt debug) bool open_dump = false; - auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || - all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || - all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { + if (davinci_model_->ModelNeedDump()) { open_dump = true; } if (call_skt && (env_flag != kCloseSkt) && !open_dump) { @@ -980,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } void KernelTaskInfo::InitDumpTask(uint32_t offset) { - if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), - op_desc_->GetName())) { + if (davinci_model_->OpNeedDump(op_desc_->GetName())) { if (IsL1FusionOp(op_desc_)) { dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; } else { diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 454bb942..249b15ec 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -46,10 +46,6 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { model_id_ = model_id; } -void HybridModelAsyncExecutor::SetModelName(const string &model_name) { - om_name_ = model_name; -} - Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr &data) { GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, "Data queue is full, please call again later, model_id %u ", model_id_); diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 69d8a3f4..b6942b10 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -51,8 +51,6 @@ class HybridModelAsyncExecutor { void SetModelId(uint32_t model_id); - void SetModelName(const string &model_name); - Status Stop(); Status EnqueueData(const std::shared_ptr &data); @@ -97,7 +95,6 @@ class HybridModelAsyncExecutor { std::map input_tensor_desc_; std::vector is_input_dynamic_; std::shared_ptr listener_; - string om_name_; DataDumper data_dumper_; bool is_op_debug_reg_ = false; OpdebugRegister op_debug_register_; diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index de3bdc37..bb06acd8 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -206,36 +206,38 @@ Status NodeDoneCallback::DumpDynamicNode() { return PARAM_INVALID; } auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(graph_context_); + const HybridModel *model = graph_context_->model; + GE_CHECK_NOTNULL(model); + std::string dynamic_model_name = model->GetModelName(); + std::string dynamic_om_name = model->GetOmName(); + uint32_t model_id = model->GetModelId(); + if(!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) { + GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str()); + return SUCCESS; + } + dump_op_.SetDynamicModelInfo(dynamic_model_name, dynamic_om_name, model_id); + auto stream = context_->GetStream(); vector input_addrs; vector output_addrs; for (int i = 0; i < context_->NumInputs(); i++) { auto tensor_value = context_->GetInput(i); GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); - uint64_t input_addr = reinterpret_cast(tensor_value->GetData()); + uintptr_t input_addr = reinterpret_cast(tensor_value->GetData()); input_addrs.emplace_back(input_addr); } for (int j = 0; j < context_->NumOutputs(); j++) { auto tensor_value = context_->GetOutput(j); GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); - uint64_t output_addr = reinterpret_cast(tensor_value->GetData()); + uintptr_t output_addr = reinterpret_cast(tensor_value->GetData()); output_addrs.emplace_back(output_addr); } - - dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream); - - GE_CHECK_NOTNULL(graph_context_); - const HybridModel *model = graph_context_->model; - GE_CHECK_NOTNULL(model); - std::string dynamic_model_name = model->GetModelName(); - uint32_t model_id = model->GetModelId(); - dump_op_.SetDynamicModelInfo(dynamic_model_name, model_id); - - void *loop_per_iter = nullptr; - TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); - if (varible_loop_per_iter != nullptr) { - loop_per_iter = const_cast(varible_loop_per_iter->GetData()); - } + void *loop_per_iter = nullptr; + TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); + if (varible_loop_per_iter != nullptr) { + loop_per_iter = const_cast(varible_loop_per_iter->GetData()); + } void *loop_cond = nullptr; TensorValue *varible_loop_cond = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index 430dfa85..c741fe7e 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -76,9 +76,8 @@ class HybridDavinciModel::Impl { executor_.SetDeviceId(device_id); } - void SetModelName(const string &model_name) { - model_.SetModelName(model_name); - executor_.SetModelName(model_name); + void SetOmName(const string &model_name) { + model_.SetOmName(model_name); } uint64_t GetSessionId() { @@ -181,9 +180,9 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { } } -void HybridDavinciModel::SetModelName(const string &model_name) { +void HybridDavinciModel::SetOmName(const string &om_name) { if (impl_ != nullptr) { - impl_->SetModelName(model_name); + impl_->SetOmName(om_name); } } diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 74dca9ed..3b3473ff 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -57,7 +57,7 @@ class HybridDavinciModel { void SetDeviceId(uint32_t device_id); - void SetModelName(const string &model_name); + void SetOmName(const string &om_name); uint64_t GetSessionId(); diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc index 5b10fb7a..67a7a101 100644 --- a/ge/hybrid/hybrid_davinci_model_stub.cc +++ b/ge/hybrid/hybrid_davinci_model_stub.cc @@ -61,7 +61,7 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { void HybridDavinciModel::SetDeviceId(uint32_t device_id) { } -void HybridDavinciModel::SetModelName(const string &model_name) { +void HybridDavinciModel::SetOmName(const string &om_name) { } uint64_t HybridDavinciModel::GetSessionId() { diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 62095d42..484f531b 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -69,8 +69,8 @@ class HybridModel { model_id_ = model_id; } - void SetModelName(const string &model_name) { - om_name_ = model_name; + void SetOmName(const string &om_name) { + om_name_ = om_name; } const std::string &GetOmName() const { diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 1c46db20..0f07cc96 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -171,6 +171,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node // set known node flag as true davinci_model->SetKnownNode(true); davinci_model->SetId(model.GetModelId()); + davinci_model->SetDumpModelName(model.GetModelName()); davinci_model->SetOmName(model.GetOmName()); // set model id as root node's node id davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index c30c7170..9e9a6108 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -166,6 +166,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" + "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" @@ -742,6 +743,7 @@ set(MULTI_PARTS_TEST_FILES "graph/transop_util_unittest.cc" "common/datatype_transfer_unittest.cc" "common/dump_manager_unittest.cc" + "common/dump_op_unittest.cc" "common/opdebug_register_unittest.cc" "common/format_transfer_unittest.cc" "common/format_transfer_transpose_unittest.cc" diff --git a/tests/ut/ge/common/dump_op_unittest.cc b/tests/ut/ge/common/dump_op_unittest.cc new file mode 100644 index 00000000..9007ac95 --- /dev/null +++ b/tests/ut/ge/common/dump_op_unittest.cc @@ -0,0 +1,61 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define protected public +#define private public +#include "common/dump/dump_op.h" +#include "common/debug/log.h" +#include "common/ge_inner_error_codes.h" +#include "common/dump/dump_properties.h" +#undef private +#undef protected + +namespace ge { +class UTEST_dump_op : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UTEST_dump_op, launch_dump_op_success) { + DumpOp dump_op; + DumpProperties dump_properties; + OpDescPtr op_desc = std::make_shared("GatherV2", "GatherV2"); + std::set temp; + dump_properties.model_dump_properties_map_.emplace("model1", temp); + dump_properties.enable_dump_ = "1"; + dump_op.SetDynamicModelInfo("model1", "model2", 1); + dump_op.SetDumpInfo(dump_properties, op_desc, {}, {}, nullptr); + auto ret = dump_op.LaunchDumpOp(); + EXPECT_EQ(ret, ge::SUCCESS); +} + +TEST_F(UTEST_dump_op, launch_dump_op_success_2) { + DumpOp dump_op; + DumpProperties dump_properties; + OpDescPtr op_desc = std::make_shared("GatherV2", "GatherV2"); + std::set temp; + dump_properties.model_dump_properties_map_.emplace("model1", temp); + dump_properties.enable_dump_ = "1"; + dump_op.SetDynamicModelInfo("modle2", "model2", 1); + dump_op.SetDumpInfo(dump_properties, op_desc, {}, {}, nullptr); + auto ret = dump_op.LaunchDumpOp(); + EXPECT_EQ(ret, ge::SUCCESS); +} + +} // namespace ge \ No newline at end of file From b1822cc73c4ef8710b54fd66e117617f38eea308 Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Sat, 3 Apr 2021 11:06:38 +0800 Subject: [PATCH 02/18] modify set dump in c77 --- ge/common/dump/dump_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc index a659d9c6..61a60afd 100644 --- a/ge/common/dump/dump_manager.cc +++ b/ge/common/dump/dump_manager.cc @@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf dump_mode = dump_config.dump_mode; GELOGI("Dump mode is %s", dump_mode.c_str()); dump_properties.SetDumpMode(dump_mode); - dump_properties_map_.emplace(kInferSessionId, dump_properties); + dump_properties_map_[kInferSessionId] = dump_properties; return SUCCESS; } From 5f1e659fcd71b6d1527aef9d7bf0368ff9d16506 Mon Sep 17 00:00:00 2001 From: lianghao Date: Mon, 5 Apr 2021 17:51:57 +0800 Subject: [PATCH 03/18] LinkToPotentialPrecedenceNode --- ge/graph/passes/atomic_addr_clean_pass.cc | 33 ++++++++++++++++++++ ge/graph/passes/atomic_addr_clean_pass.h | 8 +++++ .../passes/atomic_addr_clean_pass_unittest.cc | 35 ++++++++++++++++++++-- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 16d3c129..b1247697 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect } } } + return LinkToPotentialPrecedenceNode(graph, clean_addr_node); +} + +// Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean +// node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream +// concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control +// edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on +// each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the +// successors of Data/Variable. +Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) { + GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.", + atomic_clean_node->GetName().c_str()); + auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_ctrl_anchor); + + for (const auto &node : graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node); + bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty(); + if (!need_handle) { + continue; + } + auto second_nodes = node->GetOutAllNodes(); + for (const auto &second_node : second_nodes) { + GE_CHECK_NOTNULL(second_node); + auto in_ctrl_anchor = second_node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor); + if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) { + GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor)); + GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str()); + } + } + } + return SUCCESS; } diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index 8138d511..96147fa2 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -68,6 +68,14 @@ class AtomicAddrCleanPass : public GraphPass { Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); /** + * Link atomic clean node to all potential precedence nodes which may execute before atomic clean node + * @param graph + * @param atomic_clean_node + * @return + */ + Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node); + + /** * Check if this node is atomic op. * @param node * @return diff --git a/tests/ut/ge/graph/passes/atomic_addr_clean_pass_unittest.cc b/tests/ut/ge/graph/passes/atomic_addr_clean_pass_unittest.cc index 59636511..d9d663d9 100644 --- a/tests/ut/ge/graph/passes/atomic_addr_clean_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/atomic_addr_clean_pass_unittest.cc @@ -48,18 +48,49 @@ public: return node; } + int CountOfAtomicCleanNode() { + int node_num = 0; + for (NodePtr &node : graph_->GetDirectNode()) { + if (node->GetType() == ATOMICADDRCLEAN) { + ++node_num; + } + } + return node_num; + } + ComputeGraphPtr graph_; }; -// node1 -> node2 -> node3 +/* + * Data Data Atomic_clean + * | | / | + * relu relu | + * | ==> | | + * relu(atomic) relu(atomic) + * | | + * netoutput netoutput + */ TEST_F(UtestGraphPassesAtomicAddrCleanPass, pass_run_success) { auto node1 = NewNode("node1", DATA, 0, 1); + auto node2 = NewNode("node2", RELU, 1, 1); - auto node3 = NewNode("node3", NETOUTPUT, 1, 0); + auto node3 = NewNode("node3", RELU, 1, 1); + auto op_desc = node3->GetOpDesc(); + vector atomic_input_index = {123, 456}; + AttrUtils::SetListInt(op_desc, "atomic_input_index", atomic_input_index); + + auto node4 = NewNode("node4", NETOUTPUT, 1, 0); GraphUtils::AddEdge(node1->GetOutDataAnchor(0), node2->GetInDataAnchor(0)); GraphUtils::AddEdge(node2->GetOutDataAnchor(0), node3->GetInDataAnchor(0)); + GraphUtils::AddEdge(node3->GetOutDataAnchor(0), node4->GetInDataAnchor(0)); AtomicAddrCleanPass atomi_addr_clean_pass; Status ret = atomi_addr_clean_pass.Run(graph_); EXPECT_EQ(ret, SUCCESS); + EXPECT_EQ(1, CountOfAtomicCleanNode()); + + auto atomic_clean = graph_->FindNode("atomic_addr_clean"); + EXPECT_NE(atomic_clean, nullptr); + auto out_ctrl_nodes = atomic_clean->GetOutControlNodes(); + EXPECT_EQ(out_ctrl_nodes.size(), 2); } } // namespace ge From 960cc1fd641e5ce838ea1ae90eea84299516db98 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Apr 2021 10:01:44 +0800 Subject: [PATCH 04/18] Fix bug of const input index. --- ge/generator/ge_generator.cc | 27 +++++++++++++++++++++------ inc/framework/generator/ge_generator.h | 1 + 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 2a4d076b..61000519 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -154,7 +154,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty } static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, - bool attr) { + bool attr, int32_t &data_index) { GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); @@ -197,9 +197,10 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); - if (attr) { - GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, + if (attr && !is_const) { + GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED, "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); + ++data_index; } ge::NodePtr arg_node = graph->AddNode(data_op); @@ -693,6 +694,17 @@ namespace { } } +void GeGenerator::RemoveConst(const vector &inputs, vector &outputs) { + for (auto &input : inputs) { + GeTensorDesc input_desc = input.GetTensorDesc(); + bool is_const = false; + (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); + if (!is_const) { + outputs.emplace_back(input); + } + } +} + Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs) { GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); @@ -757,7 +769,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in GELOGI("ATC parser success in single op build."); GeRootModelPtr ge_root_model = nullptr; - GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); + vector data_inputs; + RemoveConst(inputs, data_inputs); + GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model)); map op_attrs = op_desc_tmp->GetAllAttrs(); GE_CHECK_NOTNULL(ge_root_model); GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); @@ -840,18 +854,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vectorGetAllInputsDescPtr()) { GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { continue; } - GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); + GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index)); arg_index++; } } else { for (const auto &in_desc : inputs) { - GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true)); + GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index)); arg_index++; } } diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index 7d59def7..69bd60ef 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -98,6 +98,7 @@ class GE_FUNC_VISIBILITY GeGenerator { Status BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline = true); + void RemoveConst(const vector &inputs, vector &outputs); Status CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs); using GeRootModelPtr = std::shared_ptr; From 3ef3f54d9454c2c376dc48ad92af09574b02fe00 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Apr 2021 20:28:19 +0800 Subject: [PATCH 05/18] Save atomic kernel bin to model. --- ge/common/tbe_kernel_store.cc | 11 +++++ ge/graph/build/model_builder.cc | 46 +++++++++++++++++ ge/graph/build/model_builder.h | 2 + ge/hybrid/node_executor/aicore/aicore_op_task.cc | 57 ++++++++++++++++------ ge/hybrid/node_executor/aicore/aicore_op_task.h | 8 +++ .../node_executor/aicore/aicore_task_builder.cc | 1 + 6 files changed, 111 insertions(+), 14 deletions(-) diff --git a/ge/common/tbe_kernel_store.cc b/ge/common/tbe_kernel_store.cc index 2fb9a04a..efbb46ae 100755 --- a/ge/common/tbe_kernel_store.cc +++ b/ge/common/tbe_kernel_store.cc @@ -15,6 +15,8 @@ */ #include "common/tbe_kernel_store.h" +#include "graph/utils/attr_utils.h" +#include "graph/debug/ge_attr_define.h" namespace ge { @@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); + + std::string atomic_kernel_name; + (void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name); + if (!atomic_kernel_name.empty()) { + GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str()); + auto atomic_kernel_bin = FindKernel(atomic_kernel_name); + GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin), + GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");) + } } } } diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 1a14374d..1dc2bead 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() { return SUCCESS; } +Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) { + ge::NodePtr atomic_clean_node = nullptr; + atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node); + if (atomic_clean_node == nullptr) { + return SUCCESS; + } + + ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc(); + GE_CHECK_NOTNULL(atomic_op_desc); + TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + if (tbe_kernel == nullptr) { + std::string kernel_name; + GeAttrValue::BYTES kernel_buffer; + (void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); + (void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); + if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { + GE_CHECK_NOTNULL(kernel_buffer.GetData()); + std::vector data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); + tbe_kernel = MakeShared(kernel_name, std::move(data)); + GE_CHECK_NOTNULL(tbe_kernel); + } + } + if (tbe_kernel == nullptr) { + GELOGD("Atomic_clean_node doesn't have tbe_kernel."); + return SUCCESS; + } + tbe_kernel_store_.AddTBEKernel(tbe_kernel); + GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str()); + (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName()); + + std::string kernel_name; + (void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name); + (void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); + + std::string meta_data; + (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data); + (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data); + + std::string json_string; + (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string); + (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string); + return SUCCESS; +} + Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { // Add weight ge_model.SetWeight(weight_buffer_); @@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { } tbe_name_set.insert(tbe_kernel->GetName()); tbe_kernel_store_.AddTBEKernel(tbe_kernel); + + GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!"); } SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index 12420614..67def859 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -89,6 +89,8 @@ class ModelBuilder { void SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_op_types, std::set &aicpu_tf_op_types); + Status SaveAtomicTBEKernel(const OpDescPtr &op_desc); + uint64_t session_id_; map mem_type_to_mem_offset_; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 6af2fd4a..6f9a5a52 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -71,22 +71,22 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) } Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { - auto op_desc_ptr = std::make_shared(op_desc); - GE_CHECK_NOTNULL(op_desc_ptr); - auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); - if (tbe_kernel == nullptr) { - GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); - return INTERNAL_ERROR; - } - TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); if (rt_ret != RT_ERROR_NONE || is_single_op_) { + auto op_desc_ptr = MakeShared(op_desc); + GE_CHECK_NOTNULL(op_desc_ptr); + auto tbe_kernel = op_desc_ptr->TryGetExtAttr(GetKeyForTbeKernel(), TBEKernelPtr()); + if (tbe_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); + return INTERNAL_ERROR; + } + TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); void *bin_handle = nullptr; if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); rtDevBinary_t binary; std::string json_string; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMagic(), json_string), GELOGI("Get original type of session_graph_id.")); if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; @@ -104,7 +104,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { GELOGI("TBE: binary.length: %lu", binary.length); GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); std::string meta_data; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMetaData(), meta_data), GELOGI("Get original type of json_string")); GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); @@ -114,7 +114,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { kernel_store.ReferTBEHandle(stub_name_.c_str()); } std::string kernel_name; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name), GELOGI("Get original type of kernel_name")); GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); @@ -349,9 +349,6 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", node->GetName().c_str()); - if (is_single_op_) { - tiling_info.clear_atomic = false; - } GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); return SUCCESS; } @@ -468,6 +465,22 @@ std::string AiCoreOpTask::GetKeyForOpParamSize() const { return kAttrOpParamSize; } +std::string AiCoreOpTask::GetKeyForTbeKernel() const { + return OP_EXTATTR_NAME_TBE_KERNEL; +} + +std::string AiCoreOpTask::GetKeyForTvmMagic() const { + return TVM_ATTR_NAME_MAGIC; +} + +std::string AiCoreOpTask::GetKeyForTvmMetaData() const { + return TVM_ATTR_NAME_METADATA; +} + +std::string AiCoreOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { + return op_desc.GetName() + "_kernelname"; +} + Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); return InitAtomicAddrCleanIndices(op_desc); @@ -524,6 +537,22 @@ std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { return kAttrAtomicOpParamSize; } +std::string AtomicAddrCleanOpTask::GetKeyForTbeKernel() const { + return EXT_ATTR_ATOMIC_TBE_KERNEL; +} + +std::string AtomicAddrCleanOpTask::GetKeyForTvmMagic() const { + return ATOMIC_ATTR_TVM_MAGIC; +} + +std::string AtomicAddrCleanOpTask::GetKeyForTvmMetaData() const { + return ATOMIC_ATTR_TVM_METADATA; +} + +std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { + return op_desc.GetName() + "_atomic_kernelname"; +} + Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 97df2335..f7d0854f 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -81,6 +81,10 @@ class AiCoreOpTask { protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; + virtual std::string GetKeyForTbeKernel() const; + virtual std::string GetKeyForTvmMagic() const; + virtual std::string GetKeyForTvmMetaData() const; + virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const; virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); std::unique_ptr tiling_buffer_ = nullptr; @@ -119,6 +123,10 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { protected: std::string GetKeyForOpParamSize() const override; + std::string GetKeyForTbeKernel() const override; + std::string GetKeyForTvmMagic() const override; + std::string GetKeyForTvmMetaData() const override; + std::string GetKeyForKernelName(const OpDesc &op_desc) const override; Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; private: diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc index 966e0910..bb132d0a 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc @@ -70,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, auto atomic_task = std::unique_ptr(new(std::nothrow)AtomicAddrCleanOpTask()); GE_CHECK_NOTNULL(atomic_task); + atomic_task->SetSingleOp(is_single_op); GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); From 24b2437361a8d75773a7d585bcff399c6d10db93 Mon Sep 17 00:00:00 2001 From: chuxing Date: Thu, 1 Apr 2021 13:07:50 +0800 Subject: [PATCH 06/18] Fix dump for known-shaped subgraph --- ge/graph/build/graph_builder.cc | 38 --------- ge/hybrid/executor/hybrid_execution_context.h | 2 +- ge/hybrid/executor/hybrid_model_executor.cc | 22 +++-- .../executor/hybrid_model_pipeline_executor.cc | 10 +++ ge/hybrid/model/hybrid_model.cc | 20 +++++ ge/hybrid/model/hybrid_model.h | 9 +- ge/hybrid/model/hybrid_model_builder.cc | 96 +++++++++++++++++++++- ge/hybrid/model/hybrid_model_builder.h | 8 ++ .../compiledsubgraph/known_node_executor.cc | 41 +++++---- .../compiledsubgraph/known_node_executor.h | 5 +- 10 files changed, 185 insertions(+), 66 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 74b884de..d1357bc6 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -399,41 +399,6 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor return SUCCESS; } -static Status GenerateTaskForConstant(const std::shared_ptr &graph) { - if (graph->GetGraphUnknownFlag()) { - GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str()); - return SUCCESS; - } - for (auto &node : graph->GetDirectNode()) { - // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - auto op_type = op_desc->GetType(); - if (op_type == NETOUTPUT) { - for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { - const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); - NodePtr in_node = peer_out_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(in_node); - - std::string in_node_op_type = in_node->GetType(); - if (in_node_op_type == CONSTANT) { - GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); - std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; - if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { - GELOGE(FAILED, "Insert memcpy between %s and %s failed.", - in_node->GetName().c_str(), node->GetName().c_str()); - return FAILED; - } - } - } - } - } - return SUCCESS; -} - Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); com_graph->SetGraphUnknownFlag(false); @@ -516,9 +481,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { continue; } - - GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); - if (sub_graph->GetGraphUnknownFlag()) { // unknown shape build flow GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 003e8010..54840c6a 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -68,7 +68,7 @@ struct GraphExecutionContext { DumpProperties dump_properties; bool trace_enabled = false; bool dump_enabled = false; - std::atomic_bool is_eos_; + std::atomic_bool is_eos_{false}; long profiling_level = 0; long iteration = 0; void *global_step = nullptr; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 4b589a03..ceffa203 100644 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -33,9 +33,6 @@ HybridModelExecutor::~HybridModelExecutor() { if (context_.rt_gen_context != nullptr) { (void) rtCtxDestroy(context_.rt_gen_context); } - if (context_.global_step != nullptr) { - (void) rtFree(context_.global_step); - } } Status HybridModelExecutor::Init() { @@ -49,9 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { GELOGD("Start to execute model."); auto root_graph_item = model_->GetRootGraphItem(); GE_CHECK_NOTNULL(root_graph_item); - - GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, - sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); + if (context_.global_step != nullptr) { + GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, + sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); + } SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); auto ret = ExecuteGraphInternal(executor, args); Cleanup(); @@ -102,8 +100,8 @@ Status HybridModelExecutor::InitExecutionContext() { GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); - GE_CHK_RT_RET(rtMalloc(&context_.global_step, sizeof(uint64_t), RT_MEMORY_HBM)); + context_.global_step = model_->GetGlobalStep(); context_.stream = stream_; context_.model = model_; context_.is_eos_ = false; @@ -136,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context string ctx_id = std::to_string(context.context_id); RuntimeInferenceContext::DestroyContext(ctx_id); GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); + for (auto &host_tensor : context.model->GetHostTensors()) { + auto node_id = host_tensor.first; + for (const auto &output_idx_and_tensor : host_tensor.second) { + auto output_idx = output_idx_and_tensor.first; + GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); + ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); + } + } return SUCCESS; } } // namespace hybrid diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc index 4706fa97..97b4e1aa 100644 --- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc +++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc @@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) { string ctx_id = std::to_string(context.context_id); RuntimeInferenceContext::DestroyContext(ctx_id); GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); + for (auto &host_tensor : context.model->GetHostTensors()) { + auto node_id = host_tensor.first; + for (const auto &output_idx_and_tensor : host_tensor.second) { + auto output_idx = output_idx_and_tensor.first; + GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); + ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); + } + } return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index a0217d52..6acbd6cf 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -357,5 +357,25 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { return GetVariable(node->GetName()); } + +const map>> &HybridModel::GetHostTensors() const { + return host_tensors_; +} + +void *HybridModel::GetGlobalStep() const { + if (global_step_ == nullptr) { + return nullptr; + } + return global_step_->GetData(); +} + +TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const { + auto it = weight_buffer_map_.find(subgraph_name); + if (it == weight_buffer_map_.end()) { + GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str()); + return nullptr; + } + return it->second.get(); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index fae53679..5d772b98 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -45,6 +45,8 @@ class HybridModel { return root_runtime_param_.session_id; } + void *GetGlobalStep() const; + GeModelPtr GetGeModel(const NodePtr &node) const; NodeItem *MutableNodeItem(const NodePtr &node); @@ -91,6 +93,10 @@ class HybridModel { TensorValue* GetTensor(const NodePtr &node) const; + TensorBuffer* GetModelWeight(const std::string &subgraph_name) const; + + const std::map>> &GetHostTensors() const; + const std::vector* GetTaskDefs(const NodePtr &node) const; const GraphItem *GetRootGraphItem() const; @@ -145,6 +151,7 @@ class HybridModel { std::unique_ptr root_graph_item_; std::map> subgraph_items_; std::map> node_items_; + std::map>> host_tensors_; bool is_new_model_desc_ = false; // support aipp bool is_single_op_ = false; @@ -153,10 +160,10 @@ class HybridModel { uint32_t device_id_ = 0; uint32_t model_id_ = 0; uint8_t *var_mem_base_ = nullptr; - std::unique_ptr weight_buffer_; std::map> weight_buffer_map_; RuntimeParam root_runtime_param_; string om_name_; + std::unique_ptr global_step_; }; } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 25dabd78..b463cd1f 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -145,6 +145,9 @@ Status HybridModelBuilder::Build() { GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); + GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(), + "[%s] Failed to optimize dependencies for constant inputs", + GetGraphName()); GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); return SUCCESS; } @@ -346,6 +349,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s auto src_node_item = MutableNodeItem(src_node); src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); dependent_for_shape_inference.emplace(src_node); + host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item); GELOGD("[%s] Dependent added from output of [%s:%d]", node_item.NodeName().c_str(), src_node_item->NodeName().c_str(), @@ -1480,7 +1484,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { src_node->GetName().c_str(), src_op_type.c_str()); - if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { + if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) { continue; } @@ -1489,6 +1493,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { GELOGD("Got parent output index = %u", parent_index); GE_CHECK_LE(parent_index, INT32_MAX); node_item.ref_outputs.emplace(static_cast(parent_index), src_node); + if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) { + known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node); + } } // Data nodes marked with REF_VAR_SRC_VAR_NAME @@ -1554,6 +1561,10 @@ Status HybridModelBuilder::InitModelMem() { } runtime_param_.var_base = hybrid_model_.var_mem_base_; + auto allocator = NpuMemoryAllocator::GetAllocator(); + GE_CHECK_NOTNULL(allocator); + hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t)); + GE_CHECK_NOTNULL(hybrid_model_.global_step_); return SUCCESS; } @@ -2113,5 +2124,88 @@ Status HybridModelBuilder::ParseDependentByParallelGroup() { } return SUCCESS; } + +Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() { + std::map> converted; + for (auto &it : host_input_value_dependencies_) { + auto node_item = it.first; + std::map ref_counts; + bool changed = false; + for (auto output_idx_and_node : it.second) { + auto output_idx = output_idx_and_node.first; + auto src_node_item = output_idx_and_node.second; + ++ref_counts[src_node_item]; + NodePtr constant_node; + if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) { + constant_node = src_node_item->node; + GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str()); + } else { + auto iter = known_subgraph_constant_output_refs_.find(src_node_item); + if (iter != known_subgraph_constant_output_refs_.end()) { + constant_node = iter->second[output_idx]; + if (constant_node != nullptr) { + GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str()); + } + } + } + + if (constant_node == nullptr) { + GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str()); + continue; + } + + if (converted[constant_node].count(output_idx) == 0) { + GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx), + "[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str()); + converted[constant_node].emplace(output_idx); + } + + src_node_item->to_const_output_id_list.erase(output_idx); + --ref_counts[src_node_item]; + changed = true; + } + + if (changed) { + std::vector depends_to_keep; + for (auto &ref_count_it : ref_counts) { + if (ref_count_it.second == 0) { + GELOGD("[%s] no longer depends on [%s] for shape inference", + node_item->NodeName().c_str(), + ref_count_it.first->NodeName().c_str()); + } else { + depends_to_keep.emplace_back(ref_count_it.first->node); + } + } + node_item->dependents_for_shape_inference.swap(depends_to_keep); + } + } + + return SUCCESS; +} +Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) { + auto tensor_value = hybrid_model_.GetTensor(node); + GE_CHECK_NOTNULL(tensor_value); + auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0); + GE_CHECK_NOTNULL(tensor_desc); + Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc)); + int64_t tensor_size = -1; + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size), + "[%s] Failed to get tensor size", node->GetName().c_str()); + if (tensor_size > 0) { + auto copy_size = static_cast(tensor_size); + GE_CHECK_GE(tensor_value->GetSize(), copy_size); + std::vector buffer(copy_size); + GE_CHK_RT_RET(rtMemcpy(buffer.data(), + copy_size, + tensor_value->GetData(), + copy_size, + RT_MEMCPY_DEVICE_TO_HOST)); + tensor.SetData(std::move(buffer)); + GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size); + } + + hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor)); + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index a59a282a..d383953b 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -91,6 +91,8 @@ class HybridModelBuilder { Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector &task_def_list); + Status OptimizeDependenciesForConstantInputs(); + Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); @@ -111,6 +113,12 @@ class HybridModelBuilder { RuntimeParam &runtime_param_; VarManager *var_manager_ = nullptr; + + // map> + std::map> known_subgraph_constant_output_refs_; + + // map> + std::map>> host_input_value_dependencies_; }; } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 1c46db20..dafa8201 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -18,6 +18,7 @@ #include "cce/aicpu_engine_struct.h" #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" +#include "common/dump/dump_manager.h" #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" @@ -110,15 +111,6 @@ Status KnownNodeTask::Init(TaskContext &context) { GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); } - if (!load_flag_) { - auto dump_properties = context.GetDumpProperties(); - if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { - davinci_model_->SetDumpProperties(dump_properties); - void *global_step = context.GetExecutionContext()->global_step; - davinci_model_->SetKnownShapeGlobalStep(global_step); - } - load_flag_ = true; - } GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), davinci_model_->Id(), davinci_model_->SubModelId()), "KnownNodeTask::Init destroy aicpu kernel failed."); @@ -126,20 +118,35 @@ Status KnownNodeTask::Init(TaskContext &context) { return SUCCESS; } -Status KnownNodeTask::InitDavinciModel() { - GELOGD("[Init][Model] start"); +Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) { + GELOGD("[Init][DavinciModel] start"); davinci_model_->InitRuntimeParams(); GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); int32_t device_id = 0; GE_CHK_RT_RET(rtGetDevice(&device_id)); davinci_model_->SetDeviceId(static_cast(device_id)); - GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model."); + + auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); + if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { + davinci_model_->SetDumpProperties(dump_properties); + void *global_step = model.GetGlobalStep(); + davinci_model_->SetKnownShapeGlobalStep(global_step); + } + + void *weight = nullptr; + size_t weight_size = 0; + if (weight_buffer != nullptr) { + weight = weight_buffer->GetData(); + weight_size = weight_buffer->GetSize(); + } + GELOGD("Start to init davinci model, weight size = %zu", weight_size); + GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model."); GELOGD("[Init][Model] success"); return SUCCESS; } -Status KnownNodeTask::DoInitDavinciModel() { - return davinci_model_->Init(); +Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) { + return davinci_model_->Init(nullptr, 0, weight, weight_size); } Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { @@ -165,6 +172,10 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node const GeModelPtr ge_model = model.GetGeModel(node); GE_CHECK_NOTNULL(ge_model); + AscendString graph_name; + GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name"); + auto weight_buffer = model.GetModelWeight(graph_name.GetString()); + std::shared_ptr davinci_model = MakeShared(0, nullptr); GE_CHECK_NOTNULL(davinci_model); @@ -180,7 +191,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node auto known_node_task = MakeShared(davinci_model); GE_CHECK_NOTNULL(known_node_task); - GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel()); + GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer)); GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); task = std::move(known_node_task); return SUCCESS; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 5eed528a..26141b5a 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -36,13 +36,12 @@ class KnownNodeTask : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; Status Init(TaskContext &context) override; - Status InitDavinciModel(); + Status InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer); protected: - virtual Status DoInitDavinciModel(); + virtual Status DoInitDavinciModel(void *weight, size_t weight_size); private: std::shared_ptr davinci_model_ = nullptr; - bool load_flag_ = false; }; class KnownNodeExecutor : public NodeExecutor { From 30743e1e591582a23e80e7c7808d64cda8492947 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Wed, 7 Apr 2021 11:51:26 +0800 Subject: [PATCH 07/18] fix aipp check --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 3 ++- ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 7c8d9073..25af98b8 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() { if (!convert_flag) { string error_msg = "Top name " + related_input_name + "convert rank failed, Please" " ensure top name in aipp config is the top name of data node."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); + GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); + REPORT_INPUT_ERROR("E19021", std::vector({"reason"}), std::vector({error_msg})); return PARAM_INVALID; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index fbe78121..41a32173 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { if (another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_name and related_input_rank!" " Please ensure param is the same with the first aipp config(related_input_name)."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); + GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); + REPORT_INPUT_ERROR("E19021", std::vector({"reason"}), std::vector({error_msg})); return PARAM_INVALID; } if (item->related_input_name() == another_item->related_input_name()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" " param is different in different aipp config."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); + GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); + REPORT_INPUT_ERROR("E19021", std::vector({"reason"}), std::vector({error_msg})); return PARAM_INVALID; } } @@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { if (!another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_rank and related_input_name!" " Please ensure param is the same with the first aipp config(related_input_rank)."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); + GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); + REPORT_INPUT_ERROR("E19021", std::vector({"reason"}), std::vector({error_msg})); return PARAM_INVALID; } if (item->related_input_rank() == another_item->related_input_rank()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" " param is different in different aipp config."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); + GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); + REPORT_INPUT_ERROR("E19021", std::vector({"reason"}), std::vector({error_msg})); return PARAM_INVALID; } } From 637bcc86d65ab61f27c11a16cc2818d0becd987b Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Wed, 7 Apr 2021 11:24:00 +0800 Subject: [PATCH 08/18] modify dump single op --- ge/hybrid/executor/worker/execution_engine.cc | 2 +- ge/single_op/single_op_model.cc | 6 +++--- ge/single_op/task/op_task.cc | 10 +++++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index bb06acd8..08c146ff 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -212,7 +212,7 @@ Status NodeDoneCallback::DumpDynamicNode() { std::string dynamic_model_name = model->GetModelName(); std::string dynamic_om_name = model->GetOmName(); uint32_t model_id = model->GetModelId(); - if(!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) { + if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) { GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str()); return SUCCESS; } diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index a5550deb..2fcf2de5 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -127,7 +127,7 @@ void SingleOpModel::ParseOpModelParams(ModelHelper &model_helper, SingleOpModelP ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value); param.core_type = ret ? value : 0; - GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu. core_type = %lu", + GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu, core_type = %lu", param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type); } @@ -454,7 +454,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::TE) { - GELOGD("Building TBE task"); + GELOGD("Building TBE task."); TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); tbe_task->SetModelArgs(model_name_, model_id_); @@ -482,7 +482,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { auto tasks = ge_model->GetModelTaskDefPtr()->task(); for (int i = 0; i < tasks.size(); ++i) { const TaskDef &task_def = tasks[i]; - GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), + GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), task_def.DebugString().c_str()); auto task_type = static_cast(task_def.type()); if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index f754af28..594cc3c0 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -121,7 +121,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id } GE_CHECK_NOTNULL(op_desc_); string op_name = op_desc_->GetName(); - GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u].", op_name.c_str(), task_id, stream_id); model_id = model_id_; task_desc_info.model_name = model_name_; task_desc_info.block_dim = block_dim_; @@ -463,6 +463,10 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, input_desc.size(), non_const_index); GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), "Input[%zu] update input shape failed.", input_index); + if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { + GE_CHK_STATUS_RET(op_desc_->UpdateInputDesc(input_index,input_desc[non_const_index]), + "AicpuTask Update [%zu]th input desc failed",input_index); + } non_const_index++; } @@ -470,6 +474,10 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, for (size_t j = 0; j < num_outputs_; ++j) { GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), "Output[%zu] UpdateOutputShapeAndType failed.", j); + if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { + GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(j,output_desc[j]), + "AicpuTask Update [%zu]th output desc failed",j); + } } } From b48ecfe3476e147028dc3bbeb64a0a56a6d4dbd3 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 7 Apr 2021 14:58:52 +0800 Subject: [PATCH 09/18] Save atomic kernel bin to model. --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index 0e96f411..54935e7d 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 0e96f4117db0bf9646ab4173c5e6487ed46615d0 +Subproject commit 54935e7d9d7d825eaef6f477ffb64e8e92b35153 diff --git a/parser b/parser index d851e1d4..d4587c1c 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit d851e1d467768b6cefd8f5f44745be1c5312121a +Subproject commit d4587c1c33d2d50ef157bbc0449484a196e91429 From c90cae141085992e2b08b7cdac101c02a4088cfa Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Wed, 7 Apr 2021 17:16:24 +0800 Subject: [PATCH 10/18] modify dynamic shape dump --- ge/hybrid/executor/worker/execution_engine.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index bb06acd8..f28490c3 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -233,11 +233,13 @@ Status NodeDoneCallback::DumpDynamicNode() { uintptr_t output_addr = reinterpret_cast(tensor_value->GetData()); output_addrs.emplace_back(output_addr); } - void *loop_per_iter = nullptr; - TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); - if (varible_loop_per_iter != nullptr) { - loop_per_iter = const_cast(varible_loop_per_iter->GetData()); - } + dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream); + + void *loop_per_iter = nullptr; + TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); + if (varible_loop_per_iter != nullptr) { + loop_per_iter = const_cast(varible_loop_per_iter->GetData()); + } void *loop_cond = nullptr; TensorValue *varible_loop_cond = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); From 19d1f804c712112dbca0705e8c9956b71d9e0acc Mon Sep 17 00:00:00 2001 From: chuxing Date: Thu, 8 Apr 2021 11:26:59 +0800 Subject: [PATCH 11/18] Bugfix: keep hccl control dependency --- ge/hybrid/model/hybrid_model_builder.cc | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 25dabd78..9e42a91c 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -315,6 +315,20 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s } } + if (is_hccl_op) { + for (const auto &src_node : ge_node->GetInControlNodes()) { + auto src_node_item = MutableNodeItem(src_node); + GE_CHECK_NOTNULL(src_node_item); + GELOGD("[%s](%s) Add input control dependent node [%s](%s)", + ge_node->GetName().c_str(), + ge_node->GetType().c_str(), + src_node->GetName().c_str(), + src_node->GetType().c_str()); + src_node_item->has_observer = true; + dependent_for_execution.emplace(src_node); + } + } + // cond or branch need to be prepared before the execution of IF or CASE if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) { auto src_node = NodeUtils::GetInDataNodeByIndex(*ge_node, 0); // cond input @@ -2030,8 +2044,9 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) { const auto &node = node_item->node; auto executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node); if (executor_type == NodeExecutorManager::ExecutorType::HCCL) { - std::string parallel_group; - if (AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group)) { + int64_t parallel_group_val = -1; + if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) { + std::string parallel_group = std::to_string(parallel_group_val); GELOGD("[%s] Got parallel group = [%s]", node_item->NodeName().c_str(), parallel_group.c_str()); parallel_group_to_nodes_[parallel_group].emplace(node_item); std::set group{parallel_group}; @@ -2047,8 +2062,9 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) { auto subgraph = root_graph_->GetSubgraph(subgraph_name); GE_CHECK_NOTNULL(subgraph); for (const auto &sub_node : subgraph->GetAllNodes()) { - std::string parallel_group; - if (AttrUtils::GetStr(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group)) { + int64_t parallel_group_val = -1; + if (AttrUtils::GetInt(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) { + std::string parallel_group = std::to_string(parallel_group_val); GELOGD("[%s::%s] Got parallel group = %s", subgraph_name.c_str(), sub_node->GetName().c_str(), From e52c916f56b88ad4c0e9e4235b6dd58fc89eaf9c Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Apr 2021 16:06:41 +0800 Subject: [PATCH 12/18] Don't reset -2 when there is aicore op. --- ge/generator/ge_generator.cc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 61000519..4dff94c9 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -692,6 +692,22 @@ namespace { } return SUCCESS; } + + bool CheckNoAicore(const ComputeGraphPtr &graph) { + for (const auto &node : graph->GetDirectNode()) { + if (node == nullptr) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + if (op_desc->GetOpEngineName() == kAIcoreEngine) { + return false; + } + } + return true; + } } void GeGenerator::RemoveConst(const vector &inputs, vector &outputs) { @@ -787,7 +803,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in bool all_shape = false; (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); - if (all_shape) { + if (all_shape && CheckNoAicore(root_graph)) { GELOGD("Get aicpu all_shape kernel!"); vector inputs_dynamic; vector outputs_dynamic; From f971f512e3e3c194b7b6c850d7080601a1ce380c Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Thu, 8 Apr 2021 20:25:21 +0800 Subject: [PATCH 13/18] static check modify --- ge/single_op/task/op_task.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 594cc3c0..0e12b17f 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -459,13 +459,13 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, continue; } GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, - "Input_desc size is %zu, but get non_const_index is %zu", - input_desc.size(), non_const_index); + "Input_desc size is %zu, but get non_const_index is %zu", input_desc.size(), + non_const_index); GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), "Input[%zu] update input shape failed.", input_index); if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { - GE_CHK_STATUS_RET(op_desc_->UpdateInputDesc(input_index,input_desc[non_const_index]), - "AicpuTask Update [%zu]th input desc failed",input_index); + GE_CHK_STATUS_RET(op_desc_->UpdateInputDesc(input_index, input_desc[non_const_index]), + "AicpuTask Update [%zu]th input desc failed", input_index); } non_const_index++; } @@ -475,8 +475,8 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), "Output[%zu] UpdateOutputShapeAndType failed.", j); if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { - GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(j,output_desc[j]), - "AicpuTask Update [%zu]th output desc failed",j); + GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(j, output_desc[j]), "AicpuTask Update [%zu]th output desc failed", + j); } } } From c73a3c7b469b6290da835630bbd28eaae3c10214 Mon Sep 17 00:00:00 2001 From: lichun Date: Fri, 9 Apr 2021 10:21:02 +0800 Subject: [PATCH 14/18] fix sc check error --- ge/graph/build/graph_builder.cc | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index d1357bc6..b13f0d30 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -382,23 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); } -static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, - const std::vector &in_anchors, const std::string &name) { - GE_CHECK_NOTNULL(out_anchor); - NodePtr in_node = out_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(in_node); - OpDescBuilder op_desc_builder(name, MEMCPYASYNC); - OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) - .Build(); - (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); - if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); com_graph->SetGraphUnknownFlag(false); From d5f56ad31c304766a31565f725f8d53bb802eb90 Mon Sep 17 00:00:00 2001 From: wxl Date: Fri, 9 Apr 2021 15:50:22 +0800 Subject: [PATCH 15/18] fix optional input bug --- ge/generator/ge_generator.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 61000519..e6f050bc 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -67,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { } return false; } +bool IsOptional(const ge::GeTensorDesc &tensor_desc) { + return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED; +} } // namespace namespace ge { @@ -699,7 +702,8 @@ void GeGenerator::RemoveConst(const vector &inputs, vector & GeTensorDesc input_desc = input.GetTensorDesc(); bool is_const = false; (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); - if (!is_const) { + bool is_optional = IsOptional(input_desc); + if (!is_optional && !is_const) { outputs.emplace_back(input); } } From f49599b6c58308f64adbcc9151812db5081b7a18 Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Fri, 9 Apr 2021 16:22:47 +0800 Subject: [PATCH 16/18] modify single op dump bug --- ge/common/dump/dump_op.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 4456383c..d0dbc8f0 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -204,6 +204,10 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { } Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) { + if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) { + GELOGI("Single op dump, no need set model name"); + return SUCCESS; + } std::set model_list = dump_properties_.GetAllDumpModel(); bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end(); bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end(); From 36f2c837bfa45b2c0de7a86647c2a42834ceca7e Mon Sep 17 00:00:00 2001 From: chuxing Date: Fri, 9 Apr 2021 11:29:29 +0800 Subject: [PATCH 17/18] Fix hccl control dependency --- ge/hybrid/model/hybrid_model_builder.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index afd813c1..bd4df10d 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -318,16 +318,14 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s } } - if (is_hccl_op) { - for (const auto &src_node : ge_node->GetInControlNodes()) { - auto src_node_item = MutableNodeItem(src_node); - GE_CHECK_NOTNULL(src_node_item); + for (const auto &src_node : ge_node->GetInControlNodes()) { + auto src_node_item = MutableNodeItem(src_node); + if ((src_node_item != nullptr) && (is_hccl_op || src_node_item->IsHcclOp())) { GELOGD("[%s](%s) Add input control dependent node [%s](%s)", ge_node->GetName().c_str(), ge_node->GetType().c_str(), src_node->GetName().c_str(), src_node->GetType().c_str()); - src_node_item->has_observer = true; dependent_for_execution.emplace(src_node); } } From 4928f86819556c2d05f219a74cbd1aadff88fc1f Mon Sep 17 00:00:00 2001 From: shenwei41 Date: Tue, 13 Apr 2021 11:12:42 +0800 Subject: [PATCH 18/18] update commit id --- metadef | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadef b/metadef index 54935e7d..7aa912ab 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 54935e7d9d7d825eaef6f477ffb64e8e92b35153 +Subproject commit 7aa912ab473b780c3d2f9c907760e4cb32dc0fb6