From 15763e5f3956b26c340a965528fbf58372fc4813 Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Wed, 25 Nov 2020 09:20:31 +0800 Subject: [PATCH] fix some bug find by zhouli --- ge/graph/load/new_model_manager/davinci_model.cc | 40 ++++++++++++++---------- ge/graph/load/new_model_manager/model_manager.cc | 18 ++++++++--- ge/graph/load/new_model_manager/model_manager.h | 1 - ge/graph/manager/graph_manager.cc | 14 +++++---- ge/graph/preprocess/multi_batch_copy_graph.cc | 7 +++-- ge/graph/preprocess/multi_batch_options.cc | 6 ++++ 6 files changed, 54 insertions(+), 32 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index b6db50a7..f15c0c5f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2556,8 +2556,10 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r int64_t data_size = output.second.GetDataSize(); if (is_online_infer_dynamic_) { - auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx]; - data_size = gear_and_real_out_size_info[cur_dynamic_dims_]; + if (merge_nodes_gear_and_real_out_size_info_.find(idx) != merge_nodes_gear_and_real_out_size_info_.end()) { + auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx]; + data_size = gear_and_real_out_size_info[cur_dynamic_dims_]; + } } uint64_t buffer_length = buffer.length; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data)); @@ -2594,11 +2596,13 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data return ret); std::vector output_shape = input_desc->GetShape().GetDims(); if (is_online_infer_dynamic_) { - auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; - size = gear_and_real_out_size_info[cur_dynamic_dims_]; - auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; - output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; - is_dynamic_ = true; + if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { + auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; + size = gear_and_real_out_size_info[cur_dynamic_dims_]; + auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; + output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; + is_dynamic_ = true; + } } GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); out_buffer_size_vec.push_back(size); @@ -2755,16 +2759,6 @@ void *DavinciModel::Run(DavinciModel *model) { InputData current_data = data_wrapper->GetInput(); GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id, current_data.index); - if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { - model->cur_dynamic_dims_.clear(); - GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); - auto shape_data_buffer_data = current_data.blobs.back().data; - auto shape_data_buffer_length = current_data.blobs.back().length; - model->cur_dynamic_dims_.assign(reinterpret_cast(shape_data_buffer_data), - reinterpret_cast(shape_data_buffer_data) + - shape_data_buffer_length / sizeof(int64_t)); - GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); - } GE_TIMESTAMP_START(Model_SyncVarData); ret = model->SyncVarData(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( @@ -2781,6 +2775,18 @@ void *DavinciModel::Run(DavinciModel *model) { ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); continue, "Copy input data to model failed."); // [No need to check value] + if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { + model->cur_dynamic_dims_.clear(); + GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); + auto shape_data_buffer_data = current_data.blobs.back().data; + auto shape_data_buffer_length = current_data.blobs.back().length; + model->cur_dynamic_dims_.assign(reinterpret_cast(shape_data_buffer_data), + reinterpret_cast(shape_data_buffer_data) + + shape_data_buffer_length / sizeof(int64_t)); + GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); + delete[] (int64_t *)current_data.blobs.back().data; + current_data.blobs.pop_back(); + } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); if (ProfilingManager::Instance().ProfilingOpTraceOn()) { diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index e4e0681d..3ebd1daf 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -407,10 +407,6 @@ Status ModelManager::Unload(uint32_t model_id) { } std::lock_guard lock(exeception_infos_mutex_); exception_infos_.clear(); - for (auto addr : shape_data_addrs_[model_id]) { - delete[] addr; - } - shape_data_addrs_.erase(model_id); return SUCCESS; } @@ -475,6 +471,19 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ } } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str()); + bool cur_dynamic_dims_valid = false; + std::vector shape_strs = ge::StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';'); + for (auto dynamic_dim : shape_strs) { + if (dynamic_dim == formats::JoinToString(cur_dynamic_dims)) { + cur_dynamic_dims_valid = true; + break; + } + } + if (!cur_dynamic_dims_valid) { + GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.", + formats::JoinToString(cur_dynamic_dims).c_str()); + return INTERNAL_ERROR; + } return SUCCESS; } @@ -517,7 +526,6 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector(data.data)); } } diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index ff2e3030..b2bb4564 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -363,7 +363,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::map> cust_aicpu_so_; static DumpProperties dump_properties_; - std::map> shape_data_addrs_; }; } // namespace ge diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 654bea16..0b6f6d8a 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2866,13 +2866,15 @@ void GraphManager::RunThread(GraphManager *graph_manager) { if (args.graph_node->graph_run_async_listener_ != nullptr) { args.graph_node->graph_run_async_listener_->SetCallback(args.callback); } + Status ret; // parse inputs.dims to vector> dynamic_dims - if (graph_manager->ParseInputsDims(args.input_tensor) != SUCCESS) { - GELOGE(PARAM_INVALID, "Parse input dims failed."); + ret = graph_manager->ParseInputsDims(args.input_tensor); + if (ret != SUCCESS) { + ReturnError(graph_manager, args.callback, ret, "ParseInputsDims failed, thread exit."); + args.graph_node->Unlock(); return; } - Status ret; if (!args.graph_node->GetLoadFlag()) { ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); if (ret != SUCCESS || args.ge_root_model == nullptr) { @@ -2897,12 +2899,12 @@ void GraphManager::RunThread(GraphManager *graph_manager) { ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), args.input_tensor); args.graph_node->SetRunFlag(false); - args.graph_node->Unlock(); if (ret != SUCCESS) { - GELOGE(ret, "[GraphManager] Run graph async failed, graph_id=%u.", args.graph_id); - StopQueue(graph_manager); + ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); + args.graph_node->Unlock(); return; } + args.graph_node->Unlock(); GELOGI("[GraphManager] Run graph async success, graph_id=%u.", args.graph_id); } } diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c3647b93..9ab74d70 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1571,6 +1571,10 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, // Connect NetOutput directly void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set &dynamic_output_index, vector &dynamic_output_dims) { + if (!GetLocalOmgContext().dynamic_node_type.empty()) { + GELOGD("No need to get directly shape info of %s when train.", node->GetName().c_str()); + return; + } GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &netoutput_desc = node->GetOpDesc(); const auto &inputnode_to_netoutput = node->GetInAllNodes(); @@ -1578,9 +1582,6 @@ void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, if (dynamic_output_index.count(i) > 0) { continue; } - if (inputnode_to_netoutput.at(i)->GetType() == GETDYNAMICDIMS) { - continue; - } auto tensor_desc = netoutput_desc->GetInputDesc(i); auto shape = tensor_desc.GetShape().ToString(); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index c168e077..f33c2983 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -84,8 +84,10 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector &data_n if (op_desc->GetType() == DATA && op_desc->GetName() != kShapeDataName) { if (op_desc->GetName().find(kSubstrOfGetNextNosinkName) == string::npos) { data_nodes.emplace_back(input_node); + GELOGD("Name of data node is %s.", op_desc->GetName().c_str()); } else { getnext_nosink_nodes.emplace_back(input_node); + GELOGD("Name of getnext nosink is %s.", op_desc->GetName().c_str()); } } if (IsGetNextType(input_node)) { @@ -111,6 +113,8 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector &data_n GE_CHECK_NOTNULL(data_node->GetOpDesc()); auto output_shape = data_node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims(); auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; + GELOGD("The %zu data node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(), + formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str()); if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { GELOGI("No need to check sequence for constant."); continue; @@ -151,6 +155,8 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector &get for (size_t i = 0; i < data_count; ++i) { auto output_shape = data_node->GetOpDesc()->GetOutputDesc(i).GetShape().GetDims(); auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; + GELOGD("The %zu getnext node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(), + formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str()); if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { GELOGI("No need to check sequence for constant."); continue;