From 016c933a532ab3e9feda27403fd3158681c715d8 Mon Sep 17 00:00:00 2001 From: wangzhengjun Date: Fri, 30 Oct 2020 16:59:07 +0800 Subject: [PATCH 01/35] V200 support NAN INF --- ge/graph/build/model_builder.cc | 8 ++++++++ ge/graph/load/new_model_manager/davinci_model.cc | 11 ++++++++++- ge/graph/manager/graph_manager.cc | 22 ++++++++++++++++++++++ ge/init/gelib.cc | 6 ++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 593be7bb..52711f71 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -418,6 +418,14 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { return FAILED); GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_); + string fp_ceiling_mode; + if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { + if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { + GELOGE(FAILED, "Failed to set attr ATTR_FP_CEILING_MODE"); + return FAILED; + } + GELOGI("Set attr ATTR_FP_CEILING_MODE to model, value is %s.", fp_ceiling_mode.c_str()); + } string ge_core_type; Status ret = ge::GetContext().GetOption(kCoreType, ge_core_type); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cb37182c..5ac825cc 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -676,7 +676,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size auto all_dump_model = GetDumpProperties().GetAllDumpModel(); bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { + bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || + findByOmName || findByModelName; + if (dump_l1fusion_op) { // malloc 2M for dump l1fusion op GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); @@ -690,6 +692,13 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); + string fp_ceiling_mode; + if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { + GELOGI("Get attr ATTR_FP_CEILING_MODE from model, value is %s.", fp_ceiling_mode.c_str()); + // mode 0: Do not perform saturation processing. By default, IEEE754 is used. + GE_CHK_RT_RET(rtSetCtxINFMode((fp_ceiling_mode != "0"))); + } + // collect profiling for ge if (ProfilingManager::Instance().ProfilingModelLoadOn()) { std::vector compute_graph_desc_info; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 4737955d..88a001a4 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -131,6 +131,22 @@ bool IsTailingOptimization() { GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default."); return false; } + +ge::Status CheckFpCeilingMode() { + static const std::unordered_set kValidFpCeilingMode = {"0", "1", "2"}; + string mode; + auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); + if (ret == ge::GRAPH_SUCCESS) { + if (kValidFpCeilingMode.count(mode) == 0) { + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", mode.c_str()); + return ge::GE_GRAPH_OPTIONS_INVALID; + } + GELOGI("The parameter fp_ceiling_mode is set to %s.", mode.c_str()); + return ge::SUCCESS; + } + GELOGW("The parameter fp_ceiling_mode is not set."); + return ge::SUCCESS; +} } // namespace namespace ge { @@ -166,6 +182,12 @@ Status GraphManager::Initialize(const std::map &options) { return ret; } + ret = CheckFpCeilingMode(); + if (ret != SUCCESS) { + GELOGE(ret, "[Initialize] Check fp-ceiling-mode options failed."); + return ret; + } + ret = graph_context_->Initialize(options); if (ret != SUCCESS) { GELOGE(ret, "[Initialize] GraphContext initialize failed."); diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 85a742b2..8a5cb610 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -56,6 +56,7 @@ const int kDefaultDeviceIdForInfer = -1; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); +const char *const kGlobalOptionFpCeilingModeDefault = "2"; } // namespace static std::shared_ptr instancePtr_ = nullptr; @@ -79,6 +80,11 @@ Status GELib::Initialize(const map &options) { return ret; } instancePtr_->SetDefaultPrecisionMode(new_options); + + if (new_options.find("ge.fpCeilingMode") == new_options.end()) { + new_options["ge.fpCeilingMode"] = kGlobalOptionFpCeilingModeDefault; + } + GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GE_TIMESTAMP_START(Init); From fd39b8714688aa6644c511b6d507b682a994e29a Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" <89594672@qq.com> Date: Thu, 12 Nov 2020 17:06:04 +0800 Subject: [PATCH 02/35] slice tensor no originshape --- ge/host_kernels/slice_kernel.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index fc98e8a5..e926b930 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -99,8 +99,9 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetOutputDesc(0); + GeTensorDesc output_tensor_desc(attr_output_tensor_desc); + output_tensor_desc.SetShape(output_dims); GeTensorPtr output_ptr = MakeShared(output_tensor_desc); if (output_ptr == nullptr) { GELOGW("make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str()); From e34b9ce6c0915c40fc4658dc96c2818411b6b0b5 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" <89594672@qq.com> Date: Thu, 12 Nov 2020 17:24:30 +0800 Subject: [PATCH 03/35] slice tensor no originshape --- ge/host_kernels/slice_kernel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index e926b930..d3d43355 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -99,7 +99,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetOutputDesc(0); + auto attr_output_tensor_desc = attr->GetOutputDesc(0); GeTensorDesc output_tensor_desc(attr_output_tensor_desc); output_tensor_desc.SetShape(output_dims); GeTensorPtr output_ptr = MakeShared(output_tensor_desc); From 0a259c34993b6f32aebd3eb7a172754648c42f6e Mon Sep 17 00:00:00 2001 From: wuweikang Date: Thu, 12 Nov 2020 17:35:25 +0800 Subject: [PATCH 04/35] add op_debug_level in ir_builder_suppported_options --- inc/external/ge/ge_api_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 4cee1b6f..a1c468f5 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -330,6 +330,7 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OUT_NODES, INPUT_FP16_NODES, LOG_LEVEL, + OP_DEBUG_LEVEL, DEBUG_DIR, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE}; From 593ddd7c7c2dc8fe5b617e0d042ab1736a45ca96 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" <89594672@qq.com> Date: Thu, 12 Nov 2020 17:58:17 +0800 Subject: [PATCH 05/35] slice tensor no originshape --- ge/graph/common/transop_util.cc | 8 ++++++++ ge/graph/common/transop_util.h | 2 ++ ge/graph/preprocess/graph_preprocess.cc | 29 +++++++++++++++++++++++++++++ ge/host_kernels/slice_kernel.cc | 3 ++- 4 files changed, 41 insertions(+), 1 deletion(-) mode change 100644 => 100755 ge/graph/common/transop_util.cc diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc old mode 100644 new mode 100755 index 684ef3dc..f57f56a8 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -81,5 +81,13 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { return false; } return true; + + std::string TransOpUtil::TransopMapToString() { + std::string buffer; + for (auto it = transop_index_map_.begin(); it != transop_index_map_.end(); ++it) { + buffer += it->first + ","; + } + return buffer.substr(0, buffer.size() -1); + } } } // namespace ge diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 8b10ad5c..3332e1fb 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -35,6 +35,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); + static std::string TransopMapToString(); + private: TransOpUtil(); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index f90c0d80..6125e719 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -218,6 +218,9 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c auto index = TransOpUtil::GetTransOpDataIndex(node_type); if (index < 0) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"The trans node type[" + node_type + "]", "it must be " + TransOpUtil::TransopMapToString()}); GELOGE(INTERNAL_ERROR, "The trans node type %s does not exists", node_type.c_str()); return nullptr; } @@ -386,6 +389,8 @@ Status RecoverTransRoadForVar(const NodePtr &var, const VarTransRoad &road) { auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVar(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -418,6 +423,9 @@ Status RecoverTransRoadForVarRef(const std::set &nodes, const VarTransR auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVarRef(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage( + + "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -570,6 +578,8 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node std::string related_node_name; if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) { if (related_node_name.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, {data_node->GetName(), "flag", "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node flag, but the value is empty", data_node->GetName().c_str()); return INTERNAL_ERROR; @@ -581,6 +591,9 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node } } if (switchn_node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, + {data_node->GetName(), related_node_name, "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph", data_node->GetName().c_str(), related_node_name.c_str()); return INTERNAL_ERROR; @@ -681,6 +694,10 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No ge::GeShape old_shape = input->GetShape(); bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); if (!support) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str()); return FAILED; } @@ -761,6 +778,9 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "15003", {"opname", "format"}, + {op_desc->GetName(), TypeUtils::FormatToSerialString(storage_format)}); GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str()); @@ -899,9 +919,14 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { // check if is_output_adjust_hw_layout is set if (NeedUpdateFormatByOutputTypeParm(op_desc, index)) { if ((old_format != FORMAT_NCHW) && (old_format != FORMAT_NHWC) && (old_format != FORMAT_NC1HWC0)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0."); return FAILED; } + GeTensorDesc old_desc(old_shape, old_format, old_dtype); if (ProcessNetoutputNodeFp16Nc1hwc0DynShape(old_desc, net_output_input_desc, src_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0."); @@ -1034,6 +1059,10 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i } bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end()); if (!is_acceptable) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.", node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(), input_op_desc->GetType().c_str()); diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index d3d43355..c3274465 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -99,9 +99,10 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetOutputDesc(0); GeTensorDesc output_tensor_desc(attr_output_tensor_desc); - output_tensor_desc.SetShape(output_dims); + output_tensor_desc.SetShape(output_shape); GeTensorPtr output_ptr = MakeShared(output_tensor_desc); if (output_ptr == nullptr) { GELOGW("make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str()); From 5f16700db619eddfc1c723af84be008259f7d94a Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" <89594672@qq.com> Date: Thu, 12 Nov 2020 18:12:40 +0800 Subject: [PATCH 06/35] slice tensor no originshape --- ge/graph/common/transop_util.cc | 8 -------- ge/graph/common/transop_util.h | 2 -- ge/graph/preprocess/graph_preprocess.cc | 29 ----------------------------- 3 files changed, 39 deletions(-) diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc index f57f56a8..684ef3dc 100755 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -81,13 +81,5 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { return false; } return true; - - std::string TransOpUtil::TransopMapToString() { - std::string buffer; - for (auto it = transop_index_map_.begin(); it != transop_index_map_.end(); ++it) { - buffer += it->first + ","; - } - return buffer.substr(0, buffer.size() -1); - } } } // namespace ge diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 3332e1fb..8b10ad5c 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -35,8 +35,6 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); - static std::string TransopMapToString(); - private: TransOpUtil(); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 6125e719..f90c0d80 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -218,9 +218,6 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c auto index = TransOpUtil::GetTransOpDataIndex(node_type); if (index < 0) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19025", {"situation", "reason"}, - {"The trans node type[" + node_type + "]", "it must be " + TransOpUtil::TransopMapToString()}); GELOGE(INTERNAL_ERROR, "The trans node type %s does not exists", node_type.c_str()); return nullptr; } @@ -389,8 +386,6 @@ Status RecoverTransRoadForVar(const NodePtr &var, const VarTransRoad &road) { auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVar(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -423,9 +418,6 @@ Status RecoverTransRoadForVarRef(const std::set &nodes, const VarTransR auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVarRef(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage( - - "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -578,8 +570,6 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node std::string related_node_name; if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) { if (related_node_name.empty()) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E15002", {"opname", "value", "reason"}, {data_node->GetName(), "flag", "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node flag, but the value is empty", data_node->GetName().c_str()); return INTERNAL_ERROR; @@ -591,9 +581,6 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node } } if (switchn_node == nullptr) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E15002", {"opname", "value", "reason"}, - {data_node->GetName(), related_node_name, "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph", data_node->GetName().c_str(), related_node_name.c_str()); return INTERNAL_ERROR; @@ -694,10 +681,6 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No ge::GeShape old_shape = input->GetShape(); bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); if (!support) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19014", {"opname", "value", "reason"}, - {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", - "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str()); return FAILED; } @@ -778,9 +761,6 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { - ErrorManager::GetInstance().ATCReportErrMessage( - "15003", {"opname", "format"}, - {op_desc->GetName(), TypeUtils::FormatToSerialString(storage_format)}); GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str()); @@ -919,14 +899,9 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { // check if is_output_adjust_hw_layout is set if (NeedUpdateFormatByOutputTypeParm(op_desc, index)) { if ((old_format != FORMAT_NCHW) && (old_format != FORMAT_NHWC) && (old_format != FORMAT_NC1HWC0)) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19014", {"opname", "value", "reason"}, - {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", - "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0."); return FAILED; } - GeTensorDesc old_desc(old_shape, old_format, old_dtype); if (ProcessNetoutputNodeFp16Nc1hwc0DynShape(old_desc, net_output_input_desc, src_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0."); @@ -1059,10 +1034,6 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i } bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end()); if (!is_acceptable) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19014", {"opname", "value", "reason"}, - {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", - "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.", node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(), input_op_desc->GetType().c_str()); From aa6dcd9262efa53fd311c80eedc91e23a6fe2783 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 12 Nov 2020 19:59:08 +0800 Subject: [PATCH 07/35] Fix Label for dynamic graph. --- ge/graph/build/label_allocator.cc | 31 +++++++++++++++++++++---------- ge/graph/build/model_builder.cc | 4 ++-- ge/graph/passes/memcpy_addr_async_pass.cc | 7 ++++++- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc index fad7d0c2..51f31003 100644 --- a/ge/graph/build/label_allocator.cc +++ b/ge/graph/build/label_allocator.cc @@ -32,11 +32,6 @@ Status LabelAllocator::AssignFunctionalLabels() { return INTERNAL_ERROR; } - if (compute_graph_->GetGraphUnknownFlag()) { - GELOGD("Graph[%s] is unknown graph, skip label allocator.", compute_graph_->GetName().c_str()); - return SUCCESS; - } - // Add label task for sub graph. GELOGI("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str()); std::set functional_nodes; @@ -62,7 +57,7 @@ Status LabelAllocator::AssignFunctionalLabels() { } (void)AttrUtils::SetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_index); - GELOGI("AssignFunctionalLabels success."); + GELOGI("AssignFunctionalLabels success, Num: %u.", label_index); return SUCCESS; } @@ -72,13 +67,29 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::setGetParentNode(); - if (parent == nullptr) { - GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", graph->GetName().c_str()); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip label allocator.", graph->GetName().c_str()); + return true; + } + + NodePtr func_node = graph->GetParentNode(); + if (func_node == nullptr) { + GELOGE(INTERNAL_ERROR, "Parent functional node not set: %s.", graph->GetName().c_str()); return false; } - (void)functional_nodes.insert(parent); // unique functional node. + ComputeGraphPtr owner_graph = func_node->GetOwnerComputeGraph(); + if (owner_graph == nullptr) { + GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", func_node->GetName().c_str()); + return false; + } + + if (owner_graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip label allocator.", owner_graph->GetName().c_str()); + return true; + } + + (void)functional_nodes.insert(func_node); // unique functional node. return true; } } // namespace ge diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 56a5b4dc..f382c24a 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -690,8 +690,8 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_TIMESTAMP_END(AssignLogicalStreams, "GraphBuilder::AssignLogicalStreams"); // Assign functional op labels. - label_num_ = 0; - (void)AttrUtils::GetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_num_); + auto root_graph = GraphUtils::FindRootGraph(compute_graph_); + (void)AttrUtils::GetInt(*root_graph, ATTR_MODEL_LABEL_NUM, label_num_); GE_TIMESTAMP_START(AssignMemory); MemoryAssigner mem_assigner(compute_graph_); diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc index a9e3f4c4..8bb16286 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -25,6 +25,10 @@ namespace ge { Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str()); + return SUCCESS; + } int64_t value = 0; rtError_t rt_ret = rtGetRtCapability(FEATURE_TYPE_MEMCPY, MEMCPY_INFO_SUPPORT_ZEROCOPY, &value); @@ -201,9 +205,10 @@ NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &gr const OutDataAnchorPtr &out_data_anchor, const NodePtr &out_of_user_data) { GELOGD("Start CreateMemcpyAddrAsyncNode."); + static uint32_t new_node_index = 0; OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_CHK_BOOL_EXEC(pre_op_desc != nullptr, return nullptr, "Op_desc of pre node is invalid."); - std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC; + std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC + "_" + std::to_string(new_node_index++); OpDescPtr op_desc = MakeShared(node_name, MEMCPYADDRASYNC); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); From 61703b3b52983b9d2c5030c5e06e3f87a69b9118 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Thu, 12 Nov 2020 20:20:49 +0800 Subject: [PATCH 08/35] update metadef parser --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index 0d0d2fb0..d090ec83 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 0d0d2fb016d44f9a575ad8f8c2cb8858bba3acec +Subproject commit d090ec8335c091d7481675ed99c50e83c4dae853 diff --git a/parser b/parser index 84ea76e9..0b7d8c9f 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 84ea76e94054fcfac5b80ded6e0ec4db1f37d3e0 +Subproject commit 0b7d8c9ffba6de83c4232db4f2e105ba51cd6296 From c3f4b95ffdc28db503d91ae28bb2d706e3899e1e Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 11 Nov 2020 21:39:49 +0800 Subject: [PATCH 09/35] Decoupling of aicore node executor and aicore task compiler --- .../node_executor/aicore/aicore_node_executor.cc | 42 ++++++++++++++-------- .../node_executor/aicore/aicore_node_executor.h | 39 ++++++++++++++++++-- .../node_executor/aicore/aicore_task_compiler.cc | 16 +++++++-- .../node_executor/aicore/aicore_task_compiler.h | 8 +++-- 4 files changed, 82 insertions(+), 23 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 4c32f131..dbd784c6 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -17,8 +17,6 @@ #include "aicore_node_executor.h" #include "cce/taskdown_common.hpp" #include "hybrid/executor/hybrid_execution_context.h" -#include "init/gelib.h" -#include "hybrid/executor/hybrid_execution_context.h" namespace ge { namespace hybrid { @@ -28,19 +26,10 @@ AiCoreNodeTask::AiCoreNodeTask(std::vector> &&task } Status AiCoreNodeExecutor::Initialize() { - auto ge_lib = GELib::GetInstance(); - GE_CHECK_NOTNULL(ge_lib); - if (!ge_lib->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed."); - return GE_CLI_GE_NOT_INITIALIZED; + compiler_ = TaskCompilerFactory::GetInstance().GetTaskCompiler(); + if (compiler_ != nullptr) { + GE_CHK_STATUS_RET(compiler_->Initialize(), "Failed to init aicore task compiler."); } - - auto &kernel_manager = ge_lib->OpsKernelManagerObj(); - auto aic_ops_store = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); - GE_CHECK_NOTNULL(aic_ops_store); - - compiler_.reset(new(std::nothrow)AiCoreTaskCompiler(aic_ops_store)); - GE_CHECK_NOTNULL(compiler_); return SUCCESS; } @@ -120,6 +109,12 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, GE_CHECK_NOTNULL(op_desc); GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); + auto ori_node_name = node->GetName(); + if (compiler_ == nullptr) { + GELOGE(FAILED, "[%s] Can not find any valid aicore task compiler.", ori_node_name.c_str()); + return FAILED; + } + AiCoreNodeTaskRegistry ®istry = AiCoreNodeTaskRegistry::GetInstance(); std::string shape_key; GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str()); @@ -133,7 +128,6 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, } std::vector task_defs; - auto ori_node_name = node->GetName(); op_desc->SetName(ori_node_name + "_" + shape_key); GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", ori_node_name.c_str()); op_desc->SetName(ori_node_name); @@ -239,5 +233,23 @@ bool AiCoreNodeTask::IsNoOp(TaskContext &task_context) { return true; } + +TaskCompilerFactory &TaskCompilerFactory::GetInstance() { + static TaskCompilerFactory instance; + return instance; +} + +void TaskCompilerFactory::Register(CreateFn fn) { + compiler_func_ = fn; +} + +std::unique_ptr TaskCompilerFactory::GetTaskCompiler() { + auto compiler_instance = std::unique_ptr(compiler_func_()); + return compiler_instance; +} + +CompilerFunctionRegistrar::CompilerFunctionRegistrar(CreateFn fn) { + TaskCompilerFactory::GetInstance().Register(fn); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h index 374782dc..989090e9 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -18,13 +18,21 @@ #define GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "hybrid/node_executor/aicore/aicore_task_compiler.h" #include "hybrid/node_executor/node_executor.h" #include #include namespace ge { namespace hybrid { + +class TaskCompiler { + public: + TaskCompiler() = default; + virtual ~TaskCompiler() = default; + virtual Status CompileOp(const NodePtr &node, std::vector &tasks) = 0; + virtual Status Initialize() = 0; +}; + class AiCoreNodeTaskRegistry { public: ~AiCoreNodeTaskRegistry() = default; @@ -65,8 +73,33 @@ class AiCoreNodeExecutor : public NodeExecutor { private: static Status GenNodeKey(const NodePtr &node, std::string &node_key); - std::unique_ptr compiler_; + std::unique_ptr compiler_; +}; + +using CreateFn = TaskCompiler *(*)(); +class TaskCompilerFactory { + public: + static TaskCompilerFactory &GetInstance(); + void Register(CreateFn fn); + std::unique_ptr GetTaskCompiler(); + + private: + CreateFn compiler_func_; +}; + +class CompilerFunctionRegistrar { + public: + CompilerFunctionRegistrar(CreateFn fn); + ~CompilerFunctionRegistrar() = default; }; } // namespace hybrid } // namespace ge -#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ + +#define REGISTER_TASK_COMPILER(compiler) \ + static ::ge::hybrid::CompilerFunctionRegistrar register_compiler_function \ + __attribute__((unused)) = \ + ::ge::hybrid::CompilerFunctionRegistrar([]()->::ge::hybrid::TaskCompiler* { \ + return new (std::nothrow) compiler(); \ + }) \ + +#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc index ed92ada7..26a41737 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -18,6 +18,7 @@ #include "framework/common/debug/log.h" #include "graph/debug/ge_attr_define.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "init/gelib.h" namespace ge { namespace hybrid { @@ -25,11 +26,22 @@ namespace { uintptr_t kWeightBase = 0x10000000; uintptr_t kMemBase = 0x20000000; uint64_t kFakeSize = 0x10000000UL; +REGISTER_TASK_COMPILER(AiCoreTaskCompiler); } std::mutex AiCoreTaskCompiler::mu_; -AiCoreTaskCompiler::AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store) - : aic_kernel_store_(std::move(aic_kernel_store)) {} +Status AiCoreTaskCompiler::Initialize() { + auto ge_lib = GELib::GetInstance(); + GE_CHECK_NOTNULL(ge_lib); + if (!ge_lib->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed."); + return GE_CLI_GE_NOT_INITIALIZED; + } + auto &kernel_manager = ge_lib->OpsKernelManagerObj(); + aic_kernel_store_ = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); + GE_CHECK_NOTNULL(aic_kernel_store_); + return SUCCESS; +} Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const { GE_CHECK_NOTNULL(node); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index 38ed458f..bf948349 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -19,15 +19,17 @@ #include #include "opskernel_manager/ops_kernel_manager.h" +#include "aicore_node_executor.h" namespace ge { namespace hybrid { -class AiCoreTaskCompiler { +class AiCoreTaskCompiler : public TaskCompiler { public: - explicit AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store); + AiCoreTaskCompiler() = default; ~AiCoreTaskCompiler() = default; - Status CompileOp(const NodePtr &node, std::vector &tasks); + Status CompileOp(const NodePtr &node, std::vector &tasks) override; + Status Initialize() override; private: Status DoCompileOp(const NodePtr &node) const; Status DoGenerateTask(const Node &node, std::vector &tasks); From bba5e37edd34b05ceddb6ec90ab13f6306db0c04 Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Thu, 12 Nov 2020 21:53:51 +0800 Subject: [PATCH 10/35] revert agnostic pass --- ge/graph/preprocess/graph_preprocess.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 98371426..f90c0d80 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -117,7 +117,6 @@ #include "graph/passes/variable_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" -#include "graph/passes/mark_agnostic_pass.h" namespace ge { @@ -1627,7 +1626,6 @@ Status GraphPrepare::PrepareOptimize() { try { (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); - (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass", new MarkAgnosticPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; From 8f08d868e73bcbb1b9ae7147f490a0848b848196 Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Tue, 10 Nov 2020 16:05:59 +0800 Subject: [PATCH 11/35] The model subscription function of the profile module is added to ge. --- ge/common/profiling/profiling_manager.cc | 125 ++++++++++++++++++-- ge/common/profiling/profiling_manager.h | 20 +++- ge/common/types.cc | 1 + ge/executor/ge_executor.cc | 13 +++ ge/graph/load/new_model_manager/data_dumper.h | 1 + ge/graph/load/new_model_manager/davinci_model.cc | 141 +++++++++-------------- ge/graph/load/new_model_manager/davinci_model.h | 5 +- ge/graph/load/new_model_manager/model_manager.cc | 93 +++++++++++++-- ge/graph/load/new_model_manager/model_manager.h | 5 + ge/hybrid/executor/worker/execution_engine.cc | 4 +- inc/framework/common/types.h | 9 +- inc/framework/executor/ge_executor.h | 24 ++-- 12 files changed, 314 insertions(+), 127 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index e21bcb25..6e01ee87 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -21,6 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" +#include "graph/load/new_model_manager/davinci_model.h" namespace { const char *const kJobID = "jobID"; @@ -39,10 +40,12 @@ const std::string kConfigNumsdev = "devNums"; const std::string kConfigDevIdList = "devIdList"; const std::string kProfStart = "prof_start"; const std::string kProfStop = "prof_stop"; +const std::string kProfModelSubscribe = "prof_model_subscribe"; +const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() {} +ProfilingManager::ProfilingManager() : subscribe_count_(0) {} ProfilingManager::~ProfilingManager() {} @@ -54,6 +57,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); + subscribe_count_ = 0; job_id_ = options.job_id; GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); @@ -382,7 +386,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( - const std::vector &task_desc_info, const int32_t &device_id) { + uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); if (reporter == nullptr) { @@ -401,7 +405,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(op_name).append(" ") .append(std::to_string(block_dim).append(" ") .append(std::to_string(task_id)).append(" ") - .append(std::to_string(stream_id)).append("\n")); + .append(std::to_string(stream_id)).append(" ") + .append(std::to_string(model_id)).append("\n")); Msprof::Engine::ReporterData reporter_data{}; reporter_data.deviceId = device_id; @@ -425,7 +430,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( - const std::vector &compute_graph_desc_info, const int32_t &device_id) { + uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); @@ -483,6 +488,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin data.append("\""); } + data.append(" model_id:").append(std::to_string(model_id)); + data.append("\n"); Msprof::Engine::ReporterData reporter_data{}; @@ -537,7 +544,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { + uint32_t model_id, const std::vector &task_desc_info, + const std::vector &compute_graph_desc_info, + bool check_device) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -546,7 +555,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGI("current logic_device_id:%d", logic_device_id); - if (!is_acl_api_mode_) { + if (check_device) { auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (ret == device_id_.end()) { GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); @@ -554,9 +563,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr } } GELOGI("start ProfilingTaskDescInfo."); - ProfilingTaskDescInfo(task_desc_info, logic_device_id); + ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); GELOGI("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); + ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); GELOGI("Report profiling data for GE end."); #endif } @@ -581,6 +590,105 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP return module; } +void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, + uint32_t device_id, + uint64_t module) { +#ifdef DAVINCI_SUPPORT_PROFILING + if (prof_type == kProfModelSubscribe) { + if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { + subs_dev_module_[device_id].subscribe_count++; + } else { + DeviceSubsInfo dev_info; + dev_info.module = module; + dev_info.subscribe_count = 1; + subs_dev_module_[device_id] = dev_info; + } + } else if (prof_type == kProfModelUnsubscribe) { + if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { + if (subs_dev_module_[device_id].subscribe_count > 0) { + subs_dev_module_[device_id].subscribe_count--; + } + } + } else { + GELOGI("No need to update device_id module map."); + } +#endif +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelSubscribe( + uint64_t module, void *model) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; + if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { + // register framework to profiling + int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); + if (result != SUCCESS) { + GELOGE(FAILED, "Register profiling engine failed."); + return FAILED; + } + GELOGI("Prof subscribe: model load profiling on."); + } + subscribe_count_++; + + auto davinci_model = static_cast(model); + int32_t device_num = 1; + uint32_t device[1]; + device[0] = davinci_model->GetDeviceId(); + rtError_t rt_ret = rtProfilerStart(module, device_num, device); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler start failed."); + return FAILED; + } + UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module); + + // Report profiling data + Status p_ret = davinci_model->ReportProfilingData(false); + if (p_ret != SUCCESS) { + GELOGE(p_ret, "Report profiling data failed."); + return p_ret; + } +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelUnsubscribe( + void *model) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + if (subscribe_count_ == 0) { + GELOGW("The profiler has not been subscribed, you do not need to cannel the subscription."); + return SUCCESS; + } + + auto davinci_model = static_cast(model); + int32_t dev_num = 1; + uint32_t device[1]; + device[0] = davinci_model->GetDeviceId(); + auto iter = subs_dev_module_.find(device[0]); + if (iter != subs_dev_module_.end()) { + if (subs_dev_module_[device[0]].subscribe_count == 1) { + rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler stop failed."); + return FAILED; + } + } + UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); + } + + subscribe_count_--; + if (subscribe_count_ == 0) { + int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); + if (ret != SUCCESS) { + GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); + return ret; + } + } +#endif + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); @@ -748,6 +856,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt device_id_ptr[i] = static_cast(device_list[i]); } GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); + rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler config proc failed."); diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 8fb59216..66cefc32 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -39,6 +39,10 @@ namespace { const std::string GE_PROFILING_MODULE = "Framework"; } // namespace namespace ge { +struct DeviceSubsInfo { + uint64_t module; + uint32_t subscribe_count; +}; // register Plugin class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { public: @@ -73,6 +77,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); + void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); + ge::Status ProfModelSubscribe(uint64_t module, void *model); + ge::Status ProfModelUnsubscribe(void *model); ge::Status ProfInit(uint64_t module); ge::Status ProfFinalize(); ge::Status ProfStartProfiling(uint64_t module, const std::map &config_para); @@ -84,13 +91,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern + bool IsAclApiMode() const { return is_acl_api_mode_; } int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } - void ReportProfilingData(const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info); + void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, + const std::vector &compute_graph_desc_info, + bool check_device); void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, Msprof::Engine::ReporterData &reporter_data); - void ProfilingTaskDescInfo(const std::vector &task_desc_info, const int32_t &device_id); - void ProfilingGraphDescInfo(const std::vector &compute_graph_desc_info, + void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, + const int32_t &device_id); + void ProfilingGraphDescInfo(uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id); void SetProfilingConfig(const string &profiling_cfg); vector GetProfilingDeviceId() const { return device_id_; } @@ -122,6 +132,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string task_trace_conf_; const ProfilingEngineImpl engine_; map device_id_module_map_; // key: device_id, value: profiling on module + map subs_dev_module_; // key: device_id, value: profiling on module + uint32_t subscribe_count_; std::mutex mutex_; }; } // namespace ge diff --git a/ge/common/types.cc b/ge/common/types.cc index 0d10f8b3..7ae0daa3 100755 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -54,6 +54,7 @@ const std::map PROFILE_COMPONENT_MAP{ {"runtime", RTS_PROFILE}, }; const std::string PROFILE_CONFIG = "config"; +const std::string PROFILE_MODEL_ID = "modelId"; REGISTER_OPTYPE_DEFINE(DATA, "Data"); REGISTER_OPTYPE_DEFINE(AIPPDATA, "AippData"); diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index ad2879c2..967bf420 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1062,6 +1062,19 @@ Status GeExecutor::ReleaseSingleOpResource(void *stream) { return SingleOpManager::GetInstance().ReleaseResource(stream); } +Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + auto davinci_model = model_manager->GetModel(model_id); + if (davinci_model == nullptr) { + GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + return FAILED; + } + + device_id = davinci_model->GetDeviceId(); + return SUCCESS; +} + Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { std::vector> batch_info; int32_t dynamic_type = static_cast(FIXED); diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h index 2acb963b..46ead310 100755 --- a/ge/graph/load/new_model_manager/data_dumper.h +++ b/ge/graph/load/new_model_manager/data_dumper.h @@ -86,6 +86,7 @@ class DataDumper { void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } const DumpProperties &GetDumpProperties() const { return dump_properties_; } bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; + const std::vector &GetAllOpDescInfo() const { return op_desc_info_; } // Dump exception info Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cb37182c..318035c3 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -258,7 +258,6 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { /// void DavinciModel::Shrink() { ge_model_.reset(); // delete object. - op_list_.clear(); } Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { @@ -653,18 +652,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_IF_BOOL_EXEC(IsBroadCastOpData(node), (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); } - // for profiling - op_name_map_ = compute_graph->GetGraphOpName(); - - vector op_name; - GE_IF_BOOL_EXEC(ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name), - GELOGI("get str of task_index_op_name")); - if (op_name_map_.empty()) { - for (size_t idx = 0; idx < op_name.size(); idx++) { - op_name_map_[idx] = op_name[idx]; - } - GELOGI("Infer profiling: op_name_size(%zu)", op_name.size()); - } GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); @@ -691,15 +678,13 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); // collect profiling for ge - if (ProfilingManager::Instance().ProfilingModelLoadOn()) { - std::vector compute_graph_desc_info; - Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); - if (ret1 != SUCCESS) { - GELOGE(ret1, "GetComputeGraphInfo failed."); - return ret1; + auto &profiling_manager = ProfilingManager::Instance(); + if (profiling_manager.ProfilingModelLoadOn()) { + Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode()); + if (p_ret != SUCCESS) { + GELOGE(p_ret, "Report profiling data failed."); + return p_ret; } - ProfilingManager::Instance().ReportProfilingData(GetTaskDescInfo(), compute_graph_desc_info); - GE_CHK_STATUS(SinkModelProfile(), "Sink model profile failed."); } Shrink(); @@ -707,6 +692,20 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size return ret; } +Status DavinciModel::ReportProfilingData(bool check_device) { + std::vector compute_graph_desc_info; + Status ret = GetComputeGraphInfo(compute_graph_desc_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetComputeGraphInfo failed."); + return ret; + } + ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device); + GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); + op_list_.clear(); + + return SUCCESS; +} + /// /// @ingroup ge /// @brief Travel all nodes and determine if destruction is required. @@ -2900,34 +2899,25 @@ Status DavinciModel::DistributeTask() { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } - // get op_name by task_index - if (task->GetCtx() != nullptr) { - auto iter = op_name_map_.find(task_index); - if (iter == op_name_map_.end()) { - continue; - } - - // else task index is found in op_name_map_ - TaskDescInfo task_desc_info; - string op_name = op_name_map_[task_index]; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op_name; - task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - task_desc_info_.emplace_back(task_desc_info); - } + // Load task info for profiling + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + task_desc_info_.emplace_back(task_desc_info); } } } @@ -3817,50 +3807,31 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea main_follow_stream_mapping_[main_stream_id].emplace_back(stream); } -Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector &graph_desc_info) { +Status DavinciModel::GetComputeGraphInfo(vector &graph_desc_info) { GELOGI("GetComputeGraphInfo start."); - for (auto &node : graph->GetAllNodes()) { + auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); + for (auto &op_desc : all_op_desc) { ComputeGraphDescInfo compute_graph_info; - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is nullptr."); - return PARAM_INVALID; + if (!om_name_.empty()) { + compute_graph_info.model_name = om_name_; + } else { + compute_graph_info.model_name = name_; } + compute_graph_info.op_name = op_desc.op_name; + compute_graph_info.op_type = op_desc.op_type; + compute_graph_info.input_format = op_desc.input_format; + compute_graph_info.input_shape = op_desc.input_shape; + compute_graph_info.input_data_type = op_desc.input_data_type; + compute_graph_info.output_format = op_desc.output_format; + compute_graph_info.output_shape = op_desc.output_shape; + compute_graph_info.output_data_type = op_desc.output_data_type; - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - if (!om_name_.empty()) { - compute_graph_info.model_name = om_name_; - } else { - compute_graph_info.model_name = name_; - } - compute_graph_info.op_name = op_desc->GetName(); - compute_graph_info.op_type = op_desc->GetType(); - - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } - - graph_desc_info.emplace_back(compute_graph_info); - } + graph_desc_info.emplace_back(compute_graph_info); } GELOGI("GetComputeGraphInfo end."); return SUCCESS; } + void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 964057a4..ccf6ff25 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -439,6 +439,8 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); + Status ReportProfilingData(bool check_device = true); + void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); } @@ -830,7 +832,7 @@ class DavinciModel { Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); // get desc info of graph for profiling - Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector &graph_desc_info); + Status GetComputeGraphInfo(vector &graph_desc_info); void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); @@ -949,7 +951,6 @@ class DavinciModel { std::map used_tbe_handle_map_; // for profiling task and graph info - std::map op_name_map_; std::vector task_desc_info_; int64_t maxDumpOpNum_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index ec111c3d..a286ff5c 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -43,6 +43,8 @@ const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStop = "prof_stop"; +const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; +const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; struct CustAicpuSoBuf { @@ -334,11 +336,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + - timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond - davinci_model->SetProfileTime(MODEL_LOAD_END); - } + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond + davinci_model->SetProfileTime(MODEL_LOAD_END); } while (0); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); @@ -565,7 +565,9 @@ Status ModelManager::HandleCommand(const Command &command) { {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, - {kCmdTypeProfStop, HandleProfStopCommand}}; + {kCmdTypeProfStop, HandleProfStopCommand}, + {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, + {kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -591,6 +593,77 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { return SUCCESS; } +Status ModelManager::GetModelByCmd(const Command &command, + std::shared_ptr &davinci_model) { + if (command.cmd_params.size() < kCmdParSize) { + GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.", + command.cmd_type.c_str()); + return PARAM_INVALID; + } + + std::string map_key = command.cmd_params[0]; + std::string value = command.cmd_params[1]; + if (map_key == PROFILE_MODEL_ID) { + int32_t model_id = 0; + try { + model_id = std::stoi(value); + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str()); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str()); + return PARAM_INVALID; + } catch (...) { + GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str()); + return FAILED; + } + + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + davinci_model = model_manager->GetModel(static_cast(model_id)); + if (davinci_model == nullptr) { + GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + return FAILED; + } + } else { + GELOGE(FAILED, "The model_id parameter is not found in the command."); + return FAILED; + } + + return SUCCESS; +} + +Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) { + std::shared_ptr davinci_model = nullptr; + Status ret = GetModelByCmd(command, davinci_model); + if (ret != SUCCESS) { + return ret; + } + + if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, + static_cast(davinci_model.get())) != SUCCESS) { + GELOGE(FAILED, "Handle prof model subscribe failed."); + return FAILED; + } + + return SUCCESS; +} + +Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { + std::shared_ptr davinci_model = nullptr; + Status ret = GetModelByCmd(command, davinci_model); + if (ret != SUCCESS) { + return ret; + } + + if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast(davinci_model.get())) != SUCCESS) { + GELOGE(FAILED, "Handle prof model unsubscribe failed."); + return FAILED; + } + + return SUCCESS; +} + Status ModelManager::HandleProfInitCommand(const Command &command) { uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { @@ -973,11 +1046,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGI("Parse model %u success.", model_id); - if (ProfilingManager::Instance().ProfilingModelLoadOn()) { - davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + - timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond - davinci_model->SetProfileTime(MODEL_LOAD_END); - } + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond + davinci_model->SetProfileTime(MODEL_LOAD_END); GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); return SUCCESS; diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index d6a89d6b..8d46e578 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -158,10 +158,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { static ge::Status HandleAclProfilingCommand(const Command &command); static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command); + static ge::Status HandleProfModelSubscribeCommand(const Command &command); + static ge::Status HandleProfModelUnsubscribeCommand(const Command &command); static ge::Status HandleProfInitCommand(const Command &command); static ge::Status HandleProfFinalizeCommand(const Command &command); static ge::Status HandleProfStartCommand(const Command &command); static ge::Status HandleProfStopCommand(const Command &command); + + static ge::Status GetModelByCmd(const Command &command, + std::shared_ptr &davinci_model); /// /// @ingroup domi_ome /// @brief get model memory usage diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 7dc65433..0b0f4090 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -257,7 +257,9 @@ Status NodeDoneCallback::ProfilingReport() { return profiling_ret; } - ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info); + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); return SUCCESS; } diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 0644b0f2..038b1cf6 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -70,6 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map PROFILE_COMPONENT_MAP; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; @@ -567,10 +568,10 @@ enum ModelCheckType { /// @brief dynamic input type /// enum DynamicInputType { - FIXED = 0, // default mode - DYNAMIC_BATCH = 1, - DYNAMIC_IMAGE = 2, - DYNAMIC_DIMS = 3 + FIXED = 0, // default mode + DYNAMIC_BATCH = 1, + DYNAMIC_IMAGE = 2, + DYNAMIC_DIMS = 3 }; /// diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index ba90fd03..17dbf928 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -38,14 +38,14 @@ class DynamicSingleOp; struct RunModelData { uint32_t index; // Data index uint32_t modelId; - std::vector blobs; // All input/output data buffer - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint64_t request_id = 0; // Request ID - uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 - uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 - uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 - std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0; // Request ID + uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { @@ -264,14 +264,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op); - static ge::Status ExecuteAsync(DynamicSingleOp *executor, - const std::vector &input_desc, - const std::vector &inputs, - std::vector &output_desc, + static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, + const std::vector &inputs, std::vector &output_desc, std::vector &outputs); static ge::Status ReleaseSingleOpResource(void *stream); + static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, From 66b20884dae75815f56dbaf99fc04c13085387a3 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Fri, 13 Nov 2020 17:58:44 +0800 Subject: [PATCH 12/35] error message add --- ge/graph/common/transop_util.cc | 8 +++ ge/graph/common/transop_util.h | 2 + ge/graph/preprocess/graph_preprocess.cc | 64 ++++++++++++++++++++-- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 6 ++ .../preprocess/insert_op/util_insert_aipp_op.cc | 42 ++++++++++---- ge/graph/preprocess/multi_batch_copy_graph.cc | 2 + ge/graph/preprocess/multi_batch_options.cc | 4 ++ 7 files changed, 112 insertions(+), 16 deletions(-) diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc index 684ef3dc..f57f56a8 100755 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -81,5 +81,13 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { return false; } return true; + + std::string TransOpUtil::TransopMapToString() { + std::string buffer; + for (auto it = transop_index_map_.begin(); it != transop_index_map_.end(); ++it) { + buffer += it->first + ","; + } + return buffer.substr(0, buffer.size() -1); + } } } // namespace ge diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 8b10ad5c..3332e1fb 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -35,6 +35,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); + static std::string TransopMapToString(); + private: TransOpUtil(); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 98371426..7049fba1 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -128,6 +128,9 @@ static std::map output_type_str_to_datatype = { {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; const char *const kMbatchSwitchnName = "mbatch-switch-name"; +const char *const kConstError1 = "Const is invalid scalar tensor."; +const char *const kConstError2 = "Const is invalid vector scalar."; +const char *const kConstError3 = "Const input data size is not equal with tensor desc shape"; // the size of user defined output datatype or format string after split by ":". const size_t kUserDefinedElementCount = 2; @@ -219,6 +222,9 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c auto index = TransOpUtil::GetTransOpDataIndex(node_type); if (index < 0) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"The trans node type[" + node_type + "]", "it must be " + TransOpUtil::TransopMapToString()}); GELOGE(INTERNAL_ERROR, "The trans node type %s does not exists", node_type.c_str()); return nullptr; } @@ -387,6 +393,8 @@ Status RecoverTransRoadForVar(const NodePtr &var, const VarTransRoad &road) { auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVar(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -419,6 +427,8 @@ Status RecoverTransRoadForVarRef(const std::set &nodes, const VarTransR auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVarRef(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -571,6 +581,8 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node std::string related_node_name; if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) { if (related_node_name.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, {data_node->GetName(), "flag", "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node flag, but the value is empty", data_node->GetName().c_str()); return INTERNAL_ERROR; @@ -582,6 +594,9 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node } } if (switchn_node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, + {data_node->GetName(), related_node_name, "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph", data_node->GetName().c_str(), related_node_name.c_str()); return INTERNAL_ERROR; @@ -682,6 +697,10 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No ge::GeShape old_shape = input->GetShape(); bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); if (!support) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str()); return FAILED; } @@ -762,6 +781,9 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "15003", {"opname", "format"}, + {op_desc->GetName(), TypeUtils::FormatToSerialString(storage_format)}); GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str()); @@ -900,9 +922,14 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { // check if is_output_adjust_hw_layout is set if (NeedUpdateFormatByOutputTypeParm(op_desc, index)) { if ((old_format != FORMAT_NCHW) && (old_format != FORMAT_NHWC) && (old_format != FORMAT_NC1HWC0)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0."); return FAILED; } + GeTensorDesc old_desc(old_shape, old_format, old_dtype); if (ProcessNetoutputNodeFp16Nc1hwc0DynShape(old_desc, net_output_input_desc, src_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0."); @@ -1035,6 +1062,10 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i } bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end()); if (!is_acceptable) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.", node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(), input_op_desc->GetType().c_str()); @@ -1127,6 +1158,9 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { } if ((index < 0) || (static_cast(index) >= user_input.size())) { + std::string situation = "data op index[" + std::to_string(index) + "]"; + std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(PARAM_INVALID, "user_input size = %zu, graph data op index = %ld.", user_input.size(), index); return FAILED; } @@ -1139,6 +1173,9 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { if (need_check_internal_format) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); if (is_internal) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input format[" + TypeUtils::FormatToSerialString(format).c_str() + "] or origin_format[" + + TypeUtils::FormatToSerialString(origin_format).c_str() + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); @@ -1150,6 +1187,8 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; @@ -1164,6 +1203,10 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { return FAILED); bool size_check = (size != 0 && shape_size != size); if (size_check) { + std::string situation = "input data size[" + std::to_string(size) + + "] and shape_size[" + std::to_string(size) + "]"; + std::string reason = "because size != 0 and shape_size != size"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(PARAM_INVALID, "input data size =%ld, shape_size =%ld.", size, shape_size); return FAILED; } @@ -1503,6 +1546,8 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; } @@ -1512,14 +1557,19 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { if (shape_size == 0) { if (ge_tensor_desc.GetShape().GetDims().size() == 0) { // shape = [], means it's a sclar tensor. - GE_CHK_BOOL_EXEC(data_size / length == 1, return PARAM_INVALID, "Const is invalid scalar tensor."); + GE_CHK_BOOL_EXEC(data_size / length == 1, + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {kConstError1}); + return PARAM_INVALID, kConstError1); } else { // shape = [x, y, 0,...], means it's a vector tensor that value is []. - GE_CHK_BOOL_EXEC(data_size == 0, return PARAM_INVALID, "Const is invalid vector scalar."); + GE_CHK_BOOL_EXEC(data_size == 0, + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {kConstError2}); + return PARAM_INVALID, kConstError2); } } else { - GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, return PARAM_INVALID, - "Const input data size is not equal with tensor desc shape"); + GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {kConstError3}); + return PARAM_INVALID, kConstError3); } return SUCCESS; } @@ -1543,6 +1593,9 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { return GE_GRAPH_INIT_FAILED; } if ((index < 0) || (static_cast(index) >= user_input.size())) { + std::string situation = "data op index[" + std::to_string(index) + "]"; + std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "user_input size:%zu, data op index:%ld.", user_input.size(), index); return GE_GRAPH_INIT_FAILED; } @@ -1550,6 +1603,9 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { if (desc.GetShape().GetDim(i) < 0) { + std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(desc.GetShape().GetDim(i)) + "]" ; + std::string reason = "it need >= 0"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, desc.GetShape().GetDim(i)); return GE_GRAPH_INIT_FAILED; diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 729c47de..474145af 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -790,16 +790,22 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { + string errormsg = "Get data_node dims and transfer to nchw_dims failed!"; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(PARAM_INVALID, "Get data_node dims and transfer to nchw_dims failed!"); return PARAM_INVALID; } if (batch_count <= 0) { + string errormsg = "Batch count[" + std::to_sting(batch_count) + "] is invalid, it must positive."; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(PARAM_INVALID, "Batch count %ld is invalid", batch_count); return PARAM_INVALID; } int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count); if (max_dynamic_aipp_size < 0) { + string errormsg = "The dynamic aipp size is not positive"; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(PARAM_INVALID, "The dynamic aipp size is not positive."); return PARAM_INVALID; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index a1eb104d..8b33885b 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -37,6 +37,16 @@ using domi::AippOpParams; +#define AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(expr, _status, errormsg) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + return _status; \ + } \ + } while (0) + namespace ge { namespace { const char *const kMbatchSwitchnName = "mbatch-switch-name"; @@ -224,9 +234,10 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { } } } - GE_CHK_BOOL_RET_STATUS((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), PARAM_INVALID, - "Can not config part of outputs of Data node to support AIPP, config all " - "of the outputs of Data to support AIPP, or config none of them"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), + PARAM_INVALID, + "Can not config part of outputs of Data node to support AIPP, config all " + "of the outputs of Data to support AIPP, or config none of them"); std::unique_ptr aippParams(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(aippParams); @@ -238,16 +249,19 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); if (aippMode == domi::AippOpParams::static_) { - GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, - "The input_format of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, - "The src_image_size_w of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, - "The src_image_size_h of all aipp_ops after one Data should be the same"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + aippParams->input_format() == currAippParam->input_format(), + PARAM_INVALID, "The input_format of all aipp_ops after one Data should be the same"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + aippParams->src_image_size_w() == currAippParam->src_image_size_w(), + PARAM_INVALID, "The src_image_size_w of all aipp_ops after one Data should be the same"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + aippParams->src_image_size_h() == currAippParam->src_image_size_h(), + PARAM_INVALID, "The src_image_size_h of all aipp_ops after one Data should be the same"); } else { - GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), - PARAM_INVALID, - "The max_src_image_size of all aipp_ops after one Data should be the same"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + aippParams->max_src_image_size() == currAippParam->max_src_image_size(), + PARAM_INVALID, "The max_src_image_size of all aipp_ops after one Data should be the same"); } }); } @@ -290,6 +304,8 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { for (auto &switchn : updated_switchn) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { + string errormesg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(INTERNAL_ERROR, "Failed to find relative data node by switchn %s", switchn->GetName().c_str()); return INTERNAL_ERROR; } @@ -477,6 +493,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { + string errormesg = "No max size found from switchn node[" + switchn->GetName()+ "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(INTERNAL_ERROR, "No max size found from switchn node %s", switchn->GetName().c_str()); return INTERNAL_ERROR; } diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c0ba89f4..8f42ad24 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -595,6 +595,8 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector &start_ } auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); if (!IsAllDimsPositive(dims)) { + ErrorManager::GetInstance().ATCReportErrMessage("E15004", {"opname", "shape"}, + {node->GetName(), formats::ShapeToString(dims)}); GELOGE(INTERNAL_ERROR, "Failed to copy multi batch graph, the node %s still has unknown shape %s", node->GetName().c_str(), formats::ShapeToString(dims).c_str()); return INTERNAL_ERROR; diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 9909b0dc..cc0d2d5b 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -124,6 +124,8 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, auto tmp_index = cur_data_index; for (size_t i = 0; i < static_cast(dynamic_dims_num); ++i) { if (tmp_index >= dynamic_gear_info.size()) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10045", {"name", "shape"}, {data_name, formats::JoinToString(data_shape)}); GELOGE(PARAM_INVALID, "Data: %s shape: %s make dynamic dims overflow", data_name.c_str(), formats::JoinToString(data_shape).c_str()); return FAILED; @@ -131,6 +133,8 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, one_gear.push_back(dynamic_gear_info[tmp_index++]); } } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10046", {"name", "shape"}, {data_name, formats::JoinToString(data_shape)}); GELOGE(PARAM_INVALID, "Dynamic dims num of data: %s shape: %s can not be more than one gear dynamic info size", data_name.c_str(), formats::JoinToString(data_shape).c_str()); return FAILED; From 55745911a43bc92e9283cf0eecece752d34a49db Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Fri, 13 Nov 2020 18:34:36 +0800 Subject: [PATCH 13/35] error message add --- ge/graph/common/transop_util.cc | 13 +++++++------ ge/graph/preprocess/graph_preprocess.cc | 19 ++++++++----------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc index f57f56a8..c86e69dd 100755 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -81,13 +81,14 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { return false; } return true; +} - std::string TransOpUtil::TransopMapToString() { - std::string buffer; - for (auto it = transop_index_map_.begin(); it != transop_index_map_.end(); ++it) { - buffer += it->first + ","; - } - return buffer.substr(0, buffer.size() -1); +std::string TransOpUtil::TransopMapToString() { + std::string buffer; + for (auto it = transop_index_map_.begin(); it != transop_index_map_.end(); ++it) { + buffer += it->first + " "; } + return buffer; } + } // namespace ge diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 7049fba1..c5fca249 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -128,9 +128,6 @@ static std::map output_type_str_to_datatype = { {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; const char *const kMbatchSwitchnName = "mbatch-switch-name"; -const char *const kConstError1 = "Const is invalid scalar tensor."; -const char *const kConstError2 = "Const is invalid vector scalar."; -const char *const kConstError3 = "Const input data size is not equal with tensor desc shape"; // the size of user defined output datatype or format string after split by ":". const size_t kUserDefinedElementCount = 2; @@ -1063,9 +1060,8 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end()); if (!is_acceptable) { ErrorManager::GetInstance().ATCReportErrMessage( - "E19014", {"opname", "value", "reason"}, - {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", - "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); + "E15005", {"opname", "optype", "opname1", "optype1"}, + {op_desc->GetName(), node->GetType(), input_op_desc->GetName(), input_op_desc->GetType()}); GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.", node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(), input_op_desc->GetType().c_str()); @@ -1558,18 +1554,19 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { if (ge_tensor_desc.GetShape().GetDims().size() == 0) { // shape = [], means it's a sclar tensor. GE_CHK_BOOL_EXEC(data_size / length == 1, - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {kConstError1}); - return PARAM_INVALID, kConstError1); + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid scalar tensor."}); + return PARAM_INVALID, "Const is invalid scalar tensor."); } else { // shape = [x, y, 0,...], means it's a vector tensor that value is []. GE_CHK_BOOL_EXEC(data_size == 0, - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {kConstError2}); + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid vector scalar."}); return PARAM_INVALID, kConstError2); } } else { GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {kConstError3}); - return PARAM_INVALID, kConstError3); + ErrorManager::GetInstance().ATCReportErrMessage( + "E10043", {"reason"}, {"Const input data size is not equal with tensor desc shape"}); + return PARAM_INVALID, "Const input data size is not equal with tensor desc shape"); } return SUCCESS; } From c756068df0aeab881c6b054318043292134eb6e9 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Fri, 13 Nov 2020 18:38:59 +0800 Subject: [PATCH 14/35] error message add --- ge/graph/preprocess/graph_preprocess.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index c5fca249..2d708035 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1560,7 +1560,7 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { // shape = [x, y, 0,...], means it's a vector tensor that value is []. GE_CHK_BOOL_EXEC(data_size == 0, ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid vector scalar."}); - return PARAM_INVALID, kConstError2); + return PARAM_INVALID, "Const is invalid vector scalar."); } } else { GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, From 6c595433be71b820c8947f618931cac25138a435 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Fri, 13 Nov 2020 18:53:55 +0800 Subject: [PATCH 15/35] error message add --- ge/graph/common/transop_util.h | 2 +- ge/graph/preprocess/graph_preprocess.cc | 4 ++-- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 2 +- ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 3332e1fb..6284f754 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -35,7 +35,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); - static std::string TransopMapToString(); + std::string TransopMapToString(); private: TransOpUtil(); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 2d708035..041d2227 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1170,8 +1170,8 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); if (is_internal) { ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, - {"Input format[" + TypeUtils::FormatToSerialString(format).c_str() + "] or origin_format[" + - TypeUtils::FormatToSerialString(origin_format).c_str() + "]", "it is not support"}); + {"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + + TypeUtils::FormatToSerialString(origin_format) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 474145af..8ea1f7a9 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -796,7 +796,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { return PARAM_INVALID; } if (batch_count <= 0) { - string errormsg = "Batch count[" + std::to_sting(batch_count) + "] is invalid, it must positive."; + string errormsg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(PARAM_INVALID, "Batch count %ld is invalid", batch_count); return PARAM_INVALID; diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 8b33885b..2f90b6a4 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -304,7 +304,7 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { for (auto &switchn : updated_switchn) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { - string errormesg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; + string errormsg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(INTERNAL_ERROR, "Failed to find relative data node by switchn %s", switchn->GetName().c_str()); return INTERNAL_ERROR; @@ -493,7 +493,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { - string errormesg = "No max size found from switchn node[" + switchn->GetName()+ "]"; + string errormsg = "No max size found from switchn node[" + switchn->GetName()+ "]"; ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); GELOGE(INTERNAL_ERROR, "No max size found from switchn node %s", switchn->GetName().c_str()); return INTERNAL_ERROR; From 79c1c49964016babb04113adfd8c8669d992a5b5 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Fri, 13 Nov 2020 19:45:04 +0800 Subject: [PATCH 16/35] error message add --- ge/graph/common/transop_util.cc | 4 ++-- ge/graph/common/transop_util.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc index c86e69dd..9b513fe6 100755 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -85,8 +85,8 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { std::string TransOpUtil::TransopMapToString() { std::string buffer; - for (auto it = transop_index_map_.begin(); it != transop_index_map_.end(); ++it) { - buffer += it->first + " "; + for (auto &key : Instance().transop_index_map_) { + buffer += key.first + " "; } return buffer; } diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 6284f754..3332e1fb 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -35,7 +35,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); - std::string TransopMapToString(); + static std::string TransopMapToString(); private: TransOpUtil(); From cb7d37caaa7cb3649f5153065e8b5e12d0b59d70 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Fri, 13 Nov 2020 19:54:33 +0800 Subject: [PATCH 17/35] error message add --- ge/graph/preprocess/graph_preprocess.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 041d2227..16b0e88f 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -593,7 +593,7 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node if (switchn_node == nullptr) { ErrorManager::GetInstance().ATCReportErrMessage( "E15002", {"opname", "value", "reason"}, - {data_node->GetName(), related_node_name, "but the value is empty"}); + {data_node->GetName(), related_node_name, "but can not find it on the graph"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph", data_node->GetName().c_str(), related_node_name.c_str()); return INTERNAL_ERROR; From 47d52f0d5c476d01c6e017df8b1b92b3dd9ceea7 Mon Sep 17 00:00:00 2001 From: y00500818 Date: Sat, 14 Nov 2020 09:29:49 +0800 Subject: [PATCH 18/35] zero copy optimize the struct of addr offset --- ge/graph/load/new_model_manager/zero_copy_task.cc | 2 +- ge/graph/load/new_model_manager/zero_copy_task.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 35169726..2079034e 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -45,7 +45,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { if (it == task_addr_offset_.end()) { task_addr_offset_[addr] = {offset}; } else { - it->second.push_back(offset); + it->second.insert(offset); } GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index 57ccdbaf..d0bb2b6d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -103,7 +103,7 @@ class ZeroCopyTask { bool is_updated_; string batch_label_; //
- map> task_addr_offset_; + map> task_addr_offset_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ From 61b9b3a8ec4e26fe198cf9743ec42df5f1361218 Mon Sep 17 00:00:00 2001 From: lianghao Date: Sat, 14 Nov 2020 10:54:27 +0800 Subject: [PATCH 19/35] delete atc memory limit --- ge/graph/load/new_model_manager/davinci_model.cc | 5 ++++- ge/offline/main.cc | 20 -------------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index f310e18e..d00c2eda 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -88,6 +88,7 @@ const uint32_t kDataMemAlignSizeCompare = 64; const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; +const int32_t kInvalidStream = -1; inline bool IsDataOp(const std::string &node_type) { return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; @@ -610,7 +611,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_DISMISS_GUARD(stream); stream_list_.push_back(stream); - GELOGD("Stream index:%u, stream:%p.", i, stream); + int32_t rt_stream_id = kInvalidStream; + (void)rtGetStreamId(stream, &rt_stream_id); + GELOGI("Logical stream index:%u, stream:%p, rtstream: %d.", i, stream, rt_stream_id); } for (uint32_t i = 0; i < EventNum(); i++) { diff --git a/ge/offline/main.cc b/ge/offline/main.cc index d72040d7..94514f95 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -32,7 +32,6 @@ #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" #include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" #include "graph/op_desc.h" #include "graph/utils/graph_utils.h" #include "graph/utils/type_utils.h" @@ -64,8 +63,6 @@ using std::vector; static bool is_dynamic_input = false; -// 310 limited 8G size -const char *const kGraphMemoryManagerMallocMaxSize = "8*1024*1024*1024"; const char *const kModeSupport = "only support 0(model to framework model), " "1(framework model to json), 3(only pre-check), 5(pbtxt to json)"; const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; @@ -908,13 +905,6 @@ domi::Status GenerateModel(std::map &options, std::string output return domi::FAILED; } - geRet = ge::VarManager::Instance(0)->SetMemoryMallocSize(options); - if (geRet != ge::SUCCESS) { - GELOGE(ge::FAILED, "SetMemoryMallocSize failed."); - (void)ge::GELib::GetInstance()->Finalize(); - return domi::FAILED; - } - ge::Graph graph; std::vector inputs; if (FLAGS_framework == domi::MINDSPORE) { @@ -1016,7 +1006,6 @@ static void SetEnvForSingleOp(std::map &options) { options.emplace(ge::OP_SELECT_IMPL_MODE, FLAGS_op_select_implmode); options.emplace(ge::OPTYPELIST_FOR_IMPLMODE, FLAGS_optypelist_for_implmode); options.emplace(ge::AUTO_TUNE_MODE, FLAGS_auto_tune_mode); - options.emplace(ge::GRAPH_MEMORY_MAX_SIZE, kGraphMemoryManagerMallocMaxSize); options.emplace(ge::OP_DEBUG_LEVEL, to_string(FLAGS_op_debug_level)); options.emplace(ge::DEBUG_DIR, FLAGS_debug_dir); options.emplace(ge::OP_COMPILER_CACHE_DIR, FLAGS_op_compiler_cache_dir); @@ -1053,13 +1042,6 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { return domi::FAILED; } - ret = ge::VarManager::Instance(0)->SetMemoryMallocSize(options); - if (ret != ge::SUCCESS) { - GELOGE(ge::FAILED, "SetMemoryMallocSize failed."); - (void)ge::GELib::GetInstance()->Finalize(); - return domi::FAILED; - } - vector build_params; if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) { DOMI_LOGE("parse single op json file failed"); @@ -1158,8 +1140,6 @@ domi::Status GenerateOmModel() { (FLAGS_enable_compress_weight == "true") ? ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse)); - options.insert(std::pair(string(ge::GRAPH_MEMORY_MAX_SIZE), kGraphMemoryManagerMallocMaxSize)); - options.insert(std::pair(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); options.insert(std::pair(string(ge::DEBUG_DIR), FLAGS_debug_dir)); From a4bf3c4f925d10751bc4c583641e659aaaada77b Mon Sep 17 00:00:00 2001 From: l00444296 Date: Sat, 14 Nov 2020 12:27:29 +0800 Subject: [PATCH 20/35] Feature:Support user options of aclgrphParse interface --- metadef | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadef b/metadef index d090ec83..9bbce07b 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit d090ec8335c091d7481675ed99c50e83c4dae853 +Subproject commit 9bbce07b846858fa30ef2bd7c662894e20f83ef1 From a21e7569bd7585818914635ec478c13a9399a0ab Mon Sep 17 00:00:00 2001 From: l00444296 Date: Sat, 14 Nov 2020 13:55:02 +0800 Subject: [PATCH 21/35] Feature:Support user options of aclgrphParse interface --- metadef | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadef b/metadef index 9bbce07b..e2a36e47 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 9bbce07b846858fa30ef2bd7c662894e20f83ef1 +Subproject commit e2a36e47259da923702971dcfa8a78d100b74902 From 4788c9a1e0d464959fdc8326231bc92865cfce05 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 16:33:55 +0800 Subject: [PATCH 22/35] error message add --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 160 ++++++++++----------- .../preprocess/insert_op/util_insert_aipp_op.cc | 20 +-- inc/framework/common/debug/log.h | 17 +++ 3 files changed, 97 insertions(+), 100 deletions(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 8ea1f7a9..e6b30639 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -53,16 +53,6 @@ } \ } while (0) -#define AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(expr, _status, errormsg) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, errormsg); \ - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ - return _status; \ - } \ - } while (0) - namespace { const int32_t DEFAULT_MATRIX_R0C0_YUV2RGB = 298; const int32_t DEFAULT_MATRIX_R0C1_YUV2RGB = 0; @@ -537,87 +527,87 @@ Status AippOp::SetDefaultParams() { Status AippOp::ValidateParams() { GE_CHECK_NOTNULL(aipp_params_); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, - "When insert AIPP op, aipp_mode must be configured as static or dynamic "); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_2 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_3 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, - "The parameter output_bias_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, - "The parameter output_bias_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, - "The parameter output_bias_2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, - "The parameter input_bias_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, - "The parameter input_bias_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, - "The parameter input_bias_2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, - "The parameter input_edge_idx can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, + "When insert AIPP op, aipp_mode must be configured as static or dynamic "); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_2 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_3 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, + "The parameter output_bias_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, + "The parameter output_bias_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, + "The parameter output_bias_2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, + "The parameter input_bias_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, + "The parameter input_bias_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, + "The parameter input_bias_2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, + "The parameter input_edge_idx can not be configed repeatedly"); const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode(); if (aipp_mode == domi::AippOpParams::dynamic) { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aipp_params_->max_src_image_size() > 0, PARAM_INVALID, "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); } else { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, - "Input format of AIPP conf is undefined"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, - "Src_image_size_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, - "Src_image_size_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, - "Load_start_pos_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, - "Load_start_pos_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, - "Crop_size_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, - "Resize_output_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, - "Resize_output_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, - "Left_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, - "Right_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, - "Top_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, - "Bottom_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, + "Input format of AIPP conf is undefined"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, + "Src_image_size_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, + "Src_image_size_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, + "Load_start_pos_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, + "Load_start_pos_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, + "Crop_size_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, + "Resize_output_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, + "Resize_output_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, + "Left_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, + "Right_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, + "Top_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, + "Bottom_padding_size must not be configed smaller than 0"); } return SUCCESS; diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 2f90b6a4..0fd742cd 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -37,16 +37,6 @@ using domi::AippOpParams; -#define AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(expr, _status, errormsg) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, errormsg); \ - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ - return _status; \ - } \ - } while (0) - namespace ge { namespace { const char *const kMbatchSwitchnName = "mbatch-switch-name"; @@ -234,7 +224,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { } } } - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), + GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), PARAM_INVALID, "Can not config part of outputs of Data node to support AIPP, config all " "of the outputs of Data to support AIPP, or config none of them"); @@ -249,17 +239,17 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); if (aippMode == domi::AippOpParams::static_) { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, "The input_format of all aipp_ops after one Data should be the same"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, "The src_image_size_w of all aipp_ops after one Data should be the same"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, "The src_image_size_h of all aipp_ops after one Data should be the same"); } else { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aippParams->max_src_image_size() == currAippParam->max_src_image_size(), PARAM_INVALID, "The max_src_image_size of all aipp_ops after one Data should be the same"); } diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 6d449919..965cdb7b 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -253,4 +253,21 @@ exec_expr1; \ } +#define GE_ERRORLOG_AND_ERRORMSG(expr, _status, errormsg) \ + { \ + GELOGE(_status, errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + } + + +#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + return _status; \ + } \ + } while (0) + #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ From a61864bcfc28a80508f790eef0b49d23f7792fad Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 16:40:43 +0800 Subject: [PATCH 23/35] error message add --- inc/framework/common/debug/log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 965cdb7b..92e7bee7 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -253,7 +253,7 @@ exec_expr1; \ } -#define GE_ERRORLOG_AND_ERRORMSG(expr, _status, errormsg) \ +#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ { \ GELOGE(_status, errormsg); \ ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ From 9396238bfaf9086b2643e2ad26776636f343903f Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 16:48:07 +0800 Subject: [PATCH 24/35] error message add --- ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 0fd742cd..6a2eb333 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -124,10 +124,7 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { if (another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_name and related_input_rank!" " Please ensure param is the same with the first aipp config(related_input_name)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_name)."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg) return PARAM_INVALID; } if (item->related_input_name() == another_item->related_input_name()) { From 086309d1e9140db26fa0d725982810c9f67bd8a1 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 17:11:55 +0800 Subject: [PATCH 25/35] error message add --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 23 +++++++++------------- .../preprocess/insert_op/util_insert_aipp_op.cc | 21 +++++--------------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index e6b30639..e7c1fce7 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -306,9 +306,8 @@ NodePtr AippOp::FindDataByIndex(const ComputeGraphPtr &graph, int rank) { } return node; } - GELOGE(PARAM_INVALID, "Can not find the data node by index %d", rank); string errormsg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return nullptr; } Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr &target, @@ -353,10 +352,10 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr } if (!edge_indexes.empty() && (*edge_indexes.rbegin() >= data_node->GetOutDataNodes().size())) { - GELOGE(PARAM_INVALID, "input_edge_idx %u should smaller than out edge size of target input %zu", - *edge_indexes.rbegin(), data_node->GetOutDataNodes().size()); - string errormsg = "The aipp parameter input_edge_idx should be smaller than the target input's outnodes."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + string errormsg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) + + "] should be smaller than the target input[" + + std::to_string(data_node->GetOutDataNodes().size()) +"]'s outnodes."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return PARAM_INVALID; } target = data_node; @@ -429,8 +428,7 @@ Status AippOp::ConvertRelatedInputNameToRank() { if (!convert_flag) { string error_msg = "Top name " + related_input_name + "convert rank failed, Please" " ensure top name in aipp config is the top name of data node."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, "Top name[%s] converts rank failed.", related_input_name.c_str()); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return PARAM_INVALID; } @@ -781,22 +779,19 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { string errormsg = "Get data_node dims and transfer to nchw_dims failed!"; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); - GELOGE(PARAM_INVALID, "Get data_node dims and transfer to nchw_dims failed!"); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, errormsg); return PARAM_INVALID; } if (batch_count <= 0) { string errormsg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); - GELOGE(PARAM_INVALID, "Batch count %ld is invalid", batch_count); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, errormsg); return PARAM_INVALID; } int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count); if (max_dynamic_aipp_size < 0) { string errormsg = "The dynamic aipp size is not positive"; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); - GELOGE(PARAM_INVALID, "The dynamic aipp size is not positive."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, errormsg); return PARAM_INVALID; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 6a2eb333..923f0604 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -130,10 +130,7 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { if (item->related_input_name() == another_item->related_input_name()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return PARAM_INVALID; } } @@ -153,19 +150,13 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { if (!another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_rank and related_input_name!" " Please ensure param is the same with the first aipp config(related_input_rank)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_rank)."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return PARAM_INVALID; } if (item->related_input_rank() == another_item->related_input_rank()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return PARAM_INVALID; } } @@ -292,8 +283,7 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { string errormsg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); - GELOGE(INTERNAL_ERROR, "Failed to find relative data node by switchn %s", switchn->GetName().c_str()); + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, errormsg); return INTERNAL_ERROR; } GE_RETURN_IF_ERROR(UpdateDataBySwitchN(switchn, data_iter->second)); @@ -481,8 +471,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { string errormsg = "No max size found from switchn node[" + switchn->GetName()+ "]"; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); - GELOGE(INTERNAL_ERROR, "No max size found from switchn node %s", switchn->GetName().c_str()); + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, errormsg); return INTERNAL_ERROR; } auto output_desc = switchn->GetOpDesc()->MutableOutputDesc(max_index); From 8e67c970b839025d4d6292bb99fbf2eaa60b7aeb Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 14 Nov 2020 17:35:21 +0800 Subject: [PATCH 26/35] multi batch --- ge/graph/build/memory/block_mem_assigner.cc | 9 +++++++++ ge/graph/preprocess/multi_batch_copy_graph.cc | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2d30c57e..f89a9c13 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -880,6 +880,15 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GELOGI("Unreusable block."); continue; } + std::string batch_label; + if (reusable_block->IsSameLabel(batch_label)) { + std::string op_label; + (void)ge::AttrUtils::GerStr(node_op_desc, Attr_NAME_BATCH_LABEL, op_label); + if (batch_label != op_label) { + GELOGI("label diff, op name %s", node_op_desc->GetName().c_str()); + continue; + } + } // A node can reuse blocks of the same stream and preorder streams if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c0ba89f4..1ea551bf 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1025,6 +1025,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { + const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); + if (multi_batch_with_case != nullptr) { + PassManager pass_manager; + GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); + return pass_manager.Run(graph); + } + std::vector> shapes; if (!InitDynamicParams(shapes)) { GELOGD("There is no multi-batch options, no need to process multi-batch copy"); From a8c720f8e8537018fd0cd78e929547b05e387022 Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 14 Nov 2020 17:46:08 +0800 Subject: [PATCH 27/35] multi batch --- ge/graph/build/memory/block_mem_assigner.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index f89a9c13..cdca7fb7 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -883,7 +883,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, std::string batch_label; if (reusable_block->IsSameLabel(batch_label)) { std::string op_label; - (void)ge::AttrUtils::GerStr(node_op_desc, Attr_NAME_BATCH_LABEL, op_label); + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label); if (batch_label != op_label) { GELOGI("label diff, op name %s", node_op_desc->GetName().c_str()); continue; From bdf6cf91cb45dce39ecc3b7bef522decc479a7fa Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 18:08:00 +0800 Subject: [PATCH 28/35] error message add --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 8 ++++---- ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 12 ++++++------ inc/framework/common/debug/log.h | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index e7c1fce7..7fb127c3 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -307,7 +307,7 @@ NodePtr AippOp::FindDataByIndex(const ComputeGraphPtr &graph, int rank) { return node; } string errormsg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return nullptr; } Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr &target, @@ -428,7 +428,7 @@ Status AippOp::ConvertRelatedInputNameToRank() { if (!convert_flag) { string error_msg = "Top name " + related_input_name + "convert rank failed, Please" " ensure top name in aipp config is the top name of data node."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } @@ -779,7 +779,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { string errormsg = "Get data_node dims and transfer to nchw_dims failed!"; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, errormsg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (batch_count <= 0) { @@ -791,7 +791,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count); if (max_dynamic_aipp_size < 0) { string errormsg = "The dynamic aipp size is not positive"; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, errormsg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 923f0604..d7ae2e27 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -124,13 +124,13 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { if (another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_name and related_input_rank!" " Please ensure param is the same with the first aipp config(related_input_name)."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg) + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (item->related_input_name() == another_item->related_input_name()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" " param is different in different aipp config."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } } @@ -150,13 +150,13 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { if (!another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_rank and related_input_name!" " Please ensure param is the same with the first aipp config(related_input_rank)."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (item->related_input_rank() == another_item->related_input_rank()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" " param is different in different aipp config."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } } @@ -283,7 +283,7 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { string errormsg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; - GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, errormsg); + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } GE_RETURN_IF_ERROR(UpdateDataBySwitchN(switchn, data_iter->second)); @@ -471,7 +471,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { string errormsg = "No max size found from switchn node[" + switchn->GetName()+ "]"; - GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, errormsg); + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } auto output_desc = switchn->GetOpDesc()->MutableOutputDesc(max_index); diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 92e7bee7..73990c68 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -255,7 +255,7 @@ #define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ { \ - GELOGE(_status, errormsg); \ + GELOGE(_status, "%s", errormsg); \ ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ } @@ -264,7 +264,7 @@ do { \ bool b = (expr); \ if (!b) { \ - GELOGE(_status, errormsg); \ + GELOGE(_status, "%s", errormsg); \ ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ return _status; \ } \ From bd1a9dbf1ddd56a8eb79720e1182a545b3203ab0 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 18:10:16 +0800 Subject: [PATCH 29/35] error message add --- inc/framework/common/debug/log.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 73990c68..b398a5b8 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -255,7 +255,7 @@ #define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ { \ - GELOGE(_status, "%s", errormsg); \ + GELOGE(_status, "%s", errormsg); \ ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ } @@ -264,7 +264,7 @@ do { \ bool b = (expr); \ if (!b) { \ - GELOGE(_status, "%s", errormsg); \ + GELOGE(_status, "%s", errormsg); \ ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ return _status; \ } \ From 692cf7fd3daf522116cac8bb482ffb95d65d7f9f Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 18:29:29 +0800 Subject: [PATCH 30/35] error message add --- inc/framework/common/debug/log.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index b398a5b8..53678cf0 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -83,12 +83,12 @@ } while (0); // If expr is not GRAPH_SUCCESS, print the log and return FAILED -#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ - do { \ - if ((expr) != ge::GRAPH_SUCCESS) { \ - DOMI_LOGE(__VA_ARGS__); \ - return FAILED; \ - } \ +#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ + do { \ + if ((expr) != ge::GRAPH_SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + return FAILED; \ + } \ } while (0); // If expr is not SUCCESS, print the log and execute a custom statement @@ -99,13 +99,13 @@ } while (0); // If expr is not true, print the log and return the specified status -#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, __VA_ARGS__); \ - return _status; \ - } \ +#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, __VA_ARGS__); \ + return _status; \ + } \ } while (0); // If expr is not true, print the log and return the specified status @@ -259,7 +259,6 @@ ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ } - #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ do { \ bool b = (expr); \ From affd87b88dd611c0ac49fabc85f2e302ba5087d9 Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 18:32:26 +0800 Subject: [PATCH 31/35] error message add --- inc/framework/common/debug/log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 53678cf0..e259f43b 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -28,7 +28,7 @@ #if !defined(__ANDROID__) && !defined(ANDROID) #define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__) #else -#include +#include #if defined(BUILD_VERSION_PERF) #define DOMI_LOGE(fmt, ...) #else From 011444a6139ace042a4e75e1fbcabd57ce633df0 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Sat, 14 Nov 2020 18:35:09 +0800 Subject: [PATCH 32/35] Feature:Support user options of aclgrphParse interface --- ge/graph/passes/net_output_pass.cc | 8 +++++++- ge/graph/passes/net_output_pass.h | 1 + ge/session/omg.cc | 4 ++++ metadef | 2 +- parser | 2 +- 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index e3f2b71a..c6ab062a 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -103,6 +103,12 @@ Status NetOutputPass::GetOutputNode(const ge::ComputeGraphPtr &graph, std::vecto GELOGI("user set out node [%s] is found in user def targets, out node is prio!", ele.first->GetName().c_str()); targets_.erase(iter); } + + auto op_desc = ele.first->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (op_desc->HasAttr(ATTR_ATC_USER_DEFINE_OUTPUT_NODES)) { + is_user_define_ouput_nodes = true; + } output_nodes_info.push_back({ele.first, ele.second, -1}); } GELOGI("Output node set by user or leaf node, size:%zu.", output_nodes_info.size()); @@ -414,7 +420,7 @@ Status NetOutputPass::ProcessWithNetoutput(const ge::ComputeGraphPtr &graph, con Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraphPtr &graph, const ge::NodePtr &net_out_node) { GE_CHECK_NOTNULL(net_out_node); - if (!GetLocalOmgContext().user_out_nodes.empty()) { + if (!GetLocalOmgContext().user_out_nodes.empty() || is_user_define_ouput_nodes) { GELOGI("No need to add ctrl edge to netoutput because user out nodes have been set."); return SUCCESS; } diff --git a/ge/graph/passes/net_output_pass.h b/ge/graph/passes/net_output_pass.h index b959bd96..ab190169 100644 --- a/ge/graph/passes/net_output_pass.h +++ b/ge/graph/passes/net_output_pass.h @@ -220,6 +220,7 @@ class NetOutputPass : public GraphPass { bool is_include_special_node_ = false; std::set targets_; friend class ReUpdateNetOutputPass; + bool is_user_define_ouput_nodes = false; }; } // namespace ge #endif // GE_GRAPH_PASSES_NET_OUTPUT_PASS_H_ diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 104b3d00..16449363 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -485,6 +485,10 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); return domi::FAILED; } + + // add user_define_output_nodes attr. + (void)ge::AttrUtils::SetStr(op_desc, ATTR_ATC_USER_DEFINE_OUTPUT_NODES, "true"); + if (i < output_formats.size()) { if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); diff --git a/metadef b/metadef index e2a36e47..37465b85 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit e2a36e47259da923702971dcfa8a78d100b74902 +Subproject commit 37465b85d30b67a0edcc6ea4acd2f11a9697c7af diff --git a/parser b/parser index 0b7d8c9f..5fa1f3ed 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 0b7d8c9ffba6de83c4232db4f2e105ba51cd6296 +Subproject commit 5fa1f3ed9b1785b9fd1623d624de91838dff615e From 0c67afc56e17b15fcf6f775731767914efdb56cc Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Sat, 14 Nov 2020 18:49:52 +0800 Subject: [PATCH 33/35] error message add --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 12 ++++++------ ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 7fb127c3..c638f783 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -306,7 +306,7 @@ NodePtr AippOp::FindDataByIndex(const ComputeGraphPtr &graph, int rank) { } return node; } - string errormsg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); + string error_msg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return nullptr; } @@ -352,7 +352,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr } if (!edge_indexes.empty() && (*edge_indexes.rbegin() >= data_node->GetOutDataNodes().size())) { - string errormsg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) + + string error_msg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) + "] should be smaller than the target input[" + std::to_string(data_node->GetOutDataNodes().size()) +"]'s outnodes."; GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); @@ -778,19 +778,19 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { - string errormsg = "Get data_node dims and transfer to nchw_dims failed!"; + string error_msg = "Get data_node dims and transfer to nchw_dims failed!"; GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (batch_count <= 0) { - string errormsg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, errormsg); + string error_msg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); return PARAM_INVALID; } int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count); if (max_dynamic_aipp_size < 0) { - string errormsg = "The dynamic aipp size is not positive"; + string error_msg = "The dynamic aipp size is not positive"; GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index d7ae2e27..1b926e4b 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -282,7 +282,7 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { for (auto &switchn : updated_switchn) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { - string errormsg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; + string error_msg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } @@ -470,7 +470,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { - string errormsg = "No max size found from switchn node[" + switchn->GetName()+ "]"; + string error_msg = "No max size found from switchn node[" + switchn->GetName()+ "]"; GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } From 272b16959aef165522e68a593d603d8c7abf18fa Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Mon, 16 Nov 2020 09:21:21 +0800 Subject: [PATCH 34/35] error message add --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index c638f783..38d1efcf 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -355,7 +355,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr string error_msg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) + "] should be smaller than the target input[" + std::to_string(data_node->GetOutDataNodes().size()) +"]'s outnodes."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } target = data_node; @@ -784,7 +784,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { } if (batch_count <= 0) { string error_msg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg_c_str()); return PARAM_INVALID; } From 121b9f7a2150d13201450869d169fc7721b0a7cb Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Mon, 16 Nov 2020 09:29:34 +0800 Subject: [PATCH 35/35] error message add --- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 38d1efcf..98712a82 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -784,7 +784,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { } if (batch_count <= 0) { string error_msg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg_c_str()); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; }