diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index e21bcb25..6e01ee87 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -21,6 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" +#include "graph/load/new_model_manager/davinci_model.h" namespace { const char *const kJobID = "jobID"; @@ -39,10 +40,12 @@ const std::string kConfigNumsdev = "devNums"; const std::string kConfigDevIdList = "devIdList"; const std::string kProfStart = "prof_start"; const std::string kProfStop = "prof_stop"; +const std::string kProfModelSubscribe = "prof_model_subscribe"; +const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() {} +ProfilingManager::ProfilingManager() : subscribe_count_(0) {} ProfilingManager::~ProfilingManager() {} @@ -54,6 +57,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); + subscribe_count_ = 0; job_id_ = options.job_id; GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); @@ -382,7 +386,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( - const std::vector &task_desc_info, const int32_t &device_id) { + uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); if (reporter == nullptr) { @@ -401,7 +405,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(op_name).append(" ") .append(std::to_string(block_dim).append(" ") .append(std::to_string(task_id)).append(" ") - .append(std::to_string(stream_id)).append("\n")); + .append(std::to_string(stream_id)).append(" ") + .append(std::to_string(model_id)).append("\n")); Msprof::Engine::ReporterData reporter_data{}; reporter_data.deviceId = device_id; @@ -425,7 +430,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( - const std::vector &compute_graph_desc_info, const int32_t &device_id) { + uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); @@ -483,6 +488,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin data.append("\""); } + data.append(" model_id:").append(std::to_string(model_id)); + data.append("\n"); Msprof::Engine::ReporterData reporter_data{}; @@ -537,7 +544,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { + uint32_t model_id, const std::vector &task_desc_info, + const std::vector &compute_graph_desc_info, + bool check_device) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -546,7 +555,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGI("current logic_device_id:%d", logic_device_id); - if (!is_acl_api_mode_) { + if (check_device) { auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (ret == device_id_.end()) { GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); @@ -554,9 +563,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr } } GELOGI("start ProfilingTaskDescInfo."); - ProfilingTaskDescInfo(task_desc_info, logic_device_id); + ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); GELOGI("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); + ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); GELOGI("Report profiling data for GE end."); #endif } @@ -581,6 +590,105 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP return module; } +void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, + uint32_t device_id, + uint64_t module) { +#ifdef DAVINCI_SUPPORT_PROFILING + if (prof_type == kProfModelSubscribe) { + if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { + subs_dev_module_[device_id].subscribe_count++; + } else { + DeviceSubsInfo dev_info; + dev_info.module = module; + dev_info.subscribe_count = 1; + subs_dev_module_[device_id] = dev_info; + } + } else if (prof_type == kProfModelUnsubscribe) { + if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { + if (subs_dev_module_[device_id].subscribe_count > 0) { + subs_dev_module_[device_id].subscribe_count--; + } + } + } else { + GELOGI("No need to update device_id module map."); + } +#endif +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelSubscribe( + uint64_t module, void *model) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; + if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { + // register framework to profiling + int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); + if (result != SUCCESS) { + GELOGE(FAILED, "Register profiling engine failed."); + return FAILED; + } + GELOGI("Prof subscribe: model load profiling on."); + } + subscribe_count_++; + + auto davinci_model = static_cast(model); + int32_t device_num = 1; + uint32_t device[1]; + device[0] = davinci_model->GetDeviceId(); + rtError_t rt_ret = rtProfilerStart(module, device_num, device); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler start failed."); + return FAILED; + } + UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module); + + // Report profiling data + Status p_ret = davinci_model->ReportProfilingData(false); + if (p_ret != SUCCESS) { + GELOGE(p_ret, "Report profiling data failed."); + return p_ret; + } +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelUnsubscribe( + void *model) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + if (subscribe_count_ == 0) { + GELOGW("The profiler has not been subscribed, you do not need to cannel the subscription."); + return SUCCESS; + } + + auto davinci_model = static_cast(model); + int32_t dev_num = 1; + uint32_t device[1]; + device[0] = davinci_model->GetDeviceId(); + auto iter = subs_dev_module_.find(device[0]); + if (iter != subs_dev_module_.end()) { + if (subs_dev_module_[device[0]].subscribe_count == 1) { + rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler stop failed."); + return FAILED; + } + } + UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); + } + + subscribe_count_--; + if (subscribe_count_ == 0) { + int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); + if (ret != SUCCESS) { + GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); + return ret; + } + } +#endif + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); @@ -748,6 +856,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt device_id_ptr[i] = static_cast(device_list[i]); } GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); + rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler config proc failed."); diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 8fb59216..66cefc32 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -39,6 +39,10 @@ namespace { const std::string GE_PROFILING_MODULE = "Framework"; } // namespace namespace ge { +struct DeviceSubsInfo { + uint64_t module; + uint32_t subscribe_count; +}; // register Plugin class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { public: @@ -73,6 +77,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); + void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); + ge::Status ProfModelSubscribe(uint64_t module, void *model); + ge::Status ProfModelUnsubscribe(void *model); ge::Status ProfInit(uint64_t module); ge::Status ProfFinalize(); ge::Status ProfStartProfiling(uint64_t module, const std::map &config_para); @@ -84,13 +91,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern + bool IsAclApiMode() const { return is_acl_api_mode_; } int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } - void ReportProfilingData(const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info); + void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, + const std::vector &compute_graph_desc_info, + bool check_device); void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, Msprof::Engine::ReporterData &reporter_data); - void ProfilingTaskDescInfo(const std::vector &task_desc_info, const int32_t &device_id); - void ProfilingGraphDescInfo(const std::vector &compute_graph_desc_info, + void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, + const int32_t &device_id); + void ProfilingGraphDescInfo(uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id); void SetProfilingConfig(const string &profiling_cfg); vector GetProfilingDeviceId() const { return device_id_; } @@ -122,6 +132,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string task_trace_conf_; const ProfilingEngineImpl engine_; map device_id_module_map_; // key: device_id, value: profiling on module + map subs_dev_module_; // key: device_id, value: profiling on module + uint32_t subscribe_count_; std::mutex mutex_; }; } // namespace ge diff --git a/ge/common/types.cc b/ge/common/types.cc index 0d10f8b3..7ae0daa3 100755 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -54,6 +54,7 @@ const std::map PROFILE_COMPONENT_MAP{ {"runtime", RTS_PROFILE}, }; const std::string PROFILE_CONFIG = "config"; +const std::string PROFILE_MODEL_ID = "modelId"; REGISTER_OPTYPE_DEFINE(DATA, "Data"); REGISTER_OPTYPE_DEFINE(AIPPDATA, "AippData"); diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index ad2879c2..967bf420 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1062,6 +1062,19 @@ Status GeExecutor::ReleaseSingleOpResource(void *stream) { return SingleOpManager::GetInstance().ReleaseResource(stream); } +Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + auto davinci_model = model_manager->GetModel(model_id); + if (davinci_model == nullptr) { + GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + return FAILED; + } + + device_id = davinci_model->GetDeviceId(); + return SUCCESS; +} + Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { std::vector> batch_info; int32_t dynamic_type = static_cast(FIXED); diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc index fad7d0c2..51f31003 100644 --- a/ge/graph/build/label_allocator.cc +++ b/ge/graph/build/label_allocator.cc @@ -32,11 +32,6 @@ Status LabelAllocator::AssignFunctionalLabels() { return INTERNAL_ERROR; } - if (compute_graph_->GetGraphUnknownFlag()) { - GELOGD("Graph[%s] is unknown graph, skip label allocator.", compute_graph_->GetName().c_str()); - return SUCCESS; - } - // Add label task for sub graph. GELOGI("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str()); std::set functional_nodes; @@ -62,7 +57,7 @@ Status LabelAllocator::AssignFunctionalLabels() { } (void)AttrUtils::SetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_index); - GELOGI("AssignFunctionalLabels success."); + GELOGI("AssignFunctionalLabels success, Num: %u.", label_index); return SUCCESS; } @@ -72,13 +67,29 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::setGetParentNode(); - if (parent == nullptr) { - GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", graph->GetName().c_str()); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip label allocator.", graph->GetName().c_str()); + return true; + } + + NodePtr func_node = graph->GetParentNode(); + if (func_node == nullptr) { + GELOGE(INTERNAL_ERROR, "Parent functional node not set: %s.", graph->GetName().c_str()); return false; } - (void)functional_nodes.insert(parent); // unique functional node. + ComputeGraphPtr owner_graph = func_node->GetOwnerComputeGraph(); + if (owner_graph == nullptr) { + GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", func_node->GetName().c_str()); + return false; + } + + if (owner_graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip label allocator.", owner_graph->GetName().c_str()); + return true; + } + + (void)functional_nodes.insert(func_node); // unique functional node. return true; } } // namespace ge diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2d30c57e..cdca7fb7 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -880,6 +880,15 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GELOGI("Unreusable block."); continue; } + std::string batch_label; + if (reusable_block->IsSameLabel(batch_label)) { + std::string op_label; + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label); + if (batch_label != op_label) { + GELOGI("label diff, op name %s", node_op_desc->GetName().c_str()); + continue; + } + } // A node can reuse blocks of the same stream and preorder streams if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 56a5b4dc..e8657a49 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -416,6 +416,14 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { return FAILED); GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_); + string fp_ceiling_mode; + if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { + if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { + GELOGE(FAILED, "Failed to set attr ATTR_FP_CEILING_MODE"); + return FAILED; + } + GELOGI("Set attr ATTR_FP_CEILING_MODE to model, value is %s.", fp_ceiling_mode.c_str()); + } string ge_core_type; Status ret = ge::GetContext().GetOption(kCoreType, ge_core_type); @@ -690,8 +698,8 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_TIMESTAMP_END(AssignLogicalStreams, "GraphBuilder::AssignLogicalStreams"); // Assign functional op labels. - label_num_ = 0; - (void)AttrUtils::GetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_num_); + auto root_graph = GraphUtils::FindRootGraph(compute_graph_); + (void)AttrUtils::GetInt(*root_graph, ATTR_MODEL_LABEL_NUM, label_num_); GE_TIMESTAMP_START(AssignMemory); MemoryAssigner mem_assigner(compute_graph_); diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc old mode 100644 new mode 100755 index 684ef3dc..9b513fe6 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -82,4 +82,13 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { } return true; } + +std::string TransOpUtil::TransopMapToString() { + std::string buffer; + for (auto &key : Instance().transop_index_map_) { + buffer += key.first + " "; + } + return buffer; +} + } // namespace ge diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 8b10ad5c..3332e1fb 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -35,6 +35,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); + static std::string TransopMapToString(); + private: TransOpUtil(); diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h index 2acb963b..46ead310 100755 --- a/ge/graph/load/new_model_manager/data_dumper.h +++ b/ge/graph/load/new_model_manager/data_dumper.h @@ -86,6 +86,7 @@ class DataDumper { void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } const DumpProperties &GetDumpProperties() const { return dump_properties_; } bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; + const std::vector &GetAllOpDescInfo() const { return op_desc_info_; } // Dump exception info Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cb37182c..d00c2eda 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -88,6 +88,7 @@ const uint32_t kDataMemAlignSizeCompare = 64; const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; +const int32_t kInvalidStream = -1; inline bool IsDataOp(const std::string &node_type) { return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; @@ -258,7 +259,6 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { /// void DavinciModel::Shrink() { ge_model_.reset(); // delete object. - op_list_.clear(); } Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { @@ -611,7 +611,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_DISMISS_GUARD(stream); stream_list_.push_back(stream); - GELOGD("Stream index:%u, stream:%p.", i, stream); + int32_t rt_stream_id = kInvalidStream; + (void)rtGetStreamId(stream, &rt_stream_id); + GELOGI("Logical stream index:%u, stream:%p, rtstream: %d.", i, stream, rt_stream_id); } for (uint32_t i = 0; i < EventNum(); i++) { @@ -653,18 +655,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_IF_BOOL_EXEC(IsBroadCastOpData(node), (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); } - // for profiling - op_name_map_ = compute_graph->GetGraphOpName(); - - vector op_name; - GE_IF_BOOL_EXEC(ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name), - GELOGI("get str of task_index_op_name")); - if (op_name_map_.empty()) { - for (size_t idx = 0; idx < op_name.size(); idx++) { - op_name_map_[idx] = op_name[idx]; - } - GELOGI("Infer profiling: op_name_size(%zu)", op_name.size()); - } GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); @@ -676,7 +666,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size auto all_dump_model = GetDumpProperties().GetAllDumpModel(); bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { + bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || + findByOmName || findByModelName; + if (dump_l1fusion_op) { // malloc 2M for dump l1fusion op GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); @@ -690,16 +682,21 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); + string fp_ceiling_mode; + if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { + GELOGI("Get attr ATTR_FP_CEILING_MODE from model, value is %s.", fp_ceiling_mode.c_str()); + // mode 0: Do not perform saturation processing. By default, IEEE754 is used. + GE_CHK_RT_RET(rtSetCtxINFMode((fp_ceiling_mode != "0"))); + } + // collect profiling for ge - if (ProfilingManager::Instance().ProfilingModelLoadOn()) { - std::vector compute_graph_desc_info; - Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); - if (ret1 != SUCCESS) { - GELOGE(ret1, "GetComputeGraphInfo failed."); - return ret1; + auto &profiling_manager = ProfilingManager::Instance(); + if (profiling_manager.ProfilingModelLoadOn()) { + Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode()); + if (p_ret != SUCCESS) { + GELOGE(p_ret, "Report profiling data failed."); + return p_ret; } - ProfilingManager::Instance().ReportProfilingData(GetTaskDescInfo(), compute_graph_desc_info); - GE_CHK_STATUS(SinkModelProfile(), "Sink model profile failed."); } Shrink(); @@ -707,6 +704,20 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size return ret; } +Status DavinciModel::ReportProfilingData(bool check_device) { + std::vector compute_graph_desc_info; + Status ret = GetComputeGraphInfo(compute_graph_desc_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetComputeGraphInfo failed."); + return ret; + } + ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device); + GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); + op_list_.clear(); + + return SUCCESS; +} + /// /// @ingroup ge /// @brief Travel all nodes and determine if destruction is required. @@ -2900,34 +2911,25 @@ Status DavinciModel::DistributeTask() { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } - // get op_name by task_index - if (task->GetCtx() != nullptr) { - auto iter = op_name_map_.find(task_index); - if (iter == op_name_map_.end()) { - continue; - } - - // else task index is found in op_name_map_ - TaskDescInfo task_desc_info; - string op_name = op_name_map_[task_index]; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op_name; - task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - task_desc_info_.emplace_back(task_desc_info); - } + // Load task info for profiling + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + task_desc_info_.emplace_back(task_desc_info); } } } @@ -3817,50 +3819,31 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea main_follow_stream_mapping_[main_stream_id].emplace_back(stream); } -Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector &graph_desc_info) { +Status DavinciModel::GetComputeGraphInfo(vector &graph_desc_info) { GELOGI("GetComputeGraphInfo start."); - for (auto &node : graph->GetAllNodes()) { + auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); + for (auto &op_desc : all_op_desc) { ComputeGraphDescInfo compute_graph_info; - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is nullptr."); - return PARAM_INVALID; + if (!om_name_.empty()) { + compute_graph_info.model_name = om_name_; + } else { + compute_graph_info.model_name = name_; } + compute_graph_info.op_name = op_desc.op_name; + compute_graph_info.op_type = op_desc.op_type; + compute_graph_info.input_format = op_desc.input_format; + compute_graph_info.input_shape = op_desc.input_shape; + compute_graph_info.input_data_type = op_desc.input_data_type; + compute_graph_info.output_format = op_desc.output_format; + compute_graph_info.output_shape = op_desc.output_shape; + compute_graph_info.output_data_type = op_desc.output_data_type; - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - if (!om_name_.empty()) { - compute_graph_info.model_name = om_name_; - } else { - compute_graph_info.model_name = name_; - } - compute_graph_info.op_name = op_desc->GetName(); - compute_graph_info.op_type = op_desc->GetType(); - - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } - - graph_desc_info.emplace_back(compute_graph_info); - } + graph_desc_info.emplace_back(compute_graph_info); } GELOGI("GetComputeGraphInfo end."); return SUCCESS; } + void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 964057a4..ccf6ff25 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -439,6 +439,8 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); + Status ReportProfilingData(bool check_device = true); + void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); } @@ -830,7 +832,7 @@ class DavinciModel { Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); // get desc info of graph for profiling - Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector &graph_desc_info); + Status GetComputeGraphInfo(vector &graph_desc_info); void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); @@ -949,7 +951,6 @@ class DavinciModel { std::map used_tbe_handle_map_; // for profiling task and graph info - std::map op_name_map_; std::vector task_desc_info_; int64_t maxDumpOpNum_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index ec111c3d..a286ff5c 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -43,6 +43,8 @@ const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStop = "prof_stop"; +const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; +const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; struct CustAicpuSoBuf { @@ -334,11 +336,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + - timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond - davinci_model->SetProfileTime(MODEL_LOAD_END); - } + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond + davinci_model->SetProfileTime(MODEL_LOAD_END); } while (0); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); @@ -565,7 +565,9 @@ Status ModelManager::HandleCommand(const Command &command) { {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, - {kCmdTypeProfStop, HandleProfStopCommand}}; + {kCmdTypeProfStop, HandleProfStopCommand}, + {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, + {kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -591,6 +593,77 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { return SUCCESS; } +Status ModelManager::GetModelByCmd(const Command &command, + std::shared_ptr &davinci_model) { + if (command.cmd_params.size() < kCmdParSize) { + GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.", + command.cmd_type.c_str()); + return PARAM_INVALID; + } + + std::string map_key = command.cmd_params[0]; + std::string value = command.cmd_params[1]; + if (map_key == PROFILE_MODEL_ID) { + int32_t model_id = 0; + try { + model_id = std::stoi(value); + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str()); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str()); + return PARAM_INVALID; + } catch (...) { + GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str()); + return FAILED; + } + + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + davinci_model = model_manager->GetModel(static_cast(model_id)); + if (davinci_model == nullptr) { + GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + return FAILED; + } + } else { + GELOGE(FAILED, "The model_id parameter is not found in the command."); + return FAILED; + } + + return SUCCESS; +} + +Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) { + std::shared_ptr davinci_model = nullptr; + Status ret = GetModelByCmd(command, davinci_model); + if (ret != SUCCESS) { + return ret; + } + + if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, + static_cast(davinci_model.get())) != SUCCESS) { + GELOGE(FAILED, "Handle prof model subscribe failed."); + return FAILED; + } + + return SUCCESS; +} + +Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { + std::shared_ptr davinci_model = nullptr; + Status ret = GetModelByCmd(command, davinci_model); + if (ret != SUCCESS) { + return ret; + } + + if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast(davinci_model.get())) != SUCCESS) { + GELOGE(FAILED, "Handle prof model unsubscribe failed."); + return FAILED; + } + + return SUCCESS; +} + Status ModelManager::HandleProfInitCommand(const Command &command) { uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { @@ -973,11 +1046,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGI("Parse model %u success.", model_id); - if (ProfilingManager::Instance().ProfilingModelLoadOn()) { - davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + - timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond - davinci_model->SetProfileTime(MODEL_LOAD_END); - } + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond + davinci_model->SetProfileTime(MODEL_LOAD_END); GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); return SUCCESS; diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index d6a89d6b..8d46e578 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -158,10 +158,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { static ge::Status HandleAclProfilingCommand(const Command &command); static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command); + static ge::Status HandleProfModelSubscribeCommand(const Command &command); + static ge::Status HandleProfModelUnsubscribeCommand(const Command &command); static ge::Status HandleProfInitCommand(const Command &command); static ge::Status HandleProfFinalizeCommand(const Command &command); static ge::Status HandleProfStartCommand(const Command &command); static ge::Status HandleProfStopCommand(const Command &command); + + static ge::Status GetModelByCmd(const Command &command, + std::shared_ptr &davinci_model); /// /// @ingroup domi_ome /// @brief get model memory usage diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 35169726..2079034e 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -45,7 +45,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { if (it == task_addr_offset_.end()) { task_addr_offset_[addr] = {offset}; } else { - it->second.push_back(offset); + it->second.insert(offset); } GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index 57ccdbaf..d0bb2b6d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -103,7 +103,7 @@ class ZeroCopyTask { bool is_updated_; string batch_label_; //
- map> task_addr_offset_; + map> task_addr_offset_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 282cd7a6..6c08b4d0 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -133,6 +133,22 @@ bool IsTailingOptimization() { GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default."); return false; } + +ge::Status CheckFpCeilingMode() { + static const std::unordered_set kValidFpCeilingMode = {"0", "1", "2"}; + string mode; + auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); + if (ret == ge::GRAPH_SUCCESS) { + if (kValidFpCeilingMode.count(mode) == 0) { + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", mode.c_str()); + return ge::GE_GRAPH_OPTIONS_INVALID; + } + GELOGI("The parameter fp_ceiling_mode is set to %s.", mode.c_str()); + return ge::SUCCESS; + } + GELOGW("The parameter fp_ceiling_mode is not set."); + return ge::SUCCESS; +} } // namespace namespace ge { @@ -168,6 +184,12 @@ Status GraphManager::Initialize(const std::map &options) { return ret; } + ret = CheckFpCeilingMode(); + if (ret != SUCCESS) { + GELOGE(ret, "[Initialize] Check fp-ceiling-mode options failed."); + return ret; + } + ret = graph_context_->Initialize(options); if (ret != SUCCESS) { GELOGE(ret, "[Initialize] GraphContext initialize failed."); diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc index a9e3f4c4..8bb16286 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -25,6 +25,10 @@ namespace ge { Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str()); + return SUCCESS; + } int64_t value = 0; rtError_t rt_ret = rtGetRtCapability(FEATURE_TYPE_MEMCPY, MEMCPY_INFO_SUPPORT_ZEROCOPY, &value); @@ -201,9 +205,10 @@ NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &gr const OutDataAnchorPtr &out_data_anchor, const NodePtr &out_of_user_data) { GELOGD("Start CreateMemcpyAddrAsyncNode."); + static uint32_t new_node_index = 0; OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_CHK_BOOL_EXEC(pre_op_desc != nullptr, return nullptr, "Op_desc of pre node is invalid."); - std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC; + std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC + "_" + std::to_string(new_node_index++); OpDescPtr op_desc = MakeShared(node_name, MEMCPYADDRASYNC); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index e3f2b71a..c6ab062a 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -103,6 +103,12 @@ Status NetOutputPass::GetOutputNode(const ge::ComputeGraphPtr &graph, std::vecto GELOGI("user set out node [%s] is found in user def targets, out node is prio!", ele.first->GetName().c_str()); targets_.erase(iter); } + + auto op_desc = ele.first->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (op_desc->HasAttr(ATTR_ATC_USER_DEFINE_OUTPUT_NODES)) { + is_user_define_ouput_nodes = true; + } output_nodes_info.push_back({ele.first, ele.second, -1}); } GELOGI("Output node set by user or leaf node, size:%zu.", output_nodes_info.size()); @@ -414,7 +420,7 @@ Status NetOutputPass::ProcessWithNetoutput(const ge::ComputeGraphPtr &graph, con Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraphPtr &graph, const ge::NodePtr &net_out_node) { GE_CHECK_NOTNULL(net_out_node); - if (!GetLocalOmgContext().user_out_nodes.empty()) { + if (!GetLocalOmgContext().user_out_nodes.empty() || is_user_define_ouput_nodes) { GELOGI("No need to add ctrl edge to netoutput because user out nodes have been set."); return SUCCESS; } diff --git a/ge/graph/passes/net_output_pass.h b/ge/graph/passes/net_output_pass.h index b959bd96..ab190169 100644 --- a/ge/graph/passes/net_output_pass.h +++ b/ge/graph/passes/net_output_pass.h @@ -220,6 +220,7 @@ class NetOutputPass : public GraphPass { bool is_include_special_node_ = false; std::set targets_; friend class ReUpdateNetOutputPass; + bool is_user_define_ouput_nodes = false; }; } // namespace ge #endif // GE_GRAPH_PASSES_NET_OUTPUT_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 98371426..45156e46 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -117,7 +117,6 @@ #include "graph/passes/variable_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" -#include "graph/passes/mark_agnostic_pass.h" namespace ge { @@ -219,6 +218,9 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c auto index = TransOpUtil::GetTransOpDataIndex(node_type); if (index < 0) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"The trans node type[" + node_type + "]", "it must be " + TransOpUtil::TransopMapToString()}); GELOGE(INTERNAL_ERROR, "The trans node type %s does not exists", node_type.c_str()); return nullptr; } @@ -387,6 +389,8 @@ Status RecoverTransRoadForVar(const NodePtr &var, const VarTransRoad &road) { auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVar(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -419,6 +423,8 @@ Status RecoverTransRoadForVarRef(const std::set &nodes, const VarTransR auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVarRef(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -571,6 +577,8 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node std::string related_node_name; if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) { if (related_node_name.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, {data_node->GetName(), "flag", "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node flag, but the value is empty", data_node->GetName().c_str()); return INTERNAL_ERROR; @@ -582,6 +590,9 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node } } if (switchn_node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, + {data_node->GetName(), related_node_name, "but can not find it on the graph"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph", data_node->GetName().c_str(), related_node_name.c_str()); return INTERNAL_ERROR; @@ -682,6 +693,10 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No ge::GeShape old_shape = input->GetShape(); bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); if (!support) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str()); return FAILED; } @@ -762,6 +777,9 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "15003", {"opname", "format"}, + {op_desc->GetName(), TypeUtils::FormatToSerialString(storage_format)}); GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str()); @@ -900,9 +918,14 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { // check if is_output_adjust_hw_layout is set if (NeedUpdateFormatByOutputTypeParm(op_desc, index)) { if ((old_format != FORMAT_NCHW) && (old_format != FORMAT_NHWC) && (old_format != FORMAT_NC1HWC0)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0."); return FAILED; } + GeTensorDesc old_desc(old_shape, old_format, old_dtype); if (ProcessNetoutputNodeFp16Nc1hwc0DynShape(old_desc, net_output_input_desc, src_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0."); @@ -1035,6 +1058,9 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i } bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end()); if (!is_acceptable) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15005", {"opname", "optype", "opname1", "optype1"}, + {op_desc->GetName(), node->GetType(), input_op_desc->GetName(), input_op_desc->GetType()}); GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.", node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(), input_op_desc->GetType().c_str()); @@ -1127,6 +1153,9 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { } if ((index < 0) || (static_cast(index) >= user_input.size())) { + std::string situation = "data op index[" + std::to_string(index) + "]"; + std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(PARAM_INVALID, "user_input size = %zu, graph data op index = %ld.", user_input.size(), index); return FAILED; } @@ -1139,6 +1168,9 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { if (need_check_internal_format) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); if (is_internal) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + + TypeUtils::FormatToSerialString(origin_format) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); @@ -1150,6 +1182,8 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; @@ -1164,6 +1198,10 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { return FAILED); bool size_check = (size != 0 && shape_size != size); if (size_check) { + std::string situation = "input data size[" + std::to_string(size) + + "] and shape_size[" + std::to_string(size) + "]"; + std::string reason = "because size != 0 and shape_size != size"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(PARAM_INVALID, "input data size =%ld, shape_size =%ld.", size, shape_size); return FAILED; } @@ -1503,6 +1541,8 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; } @@ -1512,14 +1552,20 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { if (shape_size == 0) { if (ge_tensor_desc.GetShape().GetDims().size() == 0) { // shape = [], means it's a sclar tensor. - GE_CHK_BOOL_EXEC(data_size / length == 1, return PARAM_INVALID, "Const is invalid scalar tensor."); + GE_CHK_BOOL_EXEC(data_size / length == 1, + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid scalar tensor."}); + return PARAM_INVALID, "Const is invalid scalar tensor."); } else { // shape = [x, y, 0,...], means it's a vector tensor that value is []. - GE_CHK_BOOL_EXEC(data_size == 0, return PARAM_INVALID, "Const is invalid vector scalar."); + GE_CHK_BOOL_EXEC(data_size == 0, + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid vector scalar."}); + return PARAM_INVALID, "Const is invalid vector scalar."); } } else { - GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, return PARAM_INVALID, - "Const input data size is not equal with tensor desc shape"); + GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, + ErrorManager::GetInstance().ATCReportErrMessage( + "E10043", {"reason"}, {"Const input data size is not equal with tensor desc shape"}); + return PARAM_INVALID, "Const input data size is not equal with tensor desc shape"); } return SUCCESS; } @@ -1543,6 +1589,9 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { return GE_GRAPH_INIT_FAILED; } if ((index < 0) || (static_cast(index) >= user_input.size())) { + std::string situation = "data op index[" + std::to_string(index) + "]"; + std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "user_input size:%zu, data op index:%ld.", user_input.size(), index); return GE_GRAPH_INIT_FAILED; } @@ -1550,6 +1599,9 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { if (desc.GetShape().GetDim(i) < 0) { + std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(desc.GetShape().GetDim(i)) + "]" ; + std::string reason = "it need >= 0"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, desc.GetShape().GetDim(i)); return GE_GRAPH_INIT_FAILED; @@ -1627,7 +1679,6 @@ Status GraphPrepare::PrepareOptimize() { try { (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); - (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass", new MarkAgnosticPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 729c47de..98712a82 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -53,16 +53,6 @@ } \ } while (0) -#define AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(expr, _status, errormsg) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, errormsg); \ - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ - return _status; \ - } \ - } while (0) - namespace { const int32_t DEFAULT_MATRIX_R0C0_YUV2RGB = 298; const int32_t DEFAULT_MATRIX_R0C1_YUV2RGB = 0; @@ -316,9 +306,8 @@ NodePtr AippOp::FindDataByIndex(const ComputeGraphPtr &graph, int rank) { } return node; } - GELOGE(PARAM_INVALID, "Can not find the data node by index %d", rank); - string errormsg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + string error_msg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return nullptr; } Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr &target, @@ -363,10 +352,10 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr } if (!edge_indexes.empty() && (*edge_indexes.rbegin() >= data_node->GetOutDataNodes().size())) { - GELOGE(PARAM_INVALID, "input_edge_idx %u should smaller than out edge size of target input %zu", - *edge_indexes.rbegin(), data_node->GetOutDataNodes().size()); - string errormsg = "The aipp parameter input_edge_idx should be smaller than the target input's outnodes."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + string error_msg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) + + "] should be smaller than the target input[" + + std::to_string(data_node->GetOutDataNodes().size()) +"]'s outnodes."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } target = data_node; @@ -439,8 +428,7 @@ Status AippOp::ConvertRelatedInputNameToRank() { if (!convert_flag) { string error_msg = "Top name " + related_input_name + "convert rank failed, Please" " ensure top name in aipp config is the top name of data node."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, "Top name[%s] converts rank failed.", related_input_name.c_str()); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } @@ -537,87 +525,87 @@ Status AippOp::SetDefaultParams() { Status AippOp::ValidateParams() { GE_CHECK_NOTNULL(aipp_params_); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, - "When insert AIPP op, aipp_mode must be configured as static or dynamic "); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_2 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_3 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, - "The parameter output_bias_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, - "The parameter output_bias_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, - "The parameter output_bias_2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, - "The parameter input_bias_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, - "The parameter input_bias_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, - "The parameter input_bias_2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, - "The parameter input_edge_idx can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, + "When insert AIPP op, aipp_mode must be configured as static or dynamic "); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_2 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_3 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, + "The parameter output_bias_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, + "The parameter output_bias_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, + "The parameter output_bias_2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, + "The parameter input_bias_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, + "The parameter input_bias_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, + "The parameter input_bias_2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, + "The parameter input_edge_idx can not be configed repeatedly"); const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode(); if (aipp_mode == domi::AippOpParams::dynamic) { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aipp_params_->max_src_image_size() > 0, PARAM_INVALID, "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); } else { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, - "Input format of AIPP conf is undefined"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, - "Src_image_size_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, - "Src_image_size_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, - "Load_start_pos_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, - "Load_start_pos_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, - "Crop_size_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, - "Resize_output_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, - "Resize_output_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, - "Left_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, - "Right_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, - "Top_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, - "Bottom_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, + "Input format of AIPP conf is undefined"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, + "Src_image_size_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, + "Src_image_size_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, + "Load_start_pos_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, + "Load_start_pos_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, + "Crop_size_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, + "Resize_output_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, + "Resize_output_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, + "Left_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, + "Right_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, + "Top_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, + "Bottom_padding_size must not be configed smaller than 0"); } return SUCCESS; @@ -790,17 +778,20 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { - GELOGE(PARAM_INVALID, "Get data_node dims and transfer to nchw_dims failed!"); + string error_msg = "Get data_node dims and transfer to nchw_dims failed!"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (batch_count <= 0) { - GELOGE(PARAM_INVALID, "Batch count %ld is invalid", batch_count); + string error_msg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count); if (max_dynamic_aipp_size < 0) { - GELOGE(PARAM_INVALID, "The dynamic aipp size is not positive."); + string error_msg = "The dynamic aipp size is not positive"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index a1eb104d..1b926e4b 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -124,19 +124,13 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { if (another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_name and related_input_rank!" " Please ensure param is the same with the first aipp config(related_input_name)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_name)."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (item->related_input_name() == another_item->related_input_name()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } } @@ -156,19 +150,13 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { if (!another_item->related_input_name().empty()) { string error_msg = "Can not both set related_input_rank and related_input_name!" " Please ensure param is the same with the first aipp config(related_input_rank)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_rank)."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (item->related_input_rank() == another_item->related_input_rank()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } } @@ -224,9 +212,10 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { } } } - GE_CHK_BOOL_RET_STATUS((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), PARAM_INVALID, - "Can not config part of outputs of Data node to support AIPP, config all " - "of the outputs of Data to support AIPP, or config none of them"); + GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), + PARAM_INVALID, + "Can not config part of outputs of Data node to support AIPP, config all " + "of the outputs of Data to support AIPP, or config none of them"); std::unique_ptr aippParams(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(aippParams); @@ -238,16 +227,19 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); if (aippMode == domi::AippOpParams::static_) { - GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, - "The input_format of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, - "The src_image_size_w of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, - "The src_image_size_h of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG( + aippParams->input_format() == currAippParam->input_format(), + PARAM_INVALID, "The input_format of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG( + aippParams->src_image_size_w() == currAippParam->src_image_size_w(), + PARAM_INVALID, "The src_image_size_w of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG( + aippParams->src_image_size_h() == currAippParam->src_image_size_h(), + PARAM_INVALID, "The src_image_size_h of all aipp_ops after one Data should be the same"); } else { - GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), - PARAM_INVALID, - "The max_src_image_size of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG( + aippParams->max_src_image_size() == currAippParam->max_src_image_size(), + PARAM_INVALID, "The max_src_image_size of all aipp_ops after one Data should be the same"); } }); } @@ -290,7 +282,8 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { for (auto &switchn : updated_switchn) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { - GELOGE(INTERNAL_ERROR, "Failed to find relative data node by switchn %s", switchn->GetName().c_str()); + string error_msg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } GE_RETURN_IF_ERROR(UpdateDataBySwitchN(switchn, data_iter->second)); @@ -477,7 +470,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { - GELOGE(INTERNAL_ERROR, "No max size found from switchn node %s", switchn->GetName().c_str()); + string error_msg = "No max size found from switchn node[" + switchn->GetName()+ "]"; + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } auto output_desc = switchn->GetOpDesc()->MutableOutputDesc(max_index); diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c0ba89f4..83739f57 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -595,6 +595,8 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector &start_ } auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); if (!IsAllDimsPositive(dims)) { + ErrorManager::GetInstance().ATCReportErrMessage("E15004", {"opname", "shape"}, + {node->GetName(), formats::ShapeToString(dims)}); GELOGE(INTERNAL_ERROR, "Failed to copy multi batch graph, the node %s still has unknown shape %s", node->GetName().c_str(), formats::ShapeToString(dims).c_str()); return INTERNAL_ERROR; @@ -1025,6 +1027,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { + const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); + if (multi_batch_with_case != nullptr) { + PassManager pass_manager; + GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); + return pass_manager.Run(graph); + } + std::vector> shapes; if (!InitDynamicParams(shapes)) { GELOGD("There is no multi-batch options, no need to process multi-batch copy"); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 9909b0dc..cc0d2d5b 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -124,6 +124,8 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, auto tmp_index = cur_data_index; for (size_t i = 0; i < static_cast(dynamic_dims_num); ++i) { if (tmp_index >= dynamic_gear_info.size()) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10045", {"name", "shape"}, {data_name, formats::JoinToString(data_shape)}); GELOGE(PARAM_INVALID, "Data: %s shape: %s make dynamic dims overflow", data_name.c_str(), formats::JoinToString(data_shape).c_str()); return FAILED; @@ -131,6 +133,8 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, one_gear.push_back(dynamic_gear_info[tmp_index++]); } } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10046", {"name", "shape"}, {data_name, formats::JoinToString(data_shape)}); GELOGE(PARAM_INVALID, "Dynamic dims num of data: %s shape: %s can not be more than one gear dynamic info size", data_name.c_str(), formats::JoinToString(data_shape).c_str()); return FAILED; diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index fc98e8a5..c3274465 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -100,7 +100,9 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetOutputDesc(0); + GeTensorDesc output_tensor_desc(attr_output_tensor_desc); + output_tensor_desc.SetShape(output_shape); GeTensorPtr output_ptr = MakeShared(output_tensor_desc); if (output_ptr == nullptr) { GELOGW("make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str()); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index d230b949..e275150a 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -259,7 +259,9 @@ Status NodeDoneCallback::ProfilingReport() { return profiling_ret; } - ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info); + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 4c32f131..dbd784c6 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -17,8 +17,6 @@ #include "aicore_node_executor.h" #include "cce/taskdown_common.hpp" #include "hybrid/executor/hybrid_execution_context.h" -#include "init/gelib.h" -#include "hybrid/executor/hybrid_execution_context.h" namespace ge { namespace hybrid { @@ -28,19 +26,10 @@ AiCoreNodeTask::AiCoreNodeTask(std::vector> &&task } Status AiCoreNodeExecutor::Initialize() { - auto ge_lib = GELib::GetInstance(); - GE_CHECK_NOTNULL(ge_lib); - if (!ge_lib->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed."); - return GE_CLI_GE_NOT_INITIALIZED; + compiler_ = TaskCompilerFactory::GetInstance().GetTaskCompiler(); + if (compiler_ != nullptr) { + GE_CHK_STATUS_RET(compiler_->Initialize(), "Failed to init aicore task compiler."); } - - auto &kernel_manager = ge_lib->OpsKernelManagerObj(); - auto aic_ops_store = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); - GE_CHECK_NOTNULL(aic_ops_store); - - compiler_.reset(new(std::nothrow)AiCoreTaskCompiler(aic_ops_store)); - GE_CHECK_NOTNULL(compiler_); return SUCCESS; } @@ -120,6 +109,12 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, GE_CHECK_NOTNULL(op_desc); GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); + auto ori_node_name = node->GetName(); + if (compiler_ == nullptr) { + GELOGE(FAILED, "[%s] Can not find any valid aicore task compiler.", ori_node_name.c_str()); + return FAILED; + } + AiCoreNodeTaskRegistry ®istry = AiCoreNodeTaskRegistry::GetInstance(); std::string shape_key; GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str()); @@ -133,7 +128,6 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, } std::vector task_defs; - auto ori_node_name = node->GetName(); op_desc->SetName(ori_node_name + "_" + shape_key); GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", ori_node_name.c_str()); op_desc->SetName(ori_node_name); @@ -239,5 +233,23 @@ bool AiCoreNodeTask::IsNoOp(TaskContext &task_context) { return true; } + +TaskCompilerFactory &TaskCompilerFactory::GetInstance() { + static TaskCompilerFactory instance; + return instance; +} + +void TaskCompilerFactory::Register(CreateFn fn) { + compiler_func_ = fn; +} + +std::unique_ptr TaskCompilerFactory::GetTaskCompiler() { + auto compiler_instance = std::unique_ptr(compiler_func_()); + return compiler_instance; +} + +CompilerFunctionRegistrar::CompilerFunctionRegistrar(CreateFn fn) { + TaskCompilerFactory::GetInstance().Register(fn); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h index 374782dc..989090e9 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -18,13 +18,21 @@ #define GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "hybrid/node_executor/aicore/aicore_task_compiler.h" #include "hybrid/node_executor/node_executor.h" #include #include namespace ge { namespace hybrid { + +class TaskCompiler { + public: + TaskCompiler() = default; + virtual ~TaskCompiler() = default; + virtual Status CompileOp(const NodePtr &node, std::vector &tasks) = 0; + virtual Status Initialize() = 0; +}; + class AiCoreNodeTaskRegistry { public: ~AiCoreNodeTaskRegistry() = default; @@ -65,8 +73,33 @@ class AiCoreNodeExecutor : public NodeExecutor { private: static Status GenNodeKey(const NodePtr &node, std::string &node_key); - std::unique_ptr compiler_; + std::unique_ptr compiler_; +}; + +using CreateFn = TaskCompiler *(*)(); +class TaskCompilerFactory { + public: + static TaskCompilerFactory &GetInstance(); + void Register(CreateFn fn); + std::unique_ptr GetTaskCompiler(); + + private: + CreateFn compiler_func_; +}; + +class CompilerFunctionRegistrar { + public: + CompilerFunctionRegistrar(CreateFn fn); + ~CompilerFunctionRegistrar() = default; }; } // namespace hybrid } // namespace ge -#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ + +#define REGISTER_TASK_COMPILER(compiler) \ + static ::ge::hybrid::CompilerFunctionRegistrar register_compiler_function \ + __attribute__((unused)) = \ + ::ge::hybrid::CompilerFunctionRegistrar([]()->::ge::hybrid::TaskCompiler* { \ + return new (std::nothrow) compiler(); \ + }) \ + +#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc index ed92ada7..26a41737 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -18,6 +18,7 @@ #include "framework/common/debug/log.h" #include "graph/debug/ge_attr_define.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "init/gelib.h" namespace ge { namespace hybrid { @@ -25,11 +26,22 @@ namespace { uintptr_t kWeightBase = 0x10000000; uintptr_t kMemBase = 0x20000000; uint64_t kFakeSize = 0x10000000UL; +REGISTER_TASK_COMPILER(AiCoreTaskCompiler); } std::mutex AiCoreTaskCompiler::mu_; -AiCoreTaskCompiler::AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store) - : aic_kernel_store_(std::move(aic_kernel_store)) {} +Status AiCoreTaskCompiler::Initialize() { + auto ge_lib = GELib::GetInstance(); + GE_CHECK_NOTNULL(ge_lib); + if (!ge_lib->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed."); + return GE_CLI_GE_NOT_INITIALIZED; + } + auto &kernel_manager = ge_lib->OpsKernelManagerObj(); + aic_kernel_store_ = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); + GE_CHECK_NOTNULL(aic_kernel_store_); + return SUCCESS; +} Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const { GE_CHECK_NOTNULL(node); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index 38ed458f..bf948349 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -19,15 +19,17 @@ #include #include "opskernel_manager/ops_kernel_manager.h" +#include "aicore_node_executor.h" namespace ge { namespace hybrid { -class AiCoreTaskCompiler { +class AiCoreTaskCompiler : public TaskCompiler { public: - explicit AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store); + AiCoreTaskCompiler() = default; ~AiCoreTaskCompiler() = default; - Status CompileOp(const NodePtr &node, std::vector &tasks); + Status CompileOp(const NodePtr &node, std::vector &tasks) override; + Status Initialize() override; private: Status DoCompileOp(const NodePtr &node) const; Status DoGenerateTask(const Node &node, std::vector &tasks); diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 85a742b2..8a5cb610 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -56,6 +56,7 @@ const int kDefaultDeviceIdForInfer = -1; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); +const char *const kGlobalOptionFpCeilingModeDefault = "2"; } // namespace static std::shared_ptr instancePtr_ = nullptr; @@ -79,6 +80,11 @@ Status GELib::Initialize(const map &options) { return ret; } instancePtr_->SetDefaultPrecisionMode(new_options); + + if (new_options.find("ge.fpCeilingMode") == new_options.end()) { + new_options["ge.fpCeilingMode"] = kGlobalOptionFpCeilingModeDefault; + } + GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GE_TIMESTAMP_START(Init); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index d72040d7..94514f95 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -32,7 +32,6 @@ #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" #include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" #include "graph/op_desc.h" #include "graph/utils/graph_utils.h" #include "graph/utils/type_utils.h" @@ -64,8 +63,6 @@ using std::vector; static bool is_dynamic_input = false; -// 310 limited 8G size -const char *const kGraphMemoryManagerMallocMaxSize = "8*1024*1024*1024"; const char *const kModeSupport = "only support 0(model to framework model), " "1(framework model to json), 3(only pre-check), 5(pbtxt to json)"; const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; @@ -908,13 +905,6 @@ domi::Status GenerateModel(std::map &options, std::string output return domi::FAILED; } - geRet = ge::VarManager::Instance(0)->SetMemoryMallocSize(options); - if (geRet != ge::SUCCESS) { - GELOGE(ge::FAILED, "SetMemoryMallocSize failed."); - (void)ge::GELib::GetInstance()->Finalize(); - return domi::FAILED; - } - ge::Graph graph; std::vector inputs; if (FLAGS_framework == domi::MINDSPORE) { @@ -1016,7 +1006,6 @@ static void SetEnvForSingleOp(std::map &options) { options.emplace(ge::OP_SELECT_IMPL_MODE, FLAGS_op_select_implmode); options.emplace(ge::OPTYPELIST_FOR_IMPLMODE, FLAGS_optypelist_for_implmode); options.emplace(ge::AUTO_TUNE_MODE, FLAGS_auto_tune_mode); - options.emplace(ge::GRAPH_MEMORY_MAX_SIZE, kGraphMemoryManagerMallocMaxSize); options.emplace(ge::OP_DEBUG_LEVEL, to_string(FLAGS_op_debug_level)); options.emplace(ge::DEBUG_DIR, FLAGS_debug_dir); options.emplace(ge::OP_COMPILER_CACHE_DIR, FLAGS_op_compiler_cache_dir); @@ -1053,13 +1042,6 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { return domi::FAILED; } - ret = ge::VarManager::Instance(0)->SetMemoryMallocSize(options); - if (ret != ge::SUCCESS) { - GELOGE(ge::FAILED, "SetMemoryMallocSize failed."); - (void)ge::GELib::GetInstance()->Finalize(); - return domi::FAILED; - } - vector build_params; if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) { DOMI_LOGE("parse single op json file failed"); @@ -1158,8 +1140,6 @@ domi::Status GenerateOmModel() { (FLAGS_enable_compress_weight == "true") ? ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse)); - options.insert(std::pair(string(ge::GRAPH_MEMORY_MAX_SIZE), kGraphMemoryManagerMallocMaxSize)); - options.insert(std::pair(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); options.insert(std::pair(string(ge::DEBUG_DIR), FLAGS_debug_dir)); diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 104b3d00..16449363 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -485,6 +485,10 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); return domi::FAILED; } + + // add user_define_output_nodes attr. + (void)ge::AttrUtils::SetStr(op_desc, ATTR_ATC_USER_DEFINE_OUTPUT_NODES, "true"); + if (i < output_formats.size()) { if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index b66c5b78..37e2dccf 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -339,6 +339,7 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OUT_NODES, INPUT_FP16_NODES, LOG_LEVEL, + OP_DEBUG_LEVEL, DEBUG_DIR, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE}; diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 6d449919..e259f43b 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -28,7 +28,7 @@ #if !defined(__ANDROID__) && !defined(ANDROID) #define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__) #else -#include +#include #if defined(BUILD_VERSION_PERF) #define DOMI_LOGE(fmt, ...) #else @@ -83,12 +83,12 @@ } while (0); // If expr is not GRAPH_SUCCESS, print the log and return FAILED -#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ - do { \ - if ((expr) != ge::GRAPH_SUCCESS) { \ - DOMI_LOGE(__VA_ARGS__); \ - return FAILED; \ - } \ +#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ + do { \ + if ((expr) != ge::GRAPH_SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + return FAILED; \ + } \ } while (0); // If expr is not SUCCESS, print the log and execute a custom statement @@ -99,13 +99,13 @@ } while (0); // If expr is not true, print the log and return the specified status -#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, __VA_ARGS__); \ - return _status; \ - } \ +#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, __VA_ARGS__); \ + return _status; \ + } \ } while (0); // If expr is not true, print the log and return the specified status @@ -253,4 +253,20 @@ exec_expr1; \ } +#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ + { \ + GELOGE(_status, "%s", errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + } + +#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, "%s", errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + return _status; \ + } \ + } while (0) + #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 0644b0f2..038b1cf6 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -70,6 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map PROFILE_COMPONENT_MAP; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; @@ -567,10 +568,10 @@ enum ModelCheckType { /// @brief dynamic input type /// enum DynamicInputType { - FIXED = 0, // default mode - DYNAMIC_BATCH = 1, - DYNAMIC_IMAGE = 2, - DYNAMIC_DIMS = 3 + FIXED = 0, // default mode + DYNAMIC_BATCH = 1, + DYNAMIC_IMAGE = 2, + DYNAMIC_DIMS = 3 }; /// diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index ba90fd03..17dbf928 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -38,14 +38,14 @@ class DynamicSingleOp; struct RunModelData { uint32_t index; // Data index uint32_t modelId; - std::vector blobs; // All input/output data buffer - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint64_t request_id = 0; // Request ID - uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 - uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 - uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 - std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0; // Request ID + uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { @@ -264,14 +264,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op); - static ge::Status ExecuteAsync(DynamicSingleOp *executor, - const std::vector &input_desc, - const std::vector &inputs, - std::vector &output_desc, + static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, + const std::vector &inputs, std::vector &output_desc, std::vector &outputs); static ge::Status ReleaseSingleOpResource(void *stream); + static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, diff --git a/metadef b/metadef index 0d0d2fb0..37465b85 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 0d0d2fb016d44f9a575ad8f8c2cb8858bba3acec +Subproject commit 37465b85d30b67a0edcc6ea4acd2f11a9697c7af diff --git a/parser b/parser index 84ea76e9..5fa1f3ed 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 84ea76e94054fcfac5b80ded6e0ec4db1f37d3e0 +Subproject commit 5fa1f3ed9b1785b9fd1623d624de91838dff615e