From 772d6bc743d4b54dd7874a1bbdc659d8b8d42bf9 Mon Sep 17 00:00:00 2001 From: xulei Date: Thu, 5 Aug 2021 11:49:43 +0800 Subject: [PATCH] fix bug Signed-off-by: xulei --- CMakeLists.txt | 3 - ge/CMakeLists.txt | 8 +- ge/client/ge_api.cc | 93 ++++++- ge/common/CMakeLists.txt | 1 + ge/common/profiling/command_handle.cc | 268 +++++++++++++++++++++ .../{ge_runner_profiling.cc => command_handle.h} | 18 +- ge/common/profiling/ge_profiling.cc | 234 +----------------- ge/common/profiling/profiling_init.cc | 247 +++++++++++++++++++ ge/common/profiling/profiling_init.h | 54 +++++ ge/common/profiling/profiling_manager.cc | 139 ++--------- ge/common/profiling/profiling_manager.h | 18 +- ge/common/profiling/profiling_properties.cc | 124 ++++++++++ ge/common/profiling/profiling_properties.h | 58 +++++ ge/executor/CMakeLists.txt | 1 + ge/executor/ge_executor.cc | 5 +- ge/ge_inference.mk | 1 - ge/ge_runner.mk | 1 - ge/graph/build/task_generator.cc | 16 +- ge/graph/load/model_manager/model_manager.cc | 28 ++- ge/graph/manager/graph_manager.cc | 4 - ge/init/gelib.cc | 24 +- ge/init/gelib.h | 1 - ge/session/inner_session.cc | 10 + inc/framework/common/profiling/ge_profiling.h | 26 +- .../common/profiling/ge_runner_profiling.h | 24 -- tests/depends/profiler/src/profiler_stub.cc | 8 + tests/depends/runtime/src/runtime_stub.cc | 12 + tests/ut/ge/CMakeLists.txt | 5 + tests/ut/ge/graph/load/davinci_model_unittest.cc | 6 +- .../ge/profiling/ge_profiling_manager_unittest.cc | 34 ++- tests/ut/ge/profiling/profiling_init_unittest.cc | 76 ++++++ .../ge/profiling/profiling_properties_unittest.cc | 72 ++++++ third_party/fwkacllib/inc/runtime/base.h | 27 +++ .../fwkacllib/inc/toolchain/prof_callback.h | 9 - 34 files changed, 1171 insertions(+), 484 deletions(-) create mode 100644 ge/common/profiling/command_handle.cc rename ge/common/profiling/{ge_runner_profiling.cc => command_handle.h} (61%) create mode 100644 ge/common/profiling/profiling_init.cc create mode 100644 ge/common/profiling/profiling_init.h create mode 100644 ge/common/profiling/profiling_properties.cc create mode 100644 ge/common/profiling/profiling_properties.h delete mode 100644 inc/framework/common/profiling/ge_runner_profiling.h create mode 100644 tests/ut/ge/profiling/profiling_init_unittest.cc create mode 100644 tests/ut/ge/profiling/profiling_properties_unittest.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 60509838..c8165b54 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,7 +84,6 @@ else () set(STATIC_ACL_LIB ${GE_LIB_PATH}) find_module(slog libalog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) - find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) find_module(adump_server libadump_server.a ${GE_LIB_PATH}) find_module(runtime libruntime.so ${GE_LIB_PATH}) @@ -106,7 +105,6 @@ else () elseif(PLATFORM STREQUAL "inference") find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) - find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) @@ -120,7 +118,6 @@ else () find_module(runtime libruntime.so ${ASCEND_ATC_DIR}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_ATC_DIR}/stub) - find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) else() message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index f98297d8..a541adc7 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -116,6 +116,7 @@ set(EXECUTOR_SRC_LIST "common/ge/plugin_manager.cc" "common/profiling/ge_profiling.cc" "common/profiling/profiling_manager.cc" + "common/profiling/command_handle.cc" "executor/ge_executor.cc" "ge_local_engine/engine/host_cpu_engine.cc" "graph/build/memory/var_mem_assign_util.cc" @@ -259,10 +260,9 @@ set(EXECUTOR_SRC_LIST ################################################################## set(COMPILER_SRC_LIST "analyzer/analyzer.cc" - "common/dump/dump_op.cc" + #"common/dump/dump_op.cc" "common/ge/op_tiling_manager.cc" "common/ge/plugin_manager.cc" - "common/profiling/profiling_manager.cc" "engine_manager/dnnengine_manager.cc" "ge_local_engine/engine/host_cpu_engine.cc" "ge_opt_info/ge_opt_info.cc" @@ -473,7 +473,7 @@ set(RUNNER_SRC_LIST "client/ge_api.cc" "session/inner_session.cc" "session/session_manager.cc" - "common/profiling/ge_runner_profiling.cc" + "common/profiling/profiling_init.cc" "graph/manager/memory_api.cc" "graph/manager/util/hcom_util.cc" "graph/load/model_manager/task_info/hccl_task_info.cc" @@ -568,6 +568,8 @@ target_link_libraries(ge_runner PRIVATE graph ge_common ascend_protobuf + ge_executor_shared + msprofiler_fwk_share register c_sec slog diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index e4a016b3..fef1e51e 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -35,6 +35,11 @@ #include "common/util/error_manager/error_manager.h" #include "toolchain/plog.h" #include "ir_build/option_utils.h" +#include "framework/common/ge_types.h" +#include "external/ge/ge_api_types.h" +#include "graph/ge_context.h" +#include "common/profiling/profiling_init.h" +#include "common/profiling/profiling_properties.h" using domi::OpRegistry; using std::map; @@ -43,6 +48,89 @@ using std::vector; namespace { const int32_t kMaxStrLen = 128; +const int kDecimal = 10; +const int kDefaultDeviceIdForTrain = 0; +const int kDefaultDeviceIdForInfer = -1; +void InitOptions(const map &option_map, ge::Options &options) { + GELOGD("InitOptions start"); + options.session_id = 0; + auto is_train_mode = false; + auto iter = option_map.find(ge::OPTION_GRAPH_RUN_MODE); + if (iter != option_map.end()) { + if (ge::GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= ge::TRAIN) { + is_train_mode = true; + } + } + iter = option_map.find(ge::OPTION_EXEC_SESSION_ID); + if (iter != option_map.end()) { + options.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); + } + options.device_id = is_train_mode ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; + iter = option_map.find(ge::OPTION_EXEC_DEVICE_ID); + if (iter != option_map.end()) { + options.device_id = static_cast(std::strtol(iter->second.c_str(), nullptr, kDecimal)); + } + iter = option_map.find(ge::OPTION_EXEC_JOB_ID); + if (iter != option_map.end()) { + options.job_id = iter->second.c_str(); + } + options.isUseHcom = false; + iter = option_map.find(ge::OPTION_EXEC_IS_USEHCOM); + if (iter != option_map.end()) { + std::istringstream(iter->second) >> options.isUseHcom; + } + options.isUseHvd = false; + iter = option_map.find(ge::OPTION_EXEC_IS_USEHVD); + if (iter != option_map.end()) { + std::istringstream(iter->second) >> options.isUseHvd; + } + options.deployMode = false; + iter = option_map.find(ge::OPTION_EXEC_DEPLOY_MODE); + if (iter != option_map.end()) { + std::istringstream(iter->second) >> options.deployMode; + } + iter = option_map.find(ge::OPTION_EXEC_POD_NAME); + if (iter != option_map.end()) { + options.podName = iter->second.c_str(); + } + iter = option_map.find(ge::OPTION_EXEC_PROFILING_MODE); + if (iter != option_map.end()) { + options.profiling_mode = iter->second.c_str(); + } + iter = option_map.find(ge::OPTION_EXEC_PROFILING_OPTIONS); + if (iter != option_map.end()) { + options.profiling_options = iter->second.c_str(); + } + iter = option_map.find(ge::OPTION_EXEC_RANK_ID); + if (iter != option_map.end()) { + options.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal); + } + iter = option_map.find(ge::OPTION_EXEC_RANK_TABLE_FILE); + if (iter != option_map.end()) { + options.rankTableFile = iter->second.c_str(); + } + options.enable_atomic = true; + iter = option_map.find(ge::OPTION_EXEC_ATOMIC_FLAG); + GE_IF_BOOL_EXEC(iter != option_map.end(), + options.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal)); + GELOGD("ge InnerInitialize, the enable_atomic_flag in options_ is %d", options.enable_atomic); +} + +void InitProfiling(ge::Options &options) { + GELOGD("InitProfiling start"); + ge::GetContext().Init(); + // Profiling init + if (ge::ProfilingInit::Instance().Init(options) != ge::SUCCESS) { + GELOGW("Profiling init failed."); + } +} + +void ShutDownProfiling() { + GELOGD("Profiling shut down"); + if (ge::ProfilingProperties::Instance().ProfilingOn()) { + ge::ProfilingInit::Instance().ShutDownProfiling(); + } +} } // namespace static bool g_ge_initialized = false; @@ -128,6 +216,9 @@ Status GEInitializeImpl(const std::map &options) { if (CheckOptionsValid(options) != SUCCESS) { return FAILED; } + ge::Options str_options; + InitOptions(options, str_options); + InitProfiling(str_options); GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid"); ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOpsProtoInit); @@ -208,7 +299,7 @@ Status GEFinalize() { GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize"); return SUCCESS; } - + ShutDownProfiling(); ErrorManager::GetInstance().SetStage(error_message::kFinalize, error_message::kFinalize); ErrorManager::GetInstance().GenWorkStreamIdDefault(); GELOGT(TRACE_INIT, "GEFinalize start"); diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 99d6ead3..a1844051 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -50,6 +50,7 @@ set(SRC_LIST "${GE_CODE_DIR}/ge/common/transop_util.cc" "${GE_CODE_DIR}/ge/common/types.cc" "${GE_CODE_DIR}/ge/common/util.cc" + "${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc" ) if (NOT ENABLE_D AND NOT ENABLE_ACL) diff --git a/ge/common/profiling/command_handle.cc b/ge/common/profiling/command_handle.cc new file mode 100644 index 00000000..247422b6 --- /dev/null +++ b/ge/common/profiling/command_handle.cc @@ -0,0 +1,268 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_handle.h" +#include "runtime/base.h" +#include "common/profiling/profiling_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/omg/omg_inner_types.h" +#include "graph/load/graph_loader.h" + +namespace { +const uint32_t kDeviceListIndex = 3; +const uint32_t kCommandNum = 6; +const int kMaxDevNum = 64; +const std::string kDeviceNums = "devNums"; +const std::string kDeviceIdList = "devIdList"; +const std::string kProfilingInit = "prof_init"; +const std::string kProfilingFinalize = "prof_finalize"; +const std::string kProfilingStart = "prof_start"; +const std::string kProfilingStop = "prof_stop"; +const std::string kProfilingModelSubscribe = "prof_model_subscribe"; +const std::string kProfilingModelUnsubscribe = "prof_model_cancel_subscribe"; +const std::string kProfilingModelId = "modelId"; + +enum ProfCommandHandleType { + kProfCommandhandleInit = 0, + kProfCommandhandleStart, + kProfCommandhandleStop, + kProfCommandhandleFinalize, + kProfCommandhandleModelSubscribe, + kProfCommandhandleModelUnsubscribe +}; + +const std::map kProfCommandTypeMap = { + {kProfCommandhandleInit, kProfilingInit}, + {kProfCommandhandleStart, kProfilingStart}, + {kProfCommandhandleStop, kProfilingStop}, + {kProfCommandhandleFinalize, kProfilingFinalize}, + {kProfCommandhandleModelSubscribe, kProfilingModelSubscribe}, + {kProfCommandhandleModelUnsubscribe, kProfilingModelUnsubscribe}}; + +bool IsProfTypeValid(uint32_t type) { + if (type < 0 || type >= kCommandNum) { + GELOGE(ge::PARAM_INVALID, "[Check][Type]Type %u is invalid", type); + return false; + } + GELOGD("Type is %u", type); + return true; +} + +bool IsProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { + if (deviceid_list == nullptr) { + GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr"); + REPORT_INNER_ERROR("E19999", "Device id list is nullptr"); + return false; + } + if (device_nums == 0 || device_nums > kMaxDevNum) { + GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums); + REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums); + return false; + } + + // real device num + int32_t dev_count = 0; + rtError_t rt_err = rtGetDeviceCount(&dev_count); + if (rt_err != RT_ERROR_NONE) { + GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err); + REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err); + return false; + } + + if (device_nums > static_cast(dev_count)) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", device_nums, dev_count); + REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", device_nums, dev_count); + return false; + } + + std::set record; + for (size_t i = 0; i < device_nums; ++i) { + uint32_t dev_id = deviceid_list[i]; + if (dev_id >= static_cast(dev_count)) { + GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", dev_id, dev_count); + REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); + return false; + } + if (record.count(dev_id) > 0) { + GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id); + REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id); + return false; + } + record.insert(dev_id); + } + return true; +} + +bool TransProfConfigToParam(const rtProfCommandHandle &profCommand, vector &prof_config_params) { + prof_config_params.clear(); + prof_config_params.emplace_back(kDeviceNums); + prof_config_params.emplace_back(std::to_string(profCommand.devNums)); + prof_config_params.emplace_back(kDeviceIdList); + std::string devID = ""; + if (profCommand.devNums == 0) { + GELOGE(ge::FAILED, "[Check][Param]The device num is invalid."); + return false; + } + for (uint32_t i = 0; i < profCommand.devNums; i++) { + devID.append(std::to_string(profCommand.devIdList[i])); + if (i != profCommand.devNums - 1) { + devID.append(","); + } + } + + prof_config_params.push_back(devID); + return true; +} + +ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, uint32_t graph_id, + vector &prof_params) { + if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) { + prof_params.clear(); + prof_params.emplace_back(kProfilingModelId); + uint32_t model_id = graph_id; + if (is_subscribe) { + auto &profiling_manager = ge::ProfilingManager::Instance(); + auto ret = profiling_manager.GetModelIdFromGraph(graph_id, model_id); + if (ret != ge::SUCCESS) { + GELOGE(ret, "[Get][GraphId]graph_id:%u not not found", graph_id); + return ret; + } + } + prof_params.emplace_back(std::to_string(model_id)); + } + return ge::SUCCESS; +} + +rtError_t NeedHandleStartEnd(ProfCommandHandleType type, rtProfCommandHandle_t *prof_config_param, + std::vector &prof_params) { + if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { + if (!IsProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { + return ge::FAILED; + } + if (!TransProfConfigToParam(*prof_config_param, prof_params)) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); + REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed"); + return ge::PARAM_INVALID; + } + } + return ge::SUCCESS; +} + +rtError_t NeedHandleModelSubscribe(ProfCommandHandleType type, rtProfCommandHandle_t *prof_config_param, + std::vector &prof_params) { + if (type == kProfCommandhandleModelSubscribe) { + auto &profiling_manager = ge::ProfilingManager::Instance(); + auto is_train = domi::GetContext().train_flag; + if (is_train) { + profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true); + return ge::SUCCESS; + } + prof_params.clear(); + prof_params.push_back(kProfilingModelId); + prof_params.push_back(std::to_string(prof_config_param->modelId)); + } + return ge::SUCCESS; +} + +rtError_t ExecuteCommand(ProfCommandHandleType type, + std::map::const_iterator iter, + rtProfCommandHandle_t *prof_config_param, std::vector &prof_params) { + ge::GraphLoader graph_loader; + ge::Command command; + command.cmd_params.clear(); + command.cmd_type = iter->second; + command.cmd_params = prof_params; + if (type != kProfCommandhandleFinalize) { + command.module_index = prof_config_param->profSwitch; + } + GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), + command.module_index); + if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { + GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); + } + ge::Status ret = graph_loader.CommandHandle(command); + if (ret != ge::SUCCESS) { + GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u", + iter->second.c_str(), ret); + REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u", + iter->second.c_str(), ret); + return ge::FAILED; + } + + GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); + return ge::SUCCESS; +} + +rtError_t HandleCtrlSwitch(void *data) { + auto &profiling_manager = ge::ProfilingManager::Instance(); + rtProfCommandHandle_t *prof_config_param = reinterpret_cast(data); + if (!IsProfTypeValid(prof_config_param->type)) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand is invalid."); + return ge::FAILED; + } + auto type = static_cast(prof_config_param->type); + if (type != kProfCommandhandleFinalize) { + GE_CHECK_NOTNULL(data); + } + auto iter = kProfCommandTypeMap.find(type); + if (iter == kProfCommandTypeMap.end()) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand type is invalid."); + return ge::PARAM_INVALID; + } + std::vector prof_params; + ge::Status ret = NeedHandleStartEnd(type, prof_config_param, prof_params); + if (ret != ge::SUCCESS) { + return ret; + } + ret = NeedHandleModelSubscribe(type, prof_config_param, prof_params); + if (ret != ge::SUCCESS) { + return ret; + } + + auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe; + // GraphId is actually stored in prof_config_param + auto graph_id = prof_config_param->modelId; + ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params); + if (ret != ge::SUCCESS) { + GELOGE(ret, "[Check][Param]graph_id:%u not not found", graph_id); + REPORT_INPUT_ERROR( + "E10001", std::vector({"value", "parameter", "reason"}), + std::vector({std::to_string(graph_id), "GraphToModelMap", "graph_id does not exist!"})); + return ge::FAILED; + } + return ExecuteCommand(type, iter, prof_config_param, prof_params); +} +} // namespace + +namespace ge { +rtError_t CommandHandle(uint32_t rt_type, void *data, uint32_t len) { + if (data == nullptr) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand is invalid."); + return ge::FAILED; + } + auto &profiling_manager = ge::ProfilingManager::Instance(); + if (rt_type == RT_PROF_CTRL_REPORTER) { + profiling_manager.SetMsprofReporterCallback(reinterpret_cast(data)); + GELOGD("return with MsprofReporterCallback"); + return ge::SUCCESS; + } else if (rt_type == RT_PROF_CTRL_SWITCH) { + return HandleCtrlSwitch(data); + } + return ge::FAILED; +} +} // namespace ge \ No newline at end of file diff --git a/ge/common/profiling/ge_runner_profiling.cc b/ge/common/profiling/command_handle.h similarity index 61% rename from ge/common/profiling/ge_runner_profiling.cc rename to ge/common/profiling/command_handle.h index f74ce384..ccc14b82 100644 --- a/ge/common/profiling/ge_runner_profiling.cc +++ b/ge/common/profiling/command_handle.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "framework/common/profiling/ge_runner_profiling.h" -#include "init/gelib.h" +#ifndef GE_COMMON_PROFILING_COMMAND_HANDLE_H_ +#define GE_COMMON_PROFILING_COMMAND_HANDLE_H_ -bool IsInitialize() { - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) { - return false; - } - return true; +#include "ge/ge_api_error_codes.h" +#include "runtime/base.h" + +namespace ge { +GE_FUNC_VISIBILITY rtError_t CommandHandle(uint32_t rt_type, void *data, uint32_t len); } +#endif // GE_COMMON_PROFILING_COMMAND_HANDLE_H_ diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index a5857b35..0b21b467 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -19,245 +19,15 @@ #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" -#include "graph/load/graph_loader.h" -#include "graph/ge_context.h" -#include "init/gelib.h" #include "framework/common/ge_inner_error_codes.h" #include "common/model/ge_model.h" -#include "framework/omg/omg_inner_types.h" namespace { -const uint32_t kDeviceListIndex = 3; -const std::string kDeviceNums = "devNums"; -const std::string kDeviceIdList = "devIdList"; -const std::string kProfilingInit = "prof_init"; -const std::string kProfilingFinalize = "prof_finalize"; -const std::string kProfilingStart = "prof_start"; -const std::string kProfilingStop = "prof_stop"; -const std::string kProfModelSubscribe = "prof_model_subscribe"; -const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; -const std::string kRtSetDeviceRegName = "profiling"; -const std::string kPofilingModelId = "modelId"; - -const std::map kProfCommandTypeMap = { - {kProfCommandhandleInit, kProfilingInit}, - {kProfCommandhandleStart, kProfilingStart}, - {kProfCommandhandleStop, kProfilingStop}, - {kProfCommandhandleFinalize, kProfilingFinalize}, - {kProfCommandhandleModelSubscribe, kProfModelSubscribe}, - {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; - const uint64_t kModelId = ge::INVALID_MODEL_ID; const uint16_t kStepStart = 0; const uint16_t kStepEnd = 1; - -ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, - uint32_t graph_id, vector &prof_params) { - if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) { - prof_params.clear(); - prof_params.emplace_back(kPofilingModelId); - uint32_t model_id = 0; - auto ret = ge::ProfilingManager::Instance().GetModelIdFromGraph(graph_id, model_id); - if (ret != ge::SUCCESS) { - GELOGE(ret, "graph_id:%u not not found", graph_id); - return ret; - } - prof_params.emplace_back(std::to_string(model_id)); - } - return ge::SUCCESS; -} } // namespace -bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector &prof_config_params) { - prof_config_params.clear(); - prof_config_params.emplace_back(kDeviceNums); - prof_config_params.emplace_back(std::to_string(profCommand.devNums)); - prof_config_params.emplace_back(kDeviceIdList); - std::string devID = ""; - if (profCommand.devNums == 0) { - GELOGW("The device num is invalid."); - return false; - } - for (uint32_t i = 0; i < profCommand.devNums; i++) { - devID.append(std::to_string(profCommand.devIdList[i])); - if (i != profCommand.devNums - 1) { - devID.append(","); - } - } - - prof_config_params.push_back(devID); - return true; -} - -bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { - if (deviceid_list == nullptr) { - GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr"); - REPORT_INNER_ERROR("E19999", "Device id list is nullptr"); - return false; - } - if (device_nums == 0 || device_nums > MAX_DEV_NUM) { - GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums); - REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums); - return false; - } - - // real device num - int32_t dev_count = 0; - rtError_t rt_err = rtGetDeviceCount(&dev_count); - if (rt_err != RT_ERROR_NONE) { - GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err); - REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err); - return false; - } - - if (device_nums > static_cast(dev_count)) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", - device_nums, dev_count); - REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", - device_nums, dev_count); - return false; - } - - std::set record; - for (size_t i = 0; i < device_nums; ++i) { - uint32_t dev_id = deviceid_list[i]; - if (dev_id >= static_cast(dev_count)) { - GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", - dev_id, dev_count); - REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); - return false; - } - if (record.count(dev_id) > 0) { - GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id); - REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id); - return false; - } - record.insert(dev_id); - } - return true; -} - -ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { - if (func == nullptr) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]Msprof ctrl callback is nullptr"); - REPORT_INNER_ERROR("E19999", "Msprof ctrl callback is nullptr"); - return ge::PARAM_INVALID; - } - if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { - GELOGW("Msprof ctrl callback is exist, just ignore it."); - } else { - ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); - } - return ge::SUCCESS; -} - -ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { - if (func == nullptr) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofSetDeviceCallback callback is nullptr"); - REPORT_INNER_ERROR("E19999", "MsprofSetDeviceCallback callback is nullptr"); - return ge::PARAM_INVALID; - } - // Pass MsprofSetDeviceCallback to runtime - ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast(func)); - if (rt_ret != ge::SUCCESS) { - GELOGE(rt_ret, "[Pass][MsprofSetDeviceCallback]To runtime failed, ret 0x%X", rt_ret); - REPORT_CALL_ERROR("E19999", "Pass MsprofSetDeviceCallback to runtime failed, ret 0x%X", rt_ret); - return rt_ret; - } - return ge::SUCCESS; -} - -ge::Status RegProfReporterCallback(MsprofReporterCallback func) { - if (func == nullptr) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); - REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); - return ge::PARAM_INVALID; - } - if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) { - GELOGW("Msprof reporter callback is exist, just ignore it."); - } else { - GELOGI("GE register Msprof reporter callback."); - ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); - // Pass MsprofReporterCallback to runtime - ge::Status rt_ret = rtSetMsprofReporterCallback(func); - if (rt_ret != ge::SUCCESS) { - GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u", - rt_ret); - REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u", - rt_ret); - return rt_ret; - } - // Pass MsprofReporterCallback to hccl - } - return ge::SUCCESS; -} - -ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { - if (type != kProfCommandhandleFinalize) { - GE_CHECK_NOTNULL(data); - } - ProfCommandHandleData *prof_config_param = reinterpret_cast(data); - auto iter = kProfCommandTypeMap.find(type); - if (iter == kProfCommandTypeMap.end()) { - GELOGW("The prof comand type is invalid."); - return ge::PARAM_INVALID; - } - std::vector prof_params; - if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { - if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { - return ge::FAILED; - } - - if (!TransProfConfigToParam(*prof_config_param, prof_params)) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); - REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed"); - return ge::PARAM_INVALID; - } - } - auto &profiling_manager = ge::ProfilingManager::Instance(); - auto is_train = domi::GetContext().train_flag; - if (type == kProfCommandhandleModelSubscribe && is_train) { - profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true); - return ge::SUCCESS; - } - auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe; - // GraphId is actually stored in prof_config_param - auto graph_id = prof_config_param->modelId; - ge::Status ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params); - if (ret != ge::SUCCESS) { - GELOGE(ret, "graph_id:%u not not found", graph_id); - REPORT_INPUT_ERROR("E10001", std::vector({"value", "parameter", "reason"}), - std::vector({std::to_string(graph_id), - "GraphToModelMap", - "graph_id does not exist!"})); - return ge::FAILED; - } - ge::GraphLoader graph_loader; - ge::Command command; - command.cmd_params.clear(); - command.cmd_type = iter->second; - command.cmd_params = prof_params; - if (type != kProfCommandhandleFinalize) { - command.module_index = prof_config_param->profSwitch; - } - GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), - command.module_index); - if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { - GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); - } - ret = graph_loader.CommandHandle(command); - if (ret != ge::SUCCESS) { - GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u", - iter->second.c_str(), ret); - REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u", - iter->second.c_str(), ret); - return ge::FAILED; - } - - GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); - return ge::SUCCESS; -} - ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream) { static bool is_first_run = true; int32_t device_id = 0; @@ -289,3 +59,7 @@ ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id) { return ge::ProfilingManager::Instance().GetDeviceIdFromGraph(graph_id, device_id); } + +void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id) { + ge::ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id); +} diff --git a/ge/common/profiling/profiling_init.cc b/ge/common/profiling/profiling_init.cc new file mode 100644 index 00000000..e0f1aa35 --- /dev/null +++ b/ge/common/profiling/profiling_init.cc @@ -0,0 +1,247 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiling_init.h" + +#include "common/properties_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "common/profiling/profiling_properties.h" +#include "runtime/base.h" +#include "common/profiling/command_handle.h" +#include "common/profiling/profiling_manager.h" + +namespace { +const char *const kTrainingTrace = "training_trace"; +const char *const kFpPoint = "fp_point"; +const char *const kBpPoint = "bp_point"; +} + +namespace ge { +ProfilingInit &ProfilingInit::Instance() { + static ProfilingInit profiling_init; + return profiling_init; +} + +ge::Status ProfilingInit::Init(const Options &options) { + GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); + + struct MsprofGeOptions prof_conf = {{0}}; + bool is_execute_profiling = false; + Status ret = InitFromOptions(options, prof_conf, is_execute_profiling); + if (ret != SUCCESS) { + GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); + REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); + return ret; + } + ProfRegisterCtrlCallback(); + if (is_execute_profiling) { + int32_t cb_ret = MsprofInit(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), + static_cast(&prof_conf), sizeof(MsprofGeOptions)); + if (cb_ret != 0) { + GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); + REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); + return FAILED; + } + GELOGI("Profiling init success"); + } + else { + GELOGI("The profiling is off, skip the initialization"); + } + return SUCCESS; +} + +ge::Status ProfilingInit::ProfRegisterCtrlCallback() {; + rtProfCtrlHandle callback = CommandHandle; + rtError_t rt_ret = rtProfRegisterCtrlCallback(GE,callback); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Register CtrlCallBack failed"); + return FAILED; + } + return SUCCESS; +} + +ge::Status ProfilingInit::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, + bool &is_execute_profiling) { + // enable profiling by env + char env_profiling_mode[MMPA_MAX_PATH] = {0x00}; + + if (options.profiling_mode == "1" && !options.profiling_options.empty()) { + // enable profiling by ge option + if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), + MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { + GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s", options.profiling_options.c_str()); + REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed", options.profiling_options.c_str()); + return INTERNAL_ERROR; + } + is_execute_profiling = true; + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, + options.profiling_options.c_str()); + } else { + (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); + (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); + // The env is invalid + if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { + return SUCCESS; + } + // enable profiling by env + is_execute_profiling = true; + GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); + } + + ProfilingProperties::Instance().SetExecuteProfiling(is_execute_profiling); + ProfilingProperties::Instance().SetLoadProfiling(true); + if (!is_execute_profiling) { + return SUCCESS; + } + + // Parse json str for bp fp + Status ret = ParseOptions(prof_conf.options); + if (ret != ge::SUCCESS) { + GELOGE(ge::PARAM_INVALID, "[Parse][Options]Parse training trace param %s failed, error_code %u", prof_conf.options, + ret); + REPORT_CALL_ERROR("E19999", "Parse training trace param %s failed, error_code %u", prof_conf.options, ret); + return ge::PARAM_INVALID; + } + + if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) != + EOK) { + GELOGE(INTERNAL_ERROR, "[Copy][JobId]Failed, original job_id %s", options.job_id.c_str()); + REPORT_CALL_ERROR("E19999", "Copy job_id %s failed", options.job_id.c_str()); + return INTERNAL_ERROR; + } + GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str()); + return ge::SUCCESS; +} + +ge::Status ProfilingInit::ParseOptions(const std::string &options) { + if (options.empty()) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]Profiling options is empty"); + REPORT_INNER_ERROR("E19999", "Profiling options is empty"); + return ge::PARAM_INVALID; + } + try { + Json prof_options = Json::parse(options); + if (options.find(kTrainingTrace) == std::string::npos) { + return ge::SUCCESS; + } + std::string training_trace; + if (prof_options.contains(kTrainingTrace)) { + training_trace = prof_options[kTrainingTrace]; + } + if (training_trace.empty()) { + GELOGI("Training trace will not take effect."); + return ge::SUCCESS; + } + GELOGI("GE profiling training trace:%s", training_trace.c_str()); + if (training_trace != "on") { + GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.", training_trace.c_str()); + REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str()); + return ge::PARAM_INVALID; + } + string fp_point; + string bp_point; + if (prof_options.contains(kFpPoint)) { + fp_point = prof_options[kFpPoint]; + } + if (prof_options.contains(kBpPoint)) { + bp_point = prof_options[kBpPoint]; + } + if (!fp_point.empty() && !bp_point.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point.c_str(), fp_point.c_str()); + } + ProfilingProperties::Instance().SetTrainingTrace(true); + ProfilingProperties::Instance().SetFpBpPoint(fp_point,bp_point); + } catch (...) { + GELOGE(FAILED, "[Check][Param]Json prof_conf options is invalid"); + REPORT_INNER_ERROR("E19999", "Json prof_conf options is invalid"); + return ge::PARAM_INVALID; + } + return ge::SUCCESS; +} + +void ProfilingInit::StopProfiling() { + uint64_t module = GetProfilingModule(); + // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal + const auto device_id = ProfilingManager::Instance().GetDeviceID(); + int32_t device_num = static_cast(device_id.size()); + if (device_num != 0) { + auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); + REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); + return; + } + for (int32_t i = 0; i < device_num; i++) { + device_id_ptr[i] = static_cast(device_id[i]); + } + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); + } + } + + // stop profiling + int32_t cb_ret = MsprofFinalize(); + if (cb_ret != 0) { + GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); + return; + } + GELOGI("Stop Profiling success."); +} + +void ProfilingInit::ShutDownProfiling() { + StopProfiling(); + ProfilingManager::Instance().PluginUnInit(); + ProfilingProperties::Instance().ClearProperties(); +} + +uint64_t ProfilingInit::GetProfilingModule() { + uint64_t module = PROF_MODEL_EXECUTE_MASK | + PROF_RUNTIME_API_MASK | + PROF_RUNTIME_TRACE_MASK | + PROF_SCHEDULE_TIMELINE_MASK | + PROF_SCHEDULE_TRACE_MASK | + PROF_TASK_TIME_MASK | + PROF_SUBTASK_TIME_MASK | + PROF_AICPU_TRACE_MASK | + PROF_AICORE_METRICS_MASK | + PROF_AIVECTORCORE_METRICS_MASK | + PROF_MODEL_LOAD_MASK; + return module; +} + +Status ProfilingInit::SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { + auto rt_ret = rtSetDeviceIdByGeModelIdx(model_id, device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(ge::FAILED, "[Set][Device]Set Device id failed"); + return ge::FAILED; + } + return ge::SUCCESS; +} + +Status ProfilingInit::UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { + auto rt_ret = rtUnsetDeviceIdByGeModelIdx(model_id, device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(ge::FAILED, "[Set][Device]Set Device id failed"); + return ge::FAILED; + } + return ge::SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/ge/common/profiling/profiling_init.h b/ge/common/profiling/profiling_init.h new file mode 100644 index 00000000..52531521 --- /dev/null +++ b/ge/common/profiling/profiling_init.h @@ -0,0 +1,54 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_PROFILING_PROFILING_INIT_H_ +#define GE_COMMON_PROFILING_PROFILING_INIT_H_ + +#include +#include +#include + +#include "common/profiling/profiling_properties.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/ge_types.h" +#include "toolchain/prof_callback.h" + +using std::map; +using std::string; +using std::vector; +using Json = nlohmann::json; + +namespace ge { +class ProfilingInit { + public: + static ProfilingInit &Instance(); + Status Init(const Options &options); + void StopProfiling(); + Status ProfRegisterCtrlCallback(); + void ShutDownProfiling(); + Status SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + Status UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + + private: + ProfilingInit() = default; + ~ProfilingInit() = default; + Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, bool &is_execute_profiling); + Status ParseOptions(const std::string &options); + uint64_t GetProfilingModule(); +}; +} // namespace ge + +#endif // GE_COMMON_PROFILING_PROFILING_INIT_H_ diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index e8f41cc4..a8c7ea97 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -25,11 +25,14 @@ #include "runtime/base.h" #include "graph/load/model_manager/davinci_model.h" #include "mmpa/mmpa_api.h" +#include "graph/load/graph_loader.h" namespace { const char *const kTrainingTrace = "training_trace"; const char *const kFpPoint = "fp_point"; const char *const kBpPoint = "bp_point"; +const uint64_t kProfModelExecuteMask = 0x0010; +const uint64_t kProfModelLoadMask = 0x8000000000000000; #ifdef DAVINCI_SUPPORT_PROFILING const int32_t kMaxDeviceNum = 256; @@ -65,14 +68,15 @@ const std::string kIdx = "idx"; } // namespace namespace ge { + ProfilingManager::ProfilingManager() : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0), - prof_cb_({nullptr, nullptr}), index_id_(UINT64_MAX), - subscribe_info_({false, 0, 0}) { + subscribe_info_({false, 0, 0}), + reporter_callback_(nullptr) { } ProfilingManager::~ProfilingManager() {} @@ -82,45 +86,6 @@ ProfilingManager &ProfilingManager::Instance() { return profiling_manager; } -ge::Status ProfilingManager::Init(const Options &options) { -#ifdef DAVINCI_SUPPORT_PROFILING - vector().swap(device_id_); - subscribe_count_ = 0; - GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); - - struct MsprofGeOptions prof_conf = {{ 0 }}; - Status ret = InitFromOptions(options, prof_conf); - if (ret != SUCCESS) { - GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); - REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); - return ret; - } - - if (is_execute_profiling_) { - if (prof_cb_.msprofCtrlCallback == nullptr) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr"); - REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr"); - return ge::PARAM_INVALID; - } - int32_t cb_ret = prof_cb_.msprofCtrlCallback( - static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), - static_cast(&prof_conf), sizeof(MsprofGeOptions)); - if (cb_ret != 0) { - GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", - static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); - REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", - static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), - cb_ret); - return FAILED; - } - GELOGI("Profiling init success"); - } else { - GELOGI("The profiling is off, skip the initialization"); - } -#endif - return SUCCESS; -} - ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) { #ifdef DAVINCI_SUPPORT_PROFILING // enable profiling by env @@ -221,44 +186,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { return ge::SUCCESS; } -void ProfilingManager::StopProfiling() { -#ifdef DAVINCI_SUPPORT_PROFILING - uint64_t module = GetProfilingModule(); - // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal - int32_t device_num = static_cast(device_id_.size()); - if (device_num != 0) { - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); - if (device_id_ptr == nullptr) { - GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); - REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); - return; - } - for (int32_t i = 0; i < device_num; i++) { - device_id_ptr[i] = static_cast(device_id_[i]); - } - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); - } - } - - // stop profiling - if (prof_cb_.msprofCtrlCallback == nullptr) { - GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr"); - REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr"); - return; - } - int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), - nullptr, 0); - if (cb_ret != 0) { - GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", - static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); - return; - } - GELOGI("Stop Profiling success."); -#endif -} - void ProfilingManager::ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json) { #ifdef DAVINCI_SUPPORT_PROFILING for (size_t i = 0; i < task.input_format.size(); i++) { @@ -440,21 +367,6 @@ void ProfilingManager::ReportProfilingData(uint32_t model_id, const std::vector< #endif } -uint64_t ProfilingManager::GetProfilingModule() { - uint64_t module = PROF_MODEL_EXECUTE_MASK | - PROF_RUNTIME_API_MASK | - PROF_RUNTIME_TRACE_MASK | - PROF_SCHEDULE_TIMELINE_MASK | - PROF_SCHEDULE_TRACE_MASK | - PROF_TASK_TIME_MASK | - PROF_SUBTASK_TIME_MASK | - PROF_AICPU_TRACE_MASK | - PROF_AICORE_METRICS_MASK | - PROF_AIVECTORCORE_METRICS_MASK | - PROF_MODEL_LOAD_MASK; - return module; -} - void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING if (prof_type == kProfModelSubscribe) { @@ -485,8 +397,8 @@ void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uin Status ProfilingManager::ProfModelSubscribe(uint64_t module, void *model) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); - uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; - if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { + uint64_t model_load_mask = module & kProfModelLoadMask; + if ((subscribe_count_ == 0) && (model_load_mask == kProfModelLoadMask)) { // register framework to profiling // register Framework to profiling int32_t cb_ret = PluginInit(); @@ -566,9 +478,9 @@ Status ProfilingManager::ProfModelUnsubscribe(void *model) { Status ProfilingManager::ProfInit(uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); - uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; + uint64_t model_load_mask = module & kProfModelLoadMask; - if (model_load_mask == PROF_MODEL_LOAD_MASK) { + if (model_load_mask == kProfModelLoadMask) { // register Framework to profiling int32_t cb_ret = PluginInit(); if (cb_ret != 0) { @@ -611,7 +523,7 @@ Status ProfilingManager::ProfFinalize() { CleanSubscribeInfo(); int32_t dev_num = -1; - rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); + rtError_t rt_ret = rtProfilerStop(kProfModelLoadMask, dev_num, nullptr); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "[Stop][Profiler]Malloc buffer failed, ret 0x%X", rt_ret); REPORT_CALL_ERROR("E19999", "Malloc buffer failed when stop profiling, ret 0x%X", rt_ret); @@ -780,7 +692,7 @@ Status ProfilingManager::ProfStartProfiling(uint64_t module, const std::map(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), nullptr, 0); @@ -912,7 +824,7 @@ Status ProfilingManager::PluginInit() { return INTERNAL_ERROR; } - cb_ret = prof_cb_.msprofReporterCallback( + cb_ret = reporter_callback_( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_DATA_MAX_LEN), &reporter_max_len_, sizeof(uint32_t)); @@ -927,12 +839,12 @@ Status ProfilingManager::PluginInit() { void ProfilingManager::PluginUnInit() const { #ifdef DAVINCI_SUPPORT_PROFILING - if (prof_cb_.msprofReporterCallback == nullptr) { + if (reporter_callback_ == nullptr) { GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); return; } - int32_t cb_ret = prof_cb_.msprofReporterCallback( + int32_t cb_ret = reporter_callback_( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), nullptr, 0); @@ -942,13 +854,14 @@ void ProfilingManager::PluginUnInit() const { #endif } -Status ProfilingManager::CallMsprofReport(ReporterData &reporter_data) const { - if (prof_cb_.msprofReporterCallback == nullptr) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( + ReporterData &reporter_data) const { + if (reporter_callback_ == nullptr) { GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); return ge::PARAM_INVALID; } - return prof_cb_.msprofReporterCallback( + return reporter_callback_( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), static_cast(&reporter_data), sizeof(ReporterData)); diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 86371d51..96f52ffe 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -26,8 +26,8 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_types.h" #include "external/register/register_types.h" -#include "toolchain/prof_callback.h" #include "runtime/stream.h" +#include "toolchain/prof_callback.h" using std::map; using std::string; @@ -52,7 +52,6 @@ namespace { const uint64_t PROF_HCCL_TRACE_MASK = 0x1000; const uint64_t PROF_DATA_PROCESS_MASK = 0x2000; const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000; - } // namespace namespace ge { class OpDesc; @@ -68,24 +67,17 @@ struct ProfSubscribeInfo { uint32_t graph_id; }; -struct MsprofCallback { - MsprofCtrlCallback msprofCtrlCallback; - MsprofReporterCallback msprofReporterCallback; -}; - class ProfilingManager { public: ProfilingManager(); virtual ~ProfilingManager(); static ProfilingManager &Instance(); - Status Init(const Options &options); Status ProfInit(uint64_t module); Status ProfFinalize(); Status ProfStartProfiling(uint64_t module, const std::map &config_para); Status ProfStopProfiling(uint64_t module, const std::map &config_para); Status ProfModelSubscribe(uint64_t module, void *model); Status ProfModelUnsubscribe(void *model); - void StopProfiling(); bool ProfilingTrainingTraceOn() const { return is_training_trace_; } // report model load profiling data flag, data contain task desc info, step info, model load fusion op info bool ProfilingModelLoadOn() const { return is_load_profiling_; } @@ -100,9 +92,8 @@ class ProfilingManager { Status PluginInit(); void PluginUnInit() const; Status CallMsprofReport(ReporterData &reporter_data) const; - struct MsprofCallback &GetMsprofCallback() { return prof_cb_; } - void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } - void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } + const MsprofReporterCallback GetMsprofReporterCallback() const { return reporter_callback_; } + void SetMsprofReporterCallback(MsprofReporterCallback func) { reporter_callback_ = func; } void GetFpBpPoint(std::string &fp_point, std::string &bp_point); void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); @@ -116,6 +107,7 @@ class ProfilingManager { void CleanSubscribeInfo(); void SetGraphIdToModelMap(uint32_t graph_id, uint32_t model_id) { model_id_map_[graph_id] = model_id; } Status GetModelIdFromGraph(uint32_t graph_id, uint32_t &model_id); + const vector &GetDeviceID() const { return device_id_; } private: Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); @@ -139,7 +131,6 @@ class ProfilingManager { uint32_t subscribe_count_; std::mutex mutex_; std::mutex mutex_report_; - MsprofCallback prof_cb_; std::string fp_point_; std::string bp_point_; uint32_t reporter_max_len_ = 0; @@ -147,6 +138,7 @@ class ProfilingManager { std::map device_id_map_; // key: graph_id, value: device_id std::map model_id_map_; // key: graph_id, value: model_id ProfSubscribeInfo subscribe_info_; + MsprofReporterCallback reporter_callback_; }; } // namespace ge #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ diff --git a/ge/common/profiling/profiling_properties.cc b/ge/common/profiling/profiling_properties.cc new file mode 100644 index 00000000..d38fec66 --- /dev/null +++ b/ge/common/profiling/profiling_properties.cc @@ -0,0 +1,124 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiling_properties.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "graph/ge_context.h" + +namespace { +const uint64_t kMsProfOptionsMaxlen = 2048; +const char *const kFpPoint = "fp_point"; +const char *const kBpPoint = "bp_point"; +} // namespace ge + +namespace ge{ + +ProfilingProperties& ProfilingProperties::Instance() { + static ProfilingProperties profiling_properties; + return profiling_properties; +} + +void ProfilingProperties::SetLoadProfiling(bool is_load_profiling) { + std::lock_guardlock(mutex_); + is_load_profiling_ = is_load_profiling; +} +bool ProfilingProperties::IsLoadProfiling() { + std::lock_guardlock(mutex_); + return is_load_profiling_; +} + +void ProfilingProperties::SetExecuteProfiling(bool is_exec_profiling) { + std::lock_guardlock(mutex_); + is_execute_profiling_ = is_exec_profiling; +} + +bool ProfilingProperties::IsExecuteProfiling() { + std::lock_guardlock(mutex_); + return is_execute_profiling_; +} + +void ProfilingProperties::SetTrainingTrace(bool is_train_trace) { + std::lock_guardlock(mutex_); + is_training_trace_ = is_train_trace; +} + +void ProfilingProperties::GetFpBpPoint(std::string &fp_point, std::string &bp_point) { + // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init + std::lock_guardlock(mutex_); + if (!fp_point_.empty() && !bp_point_.empty()) { + fp_point = fp_point_; + bp_point = bp_point_; + GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), + fp_point.c_str()); + return; + } + // ProfApi mode and training trace is set + // Parse options first + char env_profiling_options[kMsProfOptionsMaxlen] = {0x00}; + bool is_profiling_valid = false; + std::string profiling_options; + if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS && + !profiling_options.empty()) { + is_profiling_valid = true; + } else { + INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, kMsProfOptionsMaxlen); + if (ret != EN_OK) { + GELOGI("PROFILING_OPTIONS env is not exist."); + return; + } + GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); + profiling_options = env_profiling_options; + is_profiling_valid = true; + } + if (is_profiling_valid) { + try { + Json prof_options = Json::parse(profiling_options); + if (prof_options.contains(kFpPoint)) { + fp_point_ = prof_options[kFpPoint]; + } + if (prof_options.contains(kBpPoint)) { + bp_point_ = prof_options[kBpPoint]; + } + fp_point = fp_point_; + bp_point = bp_point_; + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + } + } catch (...) { + GELOGW("Json prof options is invalid."); + return; + } + } + + return; +} + +void ProfilingProperties::SetFpBpPoint(const std::string &fp_point, const std::string &bp_point) { + std::lock_guardlock(mutex_); + fp_point_ = fp_point; + bp_point_ = bp_point; +} + +void ProfilingProperties::ClearProperties() { + std::lock_guardlock(mutex_); + is_load_profiling_ = false; + is_execute_profiling_ = false; + is_training_trace_ = false; + fp_point_.clear(); + bp_point_.clear(); +} +} // namespace ge \ No newline at end of file diff --git a/ge/common/profiling/profiling_properties.h b/ge/common/profiling/profiling_properties.h new file mode 100644 index 00000000..5ca17e4b --- /dev/null +++ b/ge/common/profiling/profiling_properties.h @@ -0,0 +1,58 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_PROFILING_PROPERTIES_H_ +#define GE_COMMON_PROFILING_PROPERTIES_H_ + +#include +#include +#include +#include + +#include "framework/common/ge_types.h" + +using Json = nlohmann::json; + +namespace ge { +class ProfilingProperties { + public: + static ProfilingProperties &Instance(); + void SetLoadProfiling(bool is_load_profiling); + bool IsLoadProfiling(); + void SetExecuteProfiling(bool is_execute_profiling); + bool IsExecuteProfiling(); + void SetTrainingTrace(bool is_train_trance); + bool ProfilingTrainingTraceOn() const { return is_training_trace_; } + bool IsTrainTrace(); + void SetFpBpPoint(const std::string &fp_point, const std::string &bp_point); + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } + void GetFpBpPoint(std::string &fp_point, std::string &bp_point); + void ClearProperties(); + + private: + ProfilingProperties() =default; + ~ProfilingProperties() = default; + std::mutex mutex_; + std::mutex point_mutex_; + bool is_load_profiling_ = false; + bool is_execute_profiling_ = false; + bool is_training_trace_ = false; + std::string fp_point_; + std::string bp_point_; +}; +} // namespace ge + +#endif // GE_COMMON_PROFILING_PROPERTIES_H_ diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 54cb7639..f8660d43 100755 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -1,6 +1,7 @@ set(SRC_LIST "ge_executor.cc" "../common/profiling/profiling_manager.cc" + "../common/profiling/command_handle.cc" "../common/dump/dump_op.cc" "../common/dump/opdebug_register.cc" "../common/dump/exception_dumper.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 76cde2b9..ceb29b4e 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -33,6 +33,8 @@ #include "opskernel_manager/ops_kernel_builder_manager.h" #include "graph/opsproto_manager.h" #include "ge_local_engine/engine/host_cpu_engine.h" +#include "runtime/base.h" +#include "common/profiling/command_handle.h" using std::string; using std::vector; @@ -250,7 +252,6 @@ Status GeExecutor::Initialize() { GELOGW("Already initialized, no need to be initialized again."); return ge::SUCCESS; } - OpTilingManager::GetInstance().LoadSo(); Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); @@ -277,7 +278,6 @@ Status GeExecutor::Initialize() { profiling_options.device_id = 0; // job id need to be set, the value is meaningless; profiling_options.job_id = "1"; - ProfilingManager::Instance().Init(profiling_options); isInit_ = true; GELOGI("Init GeExecutor over."); @@ -295,7 +295,6 @@ Status GeExecutor::Finalize() { // Stop profiling if (ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().StopProfiling(); ProfilingManager::Instance().PluginUnInit(); } diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 3fd8be1a..2ec2d593 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -29,7 +29,6 @@ COMMON_LOCAL_SRC_FILES := \ common/dump/dump_manager.cc \ common/dump/dump_op.cc \ common/dump/dump_server.cc \ - common/helper/model_cache_helper.cc \ ge_local_engine/engine/host_cpu_engine.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index d6462542..1ef2d00a 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -24,7 +24,6 @@ LIBGE_LOCAL_SRC_FILES := \ common/fp16_t.cc \ common/ge/plugin_manager.cc\ common/ge/op_tiling_manager.cc\ - common/helper/model_cache_helper.cc \ common/profiling/profiling_manager.cc \ common/dump/dump_manager.cc \ common/dump/dump_properties.cc \ diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index abb409c4..9d49eb49 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -17,7 +17,7 @@ #include "graph/build/task_generator.h" #include #include -#include "common/profiling/profiling_manager.h" +#include "common/profiling/profiling_properties.h" #include "framework/common/types.h" #include "framework/common/util.h" #include "framework/common/debug/ge_log.h" @@ -945,7 +945,7 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint vector &all_reduce_nodes, std::string &fp_point_str, std::string &bp_point_str) const { - ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str); + ProfilingProperties::Instance().GetFpBpPoint(fp_point_str, bp_point_str); Status ret = SUCCESS; if (fp_point_str.empty()) { @@ -976,8 +976,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi vector &all_reduce_nodes) const { GE_CHECK_NOTNULL(graph); const char *profiling_mode = std::getenv(kProfilingMode); - bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || - ProfilingManager::Instance().ProfilingTrainingTraceOn(); + bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || + ProfilingProperties::Instance().ProfilingTrainingTraceOn(); if (!is_profiling) { GELOGD("Profiling is not open."); return SUCCESS; @@ -1071,8 +1071,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const vector &all_reduce_nodes, uint32_t node_index, vector &task_def_list) { const char *profiling_mode = std::getenv(kProfilingMode); - bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || - ProfilingManager::Instance().ProfilingTrainingTraceOn(); + bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || + ProfilingProperties::Instance().ProfilingTrainingTraceOn(); bool is_insert_fp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); bool is_insert_bp_profiling_task = false; @@ -1167,8 +1167,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P vector &task_def_list) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); - bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || - ProfilingManager::Instance().ProfilingTrainingTraceOn(); + bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || + ProfilingProperties::Instance().ProfilingTrainingTraceOn(); bool is_insert_bp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_insert_end_profiling_task = false; diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index d0d88e66..6de9e9a8 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -27,6 +27,7 @@ #include "graph/load/model_manager/davinci_model.h" #include "common/model/ge_root_model.h" #include "common/formats/utils/formats_trans_utils.h" +#include "framework/omg/omg_inner_types.h" namespace ge { thread_local uint32_t device_count = 0; @@ -330,6 +331,17 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr(tmp_device_id); + rtSetDeviceIdByGeModelIdx(model_id, device_id); + } auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); string om_name; if (IsNeedHybridLoad(*ge_root_model)) { @@ -448,6 +460,10 @@ Status ModelManager::Unload(uint32_t model_id) { } else { GELOGI("Unload model %u success.no need reset device,device_count: %u", model_id, device_count); } + uint32_t device_id = 0; + if (!domi::GetContext().train_flag) { + rtUnsetDeviceIdByGeModelIdx(model_id, device_id); + } std::lock_guard lock(exeception_infos_mutex_); exception_infos_.clear(); return SUCCESS; @@ -1146,7 +1162,17 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGE(ret, "[Load][RootModel] failed, ret:%d, model_id:%u.", ret, model_id); return ret; } - + if (!domi::GetContext().train_flag) { + int32_t tmp_device_id = 0; + rtError_t rt_ret = rtGetDevice(&tmp_device_id); + if (rt_ret != RT_ERROR_NONE || tmp_device_id < 0) { + GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret); + return ge::FAILED; + } + uint32_t device_id = static_cast(tmp_device_id); + rtSetDeviceIdByGeModelIdx(model_id, device_id); + } if (model_helper.GetModelType()) { bool is_shape_unknown = false; GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index fa140bfe..eecf00e7 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -109,7 +109,6 @@ #include "register/custom_pass_helper.h" #include "external/graph/types.h" #include "common/util/error_manager/error_manager.h" -#include "common/profiling/profiling_manager.h" namespace { const char *const kSummary = "Summary"; @@ -462,9 +461,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options, const OmgContext &omg_context) { IncreaseGraphCount(graph_id); - auto device_id = GetContext().DeviceId(); - GELOGD("Device id is %u", device_id); - ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id); // validation for adding graphs of same graph_id in multi-thread secenario // 1.previous thread owns same graph_id has finished the AddGraph procession if (GetAddGraphCondition(graph_id) == kDoneAdded) { diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 2491715b..feb1e4e8 100644 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -43,6 +43,7 @@ #include "runtime/kernel.h" #include "opskernel_manager/ops_kernel_builder_manager.h" #include "external/runtime/rt_error_codes.h" +#include "common/profiling/profiling_init.h" using Json = nlohmann::json; @@ -194,7 +195,6 @@ Status GELib::SystemInitialize(const map &options) { InitOptions(options); // In train and infer, profiling is always needed. - InitProfiling(this->options_); // 1.`is_train_mode_` means case: train // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer // these two case with logical device id @@ -206,16 +206,6 @@ Status GELib::SystemInitialize(const map &options) { return status; } -void GELib::InitProfiling(Options &options) { - GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); - std::lock_guard lock(status_mutex_); - GetContext().Init(); - // Profiling init - if (ProfilingManager::Instance().Init(options) != SUCCESS) { - GELOGW("Profiling init failed."); - } -} - void GELib::SetDefaultPrecisionMode(map &new_options) { auto iter = new_options.find(PRECISION_MODE); if (iter != new_options.end()) { @@ -463,9 +453,6 @@ Status GELib::Finalize() { GELOGI("Analyzer finalization"); Analyzer::GetInstance()->Finalize(); - // Shut down profiling - ShutDownProfiling(); - if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { GELOGI("System ShutDown."); mid_state = SystemShutdownWithOptions(this->options_); @@ -494,15 +481,6 @@ Status GELib::Finalize() { return SUCCESS; } -void GELib::ShutDownProfiling() { - std::lock_guard lock(status_mutex_); - - if (ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().StopProfiling(); - ProfilingManager::Instance().PluginUnInit(); - } -} - // Get Singleton Instance std::shared_ptr GELib::GetInstance() { return instancePtr_; } diff --git a/ge/init/gelib.h b/ge/init/gelib.h index 226dd4c8..e2d79416 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -65,7 +65,6 @@ class GE_FUNC_VISIBILITY GELib { bool IsTrainMode() { return is_train_mode_; } void InitProfiling(Options &options); - void ShutDownProfiling(); Status InitSystemWithoutOptions(); Status InitSystemWithOptions(Options &options); diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index b9c44ef1..779de19e 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -36,6 +36,7 @@ #include "runtime/mem.h" #include "ir_build/option_utils.h" #include "common/profiling/profiling_manager.h" +#include "common/profiling/profiling_init.h" namespace ge { namespace { @@ -288,6 +289,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector &inpu GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); if (mutex_.try_lock()) { std::lock_guard lock(mutex_, std::adopt_lock); + auto device_id = GetContext().DeviceId(); + GELOGD("device is is %u", device_id); + ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id); if (!init_flag_) { GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.", session_id_, graph_id); @@ -339,6 +343,9 @@ Status InnerSession::RunGraphWithStreamAsync(uint32_t graph_id, rtStream_t strea "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream); return GE_SESS_INIT_FAILED; } + auto device_id = GetContext().DeviceId(); + GELOGD("device id is %u", device_id); + ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id); UpdateThreadContext(graph_id); vector ge_inputs; for (auto &item : inputs) { @@ -382,6 +389,9 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) { session_id_, graph_id); return GE_SESS_INIT_FAILED; } + auto device_id = GetContext().DeviceId(); + GELOGD("remove device id %u", device_id); + ProfilingInit::Instance().UnsetDeviceIdByModelId(graph_id, device_id); UpdateThreadContext(graph_id); Status ret = graph_manager_.RemoveGraph(graph_id); if (ret != SUCCESS) { diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index c87c082c..e9b207af 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -18,32 +18,8 @@ #define INC_FRAMEWORK_COMMON_GE_PROFILING_H_ #include "ge/ge_api_error_codes.h" -#include "toolchain/prof_callback.h" #include "runtime/base.h" -const int MAX_DEV_NUM = 64; - -enum ProfCommandHandleType { - kProfCommandhandleInit = 0, - kProfCommandhandleStart, - kProfCommandhandleStop, - kProfCommandhandleFinalize, - kProfCommandhandleModelSubscribe, - kProfCommandhandleModelUnsubscribe -}; - -struct ProfCommandHandleData { - uint64_t profSwitch; - uint32_t devNums; // length of device id list - uint32_t devIdList[MAX_DEV_NUM]; - uint32_t modelId; -}; - -GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); -GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); -GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func); -GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); - /// /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading /// @return Status result @@ -52,4 +28,6 @@ GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id); +GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id); + #endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ diff --git a/inc/framework/common/profiling/ge_runner_profiling.h b/inc/framework/common/profiling/ge_runner_profiling.h deleted file mode 100644 index 27e19bce..00000000 --- a/inc/framework/common/profiling/ge_runner_profiling.h +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ -#define INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ - -#include "framework/common/profiling/ge_profiling.h" - -GE_FUNC_VISIBILITY bool IsInitialize(); - -#endif // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ diff --git a/tests/depends/profiler/src/profiler_stub.cc b/tests/depends/profiler/src/profiler_stub.cc index 0b8eaa88..85718483 100644 --- a/tests/depends/profiler/src/profiler_stub.cc +++ b/tests/depends/profiler/src/profiler_stub.cc @@ -40,3 +40,11 @@ rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback) { rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback) { return 0; } + +int32_t MsprofInit(uint32_t dataType, void *data, uint32_t dataLen) { + return 0; +} + +int32_t MsprofFinalize() { + return 0; +} diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 32df7552..20913c7c 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -552,6 +552,18 @@ rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t return RT_ERROR_NONE; } +rtError_t rtSetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId) { + return RT_ERROR_NONE; +} + +rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId) { + return RT_ERROR_NONE; +} + +rtError_t rtProfRegisterCtrlCallback(uint32_t logId, rtProfCtrlHandle callback) { + return RT_ERROR_NONE; +} + #ifdef __cplusplus } #endif diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index a7afee3f..a189763a 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -113,6 +113,9 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" + "${GE_CODE_DIR}/ge/common/profiling/profiling_init.cc" + "${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc" + "${GE_CODE_DIR}/ge/common/profiling/command_handle.cc" "${GE_CODE_DIR}/ge/common/profiling/ge_profiling.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" @@ -717,6 +720,8 @@ set(SINGLE_OP_TEST_FILES set(PROFILING_MNG_TEST_FILES "profiling/ge_profiling_manager_unittest.cc" + "profiling/profiling_properties_unittest" + "profiling/profiling_init_unittest" ) set(HYBRID_TEST_FILES diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 62204f6c..dee33fc9 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -896,7 +896,7 @@ TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) { } TEST_F(UtestDavinciModel, Sink_model_profile) { - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; + ProfilingManager::Instance().reporter_callback_ = MsprofReport; ProfileInfo profile; profile.fusion_info.op_name = "relu"; @@ -909,7 +909,7 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { } TEST_F(UtestDavinciModel, Sink_time_profile) { - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; + ProfilingManager::Instance().reporter_callback_ = MsprofReport; DavinciModel model(0, nullptr); InputData current_data; model.SinkTimeProfile(current_data); @@ -1031,7 +1031,7 @@ TEST_F(UtestDavinciModel, NnExecute) { input_data.blobs = output_data.blobs; EXPECT_EQ(input_data.blobs.size(), 1); - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; + ProfilingManager::Instance().reporter_callback_ = MsprofReport; ProfilingManager::Instance().device_id_.emplace_back(0); model.task_list_.resize(1); EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); diff --git a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc index 35879df8..76b14139 100644 --- a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc +++ b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc @@ -26,6 +26,7 @@ #define protected public #define private public #include "common/profiling/profiling_manager.h" +#include "common/profiling/command_handle.h" #include "graph/ge_local_context.h" #include "inc/framework/common/profiling/ge_profiling.h" #include "graph/manager/graph_manager.h" @@ -37,6 +38,17 @@ using namespace ge; using namespace std; +namespace { +enum ProfCommandHandleType { + kProfCommandhandleInit = 0, + kProfCommandhandleStart, + kProfCommandhandleStop, + kProfCommandhandleFinalize, + kProfCommandhandleModelSubscribe, + kProfCommandhandleModelUnsubscribe +}; +} + class UtestGeProfilinganager : public testing::Test { protected: void SetUp() override {} @@ -97,11 +109,11 @@ TEST_F(UtestGeProfilinganager, ParseOptions) { } TEST_F(UtestGeProfilinganager, plungin_init_) { - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = ReporterCallback; + ProfilingManager::Instance().reporter_callback_ = ReporterCallback; Status ret = ProfilingManager::Instance().PluginInit(); EXPECT_EQ(ret, INTERNAL_ERROR); - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr; + ProfilingManager::Instance().reporter_callback_ = nullptr; } TEST_F(UtestGeProfilinganager, report_data_) { @@ -169,31 +181,33 @@ TEST_F(UtestGeProfilinganager, get_device_from_graph) { OmgContext context; Status ret = graph_manager.AddGraph(graph_id, graph, options, context); EXPECT_EQ(ret, ge::SUCCESS); + ProfSetGraphIdToDeviceMap(graph_id, device_id); ret = ProfGetDeviceFormGraphId(graph_id, device_id); EXPECT_EQ(ret, ge::SUCCESS); } TEST_F(UtestGeProfilinganager, handle_subscribe_info) { - ProfCommandHandleType prof_type = kProfCommandhandleModelSubscribe; - ProfCommandHandleData prof_data; + uint32_t prof_type = RT_PROF_CTRL_SWITCH; + rtProfCommandHandle prof_data; prof_data.profSwitch = 0; prof_data.modelId = 1; + prof_data.type = 0; domi::GetContext().train_flag = true; - auto prof_ptr = std::make_shared(prof_data); - Status ret = ProfCommandHandle(prof_type, static_cast(prof_ptr.get()), sizeof(prof_data)); + auto prof_ptr = std::make_shared(prof_data); + Status ret = CommandHandle(prof_type, static_cast(prof_ptr.get()), sizeof(prof_data)); EXPECT_EQ(ret, ge::SUCCESS); } TEST_F(UtestGeProfilinganager, handle_unsubscribe_info) { - ProfCommandHandleType prof_type = kProfCommandhandleModelUnsubscribe; - ProfCommandHandleData prof_data; + uint32_t prof_type = kProfCommandhandleModelUnsubscribe; + rtProfCommandHandle prof_data; prof_data.profSwitch = 0; prof_data.modelId = 1; domi::GetContext().train_flag = true; auto &profiling_manager = ge::ProfilingManager::Instance(); profiling_manager.SetSubscribeInfo(0, 1, true); - auto prof_ptr = std::make_shared(prof_data); - Status ret = ProfCommandHandle(prof_type, static_cast(prof_ptr.get()), sizeof(prof_data)); + auto prof_ptr = std::make_shared(prof_data); + Status ret = CommandHandle(prof_type, static_cast(prof_ptr.get()), sizeof(prof_data)); profiling_manager.CleanSubscribeInfo(); } diff --git a/tests/ut/ge/profiling/profiling_init_unittest.cc b/tests/ut/ge/profiling/profiling_init_unittest.cc new file mode 100644 index 00000000..ccf90eda --- /dev/null +++ b/tests/ut/ge/profiling/profiling_init_unittest.cc @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#define protected public +#define private public +#include "common/profiling/profiling_init.h" +#include "graph/ge_local_context.h" +#include "graph/manager/graph_manager.h" +#undef protected +#undef private + +using namespace ge; +using namespace std; + +class UtestGeProfilingInit : public testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} +}; + +TEST_F(UtestGeProfilingInit, test_init) { + setenv("PROFILING_MODE", "true", true); + Options options; + options.device_id = 0; + options.job_id = "0"; + options.profiling_mode = "1"; + options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"; + auto &profiling_init = ge::ProfilingInit::Instance(); + auto ret = profiling_init.Init(options); + EXPECT_EQ(ret, ge::SUCCESS); +} + +TEST_F(UtestGeProfilingInit, test_stop) { + auto &profiling_init = ge::ProfilingInit::Instance(); + profiling_init.StopProfiling(); +} + +TEST_F(UtestGeProfilingInit, test_shut) { + auto &profiling_init = ge::ProfilingInit::Instance(); + profiling_init.ShutDownProfiling(); +} + +TEST_F(UtestGeProfilingInit, test_set_deviceId) { + uint32_t model_id = 0; + uint32_t device_id = 0; + auto &profiling_init = ge::ProfilingInit::Instance(); + auto ret = profiling_init.SetDeviceIdByModelId(model_id, device_id); +} + +TEST_F(UtestGeProfilingInit, test_unset_deviceId) { + uint32_t model_id = 0; + uint32_t device_id = 0; + auto &profiling_init = ge::ProfilingInit::Instance(); + auto ret = profiling_init.UnsetDeviceIdByModelId(model_id, device_id); +} diff --git a/tests/ut/ge/profiling/profiling_properties_unittest.cc b/tests/ut/ge/profiling/profiling_properties_unittest.cc new file mode 100644 index 00000000..f97da098 --- /dev/null +++ b/tests/ut/ge/profiling/profiling_properties_unittest.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#define protected public +#define private public +#include "common/profiling/profiling_properties.h" +#include "graph/ge_local_context.h" +#include "graph/manager/graph_manager.h" +#undef protected +#undef private + +using namespace ge; +using namespace std; + +class UtestGeProfilingProperties : public testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} +}; + +TEST_F(UtestGeProfilingProperties, test_execute_profiling) { + auto &profiling_properties = ge::ProfilingProperties::Instance(); + profiling_properties.SetExecuteProfiling(true); + auto is_execute = profiling_properties.IsExecuteProfiling(); + EXPECT_EQ(is_execute, true); +} + +TEST_F(UtestGeProfilingProperties, test_training_trace) { + auto &profiling_properties = ge::ProfilingProperties::Instance(); + profiling_properties.SetTrainingTrace(true); + auto is_train_trance = profiling_properties.ProfilingTrainingTraceOn(); + EXPECT_EQ(is_train_trance, true); +} + +TEST_F(UtestGeProfilingProperties, test_fpbp_point) { +auto &profiling_properties = ge::ProfilingProperties::Instance(); + std::string fp_point = "fp"; + std::string bp_point = "bp"; + profiling_properties.SetFpBpPoint(fp_point, bp_point); + profiling_properties.GetFpBpPoint(fp_point, bp_point); + EXPECT_EQ(fp_point, "fp"); + EXPECT_EQ(bp_point, "bp"); +} + +TEST_F(UtestGeProfilingProperties, test_profiling_on) { + auto &profiling_properties = ge::ProfilingProperties::Instance(); + profiling_properties.SetExecuteProfiling(true); + profiling_properties.SetLoadProfiling(true); + auto profiling_on = profiling_properties.ProfilingOn(); + EXPECT_EQ(profiling_on, true); +} diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 7fc1cdea..2dad826f 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -33,6 +33,7 @@ extern "C" { #endif #endif +#define RT_PROF_MAX_DEV_NUM 64 typedef int32_t rtError_t; static const int32_t RT_ERROR_NONE = 0; // success @@ -80,6 +81,13 @@ typedef enum tagRtLimitType { RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms } rtLimitType_t; +typedef enum { + RT_PROF_CTRL_INVALID = 0, + RT_PROF_CTRL_SWITCH, + RT_PROF_CTRL_REPORTER, + RT_PROF_CTRL_BUTT, +} rtProfCtrlType_t; + typedef struct rtExceptionInfo { uint32_t taskid; uint32_t streamid; @@ -88,6 +96,15 @@ typedef struct rtExceptionInfo { uint32_t retcode; } rtExceptionInfo; +typedef struct rtProfCommandHandle { + uint64_t profSwitch; + uint64_t profSwitchHi; + uint32_t devNums; + uint32_t devIdList[RT_PROF_MAX_DEV_NUM]; + uint32_t modelId; + uint32_t type; +} rtProfCommandHandle_t; + typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); @@ -118,6 +135,8 @@ typedef void *rtLabel_t; */ typedef void *rtModel_t; +typedef rtError_t (*rtProfCtrlHandle)(uint32_t type, void *data, uint32_t len); + /** * @ingroup profiling_base * @brief runtime handle. @@ -357,6 +376,14 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_ */ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); +RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t logId, rtProfCtrlHandle callback); + +RTS_API rtError_t rtSetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); + +RTS_API rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); + +RTS_API rtError_t rtGetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index 5073cfb1..969dc1cb 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -114,15 +114,6 @@ enum MsprofCtrlCallbackType { MSPROF_CTRL_PROF_SWITCH_OFF // for prof switch off }; -#define MSPROF_MAX_DEV_NUM (64) - -struct MsprofCommandHandle { - uint64_t profSwitch; - uint32_t devNums; // length of device id list - uint32_t devIdList[MSPROF_MAX_DEV_NUM]; - uint32_t modelId; -}; - /** * @name MsprofCtrlCallback * @brief callback to start/stop profiling