@@ -84,7 +84,6 @@ else () | |||
set(STATIC_ACL_LIB ${GE_LIB_PATH}) | |||
find_module(slog libalog.so ${GE_LIB_PATH}) | |||
find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) | |||
find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) | |||
find_module(hccl libhccl.so ${GE_LIB_PATH}) | |||
find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | |||
find_module(runtime libruntime.so ${GE_LIB_PATH}) | |||
@@ -106,7 +105,6 @@ else () | |||
elseif(PLATFORM STREQUAL "inference") | |||
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||
find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
if(PRODUCT STREQUAL "flr3") | |||
elseif(PRODUCT STREQUAL "flr1") | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
@@ -120,7 +118,6 @@ else () | |||
find_module(runtime libruntime.so ${ASCEND_ATC_DIR}) | |||
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_ATC_DIR}/stub) | |||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
else() | |||
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||
endif() | |||
@@ -116,6 +116,7 @@ set(EXECUTOR_SRC_LIST | |||
"common/ge/plugin_manager.cc" | |||
"common/profiling/ge_profiling.cc" | |||
"common/profiling/profiling_manager.cc" | |||
"common/profiling/command_handle.cc" | |||
"executor/ge_executor.cc" | |||
"ge_local_engine/engine/host_cpu_engine.cc" | |||
"graph/build/memory/var_mem_assign_util.cc" | |||
@@ -259,10 +260,9 @@ set(EXECUTOR_SRC_LIST | |||
################################################################## | |||
set(COMPILER_SRC_LIST | |||
"analyzer/analyzer.cc" | |||
"common/dump/dump_op.cc" | |||
#"common/dump/dump_op.cc" | |||
"common/ge/op_tiling_manager.cc" | |||
"common/ge/plugin_manager.cc" | |||
"common/profiling/profiling_manager.cc" | |||
"engine_manager/dnnengine_manager.cc" | |||
"ge_local_engine/engine/host_cpu_engine.cc" | |||
"ge_opt_info/ge_opt_info.cc" | |||
@@ -473,7 +473,7 @@ set(RUNNER_SRC_LIST | |||
"client/ge_api.cc" | |||
"session/inner_session.cc" | |||
"session/session_manager.cc" | |||
"common/profiling/ge_runner_profiling.cc" | |||
"common/profiling/profiling_init.cc" | |||
"graph/manager/memory_api.cc" | |||
"graph/manager/util/hcom_util.cc" | |||
"graph/load/model_manager/task_info/hccl_task_info.cc" | |||
@@ -568,6 +568,8 @@ target_link_libraries(ge_runner PRIVATE | |||
graph | |||
ge_common | |||
ascend_protobuf | |||
ge_executor_shared | |||
msprofiler_fwk_share | |||
register | |||
c_sec | |||
slog | |||
@@ -35,6 +35,11 @@ | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "toolchain/plog.h" | |||
#include "ir_build/option_utils.h" | |||
#include "framework/common/ge_types.h" | |||
#include "external/ge/ge_api_types.h" | |||
#include "graph/ge_context.h" | |||
#include "common/profiling/profiling_init.h" | |||
#include "common/profiling/profiling_properties.h" | |||
using domi::OpRegistry; | |||
using std::map; | |||
@@ -43,6 +48,89 @@ using std::vector; | |||
namespace { | |||
const int32_t kMaxStrLen = 128; | |||
const int kDecimal = 10; | |||
const int kDefaultDeviceIdForTrain = 0; | |||
const int kDefaultDeviceIdForInfer = -1; | |||
void InitOptions(const map<string, string> &option_map, ge::Options &options) { | |||
GELOGD("InitOptions start"); | |||
options.session_id = 0; | |||
auto is_train_mode = false; | |||
auto iter = option_map.find(ge::OPTION_GRAPH_RUN_MODE); | |||
if (iter != option_map.end()) { | |||
if (ge::GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= ge::TRAIN) { | |||
is_train_mode = true; | |||
} | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_SESSION_ID); | |||
if (iter != option_map.end()) { | |||
options.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | |||
} | |||
options.device_id = is_train_mode ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; | |||
iter = option_map.find(ge::OPTION_EXEC_DEVICE_ID); | |||
if (iter != option_map.end()) { | |||
options.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal)); | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_JOB_ID); | |||
if (iter != option_map.end()) { | |||
options.job_id = iter->second.c_str(); | |||
} | |||
options.isUseHcom = false; | |||
iter = option_map.find(ge::OPTION_EXEC_IS_USEHCOM); | |||
if (iter != option_map.end()) { | |||
std::istringstream(iter->second) >> options.isUseHcom; | |||
} | |||
options.isUseHvd = false; | |||
iter = option_map.find(ge::OPTION_EXEC_IS_USEHVD); | |||
if (iter != option_map.end()) { | |||
std::istringstream(iter->second) >> options.isUseHvd; | |||
} | |||
options.deployMode = false; | |||
iter = option_map.find(ge::OPTION_EXEC_DEPLOY_MODE); | |||
if (iter != option_map.end()) { | |||
std::istringstream(iter->second) >> options.deployMode; | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_POD_NAME); | |||
if (iter != option_map.end()) { | |||
options.podName = iter->second.c_str(); | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_PROFILING_MODE); | |||
if (iter != option_map.end()) { | |||
options.profiling_mode = iter->second.c_str(); | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_PROFILING_OPTIONS); | |||
if (iter != option_map.end()) { | |||
options.profiling_options = iter->second.c_str(); | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_RANK_ID); | |||
if (iter != option_map.end()) { | |||
options.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | |||
} | |||
iter = option_map.find(ge::OPTION_EXEC_RANK_TABLE_FILE); | |||
if (iter != option_map.end()) { | |||
options.rankTableFile = iter->second.c_str(); | |||
} | |||
options.enable_atomic = true; | |||
iter = option_map.find(ge::OPTION_EXEC_ATOMIC_FLAG); | |||
GE_IF_BOOL_EXEC(iter != option_map.end(), | |||
options.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal)); | |||
GELOGD("ge InnerInitialize, the enable_atomic_flag in options_ is %d", options.enable_atomic); | |||
} | |||
void InitProfiling(ge::Options &options) { | |||
GELOGD("InitProfiling start"); | |||
ge::GetContext().Init(); | |||
// Profiling init | |||
if (ge::ProfilingInit::Instance().Init(options) != ge::SUCCESS) { | |||
GELOGW("Profiling init failed."); | |||
} | |||
} | |||
void ShutDownProfiling() { | |||
GELOGD("Profiling shut down"); | |||
if (ge::ProfilingProperties::Instance().ProfilingOn()) { | |||
ge::ProfilingInit::Instance().ShutDownProfiling(); | |||
} | |||
} | |||
} // namespace | |||
static bool g_ge_initialized = false; | |||
@@ -128,6 +216,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) { | |||
if (CheckOptionsValid(options) != SUCCESS) { | |||
return FAILED; | |||
} | |||
ge::Options str_options; | |||
InitOptions(options, str_options); | |||
InitProfiling(str_options); | |||
GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid"); | |||
ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOpsProtoInit); | |||
@@ -208,7 +299,7 @@ Status GEFinalize() { | |||
GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize"); | |||
return SUCCESS; | |||
} | |||
ShutDownProfiling(); | |||
ErrorManager::GetInstance().SetStage(error_message::kFinalize, error_message::kFinalize); | |||
ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
GELOGT(TRACE_INIT, "GEFinalize start"); | |||
@@ -50,6 +50,7 @@ set(SRC_LIST | |||
"${GE_CODE_DIR}/ge/common/transop_util.cc" | |||
"${GE_CODE_DIR}/ge/common/types.cc" | |||
"${GE_CODE_DIR}/ge/common/util.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc" | |||
) | |||
if (NOT ENABLE_D AND NOT ENABLE_ACL) | |||
@@ -0,0 +1,268 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "command_handle.h" | |||
#include "runtime/base.h" | |||
#include "common/profiling/profiling_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/debug/log.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
#include "graph/load/graph_loader.h" | |||
namespace { | |||
const uint32_t kDeviceListIndex = 3; | |||
const uint32_t kCommandNum = 6; | |||
const int kMaxDevNum = 64; | |||
const std::string kDeviceNums = "devNums"; | |||
const std::string kDeviceIdList = "devIdList"; | |||
const std::string kProfilingInit = "prof_init"; | |||
const std::string kProfilingFinalize = "prof_finalize"; | |||
const std::string kProfilingStart = "prof_start"; | |||
const std::string kProfilingStop = "prof_stop"; | |||
const std::string kProfilingModelSubscribe = "prof_model_subscribe"; | |||
const std::string kProfilingModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
const std::string kProfilingModelId = "modelId"; | |||
enum ProfCommandHandleType { | |||
kProfCommandhandleInit = 0, | |||
kProfCommandhandleStart, | |||
kProfCommandhandleStop, | |||
kProfCommandhandleFinalize, | |||
kProfCommandhandleModelSubscribe, | |||
kProfCommandhandleModelUnsubscribe | |||
}; | |||
const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = { | |||
{kProfCommandhandleInit, kProfilingInit}, | |||
{kProfCommandhandleStart, kProfilingStart}, | |||
{kProfCommandhandleStop, kProfilingStop}, | |||
{kProfCommandhandleFinalize, kProfilingFinalize}, | |||
{kProfCommandhandleModelSubscribe, kProfilingModelSubscribe}, | |||
{kProfCommandhandleModelUnsubscribe, kProfilingModelUnsubscribe}}; | |||
bool IsProfTypeValid(uint32_t type) { | |||
if (type < 0 || type >= kCommandNum) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Type]Type %u is invalid", type); | |||
return false; | |||
} | |||
GELOGD("Type is %u", type); | |||
return true; | |||
} | |||
bool IsProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
if (deviceid_list == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "Device id list is nullptr"); | |||
return false; | |||
} | |||
if (device_nums == 0 || device_nums > kMaxDevNum) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums); | |||
REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums); | |||
return false; | |||
} | |||
// real device num | |||
int32_t dev_count = 0; | |||
rtError_t rt_err = rtGetDeviceCount(&dev_count); | |||
if (rt_err != RT_ERROR_NONE) { | |||
GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err); | |||
REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err); | |||
return false; | |||
} | |||
if (device_nums > static_cast<uint32_t>(dev_count)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", device_nums, dev_count); | |||
REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", device_nums, dev_count); | |||
return false; | |||
} | |||
std::set<uint32_t> record; | |||
for (size_t i = 0; i < device_nums; ++i) { | |||
uint32_t dev_id = deviceid_list[i]; | |||
if (dev_id >= static_cast<uint32_t>(dev_count)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", dev_id, dev_count); | |||
REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); | |||
return false; | |||
} | |||
if (record.count(dev_id) > 0) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id); | |||
REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id); | |||
return false; | |||
} | |||
record.insert(dev_id); | |||
} | |||
return true; | |||
} | |||
bool TransProfConfigToParam(const rtProfCommandHandle &profCommand, vector<string> &prof_config_params) { | |||
prof_config_params.clear(); | |||
prof_config_params.emplace_back(kDeviceNums); | |||
prof_config_params.emplace_back(std::to_string(profCommand.devNums)); | |||
prof_config_params.emplace_back(kDeviceIdList); | |||
std::string devID = ""; | |||
if (profCommand.devNums == 0) { | |||
GELOGE(ge::FAILED, "[Check][Param]The device num is invalid."); | |||
return false; | |||
} | |||
for (uint32_t i = 0; i < profCommand.devNums; i++) { | |||
devID.append(std::to_string(profCommand.devIdList[i])); | |||
if (i != profCommand.devNums - 1) { | |||
devID.append(","); | |||
} | |||
} | |||
prof_config_params.push_back(devID); | |||
return true; | |||
} | |||
ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, uint32_t graph_id, | |||
vector<string> &prof_params) { | |||
if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) { | |||
prof_params.clear(); | |||
prof_params.emplace_back(kProfilingModelId); | |||
uint32_t model_id = graph_id; | |||
if (is_subscribe) { | |||
auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
auto ret = profiling_manager.GetModelIdFromGraph(graph_id, model_id); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "[Get][GraphId]graph_id:%u not not found", graph_id); | |||
return ret; | |||
} | |||
} | |||
prof_params.emplace_back(std::to_string(model_id)); | |||
} | |||
return ge::SUCCESS; | |||
} | |||
rtError_t NeedHandleStartEnd(ProfCommandHandleType type, rtProfCommandHandle_t *prof_config_param, | |||
std::vector<string> &prof_params) { | |||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
if (!IsProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { | |||
return ge::FAILED; | |||
} | |||
if (!TransProfConfigToParam(*prof_config_param, prof_params)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); | |||
REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed"); | |||
return ge::PARAM_INVALID; | |||
} | |||
} | |||
return ge::SUCCESS; | |||
} | |||
rtError_t NeedHandleModelSubscribe(ProfCommandHandleType type, rtProfCommandHandle_t *prof_config_param, | |||
std::vector<string> &prof_params) { | |||
if (type == kProfCommandhandleModelSubscribe) { | |||
auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
auto is_train = domi::GetContext().train_flag; | |||
if (is_train) { | |||
profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true); | |||
return ge::SUCCESS; | |||
} | |||
prof_params.clear(); | |||
prof_params.push_back(kProfilingModelId); | |||
prof_params.push_back(std::to_string(prof_config_param->modelId)); | |||
} | |||
return ge::SUCCESS; | |||
} | |||
rtError_t ExecuteCommand(ProfCommandHandleType type, | |||
std::map<ProfCommandHandleType, std::string>::const_iterator iter, | |||
rtProfCommandHandle_t *prof_config_param, std::vector<string> &prof_params) { | |||
ge::GraphLoader graph_loader; | |||
ge::Command command; | |||
command.cmd_params.clear(); | |||
command.cmd_type = iter->second; | |||
command.cmd_params = prof_params; | |||
if (type != kProfCommandhandleFinalize) { | |||
command.module_index = prof_config_param->profSwitch; | |||
} | |||
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), | |||
command.module_index); | |||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
} | |||
ge::Status ret = graph_loader.CommandHandle(command); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u", | |||
iter->second.c_str(), ret); | |||
REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u", | |||
iter->second.c_str(), ret); | |||
return ge::FAILED; | |||
} | |||
GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); | |||
return ge::SUCCESS; | |||
} | |||
rtError_t HandleCtrlSwitch(void *data) { | |||
auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
rtProfCommandHandle_t *prof_config_param = reinterpret_cast<rtProfCommandHandle_t *>(data); | |||
if (!IsProfTypeValid(prof_config_param->type)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand is invalid."); | |||
return ge::FAILED; | |||
} | |||
auto type = static_cast<ProfCommandHandleType>(prof_config_param->type); | |||
if (type != kProfCommandhandleFinalize) { | |||
GE_CHECK_NOTNULL(data); | |||
} | |||
auto iter = kProfCommandTypeMap.find(type); | |||
if (iter == kProfCommandTypeMap.end()) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand type is invalid."); | |||
return ge::PARAM_INVALID; | |||
} | |||
std::vector<string> prof_params; | |||
ge::Status ret = NeedHandleStartEnd(type, prof_config_param, prof_params); | |||
if (ret != ge::SUCCESS) { | |||
return ret; | |||
} | |||
ret = NeedHandleModelSubscribe(type, prof_config_param, prof_params); | |||
if (ret != ge::SUCCESS) { | |||
return ret; | |||
} | |||
auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe; | |||
// GraphId is actually stored in prof_config_param | |||
auto graph_id = prof_config_param->modelId; | |||
ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "[Check][Param]graph_id:%u not not found", graph_id); | |||
REPORT_INPUT_ERROR( | |||
"E10001", std::vector<std::string>({"value", "parameter", "reason"}), | |||
std::vector<std::string>({std::to_string(graph_id), "GraphToModelMap", "graph_id does not exist!"})); | |||
return ge::FAILED; | |||
} | |||
return ExecuteCommand(type, iter, prof_config_param, prof_params); | |||
} | |||
} // namespace | |||
namespace ge { | |||
rtError_t CommandHandle(uint32_t rt_type, void *data, uint32_t len) { | |||
if (data == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand is invalid."); | |||
return ge::FAILED; | |||
} | |||
auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
if (rt_type == RT_PROF_CTRL_REPORTER) { | |||
profiling_manager.SetMsprofReporterCallback(reinterpret_cast<MsprofReporterCallback>(data)); | |||
GELOGD("return with MsprofReporterCallback"); | |||
return ge::SUCCESS; | |||
} else if (rt_type == RT_PROF_CTRL_SWITCH) { | |||
return HandleCtrlSwitch(data); | |||
} | |||
return ge::FAILED; | |||
} | |||
} // namespace ge |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -14,13 +14,13 @@ | |||
* limitations under the License. | |||
*/ | |||
#include "framework/common/profiling/ge_runner_profiling.h" | |||
#include "init/gelib.h" | |||
#ifndef GE_COMMON_PROFILING_COMMAND_HANDLE_H_ | |||
#define GE_COMMON_PROFILING_COMMAND_HANDLE_H_ | |||
bool IsInitialize() { | |||
std::shared_ptr<ge::GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) { | |||
return false; | |||
} | |||
return true; | |||
#include "ge/ge_api_error_codes.h" | |||
#include "runtime/base.h" | |||
namespace ge { | |||
GE_FUNC_VISIBILITY rtError_t CommandHandle(uint32_t rt_type, void *data, uint32_t len); | |||
} | |||
#endif // GE_COMMON_PROFILING_COMMAND_HANDLE_H_ |
@@ -19,245 +19,15 @@ | |||
#include "common/profiling/profiling_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/debug/log.h" | |||
#include "graph/load/graph_loader.h" | |||
#include "graph/ge_context.h" | |||
#include "init/gelib.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "common/model/ge_model.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
namespace { | |||
const uint32_t kDeviceListIndex = 3; | |||
const std::string kDeviceNums = "devNums"; | |||
const std::string kDeviceIdList = "devIdList"; | |||
const std::string kProfilingInit = "prof_init"; | |||
const std::string kProfilingFinalize = "prof_finalize"; | |||
const std::string kProfilingStart = "prof_start"; | |||
const std::string kProfilingStop = "prof_stop"; | |||
const std::string kProfModelSubscribe = "prof_model_subscribe"; | |||
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
const std::string kRtSetDeviceRegName = "profiling"; | |||
const std::string kPofilingModelId = "modelId"; | |||
const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = { | |||
{kProfCommandhandleInit, kProfilingInit}, | |||
{kProfCommandhandleStart, kProfilingStart}, | |||
{kProfCommandhandleStop, kProfilingStop}, | |||
{kProfCommandhandleFinalize, kProfilingFinalize}, | |||
{kProfCommandhandleModelSubscribe, kProfModelSubscribe}, | |||
{kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; | |||
const uint64_t kModelId = ge::INVALID_MODEL_ID; | |||
const uint16_t kStepStart = 0; | |||
const uint16_t kStepEnd = 1; | |||
ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, | |||
uint32_t graph_id, vector<string> &prof_params) { | |||
if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) { | |||
prof_params.clear(); | |||
prof_params.emplace_back(kPofilingModelId); | |||
uint32_t model_id = 0; | |||
auto ret = ge::ProfilingManager::Instance().GetModelIdFromGraph(graph_id, model_id); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "graph_id:%u not not found", graph_id); | |||
return ret; | |||
} | |||
prof_params.emplace_back(std::to_string(model_id)); | |||
} | |||
return ge::SUCCESS; | |||
} | |||
} // namespace | |||
bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) { | |||
prof_config_params.clear(); | |||
prof_config_params.emplace_back(kDeviceNums); | |||
prof_config_params.emplace_back(std::to_string(profCommand.devNums)); | |||
prof_config_params.emplace_back(kDeviceIdList); | |||
std::string devID = ""; | |||
if (profCommand.devNums == 0) { | |||
GELOGW("The device num is invalid."); | |||
return false; | |||
} | |||
for (uint32_t i = 0; i < profCommand.devNums; i++) { | |||
devID.append(std::to_string(profCommand.devIdList[i])); | |||
if (i != profCommand.devNums - 1) { | |||
devID.append(","); | |||
} | |||
} | |||
prof_config_params.push_back(devID); | |||
return true; | |||
} | |||
bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
if (deviceid_list == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "Device id list is nullptr"); | |||
return false; | |||
} | |||
if (device_nums == 0 || device_nums > MAX_DEV_NUM) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums); | |||
REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums); | |||
return false; | |||
} | |||
// real device num | |||
int32_t dev_count = 0; | |||
rtError_t rt_err = rtGetDeviceCount(&dev_count); | |||
if (rt_err != RT_ERROR_NONE) { | |||
GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err); | |||
REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err); | |||
return false; | |||
} | |||
if (device_nums > static_cast<uint32_t>(dev_count)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", | |||
device_nums, dev_count); | |||
REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", | |||
device_nums, dev_count); | |||
return false; | |||
} | |||
std::set<uint32_t> record; | |||
for (size_t i = 0; i < device_nums; ++i) { | |||
uint32_t dev_id = deviceid_list[i]; | |||
if (dev_id >= static_cast<uint32_t>(dev_count)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", | |||
dev_id, dev_count); | |||
REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); | |||
return false; | |||
} | |||
if (record.count(dev_id) > 0) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id); | |||
REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id); | |||
return false; | |||
} | |||
record.insert(dev_id); | |||
} | |||
return true; | |||
} | |||
ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { | |||
if (func == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Msprof ctrl callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "Msprof ctrl callback is nullptr"); | |||
return ge::PARAM_INVALID; | |||
} | |||
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | |||
GELOGW("Msprof ctrl callback is exist, just ignore it."); | |||
} else { | |||
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); | |||
} | |||
return ge::SUCCESS; | |||
} | |||
ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { | |||
if (func == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofSetDeviceCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofSetDeviceCallback callback is nullptr"); | |||
return ge::PARAM_INVALID; | |||
} | |||
// Pass MsprofSetDeviceCallback to runtime | |||
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); | |||
if (rt_ret != ge::SUCCESS) { | |||
GELOGE(rt_ret, "[Pass][MsprofSetDeviceCallback]To runtime failed, ret 0x%X", rt_ret); | |||
REPORT_CALL_ERROR("E19999", "Pass MsprofSetDeviceCallback to runtime failed, ret 0x%X", rt_ret); | |||
return rt_ret; | |||
} | |||
return ge::SUCCESS; | |||
} | |||
ge::Status RegProfReporterCallback(MsprofReporterCallback func) { | |||
if (func == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
return ge::PARAM_INVALID; | |||
} | |||
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) { | |||
GELOGW("Msprof reporter callback is exist, just ignore it."); | |||
} else { | |||
GELOGI("GE register Msprof reporter callback."); | |||
ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); | |||
// Pass MsprofReporterCallback to runtime | |||
ge::Status rt_ret = rtSetMsprofReporterCallback(func); | |||
if (rt_ret != ge::SUCCESS) { | |||
GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u", | |||
rt_ret); | |||
REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u", | |||
rt_ret); | |||
return rt_ret; | |||
} | |||
// Pass MsprofReporterCallback to hccl | |||
} | |||
return ge::SUCCESS; | |||
} | |||
ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { | |||
if (type != kProfCommandhandleFinalize) { | |||
GE_CHECK_NOTNULL(data); | |||
} | |||
ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data); | |||
auto iter = kProfCommandTypeMap.find(type); | |||
if (iter == kProfCommandTypeMap.end()) { | |||
GELOGW("The prof comand type is invalid."); | |||
return ge::PARAM_INVALID; | |||
} | |||
std::vector<string> prof_params; | |||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { | |||
return ge::FAILED; | |||
} | |||
if (!TransProfConfigToParam(*prof_config_param, prof_params)) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); | |||
REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed"); | |||
return ge::PARAM_INVALID; | |||
} | |||
} | |||
auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
auto is_train = domi::GetContext().train_flag; | |||
if (type == kProfCommandhandleModelSubscribe && is_train) { | |||
profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true); | |||
return ge::SUCCESS; | |||
} | |||
auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe; | |||
// GraphId is actually stored in prof_config_param | |||
auto graph_id = prof_config_param->modelId; | |||
ge::Status ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "graph_id:%u not not found", graph_id); | |||
REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"value", "parameter", "reason"}), | |||
std::vector<std::string>({std::to_string(graph_id), | |||
"GraphToModelMap", | |||
"graph_id does not exist!"})); | |||
return ge::FAILED; | |||
} | |||
ge::GraphLoader graph_loader; | |||
ge::Command command; | |||
command.cmd_params.clear(); | |||
command.cmd_type = iter->second; | |||
command.cmd_params = prof_params; | |||
if (type != kProfCommandhandleFinalize) { | |||
command.module_index = prof_config_param->profSwitch; | |||
} | |||
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), | |||
command.module_index); | |||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
} | |||
ret = graph_loader.CommandHandle(command); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u", | |||
iter->second.c_str(), ret); | |||
REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u", | |||
iter->second.c_str(), ret); | |||
return ge::FAILED; | |||
} | |||
GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); | |||
return ge::SUCCESS; | |||
} | |||
ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream) { | |||
static bool is_first_run = true; | |||
int32_t device_id = 0; | |||
@@ -289,3 +59,7 @@ ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream | |||
ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id) { | |||
return ge::ProfilingManager::Instance().GetDeviceIdFromGraph(graph_id, device_id); | |||
} | |||
void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id) { | |||
ge::ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id); | |||
} |
@@ -0,0 +1,247 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "profiling_init.h" | |||
#include "common/properties_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/debug/log.h" | |||
#include "common/profiling/profiling_properties.h" | |||
#include "runtime/base.h" | |||
#include "common/profiling/command_handle.h" | |||
#include "common/profiling/profiling_manager.h" | |||
namespace { | |||
const char *const kTrainingTrace = "training_trace"; | |||
const char *const kFpPoint = "fp_point"; | |||
const char *const kBpPoint = "bp_point"; | |||
} | |||
namespace ge { | |||
ProfilingInit &ProfilingInit::Instance() { | |||
static ProfilingInit profiling_init; | |||
return profiling_init; | |||
} | |||
ge::Status ProfilingInit::Init(const Options &options) { | |||
GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); | |||
struct MsprofGeOptions prof_conf = {{0}}; | |||
bool is_execute_profiling = false; | |||
Status ret = InitFromOptions(options, prof_conf, is_execute_profiling); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); | |||
REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); | |||
return ret; | |||
} | |||
ProfRegisterCtrlCallback(); | |||
if (is_execute_profiling) { | |||
int32_t cb_ret = MsprofInit(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions)); | |||
if (cb_ret != 0) { | |||
GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); | |||
REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); | |||
return FAILED; | |||
} | |||
GELOGI("Profiling init success"); | |||
} | |||
else { | |||
GELOGI("The profiling is off, skip the initialization"); | |||
} | |||
return SUCCESS; | |||
} | |||
ge::Status ProfilingInit::ProfRegisterCtrlCallback() {; | |||
rtProfCtrlHandle callback = CommandHandle; | |||
rtError_t rt_ret = rtProfRegisterCtrlCallback(GE,callback); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Register CtrlCallBack failed"); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
ge::Status ProfilingInit::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, | |||
bool &is_execute_profiling) { | |||
// enable profiling by env | |||
char env_profiling_mode[MMPA_MAX_PATH] = {0x00}; | |||
if (options.profiling_mode == "1" && !options.profiling_options.empty()) { | |||
// enable profiling by ge option | |||
if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), | |||
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { | |||
GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s", options.profiling_options.c_str()); | |||
REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed", options.profiling_options.c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
is_execute_profiling = true; | |||
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, | |||
options.profiling_options.c_str()); | |||
} else { | |||
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); | |||
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); | |||
// The env is invalid | |||
if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { | |||
return SUCCESS; | |||
} | |||
// enable profiling by env | |||
is_execute_profiling = true; | |||
GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); | |||
} | |||
ProfilingProperties::Instance().SetExecuteProfiling(is_execute_profiling); | |||
ProfilingProperties::Instance().SetLoadProfiling(true); | |||
if (!is_execute_profiling) { | |||
return SUCCESS; | |||
} | |||
// Parse json str for bp fp | |||
Status ret = ParseOptions(prof_conf.options); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ge::PARAM_INVALID, "[Parse][Options]Parse training trace param %s failed, error_code %u", prof_conf.options, | |||
ret); | |||
REPORT_CALL_ERROR("E19999", "Parse training trace param %s failed, error_code %u", prof_conf.options, ret); | |||
return ge::PARAM_INVALID; | |||
} | |||
if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) != | |||
EOK) { | |||
GELOGE(INTERNAL_ERROR, "[Copy][JobId]Failed, original job_id %s", options.job_id.c_str()); | |||
REPORT_CALL_ERROR("E19999", "Copy job_id %s failed", options.job_id.c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str()); | |||
return ge::SUCCESS; | |||
} | |||
ge::Status ProfilingInit::ParseOptions(const std::string &options) { | |||
if (options.empty()) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Profiling options is empty"); | |||
REPORT_INNER_ERROR("E19999", "Profiling options is empty"); | |||
return ge::PARAM_INVALID; | |||
} | |||
try { | |||
Json prof_options = Json::parse(options); | |||
if (options.find(kTrainingTrace) == std::string::npos) { | |||
return ge::SUCCESS; | |||
} | |||
std::string training_trace; | |||
if (prof_options.contains(kTrainingTrace)) { | |||
training_trace = prof_options[kTrainingTrace]; | |||
} | |||
if (training_trace.empty()) { | |||
GELOGI("Training trace will not take effect."); | |||
return ge::SUCCESS; | |||
} | |||
GELOGI("GE profiling training trace:%s", training_trace.c_str()); | |||
if (training_trace != "on") { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.", training_trace.c_str()); | |||
REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str()); | |||
return ge::PARAM_INVALID; | |||
} | |||
string fp_point; | |||
string bp_point; | |||
if (prof_options.contains(kFpPoint)) { | |||
fp_point = prof_options[kFpPoint]; | |||
} | |||
if (prof_options.contains(kBpPoint)) { | |||
bp_point = prof_options[kBpPoint]; | |||
} | |||
if (!fp_point.empty() && !bp_point.empty()) { | |||
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point.c_str(), fp_point.c_str()); | |||
} | |||
ProfilingProperties::Instance().SetTrainingTrace(true); | |||
ProfilingProperties::Instance().SetFpBpPoint(fp_point,bp_point); | |||
} catch (...) { | |||
GELOGE(FAILED, "[Check][Param]Json prof_conf options is invalid"); | |||
REPORT_INNER_ERROR("E19999", "Json prof_conf options is invalid"); | |||
return ge::PARAM_INVALID; | |||
} | |||
return ge::SUCCESS; | |||
} | |||
void ProfilingInit::StopProfiling() { | |||
uint64_t module = GetProfilingModule(); | |||
// The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal | |||
const auto device_id = ProfilingManager::Instance().GetDeviceID(); | |||
int32_t device_num = static_cast<int32_t>(device_id.size()); | |||
if (device_num != 0) { | |||
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]); | |||
if (device_id_ptr == nullptr) { | |||
GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); | |||
REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); | |||
return; | |||
} | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id_ptr[i] = static_cast<uint32_t>(device_id[i]); | |||
} | |||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); | |||
} | |||
} | |||
// stop profiling | |||
int32_t cb_ret = MsprofFinalize(); | |||
if (cb_ret != 0) { | |||
GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); | |||
return; | |||
} | |||
GELOGI("Stop Profiling success."); | |||
} | |||
void ProfilingInit::ShutDownProfiling() { | |||
StopProfiling(); | |||
ProfilingManager::Instance().PluginUnInit(); | |||
ProfilingProperties::Instance().ClearProperties(); | |||
} | |||
uint64_t ProfilingInit::GetProfilingModule() { | |||
uint64_t module = PROF_MODEL_EXECUTE_MASK | | |||
PROF_RUNTIME_API_MASK | | |||
PROF_RUNTIME_TRACE_MASK | | |||
PROF_SCHEDULE_TIMELINE_MASK | | |||
PROF_SCHEDULE_TRACE_MASK | | |||
PROF_TASK_TIME_MASK | | |||
PROF_SUBTASK_TIME_MASK | | |||
PROF_AICPU_TRACE_MASK | | |||
PROF_AICORE_METRICS_MASK | | |||
PROF_AIVECTORCORE_METRICS_MASK | | |||
PROF_MODEL_LOAD_MASK; | |||
return module; | |||
} | |||
Status ProfilingInit::SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { | |||
auto rt_ret = rtSetDeviceIdByGeModelIdx(model_id, device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(ge::FAILED, "[Set][Device]Set Device id failed"); | |||
return ge::FAILED; | |||
} | |||
return ge::SUCCESS; | |||
} | |||
Status ProfilingInit::UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { | |||
auto rt_ret = rtUnsetDeviceIdByGeModelIdx(model_id, device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(ge::FAILED, "[Set][Device]Set Device id failed"); | |||
return ge::FAILED; | |||
} | |||
return ge::SUCCESS; | |||
} | |||
} // namespace ge |
@@ -0,0 +1,54 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_COMMON_PROFILING_PROFILING_INIT_H_ | |||
#define GE_COMMON_PROFILING_PROFILING_INIT_H_ | |||
#include <vector> | |||
#include <nlohmann/json.hpp> | |||
#include <string> | |||
#include "common/profiling/profiling_properties.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "framework/common/ge_types.h" | |||
#include "toolchain/prof_callback.h" | |||
using std::map; | |||
using std::string; | |||
using std::vector; | |||
using Json = nlohmann::json; | |||
namespace ge { | |||
class ProfilingInit { | |||
public: | |||
static ProfilingInit &Instance(); | |||
Status Init(const Options &options); | |||
void StopProfiling(); | |||
Status ProfRegisterCtrlCallback(); | |||
void ShutDownProfiling(); | |||
Status SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); | |||
Status UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); | |||
private: | |||
ProfilingInit() = default; | |||
~ProfilingInit() = default; | |||
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, bool &is_execute_profiling); | |||
Status ParseOptions(const std::string &options); | |||
uint64_t GetProfilingModule(); | |||
}; | |||
} // namespace ge | |||
#endif // GE_COMMON_PROFILING_PROFILING_INIT_H_ |
@@ -25,11 +25,14 @@ | |||
#include "runtime/base.h" | |||
#include "graph/load/model_manager/davinci_model.h" | |||
#include "mmpa/mmpa_api.h" | |||
#include "graph/load/graph_loader.h" | |||
namespace { | |||
const char *const kTrainingTrace = "training_trace"; | |||
const char *const kFpPoint = "fp_point"; | |||
const char *const kBpPoint = "bp_point"; | |||
const uint64_t kProfModelExecuteMask = 0x0010; | |||
const uint64_t kProfModelLoadMask = 0x8000000000000000; | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
const int32_t kMaxDeviceNum = 256; | |||
@@ -65,14 +68,15 @@ const std::string kIdx = "idx"; | |||
} // namespace | |||
namespace ge { | |||
ProfilingManager::ProfilingManager() | |||
: is_load_profiling_(false), | |||
is_execute_profiling_(false), | |||
is_training_trace_(false), | |||
subscribe_count_(0), | |||
prof_cb_({nullptr, nullptr}), | |||
index_id_(UINT64_MAX), | |||
subscribe_info_({false, 0, 0}) { | |||
subscribe_info_({false, 0, 0}), | |||
reporter_callback_(nullptr) { | |||
} | |||
ProfilingManager::~ProfilingManager() {} | |||
@@ -82,45 +86,6 @@ ProfilingManager &ProfilingManager::Instance() { | |||
return profiling_manager; | |||
} | |||
ge::Status ProfilingManager::Init(const Options &options) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
vector<int32_t>().swap(device_id_); | |||
subscribe_count_ = 0; | |||
GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); | |||
struct MsprofGeOptions prof_conf = {{ 0 }}; | |||
Status ret = InitFromOptions(options, prof_conf); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); | |||
REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); | |||
return ret; | |||
} | |||
if (is_execute_profiling_) { | |||
if (prof_cb_.msprofCtrlCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr"); | |||
return ge::PARAM_INVALID; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofCtrlCallback( | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions)); | |||
if (cb_ret != 0) { | |||
GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); | |||
REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
cb_ret); | |||
return FAILED; | |||
} | |||
GELOGI("Profiling init success"); | |||
} else { | |||
GELOGI("The profiling is off, skip the initialization"); | |||
} | |||
#endif | |||
return SUCCESS; | |||
} | |||
ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
// enable profiling by env | |||
@@ -221,44 +186,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { | |||
return ge::SUCCESS; | |||
} | |||
void ProfilingManager::StopProfiling() { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
uint64_t module = GetProfilingModule(); | |||
// The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal | |||
int32_t device_num = static_cast<int32_t>(device_id_.size()); | |||
if (device_num != 0) { | |||
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]); | |||
if (device_id_ptr == nullptr) { | |||
GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); | |||
REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); | |||
return; | |||
} | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]); | |||
} | |||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); | |||
} | |||
} | |||
// stop profiling | |||
if (prof_cb_.msprofCtrlCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr"); | |||
return; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), | |||
nullptr, 0); | |||
if (cb_ret != 0) { | |||
GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); | |||
return; | |||
} | |||
GELOGI("Stop Profiling success."); | |||
#endif | |||
} | |||
void ProfilingManager::ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
for (size_t i = 0; i < task.input_format.size(); i++) { | |||
@@ -440,21 +367,6 @@ void ProfilingManager::ReportProfilingData(uint32_t model_id, const std::vector< | |||
#endif | |||
} | |||
uint64_t ProfilingManager::GetProfilingModule() { | |||
uint64_t module = PROF_MODEL_EXECUTE_MASK | | |||
PROF_RUNTIME_API_MASK | | |||
PROF_RUNTIME_TRACE_MASK | | |||
PROF_SCHEDULE_TIMELINE_MASK | | |||
PROF_SCHEDULE_TRACE_MASK | | |||
PROF_TASK_TIME_MASK | | |||
PROF_SUBTASK_TIME_MASK | | |||
PROF_AICPU_TRACE_MASK | | |||
PROF_AICORE_METRICS_MASK | | |||
PROF_AIVECTORCORE_METRICS_MASK | | |||
PROF_MODEL_LOAD_MASK; | |||
return module; | |||
} | |||
void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
if (prof_type == kProfModelSubscribe) { | |||
@@ -485,8 +397,8 @@ void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uin | |||
Status ProfilingManager::ProfModelSubscribe(uint64_t module, void *model) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||
if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { | |||
uint64_t model_load_mask = module & kProfModelLoadMask; | |||
if ((subscribe_count_ == 0) && (model_load_mask == kProfModelLoadMask)) { | |||
// register framework to profiling | |||
// register Framework to profiling | |||
int32_t cb_ret = PluginInit(); | |||
@@ -566,9 +478,9 @@ Status ProfilingManager::ProfModelUnsubscribe(void *model) { | |||
Status ProfilingManager::ProfInit(uint64_t module) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||
uint64_t model_load_mask = module & kProfModelLoadMask; | |||
if (model_load_mask == PROF_MODEL_LOAD_MASK) { | |||
if (model_load_mask == kProfModelLoadMask) { | |||
// register Framework to profiling | |||
int32_t cb_ret = PluginInit(); | |||
if (cb_ret != 0) { | |||
@@ -611,7 +523,7 @@ Status ProfilingManager::ProfFinalize() { | |||
CleanSubscribeInfo(); | |||
int32_t dev_num = -1; | |||
rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); | |||
rtError_t rt_ret = rtProfilerStop(kProfModelLoadMask, dev_num, nullptr); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "[Stop][Profiler]Malloc buffer failed, ret 0x%X", rt_ret); | |||
REPORT_CALL_ERROR("E19999", "Malloc buffer failed when stop profiling, ret 0x%X", rt_ret); | |||
@@ -780,7 +692,7 @@ Status ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std: | |||
"device num %d, ret 0x%X", module, device_num, rt_ret); | |||
return FAILED; | |||
} | |||
if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { | |||
if ((module & kProfModelExecuteMask) == kProfModelExecuteMask) { | |||
for (int32_t i = 0; i < device_num; i++) { | |||
if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { | |||
device_id_.push_back(device_list[i]); | |||
@@ -788,7 +700,7 @@ Status ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std: | |||
} | |||
GELOGI("Prof start: ge execute model start profiling."); | |||
} | |||
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||
if ((module & kProfModelLoadMask) == kProfModelLoadMask) { | |||
GELOGW("Prof start: load model module is invalid."); | |||
} | |||
UpdateDeviceIdModuleMap(kProfStart, module, device_list); | |||
@@ -829,8 +741,8 @@ Status ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std:: | |||
"device num %d, ret 0x%X", module, device_num, rt_ret); | |||
return FAILED; | |||
} | |||
uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; | |||
if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { | |||
uint64_t execute_model_mask = module & kProfModelExecuteMask; | |||
if (execute_model_mask == kProfModelExecuteMask) { | |||
for (int32_t i = 0; i < device_num; i++) { | |||
auto iter = std::find(device_id_.begin(), device_id_.end(), device_list[i]); | |||
if (iter != device_id_.end()) { | |||
@@ -839,7 +751,7 @@ Status ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std:: | |||
} | |||
GELOGI("Prof stop: ge execute model stop profiling."); | |||
} | |||
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||
if ((module & kProfModelLoadMask) == kProfModelLoadMask) { | |||
GELOGW("Prof stop: load model module is invalid."); | |||
} | |||
UpdateDeviceIdModuleMap(kProfStop, module, device_list); | |||
@@ -896,13 +808,13 @@ bool ProfilingManager::ProfilingModelExecuteOn() const { | |||
return execute_model_prof_on; | |||
} | |||
Status ProfilingManager::PluginInit() { | |||
if (prof_cb_.msprofReporterCallback == nullptr) { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() { | |||
if (reporter_callback_ == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
return ge::PARAM_INVALID; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofReporterCallback( | |||
int32_t cb_ret = reporter_callback_( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), | |||
nullptr, 0); | |||
@@ -912,7 +824,7 @@ Status ProfilingManager::PluginInit() { | |||
return INTERNAL_ERROR; | |||
} | |||
cb_ret = prof_cb_.msprofReporterCallback( | |||
cb_ret = reporter_callback_( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_DATA_MAX_LEN), | |||
&reporter_max_len_, sizeof(uint32_t)); | |||
@@ -927,12 +839,12 @@ Status ProfilingManager::PluginInit() { | |||
void ProfilingManager::PluginUnInit() const { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
if (prof_cb_.msprofReporterCallback == nullptr) { | |||
if (reporter_callback_ == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
return; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofReporterCallback( | |||
int32_t cb_ret = reporter_callback_( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), | |||
nullptr, 0); | |||
@@ -942,13 +854,14 @@ void ProfilingManager::PluginUnInit() const { | |||
#endif | |||
} | |||
Status ProfilingManager::CallMsprofReport(ReporterData &reporter_data) const { | |||
if (prof_cb_.msprofReporterCallback == nullptr) { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( | |||
ReporterData &reporter_data) const { | |||
if (reporter_callback_ == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
return ge::PARAM_INVALID; | |||
} | |||
return prof_cb_.msprofReporterCallback( | |||
return reporter_callback_( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), | |||
static_cast<void *>(&reporter_data), sizeof(ReporterData)); | |||
@@ -26,8 +26,8 @@ | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "framework/common/ge_types.h" | |||
#include "external/register/register_types.h" | |||
#include "toolchain/prof_callback.h" | |||
#include "runtime/stream.h" | |||
#include "toolchain/prof_callback.h" | |||
using std::map; | |||
using std::string; | |||
@@ -52,7 +52,6 @@ namespace { | |||
const uint64_t PROF_HCCL_TRACE_MASK = 0x1000; | |||
const uint64_t PROF_DATA_PROCESS_MASK = 0x2000; | |||
const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000; | |||
} // namespace | |||
namespace ge { | |||
class OpDesc; | |||
@@ -68,24 +67,17 @@ struct ProfSubscribeInfo { | |||
uint32_t graph_id; | |||
}; | |||
struct MsprofCallback { | |||
MsprofCtrlCallback msprofCtrlCallback; | |||
MsprofReporterCallback msprofReporterCallback; | |||
}; | |||
class ProfilingManager { | |||
public: | |||
ProfilingManager(); | |||
virtual ~ProfilingManager(); | |||
static ProfilingManager &Instance(); | |||
Status Init(const Options &options); | |||
Status ProfInit(uint64_t module); | |||
Status ProfFinalize(); | |||
Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
Status ProfModelSubscribe(uint64_t module, void *model); | |||
Status ProfModelUnsubscribe(void *model); | |||
void StopProfiling(); | |||
bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||
// report model load profiling data flag, data contain task desc info, step info, model load fusion op info | |||
bool ProfilingModelLoadOn() const { return is_load_profiling_; } | |||
@@ -100,9 +92,8 @@ class ProfilingManager { | |||
Status PluginInit(); | |||
void PluginUnInit() const; | |||
Status CallMsprofReport(ReporterData &reporter_data) const; | |||
struct MsprofCallback &GetMsprofCallback() { return prof_cb_; } | |||
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | |||
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | |||
const MsprofReporterCallback GetMsprofReporterCallback() const { return reporter_callback_; } | |||
void SetMsprofReporterCallback(MsprofReporterCallback func) { reporter_callback_ = func; } | |||
void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||
void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||
@@ -116,6 +107,7 @@ class ProfilingManager { | |||
void CleanSubscribeInfo(); | |||
void SetGraphIdToModelMap(uint32_t graph_id, uint32_t model_id) { model_id_map_[graph_id] = model_id; } | |||
Status GetModelIdFromGraph(uint32_t graph_id, uint32_t &model_id); | |||
const vector<int32_t> &GetDeviceID() const { return device_id_; } | |||
private: | |||
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | |||
@@ -139,7 +131,6 @@ class ProfilingManager { | |||
uint32_t subscribe_count_; | |||
std::mutex mutex_; | |||
std::mutex mutex_report_; | |||
MsprofCallback prof_cb_; | |||
std::string fp_point_; | |||
std::string bp_point_; | |||
uint32_t reporter_max_len_ = 0; | |||
@@ -147,6 +138,7 @@ class ProfilingManager { | |||
std::map<uint32_t, uint32_t> device_id_map_; // key: graph_id, value: device_id | |||
std::map<uint32_t, uint32_t> model_id_map_; // key: graph_id, value: model_id | |||
ProfSubscribeInfo subscribe_info_; | |||
MsprofReporterCallback reporter_callback_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ |
@@ -0,0 +1,124 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "profiling_properties.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/debug/log.h" | |||
#include "graph/ge_context.h" | |||
namespace { | |||
const uint64_t kMsProfOptionsMaxlen = 2048; | |||
const char *const kFpPoint = "fp_point"; | |||
const char *const kBpPoint = "bp_point"; | |||
} // namespace ge | |||
namespace ge{ | |||
ProfilingProperties& ProfilingProperties::Instance() { | |||
static ProfilingProperties profiling_properties; | |||
return profiling_properties; | |||
} | |||
void ProfilingProperties::SetLoadProfiling(bool is_load_profiling) { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
is_load_profiling_ = is_load_profiling; | |||
} | |||
bool ProfilingProperties::IsLoadProfiling() { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
return is_load_profiling_; | |||
} | |||
void ProfilingProperties::SetExecuteProfiling(bool is_exec_profiling) { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
is_execute_profiling_ = is_exec_profiling; | |||
} | |||
bool ProfilingProperties::IsExecuteProfiling() { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
return is_execute_profiling_; | |||
} | |||
void ProfilingProperties::SetTrainingTrace(bool is_train_trace) { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
is_training_trace_ = is_train_trace; | |||
} | |||
void ProfilingProperties::GetFpBpPoint(std::string &fp_point, std::string &bp_point) { | |||
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
if (!fp_point_.empty() && !bp_point_.empty()) { | |||
fp_point = fp_point_; | |||
bp_point = bp_point_; | |||
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), | |||
fp_point.c_str()); | |||
return; | |||
} | |||
// ProfApi mode and training trace is set | |||
// Parse options first | |||
char env_profiling_options[kMsProfOptionsMaxlen] = {0x00}; | |||
bool is_profiling_valid = false; | |||
std::string profiling_options; | |||
if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS && | |||
!profiling_options.empty()) { | |||
is_profiling_valid = true; | |||
} else { | |||
INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, kMsProfOptionsMaxlen); | |||
if (ret != EN_OK) { | |||
GELOGI("PROFILING_OPTIONS env is not exist."); | |||
return; | |||
} | |||
GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); | |||
profiling_options = env_profiling_options; | |||
is_profiling_valid = true; | |||
} | |||
if (is_profiling_valid) { | |||
try { | |||
Json prof_options = Json::parse(profiling_options); | |||
if (prof_options.contains(kFpPoint)) { | |||
fp_point_ = prof_options[kFpPoint]; | |||
} | |||
if (prof_options.contains(kBpPoint)) { | |||
bp_point_ = prof_options[kBpPoint]; | |||
} | |||
fp_point = fp_point_; | |||
bp_point = bp_point_; | |||
if (!fp_point_.empty() && !bp_point_.empty()) { | |||
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); | |||
} | |||
} catch (...) { | |||
GELOGW("Json prof options is invalid."); | |||
return; | |||
} | |||
} | |||
return; | |||
} | |||
void ProfilingProperties::SetFpBpPoint(const std::string &fp_point, const std::string &bp_point) { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
fp_point_ = fp_point; | |||
bp_point_ = bp_point; | |||
} | |||
void ProfilingProperties::ClearProperties() { | |||
std::lock_guard<std::mutex>lock(mutex_); | |||
is_load_profiling_ = false; | |||
is_execute_profiling_ = false; | |||
is_training_trace_ = false; | |||
fp_point_.clear(); | |||
bp_point_.clear(); | |||
} | |||
} // namespace ge |
@@ -0,0 +1,58 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_COMMON_PROFILING_PROPERTIES_H_ | |||
#define GE_COMMON_PROFILING_PROPERTIES_H_ | |||
#include <nlohmann/json.hpp> | |||
#include <mutex> | |||
#include <string> | |||
#include <vector> | |||
#include "framework/common/ge_types.h" | |||
using Json = nlohmann::json; | |||
namespace ge { | |||
class ProfilingProperties { | |||
public: | |||
static ProfilingProperties &Instance(); | |||
void SetLoadProfiling(bool is_load_profiling); | |||
bool IsLoadProfiling(); | |||
void SetExecuteProfiling(bool is_execute_profiling); | |||
bool IsExecuteProfiling(); | |||
void SetTrainingTrace(bool is_train_trance); | |||
bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||
bool IsTrainTrace(); | |||
void SetFpBpPoint(const std::string &fp_point, const std::string &bp_point); | |||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||
void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
void ClearProperties(); | |||
private: | |||
ProfilingProperties() =default; | |||
~ProfilingProperties() = default; | |||
std::mutex mutex_; | |||
std::mutex point_mutex_; | |||
bool is_load_profiling_ = false; | |||
bool is_execute_profiling_ = false; | |||
bool is_training_trace_ = false; | |||
std::string fp_point_; | |||
std::string bp_point_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_COMMON_PROFILING_PROPERTIES_H_ |
@@ -1,6 +1,7 @@ | |||
set(SRC_LIST | |||
"ge_executor.cc" | |||
"../common/profiling/profiling_manager.cc" | |||
"../common/profiling/command_handle.cc" | |||
"../common/dump/dump_op.cc" | |||
"../common/dump/opdebug_register.cc" | |||
"../common/dump/exception_dumper.cc" | |||
@@ -33,6 +33,8 @@ | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
#include "graph/opsproto_manager.h" | |||
#include "ge_local_engine/engine/host_cpu_engine.h" | |||
#include "runtime/base.h" | |||
#include "common/profiling/command_handle.h" | |||
using std::string; | |||
using std::vector; | |||
@@ -250,7 +252,6 @@ Status GeExecutor::Initialize() { | |||
GELOGW("Already initialized, no need to be initialized again."); | |||
return ge::SUCCESS; | |||
} | |||
OpTilingManager::GetInstance().LoadSo(); | |||
Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | |||
@@ -277,7 +278,6 @@ Status GeExecutor::Initialize() { | |||
profiling_options.device_id = 0; | |||
// job id need to be set, the value is meaningless; | |||
profiling_options.job_id = "1"; | |||
ProfilingManager::Instance().Init(profiling_options); | |||
isInit_ = true; | |||
GELOGI("Init GeExecutor over."); | |||
@@ -295,7 +295,6 @@ Status GeExecutor::Finalize() { | |||
// Stop profiling | |||
if (ProfilingManager::Instance().ProfilingOn()) { | |||
ProfilingManager::Instance().StopProfiling(); | |||
ProfilingManager::Instance().PluginUnInit(); | |||
} | |||
@@ -29,7 +29,6 @@ COMMON_LOCAL_SRC_FILES := \ | |||
common/dump/dump_manager.cc \ | |||
common/dump/dump_op.cc \ | |||
common/dump/dump_server.cc \ | |||
common/helper/model_cache_helper.cc \ | |||
ge_local_engine/engine/host_cpu_engine.cc \ | |||
@@ -24,7 +24,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
common/fp16_t.cc \ | |||
common/ge/plugin_manager.cc\ | |||
common/ge/op_tiling_manager.cc\ | |||
common/helper/model_cache_helper.cc \ | |||
common/profiling/profiling_manager.cc \ | |||
common/dump/dump_manager.cc \ | |||
common/dump/dump_properties.cc \ | |||
@@ -17,7 +17,7 @@ | |||
#include "graph/build/task_generator.h" | |||
#include <string> | |||
#include <utility> | |||
#include "common/profiling/profiling_manager.h" | |||
#include "common/profiling/profiling_properties.h" | |||
#include "framework/common/types.h" | |||
#include "framework/common/util.h" | |||
#include "framework/common/debug/ge_log.h" | |||
@@ -945,7 +945,7 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint | |||
vector<uint32_t> &all_reduce_nodes, std::string &fp_point_str, | |||
std::string &bp_point_str) const { | |||
ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str); | |||
ProfilingProperties::Instance().GetFpBpPoint(fp_point_str, bp_point_str); | |||
Status ret = SUCCESS; | |||
if (fp_point_str.empty()) { | |||
@@ -976,8 +976,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
vector<uint32_t> &all_reduce_nodes) const { | |||
GE_CHECK_NOTNULL(graph); | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || | |||
ProfilingProperties::Instance().ProfilingTrainingTraceOn(); | |||
if (!is_profiling) { | |||
GELOGD("Profiling is not open."); | |||
return SUCCESS; | |||
@@ -1071,8 +1071,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||
vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
vector<domi::TaskDef> &task_def_list) { | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || | |||
ProfilingProperties::Instance().ProfilingTrainingTraceOn(); | |||
bool is_insert_fp_profiling_task = false; | |||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); | |||
bool is_insert_bp_profiling_task = false; | |||
@@ -1167,8 +1167,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||
vector<domi::TaskDef> &task_def_list) { | |||
GE_CHECK_NOTNULL(op_desc); | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || | |||
ProfilingProperties::Instance().ProfilingTrainingTraceOn(); | |||
bool is_insert_bp_profiling_task = false; | |||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | |||
bool is_insert_end_profiling_task = false; | |||
@@ -27,6 +27,7 @@ | |||
#include "graph/load/model_manager/davinci_model.h" | |||
#include "common/model/ge_root_model.h" | |||
#include "common/formats/utils/formats_trans_utils.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
namespace ge { | |||
thread_local uint32_t device_count = 0; | |||
@@ -330,6 +331,17 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
GenModelId(&model_id); | |||
GELOGD("Generate new model_id:%u", model_id); | |||
} | |||
if (!domi::GetContext().train_flag) { | |||
int32_t tmp_device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&tmp_device_id); | |||
if (rt_ret != RT_ERROR_NONE || tmp_device_id < 0) { | |||
GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret); | |||
REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret); | |||
return ge::FAILED; | |||
} | |||
uint32_t device_id = static_cast<uint32_t>(tmp_device_id); | |||
rtSetDeviceIdByGeModelIdx(model_id, device_id); | |||
} | |||
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
string om_name; | |||
if (IsNeedHybridLoad(*ge_root_model)) { | |||
@@ -448,6 +460,10 @@ Status ModelManager::Unload(uint32_t model_id) { | |||
} else { | |||
GELOGI("Unload model %u success.no need reset device,device_count: %u", model_id, device_count); | |||
} | |||
uint32_t device_id = 0; | |||
if (!domi::GetContext().train_flag) { | |||
rtUnsetDeviceIdByGeModelIdx(model_id, device_id); | |||
} | |||
std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | |||
exception_infos_.clear(); | |||
return SUCCESS; | |||
@@ -1146,7 +1162,17 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
GELOGE(ret, "[Load][RootModel] failed, ret:%d, model_id:%u.", ret, model_id); | |||
return ret; | |||
} | |||
if (!domi::GetContext().train_flag) { | |||
int32_t tmp_device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&tmp_device_id); | |||
if (rt_ret != RT_ERROR_NONE || tmp_device_id < 0) { | |||
GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret); | |||
REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret); | |||
return ge::FAILED; | |||
} | |||
uint32_t device_id = static_cast<uint32_t>(tmp_device_id); | |||
rtSetDeviceIdByGeModelIdx(model_id, device_id); | |||
} | |||
if (model_helper.GetModelType()) { | |||
bool is_shape_unknown = false; | |||
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), | |||
@@ -109,7 +109,6 @@ | |||
#include "register/custom_pass_helper.h" | |||
#include "external/graph/types.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "common/profiling/profiling_manager.h" | |||
namespace { | |||
const char *const kSummary = "Summary"; | |||
@@ -462,9 +461,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
const std::map<std::string, std::string> &options, | |||
const OmgContext &omg_context) { | |||
IncreaseGraphCount(graph_id); | |||
auto device_id = GetContext().DeviceId(); | |||
GELOGD("Device id is %u", device_id); | |||
ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id); | |||
// validation for adding graphs of same graph_id in multi-thread secenario | |||
// 1.previous thread owns same graph_id has finished the AddGraph procession | |||
if (GetAddGraphCondition(graph_id) == kDoneAdded) { | |||
@@ -43,6 +43,7 @@ | |||
#include "runtime/kernel.h" | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
#include "external/runtime/rt_error_codes.h" | |||
#include "common/profiling/profiling_init.h" | |||
using Json = nlohmann::json; | |||
@@ -194,7 +195,6 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
InitOptions(options); | |||
// In train and infer, profiling is always needed. | |||
InitProfiling(this->options_); | |||
// 1.`is_train_mode_` means case: train | |||
// 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | |||
// these two case with logical device id | |||
@@ -206,16 +206,6 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
return status; | |||
} | |||
void GELib::InitProfiling(Options &options) { | |||
GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); | |||
std::lock_guard<std::mutex> lock(status_mutex_); | |||
GetContext().Init(); | |||
// Profiling init | |||
if (ProfilingManager::Instance().Init(options) != SUCCESS) { | |||
GELOGW("Profiling init failed."); | |||
} | |||
} | |||
void GELib::SetDefaultPrecisionMode(map<string, string> &new_options) { | |||
auto iter = new_options.find(PRECISION_MODE); | |||
if (iter != new_options.end()) { | |||
@@ -463,9 +453,6 @@ Status GELib::Finalize() { | |||
GELOGI("Analyzer finalization"); | |||
Analyzer::GetInstance()->Finalize(); | |||
// Shut down profiling | |||
ShutDownProfiling(); | |||
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||
GELOGI("System ShutDown."); | |||
mid_state = SystemShutdownWithOptions(this->options_); | |||
@@ -494,15 +481,6 @@ Status GELib::Finalize() { | |||
return SUCCESS; | |||
} | |||
void GELib::ShutDownProfiling() { | |||
std::lock_guard<std::mutex> lock(status_mutex_); | |||
if (ProfilingManager::Instance().ProfilingOn()) { | |||
ProfilingManager::Instance().StopProfiling(); | |||
ProfilingManager::Instance().PluginUnInit(); | |||
} | |||
} | |||
// Get Singleton Instance | |||
std::shared_ptr<GELib> GELib::GetInstance() { return instancePtr_; } | |||
@@ -65,7 +65,6 @@ class GE_FUNC_VISIBILITY GELib { | |||
bool IsTrainMode() { return is_train_mode_; } | |||
void InitProfiling(Options &options); | |||
void ShutDownProfiling(); | |||
Status InitSystemWithoutOptions(); | |||
Status InitSystemWithOptions(Options &options); | |||
@@ -36,6 +36,7 @@ | |||
#include "runtime/mem.h" | |||
#include "ir_build/option_utils.h" | |||
#include "common/profiling/profiling_manager.h" | |||
#include "common/profiling/profiling_init.h" | |||
namespace ge { | |||
namespace { | |||
@@ -288,6 +289,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu | |||
GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); | |||
if (mutex_.try_lock()) { | |||
std::lock_guard<std::mutex> lock(mutex_, std::adopt_lock); | |||
auto device_id = GetContext().DeviceId(); | |||
GELOGD("device is is %u", device_id); | |||
ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id); | |||
if (!init_flag_) { | |||
GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.", | |||
session_id_, graph_id); | |||
@@ -339,6 +343,9 @@ Status InnerSession::RunGraphWithStreamAsync(uint32_t graph_id, rtStream_t strea | |||
"session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream); | |||
return GE_SESS_INIT_FAILED; | |||
} | |||
auto device_id = GetContext().DeviceId(); | |||
GELOGD("device id is %u", device_id); | |||
ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id); | |||
UpdateThreadContext(graph_id); | |||
vector<GeTensor> ge_inputs; | |||
for (auto &item : inputs) { | |||
@@ -382,6 +389,9 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) { | |||
session_id_, graph_id); | |||
return GE_SESS_INIT_FAILED; | |||
} | |||
auto device_id = GetContext().DeviceId(); | |||
GELOGD("remove device id %u", device_id); | |||
ProfilingInit::Instance().UnsetDeviceIdByModelId(graph_id, device_id); | |||
UpdateThreadContext(graph_id); | |||
Status ret = graph_manager_.RemoveGraph(graph_id); | |||
if (ret != SUCCESS) { | |||
@@ -18,32 +18,8 @@ | |||
#define INC_FRAMEWORK_COMMON_GE_PROFILING_H_ | |||
#include "ge/ge_api_error_codes.h" | |||
#include "toolchain/prof_callback.h" | |||
#include "runtime/base.h" | |||
const int MAX_DEV_NUM = 64; | |||
enum ProfCommandHandleType { | |||
kProfCommandhandleInit = 0, | |||
kProfCommandhandleStart, | |||
kProfCommandhandleStop, | |||
kProfCommandhandleFinalize, | |||
kProfCommandhandleModelSubscribe, | |||
kProfCommandhandleModelUnsubscribe | |||
}; | |||
struct ProfCommandHandleData { | |||
uint64_t profSwitch; | |||
uint32_t devNums; // length of device id list | |||
uint32_t devIdList[MAX_DEV_NUM]; | |||
uint32_t modelId; | |||
}; | |||
GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); | |||
GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); | |||
GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func); | |||
GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); | |||
/// | |||
/// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading | |||
/// @return Status result | |||
@@ -52,4 +28,6 @@ GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id | |||
GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id); | |||
GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id); | |||
#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ |
@@ -1,24 +0,0 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ | |||
#define INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ | |||
#include "framework/common/profiling/ge_profiling.h" | |||
GE_FUNC_VISIBILITY bool IsInitialize(); | |||
#endif // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ |
@@ -40,3 +40,11 @@ rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback) { | |||
rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback) { | |||
return 0; | |||
} | |||
int32_t MsprofInit(uint32_t dataType, void *data, uint32_t dataLen) { | |||
return 0; | |||
} | |||
int32_t MsprofFinalize() { | |||
return 0; | |||
} |
@@ -552,6 +552,18 @@ rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtSetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtProfRegisterCtrlCallback(uint32_t logId, rtProfCtrlHandle callback) { | |||
return RT_ERROR_NONE; | |||
} | |||
#ifdef __cplusplus | |||
} | |||
#endif |
@@ -113,6 +113,9 @@ set(COMMON_SRC_FILES | |||
"${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" | |||
"${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/profiling_init.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/command_handle.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/ge_profiling.cc" | |||
"${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" | |||
"${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" | |||
@@ -717,6 +720,8 @@ set(SINGLE_OP_TEST_FILES | |||
set(PROFILING_MNG_TEST_FILES | |||
"profiling/ge_profiling_manager_unittest.cc" | |||
"profiling/profiling_properties_unittest" | |||
"profiling/profiling_init_unittest" | |||
) | |||
set(HYBRID_TEST_FILES | |||
@@ -896,7 +896,7 @@ TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) { | |||
} | |||
TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
ProfilingManager::Instance().reporter_callback_ = MsprofReport; | |||
ProfileInfo profile; | |||
profile.fusion_info.op_name = "relu"; | |||
@@ -909,7 +909,7 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
} | |||
TEST_F(UtestDavinciModel, Sink_time_profile) { | |||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
ProfilingManager::Instance().reporter_callback_ = MsprofReport; | |||
DavinciModel model(0, nullptr); | |||
InputData current_data; | |||
model.SinkTimeProfile(current_data); | |||
@@ -1031,7 +1031,7 @@ TEST_F(UtestDavinciModel, NnExecute) { | |||
input_data.blobs = output_data.blobs; | |||
EXPECT_EQ(input_data.blobs.size(), 1); | |||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
ProfilingManager::Instance().reporter_callback_ = MsprofReport; | |||
ProfilingManager::Instance().device_id_.emplace_back(0); | |||
model.task_list_.resize(1); | |||
EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); | |||
@@ -26,6 +26,7 @@ | |||
#define protected public | |||
#define private public | |||
#include "common/profiling/profiling_manager.h" | |||
#include "common/profiling/command_handle.h" | |||
#include "graph/ge_local_context.h" | |||
#include "inc/framework/common/profiling/ge_profiling.h" | |||
#include "graph/manager/graph_manager.h" | |||
@@ -37,6 +38,17 @@ | |||
using namespace ge; | |||
using namespace std; | |||
namespace { | |||
enum ProfCommandHandleType { | |||
kProfCommandhandleInit = 0, | |||
kProfCommandhandleStart, | |||
kProfCommandhandleStop, | |||
kProfCommandhandleFinalize, | |||
kProfCommandhandleModelSubscribe, | |||
kProfCommandhandleModelUnsubscribe | |||
}; | |||
} | |||
class UtestGeProfilinganager : public testing::Test { | |||
protected: | |||
void SetUp() override {} | |||
@@ -97,11 +109,11 @@ TEST_F(UtestGeProfilinganager, ParseOptions) { | |||
} | |||
TEST_F(UtestGeProfilinganager, plungin_init_) { | |||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = ReporterCallback; | |||
ProfilingManager::Instance().reporter_callback_ = ReporterCallback; | |||
Status ret = ProfilingManager::Instance().PluginInit(); | |||
EXPECT_EQ(ret, INTERNAL_ERROR); | |||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr; | |||
ProfilingManager::Instance().reporter_callback_ = nullptr; | |||
} | |||
TEST_F(UtestGeProfilinganager, report_data_) { | |||
@@ -169,31 +181,33 @@ TEST_F(UtestGeProfilinganager, get_device_from_graph) { | |||
OmgContext context; | |||
Status ret = graph_manager.AddGraph(graph_id, graph, options, context); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
ProfSetGraphIdToDeviceMap(graph_id, device_id); | |||
ret = ProfGetDeviceFormGraphId(graph_id, device_id); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
TEST_F(UtestGeProfilinganager, handle_subscribe_info) { | |||
ProfCommandHandleType prof_type = kProfCommandhandleModelSubscribe; | |||
ProfCommandHandleData prof_data; | |||
uint32_t prof_type = RT_PROF_CTRL_SWITCH; | |||
rtProfCommandHandle prof_data; | |||
prof_data.profSwitch = 0; | |||
prof_data.modelId = 1; | |||
prof_data.type = 0; | |||
domi::GetContext().train_flag = true; | |||
auto prof_ptr = std::make_shared<ProfCommandHandleData>(prof_data); | |||
Status ret = ProfCommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
auto prof_ptr = std::make_shared<rtProfCommandHandle>(prof_data); | |||
Status ret = CommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
TEST_F(UtestGeProfilinganager, handle_unsubscribe_info) { | |||
ProfCommandHandleType prof_type = kProfCommandhandleModelUnsubscribe; | |||
ProfCommandHandleData prof_data; | |||
uint32_t prof_type = kProfCommandhandleModelUnsubscribe; | |||
rtProfCommandHandle prof_data; | |||
prof_data.profSwitch = 0; | |||
prof_data.modelId = 1; | |||
domi::GetContext().train_flag = true; | |||
auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
profiling_manager.SetSubscribeInfo(0, 1, true); | |||
auto prof_ptr = std::make_shared<ProfCommandHandleData>(prof_data); | |||
Status ret = ProfCommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
auto prof_ptr = std::make_shared<rtProfCommandHandle>(prof_data); | |||
Status ret = CommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
profiling_manager.CleanSubscribeInfo(); | |||
} | |||
@@ -0,0 +1,76 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <bits/stdc++.h> | |||
#include <dirent.h> | |||
#include <gtest/gtest.h> | |||
#include <fstream> | |||
#include <map> | |||
#include <string> | |||
#define protected public | |||
#define private public | |||
#include "common/profiling/profiling_init.h" | |||
#include "graph/ge_local_context.h" | |||
#include "graph/manager/graph_manager.h" | |||
#undef protected | |||
#undef private | |||
using namespace ge; | |||
using namespace std; | |||
class UtestGeProfilingInit : public testing::Test { | |||
protected: | |||
void SetUp() override {} | |||
void TearDown() override {} | |||
}; | |||
TEST_F(UtestGeProfilingInit, test_init) { | |||
setenv("PROFILING_MODE", "true", true); | |||
Options options; | |||
options.device_id = 0; | |||
options.job_id = "0"; | |||
options.profiling_mode = "1"; | |||
options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"; | |||
auto &profiling_init = ge::ProfilingInit::Instance(); | |||
auto ret = profiling_init.Init(options); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
TEST_F(UtestGeProfilingInit, test_stop) { | |||
auto &profiling_init = ge::ProfilingInit::Instance(); | |||
profiling_init.StopProfiling(); | |||
} | |||
TEST_F(UtestGeProfilingInit, test_shut) { | |||
auto &profiling_init = ge::ProfilingInit::Instance(); | |||
profiling_init.ShutDownProfiling(); | |||
} | |||
TEST_F(UtestGeProfilingInit, test_set_deviceId) { | |||
uint32_t model_id = 0; | |||
uint32_t device_id = 0; | |||
auto &profiling_init = ge::ProfilingInit::Instance(); | |||
auto ret = profiling_init.SetDeviceIdByModelId(model_id, device_id); | |||
} | |||
TEST_F(UtestGeProfilingInit, test_unset_deviceId) { | |||
uint32_t model_id = 0; | |||
uint32_t device_id = 0; | |||
auto &profiling_init = ge::ProfilingInit::Instance(); | |||
auto ret = profiling_init.UnsetDeviceIdByModelId(model_id, device_id); | |||
} |
@@ -0,0 +1,72 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <bits/stdc++.h> | |||
#include <dirent.h> | |||
#include <gtest/gtest.h> | |||
#include <fstream> | |||
#include <map> | |||
#include <string> | |||
#define protected public | |||
#define private public | |||
#include "common/profiling/profiling_properties.h" | |||
#include "graph/ge_local_context.h" | |||
#include "graph/manager/graph_manager.h" | |||
#undef protected | |||
#undef private | |||
using namespace ge; | |||
using namespace std; | |||
class UtestGeProfilingProperties : public testing::Test { | |||
protected: | |||
void SetUp() override {} | |||
void TearDown() override {} | |||
}; | |||
TEST_F(UtestGeProfilingProperties, test_execute_profiling) { | |||
auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
profiling_properties.SetExecuteProfiling(true); | |||
auto is_execute = profiling_properties.IsExecuteProfiling(); | |||
EXPECT_EQ(is_execute, true); | |||
} | |||
TEST_F(UtestGeProfilingProperties, test_training_trace) { | |||
auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
profiling_properties.SetTrainingTrace(true); | |||
auto is_train_trance = profiling_properties.ProfilingTrainingTraceOn(); | |||
EXPECT_EQ(is_train_trance, true); | |||
} | |||
TEST_F(UtestGeProfilingProperties, test_fpbp_point) { | |||
auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
std::string fp_point = "fp"; | |||
std::string bp_point = "bp"; | |||
profiling_properties.SetFpBpPoint(fp_point, bp_point); | |||
profiling_properties.GetFpBpPoint(fp_point, bp_point); | |||
EXPECT_EQ(fp_point, "fp"); | |||
EXPECT_EQ(bp_point, "bp"); | |||
} | |||
TEST_F(UtestGeProfilingProperties, test_profiling_on) { | |||
auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
profiling_properties.SetExecuteProfiling(true); | |||
profiling_properties.SetLoadProfiling(true); | |||
auto profiling_on = profiling_properties.ProfilingOn(); | |||
EXPECT_EQ(profiling_on, true); | |||
} |
@@ -33,6 +33,7 @@ extern "C" { | |||
#endif | |||
#endif | |||
#define RT_PROF_MAX_DEV_NUM 64 | |||
typedef int32_t rtError_t; | |||
static const int32_t RT_ERROR_NONE = 0; // success | |||
@@ -80,6 +81,13 @@ typedef enum tagRtLimitType { | |||
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | |||
} rtLimitType_t; | |||
typedef enum { | |||
RT_PROF_CTRL_INVALID = 0, | |||
RT_PROF_CTRL_SWITCH, | |||
RT_PROF_CTRL_REPORTER, | |||
RT_PROF_CTRL_BUTT, | |||
} rtProfCtrlType_t; | |||
typedef struct rtExceptionInfo { | |||
uint32_t taskid; | |||
uint32_t streamid; | |||
@@ -88,6 +96,15 @@ typedef struct rtExceptionInfo { | |||
uint32_t retcode; | |||
} rtExceptionInfo; | |||
typedef struct rtProfCommandHandle { | |||
uint64_t profSwitch; | |||
uint64_t profSwitchHi; | |||
uint32_t devNums; | |||
uint32_t devIdList[RT_PROF_MAX_DEV_NUM]; | |||
uint32_t modelId; | |||
uint32_t type; | |||
} rtProfCommandHandle_t; | |||
typedef void (*rtErrorCallback)(rtExceptionType); | |||
typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | |||
@@ -118,6 +135,8 @@ typedef void *rtLabel_t; | |||
*/ | |||
typedef void *rtModel_t; | |||
typedef rtError_t (*rtProfCtrlHandle)(uint32_t type, void *data, uint32_t len); | |||
/** | |||
* @ingroup profiling_base | |||
* @brief runtime handle. | |||
@@ -357,6 +376,14 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_ | |||
*/ | |||
RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||
RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t logId, rtProfCtrlHandle callback); | |||
RTS_API rtError_t rtSetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); | |||
RTS_API rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); | |||
RTS_API rtError_t rtGetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -114,15 +114,6 @@ enum MsprofCtrlCallbackType { | |||
MSPROF_CTRL_PROF_SWITCH_OFF // for prof switch off | |||
}; | |||
#define MSPROF_MAX_DEV_NUM (64) | |||
struct MsprofCommandHandle { | |||
uint64_t profSwitch; | |||
uint32_t devNums; // length of device id list | |||
uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | |||
uint32_t modelId; | |||
}; | |||
/** | |||
* @name MsprofCtrlCallback | |||
* @brief callback to start/stop profiling | |||