Browse Source

Pre Merge pull request !2031 from zhupuxu/profiling

pull/2031/MERGE
zhupuxu Gitee 3 years ago
parent
commit
cda5d0d4d6
22 changed files with 880 additions and 423 deletions
  1. +6
    -3
      ge/CMakeLists.txt
  2. +88
    -0
      ge/client/ge_api.cc
  3. +1
    -0
      ge/common/CMakeLists.txt
  4. +0
    -230
      ge/common/profiling/ge_profiling.cc
  5. +0
    -26
      ge/common/profiling/ge_runner_profiling.cc
  6. +246
    -0
      ge/common/profiling/profiling_init.cc
  7. +54
    -0
      ge/common/profiling/profiling_init.h
  8. +236
    -89
      ge/common/profiling/profiling_manager.cc
  9. +21
    -15
      ge/common/profiling/profiling_manager.h
  10. +110
    -0
      ge/common/profiling/profiling_properties.cc
  11. +62
    -0
      ge/common/profiling/profiling_properties.h
  12. +0
    -2
      ge/executor/ge_executor.cc
  13. +8
    -8
      ge/graph/build/task_generator.cc
  14. +1
    -1
      ge/graph/build/task_generator.h
  15. +1
    -1
      ge/graph/execute/graph_execute.cc
  16. +0
    -4
      ge/graph/manager/graph_manager.cc
  17. +9
    -11
      ge/init/gelib.cc
  18. +10
    -0
      ge/session/inner_session.cc
  19. +0
    -24
      inc/framework/common/profiling/ge_profiling.h
  20. +0
    -0
      inc/framework/common/profiling/profiling_init.h
  21. +27
    -0
      third_party/fwkacllib/inc/runtime/base.h
  22. +0
    -9
      third_party/fwkacllib/inc/toolchain/prof_callback.h

+ 6
- 3
ge/CMakeLists.txt View File

@@ -107,6 +107,8 @@ target_link_libraries(ge_proto_client PRIVATE
)
endif ()

set(

##################################################################
set(EXECUTOR_SRC_LIST
"common/dump/dump_op.cc"
@@ -259,10 +261,11 @@ set(EXECUTOR_SRC_LIST
##################################################################
set(COMPILER_SRC_LIST
"analyzer/analyzer.cc"
"common/dump/dump_op.cc"
#"common/dump/dump_op.cc"
"common/ge/op_tiling_manager.cc"
"common/ge/plugin_manager.cc"
"common/profiling/profiling_manager.cc"
#"common/helper/model_cache_helper.cc"
#"common/profiling/profiling_manager.cc"
"engine_manager/dnnengine_manager.cc"
"ge_local_engine/engine/host_cpu_engine.cc"
"ge_opt_info/ge_opt_info.cc"
@@ -473,7 +476,7 @@ set(RUNNER_SRC_LIST
"client/ge_api.cc"
"session/inner_session.cc"
"session/session_manager.cc"
"common/profiling/ge_runner_profiling.cc"
"common/profiling/profiling_init.cc"
"graph/manager/memory_api.cc"
"graph/manager/util/hcom_util.cc"
"graph/load/model_manager/task_info/hccl_task_info.cc"


+ 88
- 0
ge/client/ge_api.cc View File

@@ -35,6 +35,10 @@
#include "common/util/error_manager/error_manager.h"
#include "toolchain/plog.h"
#include "ir_build/option_utils.h"
#include "framework/common/ge_types.h"
#include "external/ge/ge_api_types.h"
#include "graph/ge_context.h"
#include "common/profiling/profiling_init.h"

using domi::OpRegistry;
using std::map;
@@ -43,6 +47,84 @@ using std::vector;

namespace {
const int32_t kMaxStrLen = 128;
const int kDecimal = 10;
const int kDefaultDeviceIdForTrain = 0;
const int kDefaultDeviceIdForInfer = -1;
const char *const kGlobalOptionFpCeilingModeDefault = "2";

void InitOptions(const map<string, string> &option_map, ge::Options &options) {
GELOGI("InitOptions start");
options.session_id = 0;
auto is_train_mode = false;
auto iter = option_map.find(ge::OPTION_GRAPH_RUN_MODE);
if (iter != option_map.end()) {
if (ge::GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= ge::TRAIN) {
is_train_mode = true;
}
}
iter = option_map.find(ge::OPTION_EXEC_SESSION_ID);
if (iter != option_map.end()) {
options.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal);
}
options.device_id = is_train_mode ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer;
iter = option_map.find(ge::OPTION_EXEC_DEVICE_ID);
if (iter != option_map.end()) {
options.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal));
}
iter = option_map.find(ge::OPTION_EXEC_JOB_ID);
if (iter != option_map.end()) {
options.job_id = iter->second.c_str();
}
options.isUseHcom = false;
iter = option_map.find(ge::OPTION_EXEC_IS_USEHCOM);
if (iter != option_map.end()) {
std::istringstream(iter->second) >> options.isUseHcom;
}
options.isUseHvd = false;
iter = option_map.find(ge::OPTION_EXEC_IS_USEHVD);
if (iter != option_map.end()) {
std::istringstream(iter->second) >> options.isUseHvd;
}
options.deployMode = false;
iter = option_map.find(ge::OPTION_EXEC_DEPLOY_MODE);
if (iter != option_map.end()) {
std::istringstream(iter->second) >> options.deployMode;
}
iter = option_map.find(ge::OPTION_EXEC_POD_NAME);
if (iter != option_map.end()) {
options.podName = iter->second.c_str();
}
iter = option_map.find(ge::OPTION_EXEC_PROFILING_MODE);
if (iter != option_map.end()) {
options.profiling_mode = iter->second.c_str();
}
iter = option_map.find(ge::OPTION_EXEC_PROFILING_OPTIONS);
if (iter != option_map.end()) {
options.profiling_options = iter->second.c_str();
}
iter = option_map.find(ge::OPTION_EXEC_RANK_ID);
if (iter != option_map.end()) {
options.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal);
}
iter = option_map.find(ge::OPTION_EXEC_RANK_TABLE_FILE);
if (iter != option_map.end()) {
options.rankTableFile = iter->second.c_str();
}
options.enable_atomic = true;
iter = option_map.find(ge::OPTION_EXEC_ATOMIC_FLAG);
GE_IF_BOOL_EXEC(iter != option_map.end(),
options.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal));
GELOGI("ge InnerInitialize, the enable_atomic_flag in options_ is %d", options.enable_atomic);
}

void InitProfiling(ge::Options &options) {
GELOGI("InitProfiling start");
ge::GetContext().Init();
// Profiling init
if (ge::ProfilingInit::Instance().Init(options) != ge::SUCCESS) {
GELOGW("Profiling init failed.");
}
}
} // namespace

static bool g_ge_initialized = false;
@@ -91,6 +173,7 @@ Status CheckOptionsValid(const std::map<string, string> &options) {
// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitializeImpl(const std::map<string, string> &options) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
GELOGI("GEInitializeImpl start");
GELOGT(TRACE_INIT, "GEInitialize start");
std::string path_base = ge::GELib::GetPath();
auto ret = ErrorManager::GetInstance().Init(path_base);
@@ -128,6 +211,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) {
if (CheckOptionsValid(options) != SUCCESS) {
return FAILED;
}
ge::Options str_options;
InitOptions(options, str_options);
InitProfiling(str_options);
GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid");

ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOpsProtoInit);
@@ -173,6 +259,7 @@ Status GEInitializeImpl(const std::map<string, string> &options) {

// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitialize(const std::map<string, string> &options) {
GELOGI("GEInitialize with string");
ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
@@ -181,6 +268,7 @@ Status GEInitialize(const std::map<string, string> &options) {
}

Status GEInitialize(const std::map<AscendString, AscendString> &options) {
GELOGI("GEInitialize with AscendString");
ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
std::map<std::string, std::string> str_options;
for (auto &option : options) {


+ 1
- 0
ge/common/CMakeLists.txt View File

@@ -50,6 +50,7 @@ set(SRC_LIST
"${GE_CODE_DIR}/ge/common/transop_util.cc"
"${GE_CODE_DIR}/ge/common/types.cc"
"${GE_CODE_DIR}/ge/common/util.cc"
"${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc"
)

if (NOT ENABLE_D AND NOT ENABLE_ACL)


+ 0
- 230
ge/common/profiling/ge_profiling.cc View File

@@ -19,245 +19,15 @@
#include "common/profiling/profiling_manager.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "graph/load/graph_loader.h"
#include "graph/ge_context.h"
#include "init/gelib.h"
#include "framework/common/ge_inner_error_codes.h"
#include "common/model/ge_model.h"
#include "framework/omg/omg_inner_types.h"

namespace {
const uint32_t kDeviceListIndex = 3;
const std::string kDeviceNums = "devNums";
const std::string kDeviceIdList = "devIdList";
const std::string kProfilingInit = "prof_init";
const std::string kProfilingFinalize = "prof_finalize";
const std::string kProfilingStart = "prof_start";
const std::string kProfilingStop = "prof_stop";
const std::string kProfModelSubscribe = "prof_model_subscribe";
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
const std::string kRtSetDeviceRegName = "profiling";
const std::string kPofilingModelId = "modelId";

const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = {
{kProfCommandhandleInit, kProfilingInit},
{kProfCommandhandleStart, kProfilingStart},
{kProfCommandhandleStop, kProfilingStop},
{kProfCommandhandleFinalize, kProfilingFinalize},
{kProfCommandhandleModelSubscribe, kProfModelSubscribe},
{kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}};

const uint64_t kModelId = ge::INVALID_MODEL_ID;
const uint16_t kStepStart = 0;
const uint16_t kStepEnd = 1;

ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe,
uint32_t graph_id, vector<string> &prof_params) {
if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) {
prof_params.clear();
prof_params.emplace_back(kPofilingModelId);
uint32_t model_id = 0;
auto ret = ge::ProfilingManager::Instance().GetModelIdFromGraph(graph_id, model_id);
if (ret != ge::SUCCESS) {
GELOGE(ret, "graph_id:%u not not found", graph_id);
return ret;
}
prof_params.emplace_back(std::to_string(model_id));
}
return ge::SUCCESS;
}
} // namespace

bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) {
prof_config_params.clear();
prof_config_params.emplace_back(kDeviceNums);
prof_config_params.emplace_back(std::to_string(profCommand.devNums));
prof_config_params.emplace_back(kDeviceIdList);
std::string devID = "";
if (profCommand.devNums == 0) {
GELOGW("The device num is invalid.");
return false;
}
for (uint32_t i = 0; i < profCommand.devNums; i++) {
devID.append(std::to_string(profCommand.devIdList[i]));
if (i != profCommand.devNums - 1) {
devID.append(",");
}
}

prof_config_params.push_back(devID);
return true;
}

bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
if (deviceid_list == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr");
REPORT_INNER_ERROR("E19999", "Device id list is nullptr");
return false;
}
if (device_nums == 0 || device_nums > MAX_DEV_NUM) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums);
REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums);
return false;
}

// real device num
int32_t dev_count = 0;
rtError_t rt_err = rtGetDeviceCount(&dev_count);
if (rt_err != RT_ERROR_NONE) {
GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err);
REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err);
return false;
}

if (device_nums > static_cast<uint32_t>(dev_count)) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]",
device_nums, dev_count);
REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]",
device_nums, dev_count);
return false;
}

std::set<uint32_t> record;
for (size_t i = 0; i < device_nums; ++i) {
uint32_t dev_id = deviceid_list[i];
if (dev_id >= static_cast<uint32_t>(dev_count)) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)",
dev_id, dev_count);
REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count);
return false;
}
if (record.count(dev_id) > 0) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id);
REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id);
return false;
}
record.insert(dev_id);
}
return true;
}

ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
if (func == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Msprof ctrl callback is nullptr");
REPORT_INNER_ERROR("E19999", "Msprof ctrl callback is nullptr");
return ge::PARAM_INVALID;
}
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
GELOGW("Msprof ctrl callback is exist, just ignore it.");
} else {
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func);
}
return ge::SUCCESS;
}

ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
if (func == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofSetDeviceCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofSetDeviceCallback callback is nullptr");
return ge::PARAM_INVALID;
}
// Pass MsprofSetDeviceCallback to runtime
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "[Pass][MsprofSetDeviceCallback]To runtime failed, ret 0x%X", rt_ret);
REPORT_CALL_ERROR("E19999", "Pass MsprofSetDeviceCallback to runtime failed, ret 0x%X", rt_ret);
return rt_ret;
}
return ge::SUCCESS;
}

ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
if (func == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
return ge::PARAM_INVALID;
}
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) {
GELOGW("Msprof reporter callback is exist, just ignore it.");
} else {
GELOGI("GE register Msprof reporter callback.");
ge::ProfilingManager::Instance().SetMsprofReporterCallback(func);
// Pass MsprofReporterCallback to runtime
ge::Status rt_ret = rtSetMsprofReporterCallback(func);
if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u",
rt_ret);
REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u",
rt_ret);
return rt_ret;
}
// Pass MsprofReporterCallback to hccl
}
return ge::SUCCESS;
}

ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) {
if (type != kProfCommandhandleFinalize) {
GE_CHECK_NOTNULL(data);
}
ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data);
auto iter = kProfCommandTypeMap.find(type);
if (iter == kProfCommandTypeMap.end()) {
GELOGW("The prof comand type is invalid.");
return ge::PARAM_INVALID;
}
std::vector<string> prof_params;
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) {
return ge::FAILED;
}

if (!TransProfConfigToParam(*prof_config_param, prof_params)) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed");
REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed");
return ge::PARAM_INVALID;
}
}
auto &profiling_manager = ge::ProfilingManager::Instance();
auto is_train = domi::GetContext().train_flag;
if (type == kProfCommandhandleModelSubscribe && is_train) {
profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true);
return ge::SUCCESS;
}
auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe;
// GraphId is actually stored in prof_config_param
auto graph_id = prof_config_param->modelId;
ge::Status ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params);
if (ret != ge::SUCCESS) {
GELOGE(ret, "graph_id:%u not not found", graph_id);
REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"value", "parameter", "reason"}),
std::vector<std::string>({std::to_string(graph_id),
"GraphToModelMap",
"graph_id does not exist!"}));
return ge::FAILED;
}
ge::GraphLoader graph_loader;
ge::Command command;
command.cmd_params.clear();
command.cmd_type = iter->second;
command.cmd_params = prof_params;
if (type != kProfCommandhandleFinalize) {
command.module_index = prof_config_param->profSwitch;
}
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(),
command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}
ret = graph_loader.CommandHandle(command);
if (ret != ge::SUCCESS) {
GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u",
iter->second.c_str(), ret);
REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u",
iter->second.c_str(), ret);
return ge::FAILED;
}

GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index);
return ge::SUCCESS;
}

ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream) {
static bool is_first_run = true;
int32_t device_id = 0;


+ 0
- 26
ge/common/profiling/ge_runner_profiling.cc View File

@@ -1,26 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "framework/common/profiling/ge_runner_profiling.h"
#include "init/gelib.h"

bool IsInitialize() {
std::shared_ptr<ge::GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) {
return false;
}
return true;
}

+ 246
- 0
ge/common/profiling/profiling_init.cc View File

@@ -0,0 +1,246 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "profiling_init.h"

#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "common/profiling/profiling_properties.h"
#include "runtime/base.h"
#include "profiling_manager.h"

namespace {
const char *const kTrainingTrace = "training_trace";
const char *const kFpPoint = "fp_point";
const char *const kBpPoint = "bp_point";
}

namespace ge {

ProfilingInit &ProfilingInit::Instance() {
static ProfilingInit profiling_init;
return profiling_init;
}

ge::Status ProfilingInit::Init(const Options &options) {
GELOGI("ProfilingInit::Init job_id:%s", options.job_id.c_str());

struct MsprofGeOptions prof_conf = {{0}};
bool is_execute_profiling = false;
Status ret = InitFromOptions(options, prof_conf, is_execute_profiling);
if (ret != SUCCESS) {
GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret);
REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret);
return ret;
}

if (is_execute_profiling) {
int32_t cb_ret = MsprofInit(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions));
if (cb_ret != 0) {
GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
return FAILED;
}
GELOGI("Profiling init success");
}
else {
GELOGI("The profiling is off, skip the initialization");
}
return SUCCESS;
}

ge::Status ProfilingInit::ProfRegisterCtrlCallback() {
auto &prof_manager = ge::ProfilingManager::Instance();
MsprofCtrlHandle callback = prof_manager.CommandHandle;
rtError_t rt_ret = rtProfRegisterCtrlCallback(GE,callback);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "register func failed");
return FAILED;
}
return SUCCESS;
}

ge::Status ProfilingInit::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf,
bool &is_execute_profiling) {
// enable profiling by env
char env_profiling_mode[MMPA_MAX_PATH] = {0x00};

if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
// enable profiling by ge option
if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s", options.profiling_options.c_str());
REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed", options.profiling_options.c_str());
return INTERNAL_ERROR;
}
is_execute_profiling = true;
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
options.profiling_options.c_str());
} else {
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
// The env is invalid
if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) {
return SUCCESS;
}
// enable profiling by env
is_execute_profiling = true;
GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options);
}

ProfilingProperties::Instance().SetExecuteProfiling(is_execute_profiling);
if (!is_execute_profiling) {
return SUCCESS;
}

// Parse json str for bp fp
Status ret = ParseOptions(prof_conf.options);
if (ret != ge::SUCCESS) {
GELOGE(ge::PARAM_INVALID, "[Parse][Options]Parse training trace param %s failed, error_code %u", prof_conf.options,
ret);
REPORT_CALL_ERROR("E19999", "Parse training trace param %s failed, error_code %u", prof_conf.options, ret);
return ge::PARAM_INVALID;
}

if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) !=
EOK) {
GELOGE(INTERNAL_ERROR, "[Copy][JobId]Failed, original job_id %s", options.job_id.c_str());
REPORT_CALL_ERROR("E19999", "Copy job_id %s failed", options.job_id.c_str());
return INTERNAL_ERROR;
}
GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
return ge::SUCCESS;
}

ge::Status ProfilingInit::ParseOptions(const std::string &options) {
if (options.empty()) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Profiling options is empty");
REPORT_INNER_ERROR("E19999", "Profiling options is empty");
return ge::PARAM_INVALID;
}
try {
Json prof_options = Json::parse(options);
if (options.find(kTrainingTrace) == std::string::npos) {
return ge::SUCCESS;
}
std::string training_trace;
if (prof_options.contains(kTrainingTrace)) {
training_trace = prof_options[kTrainingTrace];
}
if (training_trace.empty()) {
GELOGI("Training trace will not take effect.");
return ge::SUCCESS;
}
GELOGI("GE profiling training trace:%s", training_trace.c_str());
if (training_trace != "on") {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.", training_trace.c_str());
REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str());
return ge::PARAM_INVALID;
}
string fp_point;
string bp_point;
if (prof_options.contains(kFpPoint)) {
fp_point = prof_options[kFpPoint];
}
if (prof_options.contains(kBpPoint)) {
bp_point = prof_options[kBpPoint];
}
if (!fp_point.empty() && !bp_point.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point.c_str(), fp_point.c_str());
}
ProfilingProperties::Instance().SetTrainingTrace(true);
ProfilingProperties::Instance().SetFpBpPoint(fp_point,bp_point);
} catch (...) {
GELOGE(FAILED, "[Check][Param]Json prof_conf options is invalid");
REPORT_INNER_ERROR("E19999", "Json prof_conf options is invalid");
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
}

void ProfilingInit::StopProfiling() {
uint64_t module = GetProfilingModule();
// The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal
auto device_id = ProfilingProperties::Instance().GetDeviceID();
int32_t device_num = static_cast<int32_t>(device_id.size());
if (device_num != 0) {
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
if (device_id_ptr == nullptr) {
GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null.");
REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null");
return;
}
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_id[i]);
}
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
}
}

// stop profiling
int32_t cb_ret = MsprofFinalize();
if (cb_ret != 0) {
GELOGW("call msprofCtrlCallback failed, type:%u, return:%d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret);
return;
}
GELOGI("Stop Profiling success.");
}

void ProfilingInit::ShutDownProfiling() {
StopProfiling();
ProfilingManager::Instance().PluginUnInit();
}

uint64_t ProfilingInit::GetProfilingModule() {
uint64_t module = PROF_MODEL_EXECUTE_MASK |
PROF_RUNTIME_API_MASK |
PROF_RUNTIME_TRACE_MASK |
PROF_SCHEDULE_TIMELINE_MASK |
PROF_SCHEDULE_TRACE_MASK |
PROF_TASK_TIME_MASK |
PROF_SUBTASK_TIME_MASK |
PROF_AICPU_TRACE_MASK |
PROF_AICORE_METRICS_MASK |
PROF_AIVECTORCORE_METRICS_MASK |
PROF_MODEL_LOAD_MASK;
return module;
}

Status ProfilingInit::SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) {
auto rt_ret = rtSetDeviceIdByModelIdx(model_id, device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(ge::FAILED, "[Set][Device]Set Device id failed");
return ge::FAILED;
}
return ge::SUCCESS;
}

Status ProfilingInit::UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) {
auto rt_ret = rtSetDeviceIdByModelIdx(model_id, device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(ge::FAILED, "[Unset][Device]Set Device id failed");
return ge::FAILED;
}
return ge::SUCCESS;
}
} // namespace ge

+ 54
- 0
ge/common/profiling/profiling_init.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_COMMON_PROFILING_PROFILING_INIT_H_
#define GE_COMMON_PROFILING_PROFILING_INIT_H_

#include <vector>
#include <nlohmann/json.hpp>
#include <string>

#include "common/profiling/profiling_properties.h"
#include "framework/common/ge_inner_error_codes.h"
#include "framework/common/ge_types.h"
#include "toolchain/prof_callback.h"

using std::map;
using std::string;
using std::vector;
using Json = nlohmann::json;

namespace ge {
class ProfilingInit {
public:
static ProfilingInit &Instance();
Status Init(const Options &options);
void StopProfiling();
Status ProfRegisterCtrlCallback();
void ShutDownProfiling();
Status SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
Status UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);

private:
ProfilingInit() = default;
~ProfilingInit() = default;
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, bool &is_execute_profiling);
Status ParseOptions(const std::string &options);
uint64_t GetProfilingModule();
};
} // namespace ge

#endif // GE_COMMON_PROFILING_PROFILING_INIT_H_

+ 236
- 89
ge/common/profiling/profiling_manager.cc View File

@@ -25,6 +25,7 @@
#include "runtime/base.h"
#include "graph/load/model_manager/davinci_model.h"
#include "mmpa/mmpa_api.h"
#include "graph/load/graph_loader.h"

namespace {
const char *const kTrainingTrace = "training_trace";
@@ -62,17 +63,39 @@ const std::string kShape = "shape";
const std::string kIdx = "idx";

#endif
const uint32_t kDeviceListIndex = 3;
const uint32_t kCommandNum = 6;
const int kMaxDevNum = 64;
const std::string kDeviceNums = "devNums";
const std::string kDeviceIdList = "devIdList";
const std::string kProfilingInit = "prof_init";
const std::string kProfilingFinalize = "prof_finalize";
const std::string kProfilingStart = "prof_start";
const std::string kProfilingStop = "prof_stop";
const std::string kProfilingModelSubscribe = "prof_model_subscribe";
const std::string kProfilingModelUnsubscribe = "prof_model_cancel_subscribe";
const std::string kPofilingModelId = "modelId";

const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = {
{kProfCommandhandleInit, kProfilingInit},
{kProfCommandhandleStart, kProfilingStart},
{kProfCommandhandleStop, kProfilingStop},
{kProfCommandhandleFinalize, kProfilingFinalize},
{kProfCommandhandleModelSubscribe, kProfilingModelSubscribe},
{kProfCommandhandleModelUnsubscribe, kProfilingModelUnsubscribe}};
} // namespace

namespace ge {

ProfSubscribeInfo ProfilingManager::subscribe_info_ = {false, 0, 0};
MsprofReporterCallback ProfilingManager::reporter_callback_ = nullptr;

ProfilingManager::ProfilingManager()
: is_load_profiling_(false),
is_execute_profiling_(false),
is_training_trace_(false),
subscribe_count_(0),
prof_cb_({nullptr, nullptr}),
index_id_(UINT64_MAX),
subscribe_info_({false, 0, 0}) {
index_id_(UINT64_MAX) {
}

ProfilingManager::~ProfilingManager() {}
@@ -82,45 +105,6 @@ ProfilingManager &ProfilingManager::Instance() {
return profiling_manager;
}

ge::Status ProfilingManager::Init(const Options &options) {
#ifdef DAVINCI_SUPPORT_PROFILING
vector<int32_t>().swap(device_id_);
subscribe_count_ = 0;
GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str());

struct MsprofGeOptions prof_conf = {{ 0 }};
Status ret = InitFromOptions(options, prof_conf);
if (ret != SUCCESS) {
GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret);
REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret);
return ret;
}

if (is_execute_profiling_) {
if (prof_cb_.msprofCtrlCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr");
return ge::PARAM_INVALID;
}
int32_t cb_ret = prof_cb_.msprofCtrlCallback(
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions));
if (cb_ret != 0) {
GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
cb_ret);
return FAILED;
}
GELOGI("Profiling init success");
} else {
GELOGI("The profiling is off, skip the initialization");
}
#endif
return SUCCESS;
}

ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) {
#ifdef DAVINCI_SUPPORT_PROFILING
// enable profiling by env
@@ -221,44 +205,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
return ge::SUCCESS;
}

void ProfilingManager::StopProfiling() {
#ifdef DAVINCI_SUPPORT_PROFILING
uint64_t module = GetProfilingModule();
// The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal
int32_t device_num = static_cast<int32_t>(device_id_.size());
if (device_num != 0) {
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
if (device_id_ptr == nullptr) {
GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null.");
REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null");
return;
}
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]);
}
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
}
}

// stop profiling
if (prof_cb_.msprofCtrlCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr");
return;
}
int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE),
nullptr, 0);
if (cb_ret != 0) {
GELOGW("call msprofCtrlCallback failed, type:%u, return:%d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret);
return;
}
GELOGI("Stop Profiling success.");
#endif
}

void ProfilingManager::ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json) {
#ifdef DAVINCI_SUPPORT_PROFILING
for (size_t i = 0; i < task.input_format.size(); i++) {
@@ -896,13 +842,13 @@ bool ProfilingManager::ProfilingModelExecuteOn() const {
return execute_model_prof_on;
}

Status ProfilingManager::PluginInit() {
if (prof_cb_.msprofReporterCallback == nullptr) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() {
if (reporter_callback_ == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
return ge::PARAM_INVALID;
}
int32_t cb_ret = prof_cb_.msprofReporterCallback(
int32_t cb_ret = reporter_callback_(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT),
nullptr, 0);
@@ -912,7 +858,7 @@ Status ProfilingManager::PluginInit() {
return INTERNAL_ERROR;
}

cb_ret = prof_cb_.msprofReporterCallback(
cb_ret = reporter_callback_(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_DATA_MAX_LEN),
&reporter_max_len_, sizeof(uint32_t));
@@ -927,12 +873,12 @@ Status ProfilingManager::PluginInit() {

void ProfilingManager::PluginUnInit() const {
#ifdef DAVINCI_SUPPORT_PROFILING
if (prof_cb_.msprofReporterCallback == nullptr) {
if (reporter_callback_ == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
return;
}
int32_t cb_ret = prof_cb_.msprofReporterCallback(
int32_t cb_ret = reporter_callback_(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT),
nullptr, 0);
@@ -942,13 +888,14 @@ void ProfilingManager::PluginUnInit() const {
#endif
}

Status ProfilingManager::CallMsprofReport(ReporterData &reporter_data) const {
if (prof_cb_.msprofReporterCallback == nullptr) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport(
ReporterData &reporter_data) const {
if (reporter_callback_ == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr");
REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr");
return ge::PARAM_INVALID;
}
return prof_cb_.msprofReporterCallback(
return reporter_callback_(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT),
static_cast<void *>(&reporter_data), sizeof(ReporterData));
@@ -1089,4 +1036,204 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::GetMod
GELOGE(PARAM_INVALID, "[Check][GraphId]graph_id:%u does not exist!", graph_id);
return FAILED;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::SetDeviceByModelId(
uint32_t model_id, uint32_t &device_id) {
auto ret = rtSetDeviceIdByModelIdx(model_id, device_id);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "set device id by model_id:%u failed!", model_id);
GELOGE(FAILED, "[Check][ModelId]set device id by model_id:%u failed!", model_id);
return FAILED;
}
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::UnsetDeviceByModelId(
uint32_t model_id, uint32_t &device_id) {
auto ret = rtUnsetDeviceIdByModelIdx(model_id, device_id);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "unset device id by model_id:%u failed!", model_id);
GELOGE(FAILED, "[Check][ModelId]unset device id by model_id:%u failed!", model_id);
return FAILED;
}
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::isProfTypeValid(uint32_t type) {
if (type < 0 || type >= kCommandNum) {
GELOGE(ge::PARAM_INVALID, "[Check][Type]Type %u is invalid", type);
return false;
}
GELOGD("Type is %u", type);
return true;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::isProfConfigValid(
const uint32_t *deviceid_list, uint32_t device_nums) {
if (deviceid_list == nullptr) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr");
REPORT_INNER_ERROR("E19999", "Device id list is nullptr");
return false;
}
if (device_nums == 0 || device_nums > kMaxDevNum) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums);
REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums);
return false;
}

// real device num
int32_t dev_count = 0;
rtError_t rt_err = rtGetDeviceCount(&dev_count);
if (rt_err != RT_ERROR_NONE) {
GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err);
REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err);
return false;
}

if (device_nums > static_cast<uint32_t>(dev_count)) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]",
device_nums, dev_count);
REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]",
device_nums, dev_count);
return false;
}

std::set<uint32_t> record;
for (size_t i = 0; i < device_nums; ++i) {
uint32_t dev_id = deviceid_list[i];
if (dev_id >= static_cast<uint32_t>(dev_count)) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)",
dev_id, dev_count);
REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count);
return false;
}
if (record.count(dev_id) > 0) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id);
REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id);
return false;
}
record.insert(dev_id);
}
return true;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::TransProfConfigToParam(
const rtProfCommandHandle &profCommand, vector<string> &prof_config_params) {
prof_config_params.clear();
prof_config_params.emplace_back(kDeviceNums);
prof_config_params.emplace_back(std::to_string(profCommand.devNums));
prof_config_params.emplace_back(kDeviceIdList);
std::string devID = "";
if (profCommand.devNums == 0) {
GELOGW("The device num is invalid.");
return false;
}
for (uint32_t i = 0; i < profCommand.devNums; i++) {
devID.append(std::to_string(profCommand.devList[i]));
if (i != profCommand.devNums - 1) {
devID.append(",");
}
}

prof_config_params.push_back(devID);
return true;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::NeedUnsubscribe(
ProfCommandHandleType type, bool is_subscribe, uint32_t graph_id, vector<string> &prof_params) {
if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) {
prof_params.clear();
prof_params.emplace_back(kPofilingModelId);
uint32_t model_id = 0;
auto ret = GetModelIdFromGraph(graph_id, model_id);
if (ret != ge::SUCCESS) {
GELOGE(ret, "graph_id:%u not not found", graph_id);
return ret;
}
prof_params.emplace_back(std::to_string(model_id));
}
return ge::SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY rtError_t ProfilingManager::CommandHandle(
rtProfCtrlType_t rt_type, void *data, uint32_t len) {
if (data == nullptr) {
GELOGE(ge::PARAM_INVALID, "The prof comand is invalid.");
return ge::FAILED;
}
if (rt_type == RT_PROF_CTRL_REPORTER) {
reporter_callback_ = reinterpret_cast<MsprofReporterCallback>(data);
GELOGD("return with MsprofReporterCallback");
return ge::SUCCESS;
}
else if (rt_type == RT_PROF_CTRL_SWITCH) {
rtProfCommandHandle_t *prof_config_param = reinterpret_cast<rtProfCommandHandle_t *>(data);
if (!isProfTypeValid(prof_config_param->type)) {
GELOGE(ge::PARAM_INVALID, "The prof comand is invalid.");
return ge::FAILED;
}
auto type = static_cast<ProfCommandHandleType>(prof_config_param->type);
if (type != kProfCommandhandleFinalize) {
GE_CHECK_NOTNULL(data);
}
auto iter = kProfCommandTypeMap.find(type);
if (iter == kProfCommandTypeMap.end()) {
GELOGW("The prof comand type is invalid.");
return ge::PARAM_INVALID;
}
std::vector<string> prof_params;
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
if (!isProfConfigValid(prof_config_param->devList, prof_config_param->devNums)) {
return ge::FAILED;
}
if (!TransProfConfigToParam(*prof_config_param, prof_params)) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed");
REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed");
return ge::PARAM_INVALID;
}
}
auto is_train = domi::GetContext().train_flag;
if (type == kProfCommandhandleModelSubscribe && is_train) {
SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true);
return ge::SUCCESS;
}
auto is_subscribe = subscribe_info_.is_subscribe;
// GraphId is actually stored in prof_config_param
auto graph_id = prof_config_param->modelId;
ge::Status ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params);
if (ret != ge::SUCCESS) {
GELOGE(ret, "graph_id:%u not not found", graph_id);
REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"value", "parameter", "reason"}),
std::vector<std::string>({std::to_string(graph_id),
"GraphToModelMap",
"graph_id does not exist!"}));
return ge::FAILED;
}
GraphLoader graph_loader;
Command command;
command.cmd_params.clear();
command.cmd_type = iter->second;
command.cmd_params = prof_params;
if (type != kProfCommandhandleFinalize) {
command.module_index = prof_config_param->profSwitch;
}
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(),
command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}
ret = graph_loader.CommandHandle(command);
if (ret != ge::SUCCESS) {
GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u",
iter->second.c_str(), ret);
REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u",
iter->second.c_str(), ret);
return ge::FAILED;
}

GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index);
return ge::SUCCESS;
}
return ge::FAILED;
}
} // namespace ge

+ 21
- 15
ge/common/profiling/profiling_manager.h View File

@@ -52,7 +52,14 @@ namespace {
const uint64_t PROF_HCCL_TRACE_MASK = 0x1000;
const uint64_t PROF_DATA_PROCESS_MASK = 0x2000;
const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000;

enum ProfCommandHandleType {
kProfCommandhandleInit = 0,
kProfCommandhandleStart,
kProfCommandhandleStop,
kProfCommandhandleFinalize,
kProfCommandhandleModelSubscribe,
kProfCommandhandleModelUnsubscribe
};
} // namespace
namespace ge {
class OpDesc;
@@ -68,24 +75,17 @@ struct ProfSubscribeInfo {
uint32_t graph_id;
};

struct MsprofCallback {
MsprofCtrlCallback msprofCtrlCallback;
MsprofReporterCallback msprofReporterCallback;
};

class ProfilingManager {
public:
ProfilingManager();
virtual ~ProfilingManager();
static ProfilingManager &Instance();
Status Init(const Options &options);
Status ProfInit(uint64_t module);
Status ProfFinalize();
Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
Status ProfModelSubscribe(uint64_t module, void *model);
Status ProfModelUnsubscribe(void *model);
void StopProfiling();
bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
// report model load profiling data flag, data contain task desc info, step info, model load fusion op info
bool ProfilingModelLoadOn() const { return is_load_profiling_; }
@@ -100,9 +100,8 @@ class ProfilingManager {
Status PluginInit();
void PluginUnInit() const;
Status CallMsprofReport(ReporterData &reporter_data) const;
struct MsprofCallback &GetMsprofCallback() { return prof_cb_; }
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
const MsprofReporterCallback GetMsprofReporterCallback() const { return reporter_callback_; }
void SetMsprofReporterCallback(MsprofReporterCallback func) { reporter_callback_ = func; }
void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name);
@@ -111,11 +110,14 @@ class ProfilingManager {
uint64_t GetStepInfoIndex() const { return index_id_; }
void SetGraphIdToDeviceMap(uint32_t graph_id, uint32_t device_id) { device_id_map_[graph_id] = device_id; }
Status GetDeviceIdFromGraph(uint32_t graph_id, uint32_t &device_id);
void SetSubscribeInfo(uint64_t prof_switch, uint32_t model_id, bool is_subscribe);
const ProfSubscribeInfo &GetSubscribeInfo() const { return subscribe_info_; }
static void SetSubscribeInfo(uint64_t prof_switch, uint32_t model_id, bool is_subscribe);
ProfSubscribeInfo GetSubscribeInfo() { return subscribe_info_; }
void CleanSubscribeInfo();
void SetGraphIdToModelMap(uint32_t graph_id, uint32_t model_id) { model_id_map_[graph_id] = model_id; }
Status GetModelIdFromGraph(uint32_t graph_id, uint32_t &model_id);
Status SetDeviceByModelId(uint32_t model_id, uint32_t &device_id);
Status UnsetDeviceByModelId(uint32_t model_id, uint32_t &device_id);
static rtError_t CommandHandle(rtProfCtrlType_t rt_type, void *data, uint32_t len);

private:
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
@@ -129,6 +131,10 @@ class ProfilingManager {
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);
void GetOpInputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
void GetOpOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
static bool isProfTypeValid(uint32_t type);
static bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums);
static bool TransProfConfigToParam(const rtProfCommandHandle &profCommand, vector<string> &prof_config_params);
static Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, uint32_t graph_id, vector<string> &prof_params);

bool is_load_profiling_;
bool is_execute_profiling_;
@@ -139,14 +145,14 @@ class ProfilingManager {
uint32_t subscribe_count_;
std::mutex mutex_;
std::mutex mutex_report_;
MsprofCallback prof_cb_;
std::string fp_point_;
std::string bp_point_;
uint32_t reporter_max_len_ = 0;
uint64_t index_id_;
std::map<uint32_t, uint32_t> device_id_map_; // key: graph_id, value: device_id
std::map<uint32_t, uint32_t> model_id_map_; // key: graph_id, value: model_id
ProfSubscribeInfo subscribe_info_;
static ProfSubscribeInfo subscribe_info_;
static MsprofReporterCallback reporter_callback_;
};
} // namespace ge
#endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_

+ 110
- 0
ge/common/profiling/profiling_properties.cc View File

@@ -0,0 +1,110 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "profiling_properties.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "graph/ge_context.h"

namespace {
const uint64_t kMsProfOptionsMaxlen = 2048;
const char *const kFpPoint = "fp_point";
const char *const kBpPoint = "bp_point";
} // namespace ge

namespace ge{
ProfilingProperties& ProfilingProperties::Instance() {
static ProfilingProperties profiling_properties;
return profiling_properties;
}

void ProfilingProperties::SetLoadProfiling(bool is_load_profiling) {
std::lock_guard<std::mutex>lock(mutex_);
is_load_profiling_ = is_load_profiling;
}

bool ProfilingProperties::IsLoadProfiling() {
std::lock_guard<std::mutex>lock(mutex_);
return is_load_profiling_;
}

void ProfilingProperties::SetExecuteProfiling(bool is_exec_profiling) {
std::lock_guard<std::mutex>lock(mutex_);
is_execute_profiling_ = is_exec_profiling;
}

bool ProfilingProperties::IsExecuteProfiling() {
std::lock_guard<std::mutex>lock(mutex_);
return is_execute_profiling_;
}

void ProfilingProperties::SetTrainingTrace(bool is_train_trance) {
std::lock_guard<std::mutex>lock(mutex_);
is_training_trace_ = is_train_trance;
}

void ProfilingProperties::GetFpBpPoint(std::string &fp_point, std::string &bp_point) {
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init
std::lock_guard<std::mutex>lock(mutex_);
if (!fp_point_.empty() && !bp_point_.empty()) {
fp_point = fp_point_;
bp_point = bp_point_;
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(),
fp_point.c_str());
return;
}
// ProfApi mode and training trace is set
// Parse options first
char env_profiling_options[kMsProfOptionsMaxlen] = {0x00};
bool is_profiling_valid = false;
std::string profiling_options;
if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS &&
!profiling_options.empty()) {
is_profiling_valid = true;
} else {
INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, kMsProfOptionsMaxlen);
if (ret != EN_OK) {
GELOGI("PROFILING_OPTIONS env is not exist.");
return;
}
GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options);
profiling_options = env_profiling_options;
is_profiling_valid = true;
}
if (is_profiling_valid) {
try {
Json prof_options = Json::parse(profiling_options);
if (prof_options.contains(kFpPoint)) {
fp_point_ = prof_options[kFpPoint];
}
if (prof_options.contains(kBpPoint)) {
bp_point_ = prof_options[kBpPoint];
}
fp_point = fp_point_;
bp_point = bp_point_;
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
} catch (...) {
GELOGW("Json prof options is invalid.");
return;
}
}

return;
}

} // namespace ge

+ 62
- 0
ge/common/profiling/profiling_properties.h View File

@@ -0,0 +1,62 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_COMMON_PROFILING_PROPERTIES_H_
#define GE_COMMON_PROFILING_PROPERTIES_H_

#include <nlohmann/json.hpp>
#include <mutex>
#include <string>
#include <vector>

#include "framework/common/ge_types.h"

using Json = nlohmann::json;

namespace ge {
class ProfilingProperties {
public:
static ProfilingProperties &Instance();
void SetLoadProfiling(bool is_load_profiling);
bool IsLoadProfiling();
void SetExecuteProfiling(bool is_execute_profiling);
bool IsExecuteProfiling();
void SetTrainingTrace(bool is_train_trance);
bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
bool IsTrainTrace();
void SetFpBpPoint(const std::string &fp_point, const std::string &bp_point);
void SetDeviceId(const std::vector<int32_t> &device_id);
std::vector<int32_t> GetDeviceID();
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
void GetFpBpPoint(std::string &fp_point, std::string &bp_point);

private:
ProfilingProperties() =default;
~ProfilingProperties() = default;
std::mutex mutex_;
std::mutex point_mutex_;
bool is_load_profiling_ = false;
bool is_execute_profiling_ = false;
bool is_training_trace_ = false;
std::string fp_point_;
std::string bp_point_;
std::vector<int32_t> device_id_;


};
} // namespace ge

#endif // GE_COMMON_PROFILING_PROPERTIES_H_

+ 0
- 2
ge/executor/ge_executor.cc View File

@@ -277,7 +277,6 @@ Status GeExecutor::Initialize() {
profiling_options.device_id = 0;
// job id need to be set, the value is meaningless;
profiling_options.job_id = "1";
ProfilingManager::Instance().Init(profiling_options);

isInit_ = true;
GELOGI("Init GeExecutor over.");
@@ -295,7 +294,6 @@ Status GeExecutor::Finalize() {

// Stop profiling
if (ProfilingManager::Instance().ProfilingOn()) {
ProfilingManager::Instance().StopProfiling();
ProfilingManager::Instance().PluginUnInit();
}



+ 8
- 8
ge/graph/build/task_generator.cc View File

@@ -17,7 +17,7 @@
#include "graph/build/task_generator.h"
#include <string>
#include <utility>
#include "common/profiling/profiling_manager.h"
#include "common/profiling/profiling_properties.h"
#include "framework/common/types.h"
#include "framework/common/util.h"
#include "framework/common/debug/ge_log.h"
@@ -945,7 +945,7 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint
vector<uint32_t> &all_reduce_nodes, std::string &fp_point_str,
std::string &bp_point_str) const {

ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str);
ProfilingProperties::Instance().GetFpBpPoint(fp_point_str, bp_point_str);

Status ret = SUCCESS;
if (fp_point_str.empty()) {
@@ -976,8 +976,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
vector<uint32_t> &all_reduce_nodes) const {
GE_CHECK_NOTNULL(graph);
const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() ||
ProfilingProperties::Instance().ProfilingTrainingTraceOn();
if (!is_profiling) {
GELOGD("Profiling is not open.");
return SUCCESS;
@@ -1071,8 +1071,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
vector<domi::TaskDef> &task_def_list) {
const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() ||
ProfilingProperties::Instance().ProfilingTrainingTraceOn();
bool is_insert_fp_profiling_task = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
bool is_insert_bp_profiling_task = false;
@@ -1167,8 +1167,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
vector<domi::TaskDef> &task_def_list) {
GE_CHECK_NOTNULL(op_desc);
const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() ||
ProfilingProperties::Instance().ProfilingTrainingTraceOn();
bool is_insert_bp_profiling_task = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
bool is_insert_end_profiling_task = false;


+ 1
- 1
ge/graph/build/task_generator.h View File

@@ -26,7 +26,7 @@
#include "framework/common/types.h"
#include "graph/compute_graph.h"
#include "graph/model.h"
#include "proto/task.pb.h"
#include "ge_runtime/proto/task.pb.h"
#include "runtime/rt.h"

namespace ge {


+ 1
- 1
ge/graph/execute/graph_execute.cc View File

@@ -796,7 +796,7 @@ Status GraphExecutor::GetModelByID(uint32_t model_id, std::shared_ptr<DavinciMod

Status GraphExecutor::ModelSubscribe(uint32_t graph_id) {
auto &profiling_manager = ProfilingManager::Instance();
const auto &subcribe_info = profiling_manager.GetSubscribeInfo();
auto subcribe_info = profiling_manager.GetSubscribeInfo();
if (subcribe_info.is_subscribe) {
std::shared_ptr<DavinciModel> davinci_model = nullptr;
uint32_t model_id = 0;


+ 0
- 4
ge/graph/manager/graph_manager.cc View File

@@ -109,7 +109,6 @@
#include "register/custom_pass_helper.h"
#include "external/graph/types.h"
#include "common/util/error_manager/error_manager.h"
#include "common/profiling/profiling_manager.h"

namespace {
const char *const kSummary = "Summary";
@@ -462,9 +461,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
const std::map<std::string, std::string> &options,
const OmgContext &omg_context) {
IncreaseGraphCount(graph_id);
auto device_id = GetContext().DeviceId();
GELOGD("Device id is %u", device_id);
ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id);
// validation for adding graphs of same graph_id in multi-thread secenario
// 1.previous thread owns same graph_id has finished the AddGraph procession
if (GetAddGraphCondition(graph_id) == kDoneAdded) {


+ 9
- 11
ge/init/gelib.cc View File

@@ -25,7 +25,6 @@

#include "common/ge/ge_util.h"
#include "common/ge/plugin_manager.h"
#include "common/profiling/profiling_manager.h"
#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
@@ -43,6 +42,7 @@
#include "runtime/kernel.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
#include "external/runtime/rt_error_codes.h"
#include "common/profiling/profiling_init.h"

using Json = nlohmann::json;

@@ -194,7 +194,7 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
InitOptions(options);

// In train and infer, profiling is always needed.
InitProfiling(this->options_);
//InitProfiling(this->options_);
// 1.`is_train_mode_` means case: train
// 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer
// these two case with logical device id
@@ -206,15 +206,15 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
return status;
}

void GELib::InitProfiling(Options &options) {
/*void GELib::InitProfiling(Options &options) {
GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id);
std::lock_guard<std::mutex> lock(status_mutex_);
GetContext().Init();
// Profiling init
if (ProfilingManager::Instance().Init(options) != SUCCESS) {
GELOGW("Profiling init failed.");
}
}
//if (ProfilingInit::Instance().Init(options) != SUCCESS) {
//GELOGW("Profiling init failed.");
// }
}*/

void GELib::SetDefaultPrecisionMode(map<string, string> &new_options) {
auto iter = new_options.find(PRECISION_MODE);
@@ -496,10 +496,8 @@ Status GELib::Finalize() {

void GELib::ShutDownProfiling() {
std::lock_guard<std::mutex> lock(status_mutex_);

if (ProfilingManager::Instance().ProfilingOn()) {
ProfilingManager::Instance().StopProfiling();
ProfilingManager::Instance().PluginUnInit();
if (ProfilingProperties::Instance().ProfilingOn()) {
ge::ProfilingInit::Instance().StopProfiling();
}
}



+ 10
- 0
ge/session/inner_session.cc View File

@@ -36,6 +36,7 @@
#include "runtime/mem.h"
#include "ir_build/option_utils.h"
#include "common/profiling/profiling_manager.h"
#include "common/profiling/profiling_init.h"

namespace ge {
namespace {
@@ -288,6 +289,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id);
if (mutex_.try_lock()) {
std::lock_guard<std::mutex> lock(mutex_, std::adopt_lock);
auto device_id = GetContext().DeviceId();
GELOGD("device_id is %u", device_id);
ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
@@ -339,6 +343,9 @@ Status InnerSession::RunGraphWithStreamAsync(uint32_t graph_id, rtStream_t strea
"session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream);
return GE_SESS_INIT_FAILED;
}
auto device_id = GetContext().DeviceId();
GELOGD("device_id is %u", device_id);
ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id);
UpdateThreadContext(graph_id);
vector<GeTensor> ge_inputs;
for (auto &item : inputs) {
@@ -382,6 +389,9 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) {
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
auto device_id = GetContext().DeviceId();
GELOGD("remove device_id is %u", device_id);
ProfilingInit::Instance().UnsetDeviceIdByModelId(graph_id, device_id);
UpdateThreadContext(graph_id);
Status ret = graph_manager_.RemoveGraph(graph_id);
if (ret != SUCCESS) {


+ 0
- 24
inc/framework/common/profiling/ge_profiling.h View File

@@ -18,32 +18,8 @@
#define INC_FRAMEWORK_COMMON_GE_PROFILING_H_

#include "ge/ge_api_error_codes.h"
#include "toolchain/prof_callback.h"
#include "runtime/base.h"

const int MAX_DEV_NUM = 64;

enum ProfCommandHandleType {
kProfCommandhandleInit = 0,
kProfCommandhandleStart,
kProfCommandhandleStop,
kProfCommandhandleFinalize,
kProfCommandhandleModelSubscribe,
kProfCommandhandleModelUnsubscribe
};

struct ProfCommandHandleData {
uint64_t profSwitch;
uint32_t devNums; // length of device id list
uint32_t devIdList[MAX_DEV_NUM];
uint32_t modelId;
};

GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func);
GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func);
GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func);
GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len);

///
/// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading
/// @return Status result


inc/framework/common/profiling/ge_runner_profiling.h → inc/framework/common/profiling/profiling_init.h View File


+ 27
- 0
third_party/fwkacllib/inc/runtime/base.h View File

@@ -33,6 +33,7 @@ extern "C" {
#endif
#endif

#define RT_PROF_MAX_DEV_NUM 64
typedef int32_t rtError_t;
static const int32_t RT_ERROR_NONE = 0; // success

@@ -80,6 +81,13 @@ typedef enum tagRtLimitType {
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms
} rtLimitType_t;

typedef enum {
RT_PROF_CTRL_INVALID = 0,
RT_PROF_CTRL_SWITCH,
RT_PROF_CTRL_REPORTER,
RT_PROF_CTRL_BUTT,
} rtProfCtrlType_t;

typedef struct rtExceptionInfo {
uint32_t taskid;
uint32_t streamid;
@@ -88,6 +96,15 @@ typedef struct rtExceptionInfo {
uint32_t retcode;
} rtExceptionInfo;

typedef struct rtProfCommandHandle {
uint64_t profSwitch;
uint64_t profSwitchHi;
uint32_t devNums;
uint32_t devList[RT_PROF_MAX_DEV_NUM];
uint32_t modelId;
uint32_t type;
} rtProfCommandHandle_t;

typedef void (*rtErrorCallback)(rtExceptionType);

typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
@@ -118,6 +135,8 @@ typedef void *rtLabel_t;
*/
typedef void *rtModel_t;

typedef rtError_t (*MsprofCtrlHandle)(rtProfCtrlType_t type, void *data, uint32_t len);

/**
* @ingroup profiling_base
* @brief runtime handle.
@@ -357,6 +376,14 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_
*/
RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);

RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t logId, MsprofCtrlHandle callback);

RTS_API rtError_t rtSetDeviceIdByModelIdx(uint32_t modelIdx, uint32_t &deviceId);

RTS_API rtError_t rtUnsetDeviceIdByModelIdx(uint32_t modelIdx, uint32_t &deviceId);

RTS_API rtError_t rtGetDeviceIdByModelIdx(uint32_t modelIdx, uint32_t &deviceId);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif


+ 0
- 9
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -114,15 +114,6 @@ enum MsprofCtrlCallbackType {
MSPROF_CTRL_PROF_SWITCH_OFF // for prof switch off
};

#define MSPROF_MAX_DEV_NUM (64)

struct MsprofCommandHandle {
uint64_t profSwitch;
uint32_t devNums; // length of device id list
uint32_t devIdList[MSPROF_MAX_DEV_NUM];
uint32_t modelId;
};

/**
* @name MsprofCtrlCallback
* @brief callback to start/stop profiling


Loading…
Cancel
Save