From 99412e16920dc79d3b378a3cd0e042a00fde68f3 Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Wed, 21 Jul 2021 16:54:34 +0800 Subject: [PATCH] modify profiling --- ge/common/profiling/profiling_init.cc | 223 ++++++++++++++++++++++++++++ ge/common/profiling/profiling_init.h | 45 ++++++ ge/common/profiling/profiling_properties.cc | 45 ++++++ ge/common/profiling/profiling_properties.h | 64 ++++++++ 4 files changed, 377 insertions(+) create mode 100644 ge/common/profiling/profiling_init.cc create mode 100644 ge/common/profiling/profiling_init.h create mode 100755 ge/common/profiling/profiling_properties.cc create mode 100755 ge/common/profiling/profiling_properties.h diff --git a/ge/common/profiling/profiling_init.cc b/ge/common/profiling/profiling_init.cc new file mode 100644 index 00000000..731a6ceb --- /dev/null +++ b/ge/common/profiling/profiling_init.cc @@ -0,0 +1,223 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiling_init.h" + +#include "common/properties_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "common/profiling/profiling_properties.h" +#include "runtime/base.h" + +namespace ge { + +PorfilingInit &PorfilingInit::Instance() { + static PorfilingInit profiling_init; + return profiling_init; +} + +ge::Status PorfilingInit::Init(const Options &options) { + GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); + + struct MsprofGeOptions prof_conf = {{0}}; + bool is_execute_profiling = false; + Status ret = InitFromOptions(options, prof_conf, is_execute_profiling); + if (ret != SUCCESS) { + GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); + REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); + return ret; + } + + if (is_execute_profiling) { + int32_t cb_ret = msprofInit(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), + static_cast(&prof_conf), sizeof(MsprofGeOptions)); + if (cb_ret != 0) { + GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); + REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); + return FAILED; + } + GELOGI("Profiling init success"); + } + else { + GELOGI("The profiling is off, skip the initialization"); + } + return SUCCESS; +} + +ge::Status PorfilingInit::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, + bool &is_execute_profiling) { + // enable profiling by env + char env_profiling_mode[MMPA_MAX_PATH] = {0x00}; + + if (options.profiling_mode == "1" && !options.profiling_options.empty()) { + // enable profiling by ge option + if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), + MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { + GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s", options.profiling_options.c_str()); + REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed", options.profiling_options.c_str()); + return INTERNAL_ERROR; + } + is_execute_profiling = true; + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, + options.profiling_options.c_str()); + } else { + (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); + (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); + // The env is invalid + if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { + return SUCCESS; + } + // enable profiling by env + is_execute_profiling = true; + GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); + } + + ProfilingProperties::Instance().SetExecuteProfiling(is_execute_profiling); + if (!is_execute_profiling) { + return SUCCESS; + } + + // Parse json str for bp fp + Status ret = ParseOptions(prof_conf.options); + if (ret != ge::SUCCESS) { + GELOGE(ge::PARAM_INVALID, "[Parse][Options]Parse training trace param %s failed, error_code %u", prof_conf.options, + ret); + REPORT_CALL_ERROR("E19999", "Parse training trace param %s failed, error_code %u", prof_conf.options, ret); + return ge::PARAM_INVALID; + } + + if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) != + EOK) { + GELOGE(INTERNAL_ERROR, "[Copy][JobId]Failed, original job_id %s", options.job_id.c_str()); + REPORT_CALL_ERROR("E19999", "Copy job_id %s failed", options.job_id.c_str()); + return INTERNAL_ERROR; + } + GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str()); + return ge::SUCCESS; +} + +ge::Status PorfilingInit::ParseOptions(const std::string &options) { + if (options.empty()) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]Profiling options is empty"); + REPORT_INNER_ERROR("E19999", "Profiling options is empty"); + return ge::PARAM_INVALID; + } + try { + Json prof_options = Json::parse(options); + if (options.find(kTrainingTrace) == std::string::npos) { + return ge::SUCCESS; + } + std::string training_trace; + if (prof_options.contains(kTrainingTrace)) { + training_trace = prof_options[kTrainingTrace]; + } + if (training_trace.empty()) { + GELOGI("Training trace will not take effect."); + return ge::SUCCESS; + } + GELOGI("GE profiling training trace:%s", training_trace.c_str()); + if (training_trace != "on") { + GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.", training_trace.c_str()); + REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str()); + return ge::PARAM_INVALID; + } + string fp_point; + string bp_point; + if (prof_options.contains(kFpPoint)) { + fp_point = prof_options[kFpPoint]; + } + if (prof_options.contains(kBpPoint)) { + bp_point = prof_options[kBpPoint]; + } + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + } + ProfilingProperties::Instance().SetTrainingTrace(true); + ProfilingProperties::Instance().SetFpBpPoint(fp_point,bp_point); + } catch (...) { + GELOGE(FAILED, "[Check][Param]Json prof_conf options is invalid"); + REPORT_INNER_ERROR("E19999", "Json prof_conf options is invalid"); + return ge::PARAM_INVALID; + } + return ge::SUCCESS; +} + +void PorfilingInit::StopProfiling() { + uint64_t module = GetProfilingModule(); + // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal + auto device_id = ProfilingProperties::Instance().GetDeviceID(); + int32_t device_num = static_cast(device_id.size()); + if (device_num != 0) { + auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); + REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); + return; + } + for (int32_t i = 0; i < device_num; i++) { + device_id_ptr[i] = static_cast(device_id[i]); + } + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); + } + } + +void PorfilingInit::PluginUnInit() const { + if (report_callback_ == nullptr) { + GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); + REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); + return; + } + int32_t cb_ret = report_callback_( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), + nullptr, 0); + if (cb_ret != 0) { + GELOGW("profiling plugin uninit failed, ret:%d", cb_ret); + } +} + + // stop profiling + int32_t cb_ret = mspuninit(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), + nullptr, 0); + if (cb_ret != 0) { + GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); + return; + } + GELOGI("Stop Profiling success."); +} + +uint64_t PorfilingInit::GetProfilingModule() { + uint64_t module = PROF_MODEL_EXECUTE_MASK | + PROF_RUNTIME_API_MASK | + PROF_RUNTIME_TRACE_MASK | + PROF_SCHEDULE_TIMELINE_MASK | + PROF_SCHEDULE_TRACE_MASK | + PROF_TASK_TIME_MASK | + PROF_SUBTASK_TIME_MASK | + PROF_AICPU_TRACE_MASK | + PROF_AICORE_METRICS_MASK | + PROF_AIVECTORCORE_METRICS_MASK | + PROF_MODEL_LOAD_MASK; + return module; +} + + +} // namespace ge \ No newline at end of file diff --git a/ge/common/profiling/profiling_init.h b/ge/common/profiling/profiling_init.h new file mode 100644 index 00000000..46630b4c --- /dev/null +++ b/ge/common/profiling/profiling_init.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_PROFILING_PROFILING_INIT_H_ +#define GE_COMMON_PROFILING_PROFILING_INIT_H_ + +#include +#include + +#include "common/profiling/profiling_properties.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/ge_types.h" +#include "toolchain/prof_callback.h" + +namespace ge { +class PorfilingInit { + public: + static PorfilingInit &Instance(); + Status Init(const Options &options); + void StopProfiling(); + + private: + PorfilingInit() = default; + ~PorfilingInit() = default; + Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, bool &is_execute_profiling); + Status ParseOptions(const std::string &options); + uint64_t GetProfilingModule(); + MsprofReporterCallback* report_callback_; +} +} // namespace ge + +#endif // GE_COMMON_PROFILING_PROFILING_INIT_H_ diff --git a/ge/common/profiling/profiling_properties.cc b/ge/common/profiling/profiling_properties.cc new file mode 100755 index 00000000..a65e869f --- /dev/null +++ b/ge/common/profiling/profiling_properties.cc @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiling_properties.h" + +namespace ge{ + ProfilingProperties& ProfilingProperties::Instance() { + static ProfilingProperties profiling_properties; + return profiling_properties; + } + + void ProfilingProperties::SetLoadProfiling(bool is_load_profiling) { + std::lock_guardlock(mutex_); + is_load_profiling_ = is_load_profiling; + } + + bool ProfilingProperties::IsLoadProfiling() { + std::lock_guardlock(mutex_); + return is_load_profiling_; + } + + void ProfilingProperties::SetExecuteProfiling(bool is_exec_profiling) { + std::lock_guardlock(mutex_); + is_execute_profiling_ = is_exec_profiling; + } + + bool ProfilingProperties::IsExecuteProfiling() { + std::lock_guardlock(mutex_); + return is_execute_profiling_; + } + +} \ No newline at end of file diff --git a/ge/common/profiling/profiling_properties.h b/ge/common/profiling/profiling_properties.h new file mode 100755 index 00000000..731daed0 --- /dev/null +++ b/ge/common/profiling/profiling_properties.h @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_PROFILING_PROPERTIES_H_ +#define GE_COMMON_PROFILING_PROPERTIES_H_ + +#include +#include +#include + +namespace ge { +class ProfilingProperties { +public: + +static ProfilingProperties &Instance(); + +void SetLoadProfiling(bool is_load_profiling); + +bool IsLoadProfiling(); + +void SetExecuteProfiling(bool is_execute_profiling); + +bool IsExecuteProfiling(); + +void SetTrainTrace(bool is_train_trance); + +bool IsTrainTrace(); + +void SetFpBpPoint(const std::string &fp_point, const std::string &bp_point); + +void SetDeviceId(const std::vector &device_id); + +std::vector GetDeviceID(); + + private: + ProfilingProperties() =default; + ~ProfilingProperties() = default; + std::mutex mutex_; + std::mutex point_mutex_; + bool is_load_profiling_ = false; + bool is_execute_profiling_ = false; + bool is_training_trace_ = false; + std::string fp_point_; + std::string bp_point_; + std::vector device_id_; + + +}; +} // namespace ge + +#endif // GE_COMMON_PROFILING_PROPERTIES_H_