|
|
@@ -1,369 +0,0 @@ |
|
|
|
/** |
|
|
|
* Copyright 2020 Huawei Technologies Co., Ltd |
|
|
|
* |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
* You may obtain a copy of the License at |
|
|
|
* |
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
|
|
* |
|
|
|
* Unless required by applicable law or agreed to in writing, software |
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
* See the License for the specific language governing permissions and |
|
|
|
* limitations under the License. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "ge/ge_prof.h" |
|
|
|
#include "ge/ge_api.h" |
|
|
|
#include "init/gelib.h" |
|
|
|
#include "common/debug/log.h" |
|
|
|
#include "framework/common/debug/ge_log.h" |
|
|
|
#include "common/profiling/profiling_manager.h" |
|
|
|
#include "graph/load/graph_loader.h" |
|
|
|
#include "toolchain/prof_acl_api.h" |
|
|
|
|
|
|
|
using std::map; |
|
|
|
using std::string; |
|
|
|
using std::vector; |
|
|
|
|
|
|
|
namespace { |
|
|
|
const uint32_t kMaxDeviceNum = 64; |
|
|
|
const uint32_t kDeviceListIndex = 3; |
|
|
|
const std::string kProfilingInit = "prof_init"; |
|
|
|
const std::string kProfilingFinalize = "prof_finalize"; |
|
|
|
const std::string kProfilingStart = "prof_start"; |
|
|
|
const std::string kProfilingStop = "prof_stop"; |
|
|
|
const std::string kDeviceNums = "devNums"; |
|
|
|
const std::string kDeviceIdList = "devIdList"; |
|
|
|
const std::string kAicoreMetrics = "aicoreMetrics"; |
|
|
|
|
|
|
|
const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToString = { |
|
|
|
{ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, |
|
|
|
{ge::kAicorePipeline, "AICORE_PIPELINE"}, |
|
|
|
{ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, |
|
|
|
{ge::kAicoreMemory, "AICORE_MEMORY"}, |
|
|
|
{ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, |
|
|
|
{ge::kAicoreStall, "AICORE_STALL"}}; |
|
|
|
} // namespace |
|
|
|
|
|
|
|
static bool g_graph_prof_init_ = false; |
|
|
|
static std::mutex g_prof_mutex_; |
|
|
|
|
|
|
|
namespace ge { |
|
|
|
struct aclgrphProfConfig { |
|
|
|
ProfConfig config; |
|
|
|
}; |
|
|
|
|
|
|
|
Status aclgrphProfInit(const char *profiler_path, uint32_t length) { |
|
|
|
GELOGT(TRACE_INIT, "Graph prof init start"); |
|
|
|
|
|
|
|
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); |
|
|
|
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { |
|
|
|
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
std::lock_guard<std::mutex> lock(g_prof_mutex_); |
|
|
|
if (g_graph_prof_init_) { |
|
|
|
GELOGW("Multi graph profiling initializations."); |
|
|
|
return GE_PROF_MULTI_INIT; |
|
|
|
} |
|
|
|
|
|
|
|
Status ret = CheckPath(profiler_path, length); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Profiling config path is invalid."); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
// if command mode is set, just return |
|
|
|
if (ProfilingManager::Instance().ProfilingOn()) { |
|
|
|
GELOGW("Graph prof init failed, cause profiling command pattern is running."); |
|
|
|
return GE_PROF_MODE_CONFLICT; |
|
|
|
} |
|
|
|
|
|
|
|
ret = ProfInit(profiler_path); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "ProfInit init fail"); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
GraphLoader graph_loader; |
|
|
|
Command command; |
|
|
|
command.cmd_params.clear(); |
|
|
|
command.cmd_type = kProfilingInit; |
|
|
|
command.module_index = PROF_MODEL_LOAD; |
|
|
|
ret = graph_loader.CommandHandle(command); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
if (!g_graph_prof_init_) { |
|
|
|
g_graph_prof_init_ = true; |
|
|
|
GELOGI("Profiling init successfully."); |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Successfully execute GraphProfInit."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status aclgrphProfFinalize() { |
|
|
|
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); |
|
|
|
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { |
|
|
|
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
std::lock_guard<std::mutex> lock(g_prof_mutex_); |
|
|
|
// if command mode is set, just return |
|
|
|
if (ProfilingManager::Instance().ProfilingOn()) { |
|
|
|
GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); |
|
|
|
return GE_PROF_MODE_CONFLICT; |
|
|
|
} |
|
|
|
|
|
|
|
if (!g_graph_prof_init_) { |
|
|
|
GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); |
|
|
|
return GE_PROF_NOT_INIT; |
|
|
|
} |
|
|
|
GraphLoader graph_loader; |
|
|
|
Command command; |
|
|
|
command.cmd_params.clear(); |
|
|
|
command.cmd_type = kProfilingFinalize; |
|
|
|
Status ret = graph_loader.CommandHandle(command); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
ret = ProfFinalize(); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Finalize profiling failed, result = %d", ret); |
|
|
|
} |
|
|
|
|
|
|
|
if (ret == SUCCESS) { |
|
|
|
g_graph_prof_init_ = false; |
|
|
|
GELOGI("Successfully execute GraphProfFinalize."); |
|
|
|
} |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<string> &prof_config_params) { |
|
|
|
prof_config_params.clear(); |
|
|
|
prof_config_params.emplace_back(kDeviceNums); |
|
|
|
prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); |
|
|
|
prof_config_params.emplace_back(kDeviceIdList); |
|
|
|
std::string devID = ""; |
|
|
|
if (profiler_config->config.devNums == 0) { |
|
|
|
GELOGW("The device num is invalid."); |
|
|
|
return false; |
|
|
|
} |
|
|
|
for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { |
|
|
|
devID.append(std::to_string(profiler_config->config.devIdList[i])); |
|
|
|
if (i != profiler_config->config.devNums - 1) { |
|
|
|
devID.append(","); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
prof_config_params.push_back(devID); |
|
|
|
prof_config_params.push_back(kAicoreMetrics); |
|
|
|
auto iter = |
|
|
|
kProfAicoreMetricsToString.find(static_cast<ProfilingAicoreMetrics>(profiler_config->config.aicoreMetrics)); |
|
|
|
if (iter == kProfAicoreMetricsToString.end()) { |
|
|
|
GELOGW("The prof aicore metrics is invalid."); |
|
|
|
return false; |
|
|
|
} |
|
|
|
prof_config_params.push_back(iter->second); |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { |
|
|
|
if (deviceid_list == nullptr) { |
|
|
|
GELOGE(PARAM_INVALID, "deviceIdList is nullptr"); |
|
|
|
return false; |
|
|
|
} |
|
|
|
if (device_nums == 0 || device_nums > kMaxDeviceNum) { |
|
|
|
GELOGE(PARAM_INVALID, "The device nums is invalid."); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
// real device num |
|
|
|
int32_t dev_count = 0; |
|
|
|
rtError_t rt_err = rtGetDeviceCount(&dev_count); |
|
|
|
if (rt_err != RT_ERROR_NONE) { |
|
|
|
GELOGE(INTERNAL_ERROR, "Get the Device count fail."); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
if (device_nums > static_cast<uint32_t>(dev_count)) { |
|
|
|
GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
std::unordered_set<uint32_t> record; |
|
|
|
for (size_t i = 0; i < device_nums; ++i) { |
|
|
|
uint32_t dev_id = deviceid_list[i]; |
|
|
|
if (dev_id >= static_cast<uint32_t>(dev_count)) { |
|
|
|
GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); |
|
|
|
return false; |
|
|
|
} |
|
|
|
if (record.count(dev_id) > 0) { |
|
|
|
GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); |
|
|
|
return false; |
|
|
|
} |
|
|
|
record.insert(dev_id); |
|
|
|
} |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, |
|
|
|
ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, |
|
|
|
uint64_t data_type_config) { |
|
|
|
if (!isProfConfigValid(deviceid_list, device_nums)) { |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig(); |
|
|
|
if (config == nullptr) { |
|
|
|
GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail"); |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
config->config.devNums = device_nums; |
|
|
|
if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list, |
|
|
|
device_nums * sizeof(uint32_t)) != EOK) { |
|
|
|
GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums); |
|
|
|
delete config; |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
|
|
|
|
config->config.aicoreMetrics = static_cast<ProfAicoreMetrics>(aicore_metrics); |
|
|
|
config->config.dataTypeConfig = data_type_config; |
|
|
|
GELOGI("Successfully create prof config."); |
|
|
|
return config; |
|
|
|
} |
|
|
|
|
|
|
|
Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) { |
|
|
|
if (profiler_config == nullptr) { |
|
|
|
GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr"); |
|
|
|
return PARAM_INVALID; |
|
|
|
} |
|
|
|
|
|
|
|
delete profiler_config; |
|
|
|
GELOGI("Successfully destroy prof config."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { |
|
|
|
if (profiler_config == nullptr) { |
|
|
|
GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); |
|
|
|
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { |
|
|
|
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
std::lock_guard<std::mutex> lock(g_prof_mutex_); |
|
|
|
// if command mode is set, just return |
|
|
|
if (ProfilingManager::Instance().ProfilingOn()) { |
|
|
|
GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); |
|
|
|
return GE_PROF_MODE_CONFLICT; |
|
|
|
} |
|
|
|
if (!g_graph_prof_init_) { |
|
|
|
GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); |
|
|
|
return GE_PROF_NOT_INIT; |
|
|
|
} |
|
|
|
|
|
|
|
Status ret = ProfStartProfiling(&profiler_config->config); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Start profiling failed, prof result = %d", ret); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
std::vector<string> prof_params; |
|
|
|
if (!TransProfConfigToParam(profiler_config, prof_params)) { |
|
|
|
GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); |
|
|
|
return PARAM_INVALID; |
|
|
|
} |
|
|
|
|
|
|
|
GraphLoader graph_loader; |
|
|
|
Command command; |
|
|
|
command.cmd_params.clear(); |
|
|
|
command.cmd_type = kProfilingStart; |
|
|
|
command.cmd_params = prof_params; |
|
|
|
command.module_index = profiler_config->config.dataTypeConfig; |
|
|
|
GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), |
|
|
|
prof_params[kDeviceListIndex].c_str(), command.module_index); |
|
|
|
ret = graph_loader.CommandHandle(command); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Handle profiling command failed"); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Successfully execute GraphProfStartProfiling."); |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { |
|
|
|
if (profiler_config == nullptr) { |
|
|
|
GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); |
|
|
|
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { |
|
|
|
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
std::lock_guard<std::mutex> lock(g_prof_mutex_); |
|
|
|
// if command mode is set, just return |
|
|
|
if (ProfilingManager::Instance().ProfilingOn()) { |
|
|
|
GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); |
|
|
|
return GE_PROF_MODE_CONFLICT; |
|
|
|
} |
|
|
|
if (!g_graph_prof_init_) { |
|
|
|
GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); |
|
|
|
return GE_PROF_NOT_INIT; |
|
|
|
} |
|
|
|
|
|
|
|
for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { |
|
|
|
uint64_t data_type_config; |
|
|
|
Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config); |
|
|
|
if (status != SUCCESS) { |
|
|
|
GELOGE(status, "Prof get data type config failed, prof result = %d", status); |
|
|
|
return status; |
|
|
|
} |
|
|
|
if (data_type_config != profiler_config->config.dataTypeConfig) { |
|
|
|
GELOGE(FAILED, "data type config verify failed"); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
std::vector<string> prof_params; |
|
|
|
if (!TransProfConfigToParam(profiler_config, prof_params)) { |
|
|
|
GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); |
|
|
|
return PARAM_INVALID; |
|
|
|
} |
|
|
|
|
|
|
|
GraphLoader graph_loader; |
|
|
|
Command command; |
|
|
|
command.cmd_params.clear(); |
|
|
|
command.cmd_type = kProfilingStop; |
|
|
|
command.cmd_params = prof_params; |
|
|
|
command.module_index = profiler_config->config.dataTypeConfig; |
|
|
|
GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), |
|
|
|
prof_params[kDeviceListIndex].c_str(), command.module_index); |
|
|
|
Status ret = graph_loader.CommandHandle(command); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Handle profiling command failed"); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
ret = ProfStopProfiling(&profiler_config->config); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "Stop profiling failed, prof result = %d", ret); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Successfully execute GraphProfStopProfiling."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
} // namespace ge |