Browse Source

common log optimize

pull/1572/head
liyihan2@huawei.com 4 years ago
parent
commit
822ec6c3da
4 changed files with 56 additions and 36 deletions
  1. +2
    -2
      ge/common/kernel_store.cc
  2. +2
    -2
      ge/common/op/ge_op_utils.cc
  3. +11
    -6
      ge/common/profiling/ge_profiling.cc
  4. +41
    -26
      ge/common/profiling/profiling_manager.cc

+ 2
- 2
ge/common/kernel_store.cc View File

@@ -37,8 +37,8 @@ bool KernelStore::Build() {
try { try {
buffer_.resize(total_len); buffer_.resize(total_len);
} catch (std::bad_alloc &e) { } catch (std::bad_alloc &e) {
GELOGE(ge::MEMALLOC_FAILED, "[Malloc][Memmory]All build memory failed, memory size %zu, exception %s",
total_len, e.what());
GELOGE(ge::MEMALLOC_FAILED, "[Malloc][Memmory]All build memory failed, memory size %zu, "
"exception %s", total_len, e.what());
REPORT_CALL_ERROR("E19999", "All build memory failed, memory size %zu, exception %s", REPORT_CALL_ERROR("E19999", "All build memory failed, memory size %zu, exception %s",
total_len, e.what()); total_len, e.what());
return false; return false;


+ 2
- 2
ge/common/op/ge_op_utils.cc View File

@@ -469,8 +469,8 @@ OpUtils::GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType ty
dims.push_back(shape_data[i]); dims.push_back(shape_data[i]);
} }
} else { } else {
GELOGE(PARAM_INVALID, "[Check][DataType]Invalid, type only can be DT_INT32 or DT_INT64, type is %s",
TypeUtils::DataTypeToSerialString(type).c_str());
GELOGE(PARAM_INVALID, "[Check][DataType]Invalid, type only can be DT_INT32 or DT_INT64, "
"type is %s", TypeUtils::DataTypeToSerialString(type).c_str());
REPORT_INNER_ERROR("E19999", "Data type %s check invalid, only can be DT_INT32 or DT_INT64", REPORT_INNER_ERROR("E19999", "Data type %s check invalid, only can be DT_INT32 or DT_INT64",
TypeUtils::DataTypeToSerialString(type).c_str()); TypeUtils::DataTypeToSerialString(type).c_str());
return PARAM_INVALID; return PARAM_INVALID;


+ 11
- 6
ge/common/profiling/ge_profiling.cc View File

@@ -86,8 +86,10 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
} }


if (device_nums > static_cast<uint32_t>(dev_count)) { if (device_nums > static_cast<uint32_t>(dev_count)) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", device_nums, dev_count);
REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", device_nums, dev_count);
GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]",
device_nums, dev_count);
REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]",
device_nums, dev_count);
return false; return false;
} }


@@ -95,7 +97,8 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
for (size_t i = 0; i < device_nums; ++i) { for (size_t i = 0; i < device_nums; ++i) {
uint32_t dev_id = deviceid_list[i]; uint32_t dev_id = deviceid_list[i];
if (dev_id >= static_cast<uint32_t>(dev_count)) { if (dev_id >= static_cast<uint32_t>(dev_count)) {
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", dev_id, dev_count);
GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)",
dev_id, dev_count);
REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count);
return false; return false;
} }
@@ -149,8 +152,10 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
// Pass MsprofReporterCallback to runtime // Pass MsprofReporterCallback to runtime
ge::Status rt_ret = rtSetMsprofReporterCallback(func); ge::Status rt_ret = rtSetMsprofReporterCallback(func);
if (rt_ret != ge::SUCCESS) { if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u", rt_ret);
REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u", rt_ret);
GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u",
rt_ret);
REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u",
rt_ret);
return rt_ret; return rt_ret;
} }
// Pass MsprofReporterCallback to hccl // Pass MsprofReporterCallback to hccl
@@ -173,7 +178,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) {
return ge::FAILED; return ge::FAILED;
} }
if (!TransProfConfigToParam(*prof_config_param, prof_params)) { if (!TransProfConfigToParam(*prof_config_param, prof_params)) {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed");
return ge::PARAM_INVALID; return ge::PARAM_INVALID;


+ 41
- 26
ge/common/profiling/profiling_manager.cc View File

@@ -105,7 +105,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d",
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
cb_ret);
return FAILED; return FAILED;
} }
GELOGI("Profiling init success"); GELOGI("Profiling init success");
@@ -126,8 +127,10 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
// enable profiling by ge option // enable profiling by ge option
if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s", options.profiling_options.c_str());
REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed", options.profiling_options.c_str());
GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s",
options.profiling_options.c_str());
REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed",
options.profiling_options.c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
is_execute_profiling_ = true; is_execute_profiling_ = true;
@@ -188,7 +191,8 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
} }
GELOGI("GE profiling training trace:%s", training_trace.c_str()); GELOGI("GE profiling training trace:%s", training_trace.c_str());
if (training_trace != "on") { if (training_trace != "on") {
GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.", training_trace.c_str());
GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.",
training_trace.c_str());
REPORT_CALL_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str()); REPORT_CALL_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str());
return ge::PARAM_INVALID; return ge::PARAM_INVALID;
} }
@@ -292,8 +296,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
try { try {
reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) { } catch (std::exception &e) {
GELOGE(FAILED, "[Convert][ReportData]Failed to convert json to string, reason %s.", e.what());
REPORT_CALL_ERROR("E19999", "Failed to convert reported_data from json to string, reason %s", e.what());
GELOGE(FAILED, "[Convert][ReportData]Failed to convert json to string, reason %s.",
e.what());
REPORT_CALL_ERROR("E19999", "Failed to convert reported_data from json to string, reason %s",
e.what());
return ; return ;
} catch (...) { } catch (...) {
GELOGE(FAILED, "[Convert][ReportedData]Failed to convert JSON to string"); GELOGE(FAILED, "[Convert][ReportedData]Failed to convert JSON to string");
@@ -332,7 +338,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Get][RtsInfo]Task_id and stream_id failed, ret 0x%X", rt_ret); GELOGE(RT_FAILED, "[Get][RtsInfo]Task_id and stream_id failed, ret 0x%X", rt_ret);
REPORT_CALL_ERROR("E19999", "Get task_id and stream_id failed, ret 0x%X", rt_ret);
REPORT_CALL_ERROR("E19999", "Get task_id and stream_id failed, ret 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);
@@ -350,8 +356,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil
try { try {
reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) { } catch (std::exception &e) {
GELOGE(FAILED, "[Convert][ReportedData]Failed to convert from json to string, reason: %s", e.what());
REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string, reason: %s", e.what());
GELOGE(FAILED, "[Convert][ReportedData]Failed to convert from json to string, reason: %s",
e.what());
REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string, reason: %s",
e.what());
} catch (...) { } catch (...) {
GELOGE(FAILED, "[Convert][ReportedData]Failed to convert from json to string"); GELOGE(FAILED, "[Convert][ReportedData]Failed to convert from json to string");
REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string"); REPORT_CALL_ERROR("E19999", "Failed to convert reported data from json to string");
@@ -530,8 +538,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
} }
UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module);
} else { } else {
GELOGE(FAILED, "[Cancel][DeviceId]The device_id %u has not been subscribed, do not need to cancel",
device[0]);
GELOGE(FAILED, "[Cancel][DeviceId]The device_id %u has not been subscribed, "
"do not need to cancel", device[0]);
REPORT_CALL_ERROR("E19999", "The device_id %u has not been subscribed, do not need to cancel", REPORT_CALL_ERROR("E19999", "The device_id %u has not been subscribed, do not need to cancel",
device[0]); device[0]);
return FAILED; return FAILED;
@@ -654,14 +662,16 @@ Status ProfilingManager::ProfParseDeviceId(const std::map<std::string, std::stri
decvice_id[i].c_str()); decvice_id[i].c_str());
return FAILED; return FAILED;
} catch (...) { } catch (...) {
GELOGE(FAILED, "[Parse][DeviceId]Faield, it cannot change to int, %s", decvice_id[i].c_str());
GELOGE(FAILED, "[Parse][DeviceId]Faield, it cannot change to int, %s",
decvice_id[i].c_str());
REPORT_CALL_ERROR("E19999", "Parse device id %s failed, it cannot change to int", REPORT_CALL_ERROR("E19999", "Parse device id %s failed, it cannot change to int",
decvice_id[i].c_str()); decvice_id[i].c_str());
return FAILED; return FAILED;
} }
} }
} else { } else {
GELOGE(FAILED, "[Parse][DeviceId]Config para not contain device id %s list", decvice_id[i].c_str());
GELOGE(FAILED, "[Parse][DeviceId]Config para not contain device id %s list",
decvice_id[i].c_str());
REPORT_CALL_ERROR("E19999", "Parse device id failed, config para not contain device id %s list", REPORT_CALL_ERROR("E19999", "Parse device id failed, config para not contain device id %s list",
decvice_id[i].c_str()); decvice_id[i].c_str());
return FAILED; return FAILED;
@@ -680,14 +690,17 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string>
device_num = std::stoi(iter->second); device_num = std::stoi(iter->second);
} catch (std::invalid_argument &) { } catch (std::invalid_argument &) {
GELOGE(FAILED, "[Parse][Param]Failed, device num %s is invalid", iter->second.c_str()); GELOGE(FAILED, "[Parse][Param]Failed, device num %s is invalid", iter->second.c_str());
REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s is invalid", iter->second.c_str());
REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s is invalid",
iter->second.c_str());
return FAILED; return FAILED;
} catch (std::out_of_range &) { } catch (std::out_of_range &) {
GELOGE(FAILED, "[Parse][Param]Failed, device num %s is out of range", iter->second.c_str()); GELOGE(FAILED, "[Parse][Param]Failed, device num %s is out of range", iter->second.c_str());
REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s is out of range", iter->second.c_str());
REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s is out of range",
iter->second.c_str());
return FAILED; return FAILED;
} catch (...) { } catch (...) {
GELOGE(FAILED, "[Parse][Param]Failed, device num %s cannot change to int", iter->second.c_str());
GELOGE(FAILED, "[Parse][Param]Failed, device num %s cannot change to int",
iter->second.c_str());
REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s cannot change to int", REPORT_CALL_ERROR("E19999", "Parse param failed, device num %s cannot change to int",
iter->second.c_str()); iter->second.c_str());
return FAILED; return FAILED;
@@ -706,8 +719,8 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string>
} }


if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) { if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) {
GELOGE(FAILED, "[Parse][Param]Failed, config para device num %d not equal to device list size %zu",
device_num, device_list.size());
GELOGE(FAILED, "[Parse][Param]Failed, config para device num %d not equal to "
"device list size %zu", device_num, device_list.size());
REPORT_CALL_ERROR("E19999", "[Parse][Param]Failed, config para device num %d " REPORT_CALL_ERROR("E19999", "[Parse][Param]Failed, config para device num %d "
"not equal to device list size %zu", device_num, device_list.size()); "not equal to device list size %zu", device_num, device_list.size());
return FAILED; return FAILED;
@@ -727,17 +740,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
int32_t device_num = 0; int32_t device_num = 0;
vector<int32_t> device_list; vector<int32_t> device_list;
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
GELOGE(FAILED, "[Start][Profiling]Prof start parse param failed, device num %d, device list size %zu",
device_num, device_list.size());
REPORT_CALL_ERROR("E19999", "Prof start parse param failed, device num %d, device list size %zu",
device_num, device_list.size());
GELOGE(FAILED, "[Start][Profiling]Prof start parse param failed, device num %d, "
"device list size %zu", device_num, device_list.size());
REPORT_CALL_ERROR("E19999", "Prof start parse param failed, device num %d, "
"device list size %zu", device_num, device_list.size());
return FAILED; return FAILED;
} }


auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]); auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
if (device_id_ptr == nullptr) { if (device_id_ptr == nullptr) {
GELOGE(FAILED, "[Start][Profiling]Failed, device id ptr is null, device num %d", device_num); GELOGE(FAILED, "[Start][Profiling]Failed, device id ptr is null, device num %d", device_num);
REPORT_CALL_ERROR("E19999", "Start profiling failed, device id ptr is null, device num %d", device_num);
REPORT_CALL_ERROR("E19999", "Start profiling failed, device id ptr is null, device num %d",
device_num);
return FAILED; return FAILED;
} }
for (int32_t i = 0; i < device_num; i++) { for (int32_t i = 0; i < device_num; i++) {
@@ -777,8 +791,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
int32_t device_num = 0; int32_t device_num = 0;
vector<int32_t> device_list; vector<int32_t> device_list;
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
GELOGE(FAILED, "[Stop][Profiling]Prof stop parse param failed, device num %d, device list size %zu".
device_num, device_list.size());
GELOGE(FAILED, "[Stop][Profiling]Prof stop parse param failed, device num %d, "
"device list size %zu", device_num, device_list.size());
REPORT_CALL_ERROR("E19999", "Prof stop parse param failed, device num %d, device list size %zu", REPORT_CALL_ERROR("E19999", "Prof stop parse param failed, device num %d, device list size %zu",
device_num, device_list.size()); device_num, device_list.size());
return FAILED; return FAILED;
@@ -786,7 +800,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]); auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
if (device_id_ptr == nullptr) { if (device_id_ptr == nullptr) {
GELOGE(FAILED, "[Stop][Profiling]Failed, device id ptr is null, device num %d", device_num); GELOGE(FAILED, "[Stop][Profiling]Failed, device id ptr is null, device num %d", device_num);
REPORT_CALL_ERROR("E19999", "Stop profiling failed, device id ptr is null, device num %d", device_num);
REPORT_CALL_ERROR("E19999", "Stop profiling failed, device id ptr is null, device num %d",
device_num);
return FAILED; return FAILED;
} }
for (int32_t i = 0; i < device_num; i++) { for (int32_t i = 0; i < device_num; i++) {


Loading…
Cancel
Save