|
|
@@ -710,6 +710,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size |
|
|
|
} |
|
|
|
|
|
|
|
// collect profiling for ge |
|
|
|
GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed"); |
|
|
|
auto &profiling_manager = ProfilingManager::Instance(); |
|
|
|
if (profiling_manager.ProfilingModelLoadOn()) { |
|
|
|
Status p_ret = ReportProfilingData(); |
|
|
@@ -2086,12 +2087,62 @@ Status DavinciModel::SyncVarData() { |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
inline int64_t SumSize(const vector<int64_t> &size_list) { |
|
|
|
int64_t sum_size = 0; |
|
|
|
for (const int64_t &size : size_list) { |
|
|
|
sum_size += size; |
|
|
|
Status DavinciModel::InitModelProfile() { |
|
|
|
for (const auto &task : task_list_) { |
|
|
|
GE_CHECK_NOTNULL(task); |
|
|
|
const auto &fusion_op_info = task->GetFusionOpInfo(); |
|
|
|
// when type is RT_MODEL_TASK_KERNEL, ctx is not null |
|
|
|
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size()); |
|
|
|
op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID())); |
|
|
|
} |
|
|
|
|
|
|
|
std::set<uint32_t> task_id_set; |
|
|
|
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; |
|
|
|
using Range = std::pair<CIT, CIT>; |
|
|
|
for (const auto &task : task_list_) { |
|
|
|
GE_CHECK_NOTNULL(task); |
|
|
|
const auto &fusion_op_info = task->GetFusionOpInfo(); |
|
|
|
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
uint32_t task_id = task->GetTaskID(); |
|
|
|
uint32_t op_num = fusion_op_info->original_op_names.size(); |
|
|
|
uint32_t task_count = 0; |
|
|
|
if (task_id_set.count(task->GetTaskID()) != 0) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); |
|
|
|
GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index is out of range, index: %u", fusion_op_info->op_index); |
|
|
|
|
|
|
|
ProfileInfo profile; |
|
|
|
profile.fusion_info = *fusion_op_info; |
|
|
|
Range range = op_id_map_.equal_range(fusion_op_info->op_index); |
|
|
|
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { |
|
|
|
profile.task_count++; |
|
|
|
task_id_set.insert(range_idx->second); |
|
|
|
} |
|
|
|
|
|
|
|
// memory info |
|
|
|
TaskMemInfo &mem_info = profile.memory_info; |
|
|
|
const auto input_size = ModelUtils::GetInputSize(op_desc); |
|
|
|
const auto output_size = ModelUtils::GetOutputSize(op_desc); |
|
|
|
const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc); |
|
|
|
const auto weight_size = ModelUtils::GetWeightSize(op_desc); |
|
|
|
mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0); |
|
|
|
mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0); |
|
|
|
mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0); |
|
|
|
mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0); |
|
|
|
mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size; |
|
|
|
|
|
|
|
profile_list_.emplace_back(profile); |
|
|
|
} |
|
|
|
return sum_size; |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status DavinciModel::SinkModelProfile() { |
|
|
@@ -2099,18 +2150,12 @@ Status DavinciModel::SinkModelProfile() { |
|
|
|
auto &prof_mgr = ProfilingManager::Instance(); |
|
|
|
ReporterData reporter_data{}; |
|
|
|
// report model data tag name |
|
|
|
std::string tag_name; |
|
|
|
tag_name.append("model_load_info_").append(std::to_string(this->Id())); |
|
|
|
std::string tag_name("model_load_info_" + std::to_string(this->Id())); |
|
|
|
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, |
|
|
|
return FAILED, "Sink model tag memcpy error."); |
|
|
|
|
|
|
|
// Model Header |
|
|
|
string name; |
|
|
|
if (!om_name_.empty()) { |
|
|
|
name = om_name_; |
|
|
|
} else { |
|
|
|
name = name_; |
|
|
|
} |
|
|
|
std::string name = om_name_.empty() ? name_ : om_name_; |
|
|
|
size_t name_len = name.size(); |
|
|
|
reporter_data.deviceId = device_id_; |
|
|
|
reporter_data.data = (unsigned char *)&name_len; |
|
|
@@ -2142,126 +2187,67 @@ Status DavinciModel::SinkModelProfile() { |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
int32_t task_num = task_list_.size(); |
|
|
|
std::multimap<uint32_t, uint32_t> op_id_map; |
|
|
|
std::set<uint32_t> task_id_set; |
|
|
|
for (int32_t i = 0; i < task_num; i++) { |
|
|
|
auto task = task_list_[i]; |
|
|
|
GE_CHECK_NOTNULL(task); |
|
|
|
auto fusion_op_info = task->GetFusionOpInfo(); |
|
|
|
// when type is RT_MODEL_TASK_KERNEL, ctx is not null |
|
|
|
if (fusion_op_info != nullptr) { |
|
|
|
uint32_t op_num = fusion_op_info->original_op_names.size(); |
|
|
|
uint32_t task_id = task->GetTaskID(); |
|
|
|
if (op_num > 0) { |
|
|
|
GELOGI("task.id = %u, opNum = %u", task_id, op_num); |
|
|
|
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
struct memoryInfo { |
|
|
|
int64_t input_size; |
|
|
|
int64_t output_size; |
|
|
|
int64_t weight_size; |
|
|
|
int64_t workspace_size; |
|
|
|
int64_t total_size; |
|
|
|
|
|
|
|
memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {} |
|
|
|
}; |
|
|
|
|
|
|
|
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; |
|
|
|
using Range = std::pair<CIT, CIT>; |
|
|
|
for (int32_t i = 0; i < task_num; i++) { |
|
|
|
auto task = task_list_[i]; |
|
|
|
GE_CHECK_NOTNULL(task); |
|
|
|
auto fusion_op_info = task->GetFusionOpInfo(); |
|
|
|
if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) { |
|
|
|
uint32_t task_id = task->GetTaskID(); |
|
|
|
uint32_t op_num = fusion_op_info->original_op_names.size(); |
|
|
|
uint32_t task_count = 0; |
|
|
|
if (task_id_set.count(task_id) != 0) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
uint32_t op_id = fusion_op_info->op_index; |
|
|
|
Range range = op_id_map.equal_range(op_id); |
|
|
|
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { |
|
|
|
task_count++; |
|
|
|
uint32_t task_id = range_idx->second; |
|
|
|
task_id_set.insert(task_id); |
|
|
|
} |
|
|
|
|
|
|
|
// op name after fusion |
|
|
|
string fusion_op_name = fusion_op_info->op_name; |
|
|
|
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); |
|
|
|
reporter_data.data = (unsigned char *)&fusion_op_name_len; |
|
|
|
reporter_data.dataLen = sizeof(int32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
reporter_data.data = (unsigned char *)fusion_op_name.c_str(); |
|
|
|
reporter_data.dataLen = fusion_op_name_len; |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
// original op name before fusion |
|
|
|
reporter_data.data = (unsigned char *)&op_num; |
|
|
|
reporter_data.dataLen = sizeof(int32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
for (uint32_t k = 0; k < op_num; k++) { |
|
|
|
std::string op_name = fusion_op_info->original_op_names[k]; |
|
|
|
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); |
|
|
|
reporter_data.data = (unsigned char *)&op_name_len; |
|
|
|
reporter_data.dataLen = sizeof(int32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
reporter_data.data = (unsigned char *)op_name.c_str(); |
|
|
|
reporter_data.dataLen = op_name_len; |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
} |
|
|
|
|
|
|
|
// stream id info |
|
|
|
uint32_t streamId = task->GetStreamId(); |
|
|
|
reporter_data.data = (unsigned char *)&streamId; |
|
|
|
for (const ProfileInfo &profile : profile_list_) { |
|
|
|
// op name after fusion |
|
|
|
string fusion_op_name = profile.fusion_info.op_name; |
|
|
|
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); |
|
|
|
reporter_data.data = (unsigned char *)&fusion_op_name_len; |
|
|
|
reporter_data.dataLen = sizeof(int32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
reporter_data.data = (unsigned char *)fusion_op_name.c_str(); |
|
|
|
reporter_data.dataLen = fusion_op_name_len; |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
// original op name before fusion |
|
|
|
uint32_t op_num = profile.fusion_info.original_op_names.size(); |
|
|
|
reporter_data.data = (unsigned char *)&op_num; |
|
|
|
reporter_data.dataLen = sizeof(uint32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
for (uint32_t k = 0; k < op_num; k++) { |
|
|
|
std::string op_name = profile.fusion_info.original_op_names[k]; |
|
|
|
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); |
|
|
|
reporter_data.data = (unsigned char *)&op_name_len; |
|
|
|
reporter_data.dataLen = sizeof(int32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
// memory info |
|
|
|
struct memoryInfo memory_info; |
|
|
|
uint32_t op_index = fusion_op_info->op_index; |
|
|
|
auto iter = op_list_.find(op_index); |
|
|
|
GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index); |
|
|
|
auto op_desc = iter->second; |
|
|
|
memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc)); |
|
|
|
memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc)); |
|
|
|
memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); |
|
|
|
memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); |
|
|
|
memory_info.total_size = |
|
|
|
memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; |
|
|
|
reporter_data.data = (unsigned char *)&memory_info; |
|
|
|
reporter_data.dataLen = sizeof(struct memoryInfo); |
|
|
|
reporter_data.data = (unsigned char *)op_name.c_str(); |
|
|
|
reporter_data.dataLen = op_name_len; |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
} |
|
|
|
|
|
|
|
// task info |
|
|
|
reporter_data.data = (unsigned char *)&task_count; |
|
|
|
// stream id info |
|
|
|
uint32_t streamId = profile.fusion_info.stream_id; |
|
|
|
reporter_data.data = (unsigned char *)&streamId; |
|
|
|
reporter_data.dataLen = sizeof(int32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
reporter_data.data = (unsigned char *)&profile.memory_info; |
|
|
|
reporter_data.dataLen = sizeof(profile.memory_info); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
// task info |
|
|
|
reporter_data.data = (unsigned char *)&profile.task_count; |
|
|
|
reporter_data.dataLen = sizeof(uint32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); |
|
|
|
for (CIT idx = task_range.first; idx != task_range.second; ++idx) { |
|
|
|
uint32_t task_id = idx->second; |
|
|
|
reporter_data.data = (unsigned char *)&task_id; |
|
|
|
reporter_data.dataLen = sizeof(uint32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
|
|
|
|
Range task_range = op_id_map.equal_range(op_id); |
|
|
|
for (CIT idx = task_range.first; idx != task_range.second; ++idx) { |
|
|
|
uint32_t task_id = idx->second; |
|
|
|
reporter_data.data = (unsigned char *)&task_id; |
|
|
|
reporter_data.dataLen = sizeof(uint32_t); |
|
|
|
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, |
|
|
|
"Reporter data fail, model id:%u.", this->Id()); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return SUCCESS; |
|
|
|