Browse Source

Feature: Support single op profiling

pull/496/head
l00444296 4 years ago
parent
commit
f607364e03
5 changed files with 105 additions and 4 deletions
  1. +1
    -1
      ge/executor/ge_executor.cc
  2. +78
    -2
      ge/single_op/single_op.cc
  3. +8
    -0
      ge/single_op/single_op.h
  4. +17
    -1
      ge/single_op/single_op_model.cc
  5. +1
    -0
      ge/single_op/single_op_model.h

+ 1
- 1
ge/executor/ge_executor.cc View File

@@ -244,7 +244,7 @@ Status GeExecutor::Initialize() {
// Start profiling // Start profiling
Options profiling_options; Options profiling_options;
profiling_options.device_id = 0; profiling_options.device_id = 0;
profiling_options.job_id = "";
profiling_options.job_id = "1";
ProfilingManager::Instance().Init(profiling_options); ProfilingManager::Instance().Init(profiling_options);


isInit_ = true; isInit_ = true;


+ 78
- 2
ge/single_op/single_op.cc View File

@@ -17,6 +17,7 @@
#include "single_op/single_op.h" #include "single_op/single_op.h"


#include "common/fmk_types.h" #include "common/fmk_types.h"
#include "common/ge_types.h"
#include "common/math/math_util.h" #include "common/math/math_util.h"
#include "common/profiling/profiling_manager.h" #include "common/profiling/profiling_manager.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
@@ -164,16 +165,53 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
return ret; return ret;
} }


uint32_t index = 0;
for (auto &task : tasks_) { for (auto &task : tasks_) {
ret = task->LaunchKernel(stream_); ret = task->LaunchKernel(stream_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
} }
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(index));
index++;
} }


return ret; return ret;
} }


Status SingleOp::ProfilingTaskInfo(uint32_t index) {
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) {
return SUCCESS;
}
if (op_name_.size() <= index) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "index[%d] is out of range of op_name_ size[%d].", index, op_name_.size());
return ACL_ERROR_GE_PARAM_INVALID;
}
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name_[index].c_str(), model_name_.c_str());
std::vector<TaskDescInfo> task_desc_info;
uint32_t task_id = 0;
uint32_t stream_id = 0;
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed.");
return ACL_ERROR_GE_PARAM_INVALID;
}

TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = model_name_;
tmp_task_desc_info.op_name = op_name_[index];
tmp_task_desc_info.block_dim = 0;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name_[index].c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);

std::vector<ComputeGraphDescInfo> compute_graph_info;

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info,
!profiling_manager.IsAclApiMode());
return SUCCESS;
}

void SingleOp::SetStream(rtStream_t stream) { void SingleOp::SetStream(rtStream_t stream) {
stream_ = stream; stream_ = stream;
} }
@@ -263,6 +301,36 @@ Status DynamicSingleOp::ExecuteTbeTask(const vector<GeTensorDesc> &input_desc,
return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_);
} }


Status DynamicSingleOp::ProfilingTaskInfo() {
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) {
return SUCCESS;
}
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name_.c_str(), model_name_.c_str());
std::vector<TaskDescInfo> task_desc_info;
uint32_t task_id = 0;
uint32_t stream_id = 0;
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed.");
return ACL_ERROR_GE_PARAM_INVALID;
}

TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = model_name_;
tmp_task_desc_info.op_name = op_name_;
tmp_task_desc_info.block_dim = 0;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name_.c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);

std::vector<ComputeGraphDescInfo> compute_graph_info;

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info,
!profiling_manager.IsAclApiMode());
return SUCCESS;
}

Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers, const vector<DataBuffer> &input_buffers,
vector<GeTensorDesc> &output_desc, vector<GeTensorDesc> &output_desc,
@@ -281,9 +349,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
} }


if (op_task_->GetOpTaskType() == OP_TASK_TBE) { if (op_task_->GetOpTaskType() == OP_TASK_TBE) {
return ExecuteTbeTask(input_desc, inputs, output_desc, outputs);
auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs);
if (ret == SUCCESS) {
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo());
}
return ret;
} else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) {
return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_);
auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_);
if (aicpu_ret == SUCCESS) {
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo());
}
return aicpu_ret;
} else { } else {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID,
"Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u",


+ 8
- 0
ge/single_op/single_op.h View File

@@ -42,6 +42,7 @@ class SingleOp {
Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status ProfilingTaskInfo(uint32_t index);


friend class SingleOpModel; friend class SingleOpModel;
std::mutex *stream_mutex_; std::mutex *stream_mutex_;
@@ -53,7 +54,10 @@ class SingleOp {
std::vector<uintptr_t> args_; std::vector<uintptr_t> args_;


std::vector<OpTask *> tasks_; std::vector<OpTask *> tasks_;
std::vector<std::string> op_name_;
std::vector<std::vector<uintptr_t *>> arg_table_; std::vector<std::vector<uintptr_t *>> arg_table_;
std::string model_name_;
uint32_t model_id_ = 0;
}; };


class DynamicSingleOp { class DynamicSingleOp {
@@ -79,6 +83,7 @@ class DynamicSingleOp {
const vector<void *> &inputs, const vector<void *> &inputs,
vector<GeTensorDesc> &output_desc, vector<GeTensorDesc> &output_desc,
vector<void *> &outputs); vector<void *> &outputs);
Status ProfilingTaskInfo();


std::unique_ptr<OpTask> op_task_; std::unique_ptr<OpTask> op_task_;
uintptr_t resource_id_ = 0; uintptr_t resource_id_ = 0;
@@ -86,6 +91,9 @@ class DynamicSingleOp {
rtStream_t stream_ = nullptr; rtStream_t stream_ = nullptr;
size_t num_inputs_ = 0; size_t num_inputs_ = 0;
size_t num_outputs_ = 0; size_t num_outputs_ = 0;
std::string model_name_;
std::string op_name_;
uint32_t model_id_ = 0;
}; };
} // namespace ge } // namespace ge
#endif // GE_SINGLE_OP_SINGLE_OP_H_ #endif // GE_SINGLE_OP_SINGLE_OP_H_

+ 17
- 1
ge/single_op/single_op_model.cc View File

@@ -157,6 +157,7 @@ Status SingleOpModel::LoadAllNodes() {
auto ge_model = model_helper_.GetGeModel(); auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model); GE_CHECK_NOTNULL(ge_model);
Graph graph = ge_model->GetGraph(); Graph graph = ge_model->GetGraph();
model_id_ = ge_model->GetModelId();
auto compute_graph = GraphUtils::GetComputeGraph(graph); auto compute_graph = GraphUtils::GetComputeGraph(graph);
if (compute_graph == nullptr) { if (compute_graph == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str());
@@ -222,6 +223,8 @@ Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) {
} }


single_op.args_.resize(arg_index); single_op.args_.resize(arg_index);
single_op.model_name_ = model_name_;
single_op.model_id_ = model_id_;
return SUCCESS; return SUCCESS;
} }


@@ -245,10 +248,11 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
} }
string tbe_op_name = op_list_[context.op_index()]->GetName();
single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
ParseArgTable(tbe_task, single_op); ParseArgTable(tbe_task, single_op);
single_op.tasks_.emplace_back(tbe_task); single_op.tasks_.emplace_back(tbe_task);
single_op.op_name_.emplace_back(tbe_op_name);
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task"); GELOGD("Building AICPU_CC task");
OpTask *task = nullptr; OpTask *task = nullptr;
@@ -258,7 +262,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
} }
string aicpu_op_name = op_list_[context.op_index()]->GetName();
single_op.tasks_.emplace_back(task); single_op.tasks_.emplace_back(task);
single_op.op_name_.emplace_back(aicpu_op_name);
} else { } else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID;
@@ -273,7 +279,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
} }
string op_name = op_list_[task_def.kernel_ex().op_index()]->GetName();
single_op.tasks_.emplace_back(aicpu_task); single_op.tasks_.emplace_back(aicpu_task);
single_op.op_name_.emplace_back(op_name);
} else { } else {
// skip // skip
GELOGD("Skip task type: %d", static_cast<int>(task_type)); GELOGD("Skip task type: %d", static_cast<int>(task_type));
@@ -393,6 +401,8 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
GELOGD("Building TBE task"); GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr; TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
string te_op_name = op_list_[context.op_index()]->GetName();
single_op.op_name_ = te_op_name;
single_op.op_task_.reset(tbe_task); single_op.op_task_.reset(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task"); GELOGD("Building AICPU_CC task");
@@ -400,6 +410,8 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id);
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id));
string aicpu_op_name = op_list_[context.op_index()]->GetName();
single_op.op_name_ = aicpu_op_name;
single_op.op_task_.reset(task); single_op.op_task_.reset(task);
} else { } else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
@@ -446,6 +458,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
const TaskDef &copy_task_def = tasks[i]; const TaskDef &copy_task_def = tasks[i];
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex()));
} }
string op_name = op_list_[task_def.kernel_ex().op_index()]->GetName();
single_op.op_name_ = op_name;
single_op.op_task_.reset(aicpu_task); single_op.op_task_.reset(aicpu_task);
} else { } else {
// skip // skip
@@ -458,6 +472,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) { Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) {
single_op.num_inputs_ = data_ops_.size(); single_op.num_inputs_ = data_ops_.size();
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
single_op.model_name_ = model_name_;
single_op.model_id_ = model_id_;
ParseOpModelParams(model_helper_, model_params_); ParseOpModelParams(model_helper_, model_params_);
return BuildTaskListForDynamicOp(single_op); return BuildTaskListForDynamicOp(single_op);
} }


+ 1
- 0
ge/single_op/single_op_model.h View File

@@ -77,6 +77,7 @@ class SingleOpModel {
void ParseArgTable(TbeOpTask *task, SingleOp &op); void ParseArgTable(TbeOpTask *task, SingleOp &op);


std::string model_name_; std::string model_name_;
uint32_t model_id_ = 0;
const void *ori_model_data_; const void *ori_model_data_;
uint32_t ori_model_size_; uint32_t ori_model_size_;




Loading…
Cancel
Save