@@ -17,6 +17,7 @@ | |||||
#include "single_op/single_op.h" | #include "single_op/single_op.h" | ||||
#include "common/fmk_types.h" | #include "common/fmk_types.h" | ||||
#include "common/ge_types.h" | |||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
@@ -164,16 +165,53 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
return ret; | return ret; | ||||
} | } | ||||
uint32_t index = 0; | |||||
for (auto &task : tasks_) { | for (auto &task : tasks_) { | ||||
ret = task->LaunchKernel(stream_); | ret = task->LaunchKernel(stream_); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(index)); | |||||
index++; | |||||
} | } | ||||
return ret; | return ret; | ||||
} | } | ||||
Status SingleOp::ProfilingTaskInfo(uint32_t index) { | |||||
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
return SUCCESS; | |||||
} | |||||
if (op_name_.size() <= index) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "index[%d] is out of range of op_name_ size[%d].", index, op_name_.size()); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name_[index].c_str(), model_name_.c_str()); | |||||
std::vector<TaskDescInfo> task_desc_info; | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
TaskDescInfo tmp_task_desc_info; | |||||
tmp_task_desc_info.model_name = model_name_; | |||||
tmp_task_desc_info.op_name = op_name_[index]; | |||||
tmp_task_desc_info.block_dim = 0; | |||||
tmp_task_desc_info.task_id = task_id; | |||||
tmp_task_desc_info.stream_id = stream_id; | |||||
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name_[index].c_str(), task_id, stream_id); | |||||
task_desc_info.emplace_back(tmp_task_desc_info); | |||||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
auto &profiling_manager = ProfilingManager::Instance(); | |||||
profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, | |||||
!profiling_manager.IsAclApiMode()); | |||||
return SUCCESS; | |||||
} | |||||
void SingleOp::SetStream(rtStream_t stream) { | void SingleOp::SetStream(rtStream_t stream) { | ||||
stream_ = stream; | stream_ = stream; | ||||
} | } | ||||
@@ -263,6 +301,36 @@ Status DynamicSingleOp::ExecuteTbeTask(const vector<GeTensorDesc> &input_desc, | |||||
return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); | return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); | ||||
} | } | ||||
Status DynamicSingleOp::ProfilingTaskInfo() { | |||||
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
return SUCCESS; | |||||
} | |||||
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name_.c_str(), model_name_.c_str()); | |||||
std::vector<TaskDescInfo> task_desc_info; | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
TaskDescInfo tmp_task_desc_info; | |||||
tmp_task_desc_info.model_name = model_name_; | |||||
tmp_task_desc_info.op_name = op_name_; | |||||
tmp_task_desc_info.block_dim = 0; | |||||
tmp_task_desc_info.task_id = task_id; | |||||
tmp_task_desc_info.stream_id = stream_id; | |||||
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name_.c_str(), task_id, stream_id); | |||||
task_desc_info.emplace_back(tmp_task_desc_info); | |||||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
auto &profiling_manager = ProfilingManager::Instance(); | |||||
profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, | |||||
!profiling_manager.IsAclApiMode()); | |||||
return SUCCESS; | |||||
} | |||||
Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | ||||
const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
@@ -281,9 +349,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
} | } | ||||
if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | ||||
return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | |||||
auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | |||||
if (ret == SUCCESS) { | |||||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo()); | |||||
} | |||||
return ret; | |||||
} else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | ||||
return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); | |||||
auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); | |||||
if (aicpu_ret == SUCCESS) { | |||||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo()); | |||||
} | |||||
return aicpu_ret; | |||||
} else { | } else { | ||||
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, | ||||
"Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | ||||
@@ -42,6 +42,7 @@ class SingleOp { | |||||
Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
Status ProfilingTaskInfo(uint32_t index); | |||||
friend class SingleOpModel; | friend class SingleOpModel; | ||||
std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
@@ -53,7 +54,10 @@ class SingleOp { | |||||
std::vector<uintptr_t> args_; | std::vector<uintptr_t> args_; | ||||
std::vector<OpTask *> tasks_; | std::vector<OpTask *> tasks_; | ||||
std::vector<std::string> op_name_; | |||||
std::vector<std::vector<uintptr_t *>> arg_table_; | std::vector<std::vector<uintptr_t *>> arg_table_; | ||||
std::string model_name_; | |||||
uint32_t model_id_ = 0; | |||||
}; | }; | ||||
class DynamicSingleOp { | class DynamicSingleOp { | ||||
@@ -79,6 +83,7 @@ class DynamicSingleOp { | |||||
const vector<void *> &inputs, | const vector<void *> &inputs, | ||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
vector<void *> &outputs); | vector<void *> &outputs); | ||||
Status ProfilingTaskInfo(); | |||||
std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
uintptr_t resource_id_ = 0; | uintptr_t resource_id_ = 0; | ||||
@@ -86,6 +91,9 @@ class DynamicSingleOp { | |||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
size_t num_inputs_ = 0; | size_t num_inputs_ = 0; | ||||
size_t num_outputs_ = 0; | size_t num_outputs_ = 0; | ||||
std::string model_name_; | |||||
std::string op_name_; | |||||
uint32_t model_id_ = 0; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_SINGLE_OP_SINGLE_OP_H_ | #endif // GE_SINGLE_OP_SINGLE_OP_H_ |
@@ -157,6 +157,7 @@ Status SingleOpModel::LoadAllNodes() { | |||||
auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
Graph graph = ge_model->GetGraph(); | Graph graph = ge_model->GetGraph(); | ||||
model_id_ = ge_model->GetModelId(); | |||||
auto compute_graph = GraphUtils::GetComputeGraph(graph); | auto compute_graph = GraphUtils::GetComputeGraph(graph); | ||||
if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); | ||||
@@ -222,6 +223,8 @@ Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) { | |||||
} | } | ||||
single_op.args_.resize(arg_index); | single_op.args_.resize(arg_index); | ||||
single_op.model_name_ = model_name_; | |||||
single_op.model_id_ = model_id_; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -245,10 +248,11 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
string tbe_op_name = op_list_[context.op_index()]->GetName(); | |||||
single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | ||||
ParseArgTable(tbe_task, single_op); | ParseArgTable(tbe_task, single_op); | ||||
single_op.tasks_.emplace_back(tbe_task); | single_op.tasks_.emplace_back(tbe_task); | ||||
single_op.op_name_.emplace_back(tbe_op_name); | |||||
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | ||||
GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
OpTask *task = nullptr; | OpTask *task = nullptr; | ||||
@@ -258,7 +262,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
string aicpu_op_name = op_list_[context.op_index()]->GetName(); | |||||
single_op.tasks_.emplace_back(task); | single_op.tasks_.emplace_back(task); | ||||
single_op.op_name_.emplace_back(aicpu_op_name); | |||||
} else { | } else { | ||||
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | ||||
return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | ||||
@@ -273,7 +279,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
string op_name = op_list_[task_def.kernel_ex().op_index()]->GetName(); | |||||
single_op.tasks_.emplace_back(aicpu_task); | single_op.tasks_.emplace_back(aicpu_task); | ||||
single_op.op_name_.emplace_back(op_name); | |||||
} else { | } else { | ||||
// skip | // skip | ||||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
@@ -393,6 +401,8 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
GELOGD("Building TBE task"); | GELOGD("Building TBE task"); | ||||
TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | ||||
string te_op_name = op_list_[context.op_index()]->GetName(); | |||||
single_op.op_name_ = te_op_name; | |||||
single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | ||||
GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
@@ -400,6 +410,8 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | ||||
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | ||||
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | ||||
string aicpu_op_name = op_list_[context.op_index()]->GetName(); | |||||
single_op.op_name_ = aicpu_op_name; | |||||
single_op.op_task_.reset(task); | single_op.op_task_.reset(task); | ||||
} else { | } else { | ||||
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | ||||
@@ -446,6 +458,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
const TaskDef ©_task_def = tasks[i]; | const TaskDef ©_task_def = tasks[i]; | ||||
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | ||||
} | } | ||||
string op_name = op_list_[task_def.kernel_ex().op_index()]->GetName(); | |||||
single_op.op_name_ = op_name; | |||||
single_op.op_task_.reset(aicpu_task); | single_op.op_task_.reset(aicpu_task); | ||||
} else { | } else { | ||||
// skip | // skip | ||||
@@ -458,6 +472,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) { | Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) { | ||||
single_op.num_inputs_ = data_ops_.size(); | single_op.num_inputs_ = data_ops_.size(); | ||||
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
single_op.model_name_ = model_name_; | |||||
single_op.model_id_ = model_id_; | |||||
ParseOpModelParams(model_helper_, model_params_); | ParseOpModelParams(model_helper_, model_params_); | ||||
return BuildTaskListForDynamicOp(single_op); | return BuildTaskListForDynamicOp(single_op); | ||||
} | } | ||||
@@ -77,6 +77,7 @@ class SingleOpModel { | |||||
void ParseArgTable(TbeOpTask *task, SingleOp &op); | void ParseArgTable(TbeOpTask *task, SingleOp &op); | ||||
std::string model_name_; | std::string model_name_; | ||||
uint32_t model_id_ = 0; | |||||
const void *ori_model_data_; | const void *ori_model_data_; | ||||
uint32_t ori_model_size_; | uint32_t ori_model_size_; | ||||