diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index d03a8d7b..fedd13b7 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -283,7 +283,7 @@ Status GeExecutor::Initialize() { // Start profiling Options profiling_options; profiling_options.device_id = 0; - profiling_options.job_id = ""; + profiling_options.job_id = "1"; ProfilingManager::Instance().Init(profiling_options); isInit_ = true; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 371d7110..71a36bc8 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -17,6 +17,7 @@ #include "single_op/single_op.h" #include "common/fmk_types.h" +#include "common/ge_types.h" #include "common/math/math_util.h" #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" @@ -164,16 +165,54 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } + uint32_t index = 0; for (auto &task : tasks_) { ret = task->LaunchKernel(stream_); if (ret != SUCCESS) { return ret; } + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(index)); + index++; } return ret; } +Status SingleOp::ProfilingTaskInfo(uint32_t index) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + if (op_name_.size() <= index || block_dim_.size() <= index) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "index[%d] is out of range of op_name_ size[%d] or block_dim_ size[%d].", index, + op_name_.size(), block_dim_.size()); + return ACL_ERROR_GE_PARAM_INVALID; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name_[index].c_str(), model_name_.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name_; + tmp_task_desc_info.op_name = op_name_[index]; + tmp_task_desc_info.block_dim = block_dim_[index]; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name_[index].c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); + return SUCCESS; +} + void SingleOp::SetStream(rtStream_t stream) { stream_ = stream; } @@ -263,6 +302,36 @@ Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); } +Status DynamicSingleOp::ProfilingTaskInfo() { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name_.c_str(), model_name_.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name_; + tmp_task_desc_info.op_name = op_name_; + tmp_task_desc_info.block_dim = block_dim_; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name_.c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); + return SUCCESS; +} + Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, @@ -281,9 +350,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, } if (op_task_->GetOpTaskType() == OP_TASK_TBE) { - return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); + auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs); + if (ret == SUCCESS) { + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo()); + } + return ret; } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { - return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); + auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); + if (aicpu_ret == SUCCESS) { + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo()); + } + return aicpu_ret; } else { GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index 14ef8ce1..a5f5e33c 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -42,6 +42,7 @@ class SingleOp { Status ValidateArgs(const std::vector &inputs, const std::vector &outputs); Status UpdateArgs(const std::vector &inputs, const std::vector &outputs); Status GetArgs(const std::vector &inputs, const std::vector &outputs); + Status ProfilingTaskInfo(uint32_t index); friend class SingleOpModel; std::mutex *stream_mutex_; @@ -53,7 +54,11 @@ class SingleOp { std::vector args_; std::vector tasks_; + std::vector op_name_; + std::vector block_dim_; std::vector> arg_table_; + std::string model_name_; + uint32_t model_id_ = 0; }; class DynamicSingleOp { @@ -79,6 +84,7 @@ class DynamicSingleOp { const vector &inputs, vector &output_desc, vector &outputs); + Status ProfilingTaskInfo(); std::unique_ptr op_task_; uintptr_t resource_id_ = 0; @@ -86,6 +92,10 @@ class DynamicSingleOp { rtStream_t stream_ = nullptr; size_t num_inputs_ = 0; size_t num_outputs_ = 0; + std::string model_name_; + std::string op_name_; + uint32_t model_id_ = 0; + uint32_t block_dim_ = 1; }; } // namespace ge #endif // GE_SINGLE_OP_SINGLE_OP_H_ diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 49968f4f..fde3e7a0 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -41,6 +41,7 @@ using std::vector; namespace ge { namespace { const size_t kDataOutputNum = 1; +const uint32_t kDefaultBlockDim = 1; } // namespace SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} @@ -157,6 +158,7 @@ Status SingleOpModel::LoadAllNodes() { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); Graph graph = ge_model->GetGraph(); + model_id_ = ge_model->GetModelId(); auto compute_graph = GraphUtils::GetComputeGraph(graph); if (compute_graph == nullptr) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); @@ -222,6 +224,8 @@ Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) { } single_op.args_.resize(arg_index); + single_op.model_name_ = model_name_; + single_op.model_id_ = model_id_; return SUCCESS; } @@ -245,10 +249,12 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } - + string tbe_op_name = op_list_[context.op_index()]->GetName(); single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); single_op.tasks_.emplace_back(tbe_task); + single_op.op_name_.emplace_back(tbe_op_name); + single_op.block_dim_.emplace_back(kernel_def.block_dim()); } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; @@ -258,7 +264,10 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } + string aicpu_op_name = op_list_[context.op_index()]->GetName(); single_op.tasks_.emplace_back(task); + single_op.op_name_.emplace_back(aicpu_op_name); + single_op.block_dim_.emplace_back(kernel_def.block_dim()); } else { GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; @@ -273,7 +282,10 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } + string op_name = op_list_[task_def.kernel_ex().op_index()]->GetName(); single_op.tasks_.emplace_back(aicpu_task); + single_op.op_name_.emplace_back(op_name); + single_op.block_dim_.emplace_back(kDefaultBlockDim); } else { // skip GELOGD("Skip task type: %d", static_cast(task_type)); @@ -393,6 +405,9 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); + string te_op_name = op_list_[context.op_index()]->GetName(); + single_op.op_name_ = te_op_name; + single_op.block_dim_ = kernel_def.block_dim(); single_op.op_task_.reset(tbe_task); } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); @@ -400,6 +415,9 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); + string aicpu_op_name = op_list_[context.op_index()]->GetName(); + single_op.op_name_ = aicpu_op_name; + single_op.block_dim_ = kernel_def.block_dim(); single_op.op_task_.reset(task); } else { GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, @@ -446,6 +464,9 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { const TaskDef ©_task_def = tasks[i]; GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); } + string op_name = op_list_[task_def.kernel_ex().op_index()]->GetName(); + single_op.op_name_ = op_name; + single_op.block_dim_ = kDefaultBlockDim; single_op.op_task_.reset(aicpu_task); } else { // skip @@ -458,6 +479,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) { single_op.num_inputs_ = data_ops_.size(); single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); + single_op.model_name_ = model_name_; + single_op.model_id_ = model_id_; ParseOpModelParams(model_helper_, model_params_); return BuildTaskListForDynamicOp(single_op); } diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 50aeb7ab..5f1c842a 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -77,6 +77,7 @@ class SingleOpModel { void ParseArgTable(TbeOpTask *task, SingleOp &op); std::string model_name_; + uint32_t model_id_ = 0; const void *ori_model_data_; uint32_t ori_model_size_;