From e06d338f581d78674170996299c2418d1f57e447 Mon Sep 17 00:00:00 2001 From: guopeian Date: Tue, 20 Jul 2021 12:02:51 +0800 Subject: [PATCH] fix --- ge/single_op/single_op_model.cc | 15 ++++-- ge/single_op/single_op_model.h | 2 +- ge/single_op/task/aicpu_kernel_task_builder.cc | 5 +- ge/single_op/task/op_task.cc | 71 ++++++++++++-------------- ge/single_op/task/op_task.h | 5 -- 5 files changed, 48 insertions(+), 50 deletions(-) diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index ca07d2ae..a126d8cd 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -333,7 +333,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s single_op.tasks_.emplace_back(tbe_task); } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); - OpTask *task = nullptr; + AiCpuCCTask *task = nullptr; uint64_t singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id)); @@ -489,7 +489,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC return SUCCESS; } -Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id) { +Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, AicpuCCTask **task, uint64_t kernel_id) { const auto &context = kernel_def.context(); auto iter = op_list_.find(context.op_index()); if (iter == op_list_.end()) { @@ -611,10 +611,19 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, } else if (lib_name == kEngineNameAiCpu) { const auto &task_def = task_defs[0]; GELOGD("Building AICPU_CC task"); - OpTask *task = nullptr; + AicpuCCTask *task = nullptr; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); + if (task->GetUnknownType() == DEPEND_COMPUTE) { + if (task_defs.size() < kNumTaskWithMemCpyTask) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); + REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); + return ACL_ERROR_GE_PARAM_INVALID; + } + const TaskDef ©_task_def = task_defs[1]; + GE_CHK_STATUS_RET_NOLOG(task->SetMemCopyTask(copy_task_def.kernel())); + } task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(task); } else if (lib_name == kEngineNameAiCpuTf) { diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index b1cd161c..4057682d 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -71,7 +71,7 @@ class SingleOpModel { Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); - Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); + Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, AicpuCCTask **task, uint64_t kernel_id); static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); void ParseArgTable(OpTask *task, SingleOp &op); diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 2f0856bf..3099d8b6 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -102,11 +102,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons return ret; } GE_CHK_STATUS_RET(task.SetInputConst(), "[Set][InputConst] failed."); + GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "[Init][SummaryAndCopy] failed."); - if (task.GetUnknownType() == DEPEND_COMPUTE) { - GELOGE(FAILED, "[Get][UnknownType] is depend compute, it's not supported now."); - return FAILED; - } auto aicpu_param_head = reinterpret_cast(task.args_.get()); if (task.ext_info_addr_dev_ != nullptr) { aicpu_param_head->extInfoLength = kernel_ext_info.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 83cb0529..4dd09c43 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -567,6 +567,16 @@ AiCpuBaseTask::~AiCpuBaseTask() { if (rt_event_ != nullptr) { (void)rtEventDestroy(rt_event_); } + FreeHbm(copy_input_release_flag_dev_); + FreeHbm(copy_input_data_size_dev_); + FreeHbm(copy_input_src_dev_); + FreeHbm(copy_input_dst_dev_); + for (auto summary : output_summary_) { + FreeHbm(summary); + } + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); + } } Status AiCpuBaseTask::UpdateEventIdForBlockingAicpuOp() { @@ -878,17 +888,7 @@ AiCpuTask::~AiCpuTask() { FreeHbm(workspace_addr_); FreeHbm(copy_workspace_buf_); FreeHbm(copy_ioaddr_dev_); - FreeHbm(copy_input_release_flag_dev_); - FreeHbm(copy_input_data_size_dev_); - FreeHbm(copy_input_src_dev_); - FreeHbm(copy_input_dst_dev_); FreeHbm(copy_task_args_buf_); - for (auto summary : output_summary_) { - FreeHbm(summary); - } - for (auto out_shape : out_shape_hbm_) { - FreeHbm(out_shape); - } } Status AiCpuTask::LaunchKernel(rtStream_t stream) { @@ -926,7 +926,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuTask::PrepareCopyInputs(vector &outputs) { +Status AiCpuBaseTask::PrepareCopyInputs(vector &outputs) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -967,7 +967,7 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs) { return SUCCESS; } -Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { +Status AiCpuBaseTask::ReadResultSummaryAndPrepareMemory() { for (size_t i = 0; i < num_outputs_; ++i) { auto &result_summary = output_summary_host_[i]; @@ -984,6 +984,19 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { return SUCCESS; } +Status AiCpuCCTask::CopyDataToHbm(vector &outputs, + rtStream_t stream) { + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); + + auto ret = rtCpuKernelLaunchWithFlag(static_cast(memcpy_so_name_.data()), + static_cast(memcpy_kernel_name_.data()), + block_dim_, memcpy_args_.get(), static_cast(memcpy_args_size_), + nullptr, stream, RT_KERNEL_DEFAULT); + GE_CHK_RT_RET(ret); + GE_CHK_RT_RET(rtStreamSynchronize(stream)); + return SUCCESS; +} + Status AiCpuTask::CopyDataToHbm(vector &outputs, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); @@ -994,7 +1007,7 @@ Status AiCpuTask::CopyDataToHbm(vector &outputs, return SUCCESS; } -Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc) { +Status AiCpuBaseTask::UpdateShapeByHbmBuffer(vector &output_desc) { for (size_t i = 0; i < num_outputs_; ++i) { const auto &result_summary = output_summary_host_[i]; std::vector shape_dims; @@ -1023,9 +1036,9 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc) { } -Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, - vector &outputs, - rtStream_t stream) { +Status AiCpuBaseTask::UpdateShapeAndDataByResultSummary(vector &output_desc, + vector &outputs, + rtStream_t stream) { if (num_outputs_ == 0) { GELOGI("Output num is 0, there is no need to update the output and size."); return SUCCESS; @@ -1123,11 +1136,11 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { return SUCCESS; } -Status AiCpuTask::LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, - rtStream_t stream) { +Status AiCpuBaseTask::LaunchKernel(const std::vector &input_desc, + const std::vector &input_buffers, + std::vector &output_desc, + std::vector &output_buffers, + rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); if (unknown_type_ == DEPEND_COMPUTE) { std::vector summary_buffers; @@ -1209,22 +1222,6 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, - rtStream_t stream) { - GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); - GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); - GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); - if (unknown_type_ == DEPEND_SHAPE_RANGE) { - GE_CHK_RT_RET(rtStreamSynchronize(stream)); - GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); - } - - return SUCCESS; -} - void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = io_addr_; arg_count = io_addr_num_; diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index adf51dba..8890c9d7 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -78,11 +78,6 @@ class TbeOpTask : public OpTask { public: ~TbeOpTask() override; Status LaunchKernel(rtStream_t stream) override; - Status LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, - rtStream_t stream) override; void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func);