|
|
@@ -567,6 +567,16 @@ AiCpuBaseTask::~AiCpuBaseTask() { |
|
|
|
if (rt_event_ != nullptr) { |
|
|
|
(void)rtEventDestroy(rt_event_); |
|
|
|
} |
|
|
|
FreeHbm(copy_input_release_flag_dev_); |
|
|
|
FreeHbm(copy_input_data_size_dev_); |
|
|
|
FreeHbm(copy_input_src_dev_); |
|
|
|
FreeHbm(copy_input_dst_dev_); |
|
|
|
for (auto summary : output_summary_) { |
|
|
|
FreeHbm(summary); |
|
|
|
} |
|
|
|
for (auto out_shape : out_shape_hbm_) { |
|
|
|
FreeHbm(out_shape); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuBaseTask::UpdateEventIdForBlockingAicpuOp() { |
|
|
@@ -878,17 +888,7 @@ AiCpuTask::~AiCpuTask() { |
|
|
|
FreeHbm(workspace_addr_); |
|
|
|
FreeHbm(copy_workspace_buf_); |
|
|
|
FreeHbm(copy_ioaddr_dev_); |
|
|
|
FreeHbm(copy_input_release_flag_dev_); |
|
|
|
FreeHbm(copy_input_data_size_dev_); |
|
|
|
FreeHbm(copy_input_src_dev_); |
|
|
|
FreeHbm(copy_input_dst_dev_); |
|
|
|
FreeHbm(copy_task_args_buf_); |
|
|
|
for (auto summary : output_summary_) { |
|
|
|
FreeHbm(summary); |
|
|
|
} |
|
|
|
for (auto out_shape : out_shape_hbm_) { |
|
|
|
FreeHbm(out_shape); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::LaunchKernel(rtStream_t stream) { |
|
|
@@ -926,7 +926,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::PrepareCopyInputs(vector<DataBuffer> &outputs) { |
|
|
|
Status AiCpuBaseTask::PrepareCopyInputs(vector<DataBuffer> &outputs) { |
|
|
|
std::vector<uint64_t> copy_input_release_flag; |
|
|
|
std::vector<uint64_t> copy_input_data_size; |
|
|
|
std::vector<uint64_t> copy_input_src; |
|
|
@@ -967,7 +967,7 @@ Status AiCpuTask::PrepareCopyInputs(vector<DataBuffer> &outputs) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { |
|
|
|
Status AiCpuBaseTask::ReadResultSummaryAndPrepareMemory() { |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
auto &result_summary = output_summary_host_[i]; |
|
|
|
|
|
|
@@ -984,6 +984,19 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuCCTask::CopyDataToHbm(vector<DataBuffer> &outputs, |
|
|
|
rtStream_t stream) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); |
|
|
|
|
|
|
|
auto ret = rtCpuKernelLaunchWithFlag(static_cast<const void *>(memcpy_so_name_.data()), |
|
|
|
static_cast<const void *>(memcpy_kernel_name_.data()), |
|
|
|
block_dim_, memcpy_args_.get(), static_cast<uint32_t>(memcpy_args_size_), |
|
|
|
nullptr, stream, RT_KERNEL_DEFAULT); |
|
|
|
GE_CHK_RT_RET(ret); |
|
|
|
GE_CHK_RT_RET(rtStreamSynchronize(stream)); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::CopyDataToHbm(vector<DataBuffer> &outputs, |
|
|
|
rtStream_t stream) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); |
|
|
@@ -994,7 +1007,7 @@ Status AiCpuTask::CopyDataToHbm(vector<DataBuffer> &outputs, |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { |
|
|
|
Status AiCpuBaseTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
const auto &result_summary = output_summary_host_[i]; |
|
|
|
std::vector<int64_t> shape_dims; |
|
|
@@ -1023,9 +1036,9 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, |
|
|
|
vector<DataBuffer> &outputs, |
|
|
|
rtStream_t stream) { |
|
|
|
Status AiCpuBaseTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, |
|
|
|
vector<DataBuffer> &outputs, |
|
|
|
rtStream_t stream) { |
|
|
|
if (num_outputs_ == 0) { |
|
|
|
GELOGI("Output num is 0, there is no need to update the output and size."); |
|
|
|
return SUCCESS; |
|
|
@@ -1123,11 +1136,11 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
const std::vector<DataBuffer> &input_buffers, |
|
|
|
std::vector<GeTensorDesc> &output_desc, |
|
|
|
std::vector<DataBuffer> &output_buffers, |
|
|
|
rtStream_t stream) { |
|
|
|
Status AiCpuBaseTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
const std::vector<DataBuffer> &input_buffers, |
|
|
|
std::vector<GeTensorDesc> &output_desc, |
|
|
|
std::vector<DataBuffer> &output_buffers, |
|
|
|
rtStream_t stream) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); |
|
|
|
if (unknown_type_ == DEPEND_COMPUTE) { |
|
|
|
std::vector<DataBuffer> summary_buffers; |
|
|
@@ -1209,22 +1222,6 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
const std::vector<DataBuffer> &input_buffers, |
|
|
|
std::vector<GeTensorDesc> &output_desc, |
|
|
|
std::vector<DataBuffer> &output_buffers, |
|
|
|
rtStream_t stream) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); |
|
|
|
if (unknown_type_ == DEPEND_SHAPE_RANGE) { |
|
|
|
GE_CHK_RT_RET(rtStreamSynchronize(stream)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); |
|
|
|
} |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { |
|
|
|
arg_base = io_addr_; |
|
|
|
arg_count = io_addr_num_; |
|
|
|