|
|
@@ -1164,6 +1164,91 @@ Status AiCpuBaseTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuCCTask::InitForSummaryAndCopy() { |
|
|
|
if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { |
|
|
|
GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
output_summary_.resize(num_outputs_); |
|
|
|
constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
GE_CHK_RT_RET(rtMalloc(&output_summary_[i], result_summary_size, RT_MEMORY_HBM)); |
|
|
|
} |
|
|
|
output_summary_host_.resize(num_outputs_); |
|
|
|
|
|
|
|
const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_release_flag_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_data_size_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_src_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_dst_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
|
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_)); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuCCTask::SetMemCopyTask(const domi::KernelDef &kernel_def) { |
|
|
|
auto &memcpy_args = kernel_def.args(); |
|
|
|
memcpy_args_size_ = kernel_def.args_size(); |
|
|
|
memcpy_so_name_ = kernel_def.so_name(); |
|
|
|
memcpy_kernel_name_ = kernel_def.kernel_name(); |
|
|
|
if (memcpy_args.size() != memcpy_args_size_) { |
|
|
|
REPORT_INNER_ERROR("E19999", "MemCopy task def args.size=%zu, but args_size=%u not equal.", |
|
|
|
memcpy_args.size(), memcpy_args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Size]MemCopy task def args.size=%zu, but args_size=%u not equal.", |
|
|
|
memcpy_args.size(), memcpy_args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
if (memcpy_args_size_ < sizeof(aicpu::AicpuParamHead)) { |
|
|
|
REPORT_INNER_ERROR("E19999", |
|
|
|
"Task def args_size=%u is less than aicpu param head len=%zu.", |
|
|
|
memcpy_args_size_, sizeof(aicpu::AicpuParamHead)); |
|
|
|
GELOGE(FAILED, |
|
|
|
"[Check][Size] Task def args_size=%u is less than aicpu param head len=%zu.", |
|
|
|
memcpy_args_size_, sizeof(aicpu::AicpuParamHead)); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
memcpy_args_.reset(new(std::nothrow) uint8_t[memcpy_args_size_]()); |
|
|
|
if (memcpy_args_ == nullptr) { |
|
|
|
REPORT_INNER_ERROR("E19999", "new memory failed for Node[MemCopy], task_size[%u].", |
|
|
|
memcpy_args_size_); |
|
|
|
GELOGE(FAILED, "[Malloc][Memory] failed for Node[MemCopy], task_size[%u].", |
|
|
|
memcpy_args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
errno_t sec_ret = memcpy_s(memcpy_args_.get(), memcpy_args_size_, memcpy_args.c_str(), memcpy_args.size()); |
|
|
|
if (sec_ret != EOK) { |
|
|
|
REPORT_INNER_ERROR("E19999", |
|
|
|
"memcpy_s argc_ failed for Node[MemCopy], ret: %d", sec_ret); |
|
|
|
GELOGE(INTERNAL_ERROR, |
|
|
|
"[Update][args] failed for Node[MemCopy], ret: %d", sec_ret); |
|
|
|
return sec_ret; |
|
|
|
} |
|
|
|
auto memcpy_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(memcpy_args_.get()); |
|
|
|
uint32_t memcpy_io_num = memcpy_param_head->ioAddrNum; |
|
|
|
auto memcpy_io_addr = memcpy_args_.get() + sizeof(aicpu::AicpuParamHead); |
|
|
|
// if has input and output, need copy to ioaddr |
|
|
|
int cpy_ret = memcpy_s(memcpy_io_addr, memcpy_args_size_ - sizeof(aicpu::AicpuParamHead), |
|
|
|
©_io_addr_[0], sizeof(uint64_t) * memcpy_io_num); |
|
|
|
if (cpy_ret != 0) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Node[Memcpoy] memcpy io addr to AicpuParamHead failed," |
|
|
|
"ret=%d, args_size=%u, io nums=%u.", |
|
|
|
cpy_ret, memcpy_args_size_, memcpy_io_num); |
|
|
|
GELOGE(INTERNAL_ERROR, "[Update][io_addr]Node[MemCopy] memcpy io addr to AicpuParamHead failed," |
|
|
|
"ret=%d, args_size=%u, io nums=%u.", |
|
|
|
cpy_ret, memcpy_args_size_, memcpy_io_num); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
GELOGD("Set memcpy task for node[MemCopy] successfully."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { |
|
|
|
// aicpu do not have workspace, for now |
|
|
|
return DoUpdateArgTable(param, false); |
|
|
|