|
|
@@ -1137,18 +1137,10 @@ Status AiCpuCCTask::InitForSummaryAndCopy() { |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_src_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_dst_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
|
|
|
|
std::vector<uint64_t> copy_io_addr; |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_)); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_)); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_)); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_)); |
|
|
|
|
|
|
|
const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size(); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMalloc(©_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM)); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, |
|
|
|
copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_)); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
@@ -1192,7 +1184,7 @@ Status AiCpuCCTask::SetMemCopyTask(const domi::KernelDef &kernel_def) { |
|
|
|
auto memcpy_io_addr = memcpy_args_.get() + sizeof(aicpu::AicpuParamHead); |
|
|
|
// if has input and output, need copy to ioaddr |
|
|
|
int cpy_ret = memcpy_s(memcpy_io_addr, memcpy_args_size_ - sizeof(aicpu::AicpuParamHead), |
|
|
|
©_ioaddr_dev_, sizeof(uint64_t) * memcpy_io_num); |
|
|
|
©_io_addr[0], sizeof(uint64_t) * memcpy_io_num); |
|
|
|
GE_IF_BOOL_EXEC(cpy_ret != 0, |
|
|
|
REPORT_INNER_ERROR("E19999", "Node[Memcpoy] memcpy io addr to AicpuParamHead failed," |
|
|
|
"ret=%d, args_size=%u, io nums=%u.", |
|
|
|