|
|
@@ -783,22 +783,10 @@ Status AicpuNodeTask::InitForDependComputeTask() { |
|
|
|
"[Alloc][TensorBuffer] failed for Node[%s] to copy task input dst, size=%zu", |
|
|
|
node_name_.c_str(), copy_input_buf_len); |
|
|
|
|
|
|
|
std::vector<uint64_t> copy_io_addr; |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_->GetData())); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_->GetData())); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_->GetData())); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_->GetData())); |
|
|
|
|
|
|
|
// mem copy op has 4 inputs and 0 output. |
|
|
|
const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size(); |
|
|
|
|
|
|
|
// can alloc in init, it can reuse |
|
|
|
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_), |
|
|
|
"[Alloc][TensorBuffer] failed for Node[%s] to copy task ioaddr, size=%zu", |
|
|
|
node_name_.c_str(), copy_io_addr_size); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, |
|
|
|
©_io_addr[0], copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_->GetData())); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_->GetData())); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_->GetData())); |
|
|
|
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_->GetData())); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
@@ -973,7 +961,7 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { |
|
|
|
auto kernel_type = static_cast<ccKernelType>(kcontext.kernel_type()); |
|
|
|
uint32_t flag = RT_KERNEL_DEFAULT; |
|
|
|
if (kernel_type == ccKernelType::CUST_AI_CPU) { |
|
|
|
flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); |
|
|
|
flag |= static_cast<uint32_t>(RT_KERNEL_C USTOM_AICPU); |
|
|
|
} |
|
|
|
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), |
|
|
|
reinterpret_cast<const void *>(kernel_name.c_str()), |
|
|
|