Browse Source

fix

pull/1985/head
guopeian 3 years ago
parent
commit
1bbd6aba11
1 changed files with 5 additions and 17 deletions
  1. +5
    -17
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc

+ 5
- 17
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -783,22 +783,10 @@ Status AicpuNodeTask::InitForDependComputeTask() {
"[Alloc][TensorBuffer] failed for Node[%s] to copy task input dst, size=%zu",
node_name_.c_str(), copy_input_buf_len);

std::vector<uint64_t> copy_io_addr;
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_->GetData()));
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_->GetData()));
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_->GetData()));
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_->GetData()));

// mem copy op has 4 inputs and 0 output.
const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size();

// can alloc in init, it can reuse
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_),
"[Alloc][TensorBuffer] failed for Node[%s] to copy task ioaddr, size=%zu",
node_name_.c_str(), copy_io_addr_size);

GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size,
&copy_io_addr[0], copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE));
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_->GetData()));
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_->GetData()));
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_->GetData()));
copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_->GetData()));
return SUCCESS;
}

@@ -973,7 +961,7 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
auto kernel_type = static_cast<ccKernelType>(kcontext.kernel_type());
uint32_t flag = RT_KERNEL_DEFAULT;
if (kernel_type == ccKernelType::CUST_AI_CPU) {
flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU);
flag |= static_cast<uint32_t>(RT_KERNEL_C USTOM_AICPU);
}
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),
reinterpret_cast<const void *>(kernel_name.c_str()),


Loading…
Cancel
Save