|
|
@@ -20,12 +20,9 @@ |
|
|
|
|
|
|
|
namespace { |
|
|
|
const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch |
|
|
|
const char *const kCpuTaskModelEnqueue = "modelEnqueue"; |
|
|
|
const char *const kCpuTaskModelPrepare = "modelPrepare"; |
|
|
|
const char *const kCpuTaskWaitEndGraph = "modelWaitEndGraph"; |
|
|
|
const char *const kCpuTaskPrepareOutput = "bufferPrepareOutput"; |
|
|
|
const char *const kCpuTaskModelDequeue = "modelDequeue"; |
|
|
|
const char *const kCpuTaskModelRepeat = "modelRepeat"; |
|
|
|
const char *const kCpuTaskZeroCopy = "zeroCpy"; |
|
|
|
const char *const kCpuTaskModelPostpare = "modelPostpare"; |
|
|
|
} // namespace |
|
|
|
|
|
|
|
namespace ge { |
|
|
@@ -42,261 +39,214 @@ CpuTaskInfo::~CpuTaskInfo() { |
|
|
|
} |
|
|
|
args_ = nullptr; |
|
|
|
} |
|
|
|
/// |
|
|
|
/// @ingroup ge |
|
|
|
/// @brief definiteness queue schedule, bind input queue to task. |
|
|
|
/// @param [in] queue_id: input queue id from user. |
|
|
|
/// @param [out] in_mbuf: input mbuf addr for input data. |
|
|
|
/// @return: 0 for success / others for failed |
|
|
|
/// |
|
|
|
Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { |
|
|
|
if ((args_ != nullptr) || (args_size_ > 0)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
|
|
|
|
Status CpuTaskModelPrepare::GenerateCpuAddr(const map<uint32_t, ZeroCopyOffset> &node_addrs, void *&data_list_addr, |
|
|
|
void *&index_list_addr, uint32_t &num) { |
|
|
|
vector<uint64_t> addrs_list; |
|
|
|
vector<uint32_t> index_list; |
|
|
|
for (const auto &addrs : node_addrs) { |
|
|
|
const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); |
|
|
|
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "[Check][Param] not set outside_addrs"); |
|
|
|
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; |
|
|
|
for (const auto &virtual_args_addr : virtual_args_addrs) { |
|
|
|
num += virtual_args_addr.second.size(); |
|
|
|
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { |
|
|
|
index_list.emplace_back(addrs.first); |
|
|
|
addrs_list.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
args_size_ = sizeof(MbufQueueInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save in_mbuf. |
|
|
|
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&data_list_addr, addrs_list.size() * sizeof(uint64_t), RT_MEMORY_HBM)); |
|
|
|
rtError_t status = rtMemcpy(data_list_addr, addrs_list.size() * sizeof(uint64_t), addrs_list.data(), |
|
|
|
addrs_list.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", addrs_list.size() * sizeof(uint64_t), |
|
|
|
status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", addrs_list.size() * sizeof(uint64_t), status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo); |
|
|
|
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) |
|
|
|
|
|
|
|
MbufQueueInfo queue_info; |
|
|
|
queue_info.queue_id = queue_id; |
|
|
|
queue_info.in_mbuf = in_mbuf; // Placeholder, input mbuf addr will save to this place. |
|
|
|
status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&index_list_addr, index_list.size() * sizeof(uint32_t), RT_MEMORY_HBM)); |
|
|
|
status = rtMemcpy(index_list_addr, index_list.size() * sizeof(uint32_t), index_list.data(), |
|
|
|
index_list.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", index_list.size() * sizeof(uint32_t), |
|
|
|
status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", index_list.size() * sizeof(uint32_t), status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status CpuTaskModelDequeue::Distribute() { |
|
|
|
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
Status CpuTaskModelPrepare::GenerateOutSizeAddr(const map<uint32_t, ZeroCopyOffset> &outside_addrs, |
|
|
|
void *&output_size_list_addr) { |
|
|
|
vector<uint32_t> output_sizes; |
|
|
|
for (const auto &addrs : outside_addrs) { |
|
|
|
if (addrs.second.GetDataInfo().empty()) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Index:%u out_data_info is empty, check invalid", addrs.first); |
|
|
|
GELOGE(INTERNAL_ERROR, "[Check][Param] Index:%u out_data_info is empty, check invalid", addrs.first); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
uint32_t data_size = static_cast<uint32_t>(addrs.second.GetDataInfo().at(0).first); |
|
|
|
output_sizes.push_back(data_size); |
|
|
|
} |
|
|
|
|
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&output_size_list_addr, output_sizes.size() * sizeof(uint32_t), RT_MEMORY_HBM)); |
|
|
|
rtError_t status = rtMemcpy(output_size_list_addr, output_sizes.size() * sizeof(uint32_t), output_sizes.data(), |
|
|
|
output_sizes.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", output_sizes.size() * sizeof(uint32_t), |
|
|
|
status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", output_sizes.size() * sizeof(uint32_t), status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Cpu kernel launch model dequeue task success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
/// |
|
|
|
/// @ingroup ge |
|
|
|
/// @brief definiteness queue schedule, zero copy. |
|
|
|
/// @param [in] mbuf_list: input/output mbuf addr list for input/output data. |
|
|
|
/// @param [in] outside_addrs: model input/output memory addr |
|
|
|
/// @return: 0 for success / others for failed |
|
|
|
/// |
|
|
|
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) { |
|
|
|
Status CpuTaskModelPrepare::Init(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids, |
|
|
|
const map<uint32_t, ZeroCopyOffset> &inside_addrs, |
|
|
|
const map<uint32_t, ZeroCopyOffset> &outside_addrs, uintptr_t &out_mbuf) { |
|
|
|
if ((args_ != nullptr) || (args_size_ > 0)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
args_size_ = sizeof(AddrMapInfo); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); |
|
|
|
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) |
|
|
|
|
|
|
|
AddrMapInfo addr_map_info; |
|
|
|
// init src_addrs/dst_addrs |
|
|
|
vector<uint64_t> src_addrs; |
|
|
|
vector<uint64_t> dst_addrs; |
|
|
|
for (const auto &addrs : outside_addrs) { |
|
|
|
const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); |
|
|
|
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "[Check][Param] not set outside_addrs"); |
|
|
|
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; |
|
|
|
for (const auto &virtual_args_addr : virtual_args_addrs) { |
|
|
|
addr_map_info.addr_num += virtual_args_addr.second.size(); |
|
|
|
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { |
|
|
|
src_addrs.emplace_back(mbuf_list.at(addrs.first)); |
|
|
|
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); |
|
|
|
|
|
|
|
// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs |
|
|
|
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); |
|
|
|
rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), |
|
|
|
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", |
|
|
|
src_addrs.size() * sizeof(uint64_t), status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", |
|
|
|
src_addrs.size() * sizeof(uint64_t), status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status);) |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); |
|
|
|
status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(), |
|
|
|
dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", |
|
|
|
dst_addrs.size() * sizeof(uint64_t), status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", |
|
|
|
dst_addrs.size() * sizeof(uint64_t), status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status);) |
|
|
|
|
|
|
|
// src_addr_list is init to src_addr, which is the point to src_addrs |
|
|
|
if (!src_addrs.empty() && !dst_addrs.empty()) { |
|
|
|
addr_map_info.src_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(src_addr_)); |
|
|
|
addr_map_info.dst_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(dst_addr_)); |
|
|
|
GELOGI("src_addr_list is %lu, dst_addr_list is %lu", addr_map_info.src_addr_list, addr_map_info.dst_addr_list); |
|
|
|
} |
|
|
|
|
|
|
|
status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status);) |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status CpuTaskZeroCopy::Distribute() { |
|
|
|
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&mbufptr_list_, output_queue_ids.size() * sizeof(uint64_t), RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&queue_id_list_addr_, input_queue_ids.size() * sizeof(uint32_t), RT_MEMORY_HBM)); |
|
|
|
rtError_t status = rtMemcpy(queue_id_list_addr_, input_queue_ids.size() * sizeof(uint32_t), input_queue_ids.data(), |
|
|
|
input_queue_ids.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", input_queue_ids.size() * sizeof(uint32_t), |
|
|
|
status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", input_queue_ids.size() * sizeof(uint32_t), status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Cpu kernel launch zero copy task success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
CpuTaskZeroCopy::~CpuTaskZeroCopy() { |
|
|
|
if (src_addr_ == nullptr && dst_addr_ == nullptr) { |
|
|
|
return; |
|
|
|
} |
|
|
|
if (src_addr_ != nullptr) { |
|
|
|
rtError_t status = rtFree(src_addr_); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
GELOGW("Call rt free failed, status: 0x%x", status); |
|
|
|
} |
|
|
|
} |
|
|
|
if (dst_addr_ != nullptr) { |
|
|
|
rtError_t status = rtFree(dst_addr_); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
GELOGW("Call rt free failed, status: 0x%x", status); |
|
|
|
} |
|
|
|
uint32_t input_addr_num = 0; |
|
|
|
uint32_t output_addr_num = 0; |
|
|
|
if (GenerateCpuAddr(inside_addrs, input_list_addr_, input_index_list_addr_, input_addr_num) != SUCCESS) { |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
src_addr_ = nullptr; |
|
|
|
dst_addr_ = nullptr; |
|
|
|
} |
|
|
|
/// |
|
|
|
/// @ingroup ge |
|
|
|
/// @brief definiteness queue schedule, bind output queue to task. |
|
|
|
/// @param [in] addr: NetOutput Op input tensor address. |
|
|
|
/// @param [in] size: NetOutput Op input tensor size. |
|
|
|
/// @param [in] in_mbuf: input mbuf addr for input data. |
|
|
|
/// @param [out] out_mbuf: output mbuf addr for output data. |
|
|
|
/// @return: 0 for success / others for failed |
|
|
|
/// |
|
|
|
Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mbuf, uintptr_t &out_mbuf) { |
|
|
|
if ((args_ != nullptr) || (args_size_ > 0)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); |
|
|
|
if (GenerateCpuAddr(outside_addrs, output_list_addr_, output_index_list_addr_, output_addr_num) != SUCCESS) { |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
args_size_ = sizeof(PrepareOutputInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save out_mbuf. |
|
|
|
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
if (GenerateOutSizeAddr(outside_addrs, output_size_list_addr_) != SUCCESS) { |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo); |
|
|
|
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) |
|
|
|
|
|
|
|
// Get NetOutput Input address and bind to queue. |
|
|
|
PrepareOutputInfo prepare; |
|
|
|
prepare.data_size = size; |
|
|
|
prepare.data_addr = addr; |
|
|
|
prepare.in_mbuf = in_mbuf; |
|
|
|
prepare.out_mbuf = out_mbuf; // Placeholder, output mbuf addr will save to this place. |
|
|
|
status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
AicpuPareInfo aicpu_info; |
|
|
|
aicpu_info.aicpu_info_size = sizeof(AicpuPareInfo); |
|
|
|
aicpu_info.input_addr_num = input_addr_num; |
|
|
|
aicpu_info.input_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_list_addr_)); |
|
|
|
aicpu_info.input_index_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_index_list_addr_)); |
|
|
|
aicpu_info.output_addr_num = output_addr_num; |
|
|
|
aicpu_info.output_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(output_list_addr_)); |
|
|
|
aicpu_info.output_index_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(output_index_list_addr_)); |
|
|
|
aicpu_info.output_num = outside_addrs.size(); |
|
|
|
aicpu_info.output_size_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(output_size_list_addr_)); |
|
|
|
aicpu_info.in_queue_num = input_queue_ids.size(); |
|
|
|
aicpu_info.in_queueid_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(queue_id_list_addr_)); |
|
|
|
aicpu_info.out_queue_num = output_queue_ids.size(); |
|
|
|
aicpu_info.mbufptr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(mbufptr_list_)); |
|
|
|
|
|
|
|
args_size_ = sizeof(AicpuPareInfo); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); |
|
|
|
status = rtMemcpy(args_, args_size_, &aicpu_info, sizeof(AicpuPareInfo), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
out_mbuf = reinterpret_cast<uintptr_t>(mbufptr_list_); |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status CpuTaskPrepareOutput::Distribute() { |
|
|
|
Status CpuTaskModelPrepare::Distribute() { |
|
|
|
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", args_size_); |
|
|
|
REPORT_INNER_ERROR("E19999", |
|
|
|
"Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", |
|
|
|
args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelPrepare, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Cpu kernel launch prepare output task success."); |
|
|
|
GELOGI("Cpu kernel launch model prepare task success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
/// |
|
|
|
/// @ingroup ge |
|
|
|
/// @brief definiteness queue schedule, bind output queue to task. |
|
|
|
/// @param [in] queue_id: output queue id from user. |
|
|
|
/// @param [in] out_mbuf: mbuf for output data. |
|
|
|
/// @return: 0 for success / others for failed |
|
|
|
/// |
|
|
|
Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { |
|
|
|
CpuTaskModelPrepare::~CpuTaskModelPrepare() { |
|
|
|
if (input_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(input_list_addr_)); |
|
|
|
} |
|
|
|
if (input_index_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(input_index_list_addr_)); |
|
|
|
} |
|
|
|
if (output_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(output_list_addr_)); |
|
|
|
} |
|
|
|
if (output_index_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(output_index_list_addr_)); |
|
|
|
} |
|
|
|
if (output_size_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(output_size_list_addr_)); |
|
|
|
} |
|
|
|
if (queue_id_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(queue_id_list_addr_)); |
|
|
|
} |
|
|
|
if (mbufptr_list_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(mbufptr_list_)); |
|
|
|
} |
|
|
|
|
|
|
|
input_list_addr_ = nullptr; |
|
|
|
input_index_list_addr_ = nullptr; |
|
|
|
output_list_addr_ = nullptr; |
|
|
|
output_index_list_addr_ = nullptr; |
|
|
|
output_size_list_addr_ = nullptr; |
|
|
|
queue_id_list_addr_ = nullptr; |
|
|
|
mbufptr_list_ = nullptr; |
|
|
|
} |
|
|
|
|
|
|
|
Status CpuTaskModelPostpare::Init(uint32_t model_id, const vector<uint32_t> &output_queue_ids, uintptr_t out_mbuf) { |
|
|
|
if ((args_ != nullptr) || (args_size_ > 0)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
// Get NetOutput Input address and bind to queue. |
|
|
|
args_size_ = sizeof(MbufQueueInfo); |
|
|
|
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&queue_id_list_addr_, output_queue_ids.size() * sizeof(uint32_t), RT_MEMORY_HBM)); |
|
|
|
rtError_t status = rtMemcpy(queue_id_list_addr_, output_queue_ids.size() * sizeof(uint32_t), output_queue_ids.data(), |
|
|
|
output_queue_ids.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", output_queue_ids.size() * sizeof(uint32_t), |
|
|
|
status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", output_queue_ids.size() * sizeof(uint32_t), |
|
|
|
status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) |
|
|
|
|
|
|
|
MbufQueueInfo queue_info; |
|
|
|
queue_info.queue_id = queue_id; |
|
|
|
queue_info.in_mbuf = out_mbuf; |
|
|
|
status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
AicpuPareInfo aicpu_info; |
|
|
|
aicpu_info.aicpu_info_size = sizeof(AicpuPareInfo); |
|
|
|
aicpu_info.model_id = model_id; |
|
|
|
aicpu_info.out_queue_num = output_queue_ids.size(); |
|
|
|
aicpu_info.out_queueid_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(queue_id_list_addr_)); |
|
|
|
aicpu_info.mbufptr_list = static_cast<uint64_t>(out_mbuf); |
|
|
|
|
|
|
|
args_size_ = sizeof(AicpuPareInfo); |
|
|
|
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); |
|
|
|
status = rtMemcpy(args_, args_size_, &aicpu_info, sizeof(AicpuPareInfo), RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
@@ -306,25 +256,35 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status CpuTaskModelEnqueue::Distribute() { |
|
|
|
Status CpuTaskModelPostpare::Distribute() { |
|
|
|
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_ is 0 or stream_ is nullptr, arg_size:%u," |
|
|
|
"check invalid", args_size_); |
|
|
|
REPORT_INNER_ERROR("E19999", |
|
|
|
"Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", |
|
|
|
args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelPostpare, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Cpu kernel launch model enqueue task success."); |
|
|
|
GELOGI("Cpu kernel launch model postpare task success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
CpuTaskModelPostpare::~CpuTaskModelPostpare() { |
|
|
|
if (queue_id_list_addr_ != nullptr) { |
|
|
|
GE_CHK_RT(rtFree(queue_id_list_addr_)); |
|
|
|
} |
|
|
|
|
|
|
|
queue_id_list_addr_ = nullptr; |
|
|
|
} |
|
|
|
|
|
|
|
/// |
|
|
|
/// @ingroup ge |
|
|
|
/// @brief definiteness queue schedule, active entry stream. |
|
|
@@ -394,8 +354,10 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) { |
|
|
|
|
|
|
|
Status CpuTaskWaitEndGraph::Distribute() { |
|
|
|
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", args_size_); |
|
|
|
REPORT_INNER_ERROR("E19999", |
|
|
|
"Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", |
|
|
|
args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
@@ -410,55 +372,4 @@ Status CpuTaskWaitEndGraph::Distribute() { |
|
|
|
GELOGI("Cpu kernel launch wait end task success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
/// |
|
|
|
/// @ingroup ge |
|
|
|
/// @brief definiteness queue schedule, repeat run model. |
|
|
|
/// @param [in] model_id: model id for repeat run. |
|
|
|
/// @return: 0 for success / others for failed |
|
|
|
/// |
|
|
|
Status CpuTaskModelRepeat::Init(uint32_t model_id) { |
|
|
|
if ((args_ != nullptr) || (args_size_ > 0)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
args_size_ = sizeof(model_id); |
|
|
|
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) |
|
|
|
|
|
|
|
status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status CpuTaskModelRepeat::Distribute() { |
|
|
|
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { |
|
|
|
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," |
|
|
|
"check invalid", args_size_); |
|
|
|
GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_); |
|
|
|
if (status != RT_ERROR_NONE) { |
|
|
|
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); |
|
|
|
GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); |
|
|
|
return RT_ERROR_TO_GE_STATUS(status); |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Cpu kernel launch repeat task success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
} // namespace ge |