From 35ed34f4f9e218be0e9b674079edbbaa77d351ac Mon Sep 17 00:00:00 2001 From: liudingyan Date: Thu, 22 Jul 2021 16:22:18 +0800 Subject: [PATCH] test cv case base on ffts plus task --- ge/generator/ge_generator.cc | 1 - ge/graph/build/memory/graph_mem_assigner.cc | 2 +- ge/graph/build/memory/memory_assigner.cc | 5 +- ge/graph/load/model_manager/davinci_model.cc | 52 ++ ge/graph/load/model_manager/davinci_model.h | 5 + .../model_manager/task_info/ffts_plus_task_info.cc | 33 +- .../model_manager/task_info/ffts_plus_task_info.h | 6 + .../model_manager/task_info/kernel_ex_task_info.cc | 109 +++- .../model_manager/task_info/kernel_ex_task_info.h | 8 + .../model_manager/task_info/kernel_task_info.cc | 106 +++- .../model_manager/task_info/kernel_task_info.h | 8 + ge/graph/manager/graph_manager.cc | 4 +- ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc | 30 + ge/hybrid/node_executor/aicpu/aicpu_ext_info.h | 5 + .../node_executor/aicpu/aicpu_node_executor.cc | 121 ++++ .../node_executor/aicpu/aicpu_node_executor.h | 10 +- ge/offline/single_op_parser.cc | 3 +- ge/single_op/task/op_task.cc | 117 ++++ ge/single_op/task/op_task.h | 7 + metadef | 2 +- tests/depends/runtime/src/runtime_stub.cc | 105 +++- tests/depends/runtime/src/runtime_stub.h | 70 +++ tests/ut/ge/CMakeLists.txt | 4 + .../ge/graph/build/graph_mem_assigner_unittest.cc | 90 +++ .../ge/graph/load/ffts_plus_task_info_unittest.cc | 697 +++++++++++++++++++++ .../ge/graph/load/kernel_ex_task_info_unittest.cc | 141 ++++- .../ut/ge/graph/load/kernel_task_info_unittest.cc | 140 ++++- tests/ut/ge/graph_ir/ge_ir_build_unittest.cc | 4 +- .../aicpu/aicpu_node_executor_unittest.cc | 227 ++++++- tests/ut/ge/single_op/single_op_task_unittest.cc | 131 +++- third_party/fwkacllib/inc/cce/fwk_adpt_struct.h | 16 + third_party/fwkacllib/inc/runtime/config.h | 8 + third_party/fwkacllib/inc/runtime/dev.h | 12 + third_party/fwkacllib/inc/runtime/rt_ffts.h | 2 +- 34 files changed, 2243 insertions(+), 38 deletions(-) create mode 100644 tests/depends/runtime/src/runtime_stub.h create mode 100644 tests/ut/ge/graph/build/graph_mem_assigner_unittest.cc create mode 100644 tests/ut/ge/graph/load/ffts_plus_task_info_unittest.cc diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 1a80a3e0..7c5cb330 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -1158,7 +1158,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector if (ret != SUCCESS) { REPORT_CALL_ERROR("E19999", "build graph failed, graph id:%u, ret:%d", graph_id, ret); GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "[Build][Graph] fail, graph id: %u", graph_id); - ret = GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; } RtContextUtil::GetInstance().DestroyRtContexts(session_id); diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f8878383..542b6215 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -275,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map({"size", "item", "maxsize"}), std::vector({std::to_string(total_mem_offset), "featuremap", std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())})); - return ge::FAILED; + return ACL_ERROR_GE_MEMORY_ALLOCATION; } return SUCCESS; } diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index 6e49827f..41171164 100755 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -29,9 +29,10 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &m } // Reassign memory for special nodes - if (graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset) != ge::SUCCESS) { + Status ret = graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset); + if (ret != ge::SUCCESS) { GELOGE(ge::FAILED, "[ReAssign][Memory] failed, graph:%s", compute_graph_->GetName().c_str()); - return ge::FAILED; + return ret; } // Assign memory (block and offset) for zero copy nodes diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 1141712c..b97f53a0 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -235,6 +235,12 @@ DavinciModel::~DavinciModel() { GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i); } + for (const auto &it : stream_2_event_) { + if (rtEventDestroy(it.second) != RT_ERROR_NONE) { + GELOGW("Destroy event failed"); + } + } + FreeWeightsMem(); FreeFeatureMapMem(); @@ -4660,4 +4666,50 @@ Status DavinciModel::GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size) total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size; return SUCCESS; } + +Status DavinciModel::GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id) { + GELOGI("Get event id for aicpu blocking op:%s", op_desc->GetName().c_str()); + auto it = stream_2_event_.find(stream); + if (it != stream_2_event_.end()) { + auto rt_ret = rtGetEventID(it->second, &event_id); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + } else { + rtEvent_t rt_event = nullptr; + auto rt_ret = rtEventCreateWithFlag(&rt_event, RT_EVENT_WITH_FLAG); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for op:%s(%s), ret:0x%X", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); + GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for op:%s(%s), ret:0x%X", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + rt_ret = rtGetEventID(rt_event, &event_id); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + stream_2_event_.emplace(stream, rt_event); + } + return SUCCESS; +} + +Status DavinciModel::GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event) { + auto it = stream_2_event_.find(stream); + if (it == stream_2_event_.end()) { + REPORT_INNER_ERROR("E19999", "Get event failed"); + GELOGE(FAILED, "[Get][Event] Get event failed"); + return FAILED; + } + rt_event = it->second; + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 668c4b31..fd46d92c 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -583,6 +583,9 @@ class DavinciModel { Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); Status GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt); + // for blocking aicpu op + Status GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event); + Status GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id); private: // memory address of weights @@ -1111,6 +1114,8 @@ class DavinciModel { // op name to attrs mapping std::map>> op_name_to_attrs_; + + std::map stream_2_event_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc b/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc index 73dbd788..c17a3203 100644 --- a/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc +++ b/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc @@ -30,6 +30,7 @@ constexpr uint32_t kTailAicCtxIndex = 1; constexpr uint32_t kNonTailAivCtxIndex = 2; constexpr uint32_t kTailAivCtxIndex = 3; constexpr uint32_t kMixAicAivCtxPcNum = 4; +constexpr uint32_t kModeInArgsFirstField = 1; } namespace ge { FftsPlusTaskInfo::~FftsPlusTaskInfo() { @@ -50,7 +51,7 @@ Status FftsPlusTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin if (args_size_ != 0) { GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); } - + SetAdditionalDatatoCtx(ffts_plus_task_def); std::vector sqe_buffer(sizeof(rtFftsPlusSqe_t)); auto ffts_plus_sqe = reinterpret_cast(sqe_buffer.data()); InitFftsPlusSqe(ffts_plus_task_def.ffts_plus_sqe(), ffts_plus_sqe); @@ -124,6 +125,10 @@ Status FftsPlusTaskInfo::InitFftsPlusCtx(const domi::FftsPlusTaskDef &task_def, break; } default: + if (ctx_def.hardware_ctx_type() == RT_HW_CTX_TYPE_AIC || + ctx_def.hardware_ctx_type() == RT_HW_CTX_TYPE_AIV) { + GE_CHK_STATUS_RET_NOLOG(UpdateMixAicAivCtxParam(ctx_def.mix_aic_aiv_ctx(), i)); + } GE_CHK_STATUS_RET_NOLOG(InitHardWareCtx(ctx_def, cur_ctx)); break; } @@ -954,6 +959,32 @@ Status FftsPlusTaskInfo::Distribute() { return SUCCESS; } +void FftsPlusTaskInfo::SetAdditionalDatatoCtx(const domi::FftsPlusTaskDef &task_def) { + for (int i = 0; i < task_def.additional_data_size(); ++i) { + const domi::AdditionalDataDef &additionaldata = task_def.additional_data(i); + const uint32_t &data_type = additionaldata.data_type(); + for (int j = 0; j < additionaldata.context_id_size(); ++j) { + ctx_additional_data_[additionaldata.context_id(j)].emplace(data_type); + } + } +} + +Status FftsPlusTaskInfo::UpdateMixAicAivCtxParam(const domi::FftsPlusMixAicAivCtxDef &ctx_def, size_t ctx_idx) { + if (ctx_additional_data_.count(ctx_idx) == 0) { + GELOGD("ctx idx:%zu not in ctx additional data"); + return SUCCESS; + } + if (ctx_additional_data_[ctx_idx].count(kModeInArgsFirstField) == 0) { + GELOGD("ctx idx:%zu need not to save mode in args first field"); + return SUCCESS; + } + if (rtApp_addr_ == 0) { + GE_CHK_RT_RET(rtGetC2cCtrlAddr(&rtApp_addr_, &rtApp_data_len_)); + } + GE_CHK_RT_RET(rtMemcpy(reinterpret_cast(ctx_def.aiv_task_param_ptr()), rtApp_data_len_, + reinterpret_cast(rtApp_addr_), rtApp_data_len_, RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} // task_addr = {0,200,700,1000,2000, 3500} // task_addr_offset = {20,40,2,100,200} template diff --git a/ge/graph/load/model_manager/task_info/ffts_plus_task_info.h b/ge/graph/load/model_manager/task_info/ffts_plus_task_info.h index 84123686..f55dfe9d 100644 --- a/ge/graph/load/model_manager/task_info/ffts_plus_task_info.h +++ b/ge/graph/load/model_manager/task_info/ffts_plus_task_info.h @@ -58,6 +58,9 @@ class FftsPlusTaskInfo : public TaskInfo { Status InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def, rtFftsPlusCaseSwitchCtx_t *&ctx); Status InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def, rtFftsPlusCaseDefCtx_t *&ctx); + void SetAdditionalDatatoCtx(const domi::FftsPlusTaskDef &task_def); + Status UpdateMixAicAivCtxParam(const domi::FftsPlusMixAicAivCtxDef &ctx_def, size_t ctx_idx); + template Status InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_id, uint32_t addr_count); @@ -66,6 +69,9 @@ class FftsPlusTaskInfo : public TaskInfo { std::vector io_addrs_; void *args_{nullptr}; // runtime args memory uint32_t args_size_{0}; // runtime args memory length + std::map> ctx_additional_data_; + uint64_t rtApp_addr_{0}; + uint32_t rtApp_data_len_{0}; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_ diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 1a6ab542..fe9cd0cc 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -26,8 +26,8 @@ #include "external/graph/attr_value.h" #include "graph/load/model_manager/davinci_model.h" #include "graph/load/model_manager/model_manager.h" -#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" #include "framework/common/debug/log.h" +#include "runtime/rt.h" namespace { const char *const kAicpuAllshape = "_AllShape"; @@ -43,7 +43,7 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe UnknowShapeOpType unknown_type = static_cast(unknown_shape_type_val); uint32_t num_inputs = op_desc->GetInputsSize(); uint32_t num_outputs = op_desc->GetOutputsSize(); - std::unique_ptr ext_handle( + std::shared_ptr ext_handle( new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), num_inputs, num_outputs, @@ -76,6 +76,16 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe } } } + + AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); + GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); + + if (UpdateEventIdForAicpuBlockingOp(op_desc, ext_handle) != SUCCESS) { + GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)", + op_desc->GetName().c_str(), op_desc->GetType().c_str()); + return FAILED; + } + auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", ext_info.size(), rt_ret); @@ -448,6 +458,101 @@ Status KernelExTaskInfo::Distribute() { stream_id_ = stream_id; GELOGI("KernelExTaskInfo Distribute Success. task id: %u, stream id: %u", task_id_, stream_id_); + if (is_blocking_aicpu_op_) { + if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) { + GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); + return FAILED; + } + } + return SUCCESS; +} + +Status KernelExTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { + int32_t device_id = 0; + auto rt_ret = rtGetDevice(&device_id); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + int32_t value = 0; + rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { + REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); + GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); + return FAILED; + } + is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); + return SUCCESS; +} + +Status KernelExTaskInfo::UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc, + std::shared_ptr &ext_handle) { + if (is_blocking_aicpu_op_) { + bool is_support = false; + if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { + GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); + return FAILED; + } + if (!is_support) { + GELOGD("Device not support blocking aicpu op process"); + return SUCCESS; + } + uint32_t event_id = 0; + if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc, stream_, event_id) != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Get event id failed for op:%s(%s).", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); + GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); + return FAILED; + } + if (ext_handle->UpdateEventId(event_id) != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update event id failed for op:%s(%s).", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); + GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); + return FAILED; + } + GELOGI("Update event_id=%u success", event_id); + } + return SUCCESS; +} + +Status KernelExTaskInfo::DistributeWaitTaskForAicpuBlockingOp() { + bool is_support = false; + if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { + GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); + return FAILED; + } + if (!is_support) { + GELOGD("Device not support blocking aicpu op process."); + return SUCCESS; + } + GELOGD("Distribute wait task begin"); + rtEvent_t rt_event = nullptr; + if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) { + GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed"); + return FAILED; + } + auto rt_ret = rtStreamWaitEvent(stream_, rt_event); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + rt_ret = rtEventReset(rt_event, stream_); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } return SUCCESS; } diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index 7d07eb7f..eb411576 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -19,6 +19,7 @@ #include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" +#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" namespace ge { class KernelExTaskInfo : public TaskInfo { @@ -65,6 +66,12 @@ class KernelExTaskInfo : public TaskInfo { void InitDumpArgs(void *addr, const OpDescPtr &op_desc); Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); + // for blocking aicpu op + Status DistributeWaitTaskForAicpuBlockingOp(); + Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); + Status UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc, + std::shared_ptr &ext_handle); + uint32_t task_id_; uint32_t stream_id_; uint32_t dump_flag_; @@ -79,6 +86,7 @@ class KernelExTaskInfo : public TaskInfo { uint32_t args_offset_ = 0; int64_t fixed_addr_offset_ = 0; int32_t topic_type_flag_ = -1; + bool is_blocking_aicpu_op_ = false; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc index 019a0a8b..6bbfe58e 100755 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -28,11 +28,10 @@ #include "graph/load/model_manager/davinci_model.h" #include "graph/load/model_manager/model_manager.h" #include "graph/load/model_manager/model_utils.h" -#include "runtime/kernel.h" +#include "runtime/rt.h" #include "graph/load/model_manager/task_info/super_kernel/super_kernel.h" #include "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h" #include "cce/aicpu_engine_struct.h" -#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" #include "framework/common/debug/log.h" namespace { @@ -474,6 +473,12 @@ Status KernelTaskInfo::Distribute() { } // set for task_id_ UpdateTaskId(); + if (is_blocking_aicpu_op_) { + if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) { + GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); + return FAILED; + } + } GELOGD( "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " "blockdim:%d stream:%p", @@ -482,6 +487,91 @@ Status KernelTaskInfo::Distribute() { return SUCCESS; } +Status KernelTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { + int32_t device_id = 0; + auto rt_ret = rtGetDevice(&device_id); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + int32_t value = 0; + rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { + REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); + GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); + return FAILED; + } + is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); + return SUCCESS; +} + +Status KernelTaskInfo::UpdateEventIdForAicpuBlockingOp(std::shared_ptr &ext_handle) { + if (is_blocking_aicpu_op_) { + bool is_support = false; + if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { + GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); + return FAILED; + } + if (!is_support) { + GELOGD("Device not support blocking aicpu op process"); + return SUCCESS; + } + uint32_t event_id = 0; + if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc_, stream_, event_id) != SUCCESS) { + GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc_->GetName().c_str(), + op_desc_->GetType().c_str()); + return FAILED; + } + if (ext_handle->UpdateEventId(event_id) != SUCCESS) { + GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc_->GetName().c_str(), + op_desc_->GetType().c_str()); + return FAILED; + } + GELOGI("Update event_id=%u success", event_id); + } + return SUCCESS; +} + +Status KernelTaskInfo::DistributeWaitTaskForAicpuBlockingOp() { + bool is_support = false; + if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { + GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); + return FAILED; + } + if (!is_support) { + GELOGD("device not support blocking aicpu op process."); + return SUCCESS; + } + GELOGD("Distribute wait task begin"); + rtEvent_t rt_event = nullptr; + if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Call GetEventByStream failed"); + GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed"); + return FAILED; + } + auto rt_ret = rtStreamWaitEvent(stream_, rt_event); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + rt_ret = rtEventReset(rt_event, stream_); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + return SUCCESS; +} + void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); vector input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); @@ -1109,7 +1199,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { UnknowShapeOpType unknown_type = static_cast(unknown_shape_type_val); uint32_t num_inputs = op_desc_->GetInputsSize(); uint32_t num_outputs = op_desc_->GetOutputsSize(); - std::unique_ptr ext_handle( + std::shared_ptr ext_handle( new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), num_inputs, num_outputs, @@ -1145,6 +1235,16 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { j, op_desc_->GetName().c_str()); } } + + AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); + GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_); + + if (UpdateEventIdForAicpuBlockingOp(ext_handle) != SUCCESS) { + GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)", + op_desc_->GetName().c_str(), op_desc_->GetType().c_str()); + return FAILED; + } + auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X", diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h index d9dd30bb..59a91aee 100644 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -24,6 +24,8 @@ #include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" +#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" + namespace ge { class KernelTaskInfo : public TaskInfo { public: @@ -148,6 +150,11 @@ class KernelTaskInfo : public TaskInfo { bool DoubleCallSKTSaveCheck(); void SetArgs(); + // for blocking aicpu op + Status DistributeWaitTaskForAicpuBlockingOp(); + Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); + Status UpdateEventIdForAicpuBlockingOp(std::shared_ptr &ext_handle); + void *stub_func_; void *args_; void *sm_desc_; @@ -187,6 +194,7 @@ class KernelTaskInfo : public TaskInfo { uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT; void *superkernel_device_args_addr_ = nullptr; void *superkernel_dev_nav_table_ = nullptr; + bool is_blocking_aicpu_op_ = false; struct AICPUCustomInfo { void *input_descs = nullptr; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 39439f33..bafc60ce 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -1389,8 +1389,8 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorSetRunFlag(false); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_PRERUN_FAILED, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); - return GE_GRAPH_PRERUN_FAILED; + GELOGE(ret, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); + return ret; } GELOGI("[BuildGraph] build graph success, graph_id=%u.", graph_id); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index c607a43e..6e8841b9 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -81,6 +81,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { case aicpu::FWKAdapter::FWK_ADPT_EXT_TOPIC_TYPE: GE_CHK_STATUS_RET(ParseExtTopicType(aicpu_ext_info), "[Parse][ExtTopicType] failed."); break; + case aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT: + GE_CHK_STATUS_RET(ParseExtAsyncWait(aicpu_ext_info), "[Parse][ExtAsyncWait] failed."); + break; default: GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); @@ -101,6 +104,22 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { return SUCCESS; } +Status AicpuExtInfoHandler::ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info) { + if (aicpu_ext_info->infoLen != sizeof(AsyncWaitInfo)) { + REPORT_INNER_ERROR("E19999", + "Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.", + node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "[Check][DataLen]Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.", + node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen); + return ACL_ERROR_GE_PARAM_INVALID; + } + + async_wait_ = reinterpret_cast(aicpu_ext_info->infoMsg); + GELOGI("Node[%s] parse async wait info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); + return SUCCESS; +} + Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(int32_t), REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", @@ -280,6 +299,17 @@ Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kern return SUCCESS; } +Status AicpuExtInfoHandler::UpdateEventId(uint32_t event_id) { + if (async_wait_ == nullptr) { + REPORT_INNER_ERROR("E19999", "async_wait_ is nullptr."); + GELOGE(FAILED, "[Check][async_wait_] async_wait_ is nullptr."); + return FAILED; + } + async_wait_->waitType = 1; + async_wait_->waitId = event_id; + return SUCCESS; +} + Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) { if (session_info_ == nullptr) { GELOGD("There is no session info in ext_info, no need update."); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h index 46fb7c05..80e3bb92 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h @@ -27,6 +27,7 @@ namespace ge { namespace hybrid { using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; +using AsyncWaitInfo = aicpu::FWKAdapter::AsyncWait; using AicpuSessionInfo = SessionInfo; class AicpuExtInfoHandler { @@ -59,6 +60,8 @@ class AicpuExtInfoHandler { Status UpdateExecuteMode(bool flag); + Status UpdateEventId(uint32_t event_id); + Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); bool IsNeedRefreshIOAddr(); @@ -73,6 +76,7 @@ class AicpuExtInfoHandler { Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); Status ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info); Status ParseExtTopicType(AicpuExtInfo *aicpu_ext_info); + Status ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info); static Status UpdateShapeAndType(const GeShape &shape, DataType data_type, @@ -90,6 +94,7 @@ class AicpuExtInfoHandler { const uint32_t output_num_; UnknowShapeOpType unknown_type_; AicpuSessionInfo *session_info_ = nullptr; + AsyncWaitInfo *async_wait_ = nullptr; uint64_t *bit_map_ = nullptr; uint32_t *update_addr_ = nullptr; int32_t topic_type_flag_ = -1; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index cf20303c..f309ebd0 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -22,6 +22,7 @@ #include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" +#include "runtime/rt.h" namespace ge { namespace hybrid { @@ -33,6 +34,12 @@ const char *const kAicpuAllshape = "_AllShape"; REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); +AicpuNodeTaskBase::~AicpuNodeTaskBase() { + if (rt_event_ != nullptr) { + (void)rtEventDestroy(rt_event_); + } +} + Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr &tensor_buffer) { auto allocator = NpuMemoryAllocator::GetAllocator(); GE_CHECK_NOTNULL(allocator); @@ -64,6 +71,13 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), "[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id); + if (is_blocking_aicpu_op_) { + if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) { + GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed"); + return FAILED; + } + } + // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), "[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu", @@ -230,6 +244,96 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionnum_outputs == 0)) { GELOGD("Node[%s] type[%s] unknown_type is %d, output num is %d.", @@ -325,6 +429,9 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { // init ext info uint64_t ext_session_id = model.GetSessionId(); + const OpDescPtr op_desc = node_item_->GetOpDesc(); + AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); + GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].", node_name_.c_str()); GE_CHK_STATUS_RET(InitForDependComputeTask(), "[Init][DependComputeTask] failed for Node[%s].", node_name_.c_str()); @@ -642,6 +749,12 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { kernel_buf_->GetSize(), flag, context.GetStream())); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); GELOGD("Node[%s] launch end.", node_name_.c_str()); + if (is_blocking_aicpu_op_) { + if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) { + GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); + return FAILED; + } + } if (need_sync_) { GELOGD("[%s] Task needs sync", node_name_.c_str()); GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); @@ -760,6 +873,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { return FAILED;); uint64_t ext_session_id = model.GetSessionId(); + AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); + GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].", node_name.c_str()); @@ -826,6 +941,12 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { args_.get(), args_size_, nullptr, context.GetStream(), flag); GE_CHK_RT_RET(rt_ret); + if (is_blocking_aicpu_op_) { + if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) { + GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); + return FAILED; + } + } GELOGD("Node[%s] launch task end.", node_name_.c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 14bc8fcc..3911e090 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -35,7 +35,7 @@ class AicpuNodeTaskBase : public NodeTask { node_item->num_outputs, node_item->shape_inference_type) {} - ~AicpuNodeTaskBase() override = default; + ~AicpuNodeTaskBase() override; using NodeTask::Init; @@ -61,6 +61,10 @@ class AicpuNodeTaskBase : public NodeTask { static Status AllocTensorBuffer(size_t size, std::unique_ptr &tensor_buffer); + Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); + Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); + Status UpdateEventIdForBlockingAicpuOp(); + protected: const NodeItem *node_item_; // just reference. @@ -78,6 +82,10 @@ class AicpuNodeTaskBase : public NodeTask { // ext info addr, device mem std::unique_ptr ext_info_addr_dev_; + + // for blocking aicpu op + bool is_blocking_aicpu_op_ = false; + rtEvent_t rt_event_ = nullptr; }; class AicpuTfNodeTask : public AicpuNodeTaskBase { diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc index 6bc5cb3d..aeb73116 100644 --- a/ge/offline/single_op_parser.cc +++ b/ge/offline/single_op_parser.cc @@ -89,7 +89,8 @@ map kDataTypeDict = { {"float", DT_FLOAT}, {"float32", DT_FLOAT}, {"double", DT_DOUBLE}, - {"complex64", DT_COMPLEX64} + {"complex64", DT_COMPLEX64}, + {"complex128", DT_COMPLEX128} }; map kFormatDict = { diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index dbc90ac5..83cb0529 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -564,6 +564,41 @@ AiCpuBaseTask::~AiCpuBaseTask() { if (ext_info_addr_dev_ != nullptr) { (void)rtFree(ext_info_addr_dev_); } + if (rt_event_ != nullptr) { + (void)rtEventDestroy(rt_event_); + } +} + +Status AiCpuBaseTask::UpdateEventIdForBlockingAicpuOp() { + bool is_support = false; + if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { + GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); + return FAILED; + } + if (!is_support) { + GELOGD("Device not support blocking aicpu op process"); + return SUCCESS; + } + uint32_t event_id = 0; + auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + rt_ret = rtGetEventID(rt_event_, &event_id); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetEventID] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + if (aicpu_ext_handle_->UpdateEventId(event_id) != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update event id=%u failed.", event_id); + GELOGE(FAILED, "[Update][EventId] Update event id failed", event_id); + return FAILED; + } + GELOGI("Update event_id=%u success", event_id); + return SUCCESS; } Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id) { @@ -577,6 +612,9 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint GELOGD("Get unknown_type is %d.", unknown_shape_type_val); unknown_type_ = static_cast(unknown_shape_type_val); + AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); + GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_); + aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), num_inputs_, num_outputs_, @@ -595,6 +633,13 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), "[Update][SessionInfo] failed."); + if (is_blocking_aicpu_op_) { + if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) { + GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed"); + return FAILED; + } + } + GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), @@ -770,6 +815,63 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vecto return SUCCESS; } +Status AiCpuBaseTask::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { + int32_t device_id = 0; + auto rt_ret = rtGetDevice(&device_id); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + int32_t value = 0; + rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { + REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); + GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); + return FAILED; + } + is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); + return SUCCESS; +} + +Status AiCpuBaseTask::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) { + bool is_support = false; + if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { + GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); + return FAILED; + } + if (!is_support) { + GELOGD("Device not support blocking aicpu op process."); + return SUCCESS; + } + GELOGI("Distribute queue task begin"); + if (rt_event_ == nullptr) { + REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr"); + GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr"); + return FAILED; + } + auto rt_ret = rtStreamWaitEvent(stream, rt_event_); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + rt_ret = rtEventReset(rt_event_, stream); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + return SUCCESS; +} + AiCpuTask::~AiCpuTask() { FreeHbm(args_); FreeHbm(io_addr_); @@ -813,6 +915,14 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); + + if (is_blocking_aicpu_op_) { + if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) { + GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); + return FAILED; + } + } + return SUCCESS; } @@ -1089,6 +1199,13 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); GELOGD("Invoke rtCpuKernelLaunch succeeded"); + + if (is_blocking_aicpu_op_) { + if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) { + GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); + return FAILED; + } + } return SUCCESS; } diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 132672b0..adf51dba 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -178,6 +178,10 @@ class AiCpuBaseTask : public OpTask { rtStream_t stream); Status UpdateOutputShape(vector &output_desc); Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); + // for blocking aicpu op + Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); + Status UpdateEventIdForBlockingAicpuOp(); + Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); protected: size_t num_inputs_ = 0; @@ -186,6 +190,9 @@ class AiCpuBaseTask : public OpTask { std::unique_ptr aicpu_ext_handle_; void *ext_info_addr_dev_ = nullptr; vector input_is_const_; + // for blocking aicpu op + bool is_blocking_aicpu_op_ = false; + rtEvent_t rt_event_ = nullptr; }; class AiCpuTask : public AiCpuBaseTask { diff --git a/metadef b/metadef index 8f2c4395..a725349b 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 8f2c4395c346af026c470b47a7c52f2ab5b51f90 +Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 5b6ab796..eb5f558e 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -16,12 +16,94 @@ #include #include +#include "runtime_stub.h" +#include "runtime/rt.h" + +#define ADD_STUB_RETURN_VALUE(FUNC, TYPE) std::vector g_Stub_##FUNC##_RETURN + +#define GET_STUB_RETURN_VALUE(FUNC, TYPE, DEFAULT) ({ \ + TYPE result = DEFAULT; \ + if (!g_Stub_##FUNC##_RETURN.empty()) { \ + result = g_Stub_##FUNC##_RETURN.back(); \ + g_Stub_##FUNC##_RETURN.pop_back(); \ + } \ + result; \ +}) + +#define DEL_STUB_RETURN_VALUE(FUNC, TYPE) \ +do { \ + extern std::vector g_Stub_##FUNC##_RETURN; \ + g_Stub_##FUNC##_RETURN.clear(); \ +} while (0) + + +#define ADD_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) std::vector g_Stub_##FUNC##_OUT_##NAME + +#define GET_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, DEFAULT) ({ \ + TYPE value; \ + if (!g_Stub_##FUNC##_OUT_##NAME.empty()) { \ + value = g_Stub_##FUNC##_OUT_##NAME.back(); \ + g_Stub_##FUNC##_OUT_##NAME.pop_back(); \ + } else { \ + value = DEFAULT; \ + } \ + value; \ +}) + +#define DEL_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) \ +do { \ + extern std::vector g_Stub_##FUNC##_OUT_##NAME; \ + g_Stub_##FUNC##_OUT_##NAME.clear(); \ +} while (0) #ifdef __cplusplus extern "C" { #endif #define EVENT_LENTH 10 +void rtStubTearDown() { + DEL_STUB_RETURN_VALUE(rtGetDevice, rtError_t); + DEL_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t); + DEL_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t); + DEL_STUB_RETURN_VALUE(rtEventReset, rtError_t); + DEL_STUB_RETURN_VALUE(rtEventCreate, rtError_t); + DEL_STUB_RETURN_VALUE(rtGetEventID, rtError_t); +} + +ADD_STUB_RETURN_VALUE(rtGetDevice, rtError_t); +rtError_t rtGetDevice(int32_t *device) { + return GET_STUB_RETURN_VALUE(rtGetDevice, rtError_t, RT_ERROR_NONE); +} + +ADD_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t); +ADD_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value); +rtError_t rtGetDeviceCapability(int32_t device, int32_t moduleType, int32_t featureType, int32_t *value) { + *value = GET_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT); + return GET_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); +} + +ADD_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t); +rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { + return GET_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, RT_ERROR_NONE); +} + +ADD_STUB_RETURN_VALUE(rtEventReset, rtError_t); +rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { + return GET_STUB_RETURN_VALUE(rtEventReset, rtError_t, RT_ERROR_NONE); +} + +ADD_STUB_RETURN_VALUE(rtEventCreate, rtError_t); +rtError_t rtEventCreate(rtEvent_t *event) { + *event = new int[EVENT_LENTH]; + return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE); +} + +ADD_STUB_RETURN_VALUE(rtGetEventID, rtError_t); +rtError_t rtGetEventID(rtEvent_t event, uint32_t *event_id) { + *event_id = 0; + return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE); +} + rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; } rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { @@ -42,11 +124,6 @@ rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event) { return RT_ERROR_NONE; } -rtError_t rtEventCreate(rtEvent_t *event) { - *event = new int[EVENT_LENTH]; - return RT_ERROR_NONE; -} - rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag) { return rtEventCreate(event); } @@ -112,8 +189,6 @@ rtError_t rtMemcpyAsync(void *dst, uint64_t dest_max, const void *src, uint64_t return RT_ERROR_NONE; } -rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { return RT_ERROR_NONE; } - rtError_t rtSetTSDevice(uint32_t tsId) { return RT_ERROR_NONE; } @@ -347,10 +422,6 @@ rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *value_ptr, rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream) { return RT_ERROR_NONE; } -rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { return RT_ERROR_NONE; } - -rtError_t rtGetDevice(int32_t *device) { return RT_ERROR_NONE; } - rtError_t rtDatadumpInfoLoad(const void *dump_info, uint32_t length) { return RT_ERROR_NONE; } rtError_t rtKernelLaunchWithFlag(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, @@ -489,6 +560,18 @@ rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t return RT_ERROR_NONE; } +rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len) { + return RT_ERROR_NONE; +} + +rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream) { + return RT_ERROR_NONE; +} + +rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt) { + return RT_ERROR_NONE; +} + #ifdef __cplusplus } #endif diff --git a/tests/depends/runtime/src/runtime_stub.h b/tests/depends/runtime/src/runtime_stub.h new file mode 100644 index 00000000..b693b9ea --- /dev/null +++ b/tests/depends/runtime/src/runtime_stub.h @@ -0,0 +1,70 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INC_LLT_RUNTIME_STUB_H +#define __INC_LLT_RUNTIME_STUB_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif +void rtStubTearDown(); + +#define RTS_STUB_SETUP() \ +do { \ + rtStubTearDown(); \ +} while (0) + +#define RTS_STUB_TEARDOWN() \ +do { \ + rtStubTearDown(); \ +} while (0) + +#define RTS_STUB_RETURN_VALUE(FUNC, TYPE, VALUE) \ +do { \ + g_Stub_##FUNC##_RETURN.emplace(g_Stub_##FUNC##_RETURN.begin(), VALUE); \ +} while (0) + +#define RTS_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, VALUE) \ +do { \ + g_Stub_##FUNC##_OUT_##NAME.emplace(g_Stub_##FUNC##_OUT_##NAME.begin(), VALUE); \ +} while (0) + + +#define RTS_STUB_RETURN_EXTERN(FUNC, TYPE) extern std::vector g_Stub_##FUNC##_RETURN; +#define RTS_STUB_OUTBOUND_EXTERN(FUNC, TYPE, NAME) extern std::vector g_Stub_##FUNC##_OUT_##NAME; + +RTS_STUB_RETURN_EXTERN(rtGetDevice, rtError_t); +RTS_STUB_OUTBOUND_EXTERN(rtGetDevice, int32_t, device) + +RTS_STUB_RETURN_EXTERN(rtGetDeviceCapability, rtError_t); +RTS_STUB_OUTBOUND_EXTERN(rtGetDeviceCapability, int32_t, value); + +RTS_STUB_RETURN_EXTERN(rtStreamWaitEvent, rtError_t); + +RTS_STUB_RETURN_EXTERN(rtEventReset, rtError_t); + +RTS_STUB_RETURN_EXTERN(rtEventCreate, rtError_t); +RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, rtEvent_t, event); + +RTS_STUB_RETURN_EXTERN(rtGetEventID, rtError_t); +RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, uint32_t, event_id); + +#ifdef __cplusplus +} +#endif +#endif // __INC_LLT_RUNTIME_STUB_H diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index a0790cf2..c92f2161 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -244,6 +244,7 @@ set(GRAPH_DAVINCI_MODEL_SRC_FILES "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" @@ -527,6 +528,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/kernel_ex_task_info_unittest.cc" "graph/load/kernel_task_info_unittest.cc" "graph/load/ffts_task_info_unittest.cc" + "graph/load/ffts_plus_task_info_unittest.cc" "graph/load/memcpy_addr_async_task_info_unittest.cc" "graph/load/memcpy_async_task_info_unittest.cc" "graph/load/cpu_queue_schedule_unittest.cc" @@ -670,6 +672,7 @@ set(MULTI_PARTS_TEST_FILES "graph/build/stream_allocator_unittest.cc" "graph/build/model_builder_unittest.cc" "graph/build/mem_assigner_unittest.cc" + "graph/build/graph_mem_assigner_unittest.cc" "graph/build/task_generator_unittest.cc" "graph/build/buffer_pool_mem_assigner_unittest.cc" "graph/execute/graph_execute_unittest.cc" @@ -935,6 +938,7 @@ target_link_libraries(ge_single_op PRIVATE ascend_protobuf json c_sec + runtime_stub ) # ut binary diff --git a/tests/ut/ge/graph/build/graph_mem_assigner_unittest.cc b/tests/ut/ge/graph/build/graph_mem_assigner_unittest.cc new file mode 100644 index 00000000..703ac3b4 --- /dev/null +++ b/tests/ut/ge/graph/build/graph_mem_assigner_unittest.cc @@ -0,0 +1,90 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" +#include "../passes/graph_builder_utils.h" + +#define protected public +#define private public +#include "graph/build/memory/binary_block_mem_assigner.h" +#include "graph/build/memory/graph_mem_assigner.h" +#include "graph/build/memory/hybrid_mem_assigner.h" +#include "graph/build/memory/max_block_mem_assigner.h" +#include "graph/manager/graph_var_manager.h" +#include "graph/manager/graph_mem_manager.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestGraphMemAssigner : public testing::Test { + public: + ge::ComputeGraphPtr BuildGraphWithVar(int64_t session_id) { + // init + MemManager::Instance().Initialize(std::vector({RT_MEMORY_HBM})); + VarManager::Instance(session_id)->Init(0, 0, 0, 0); + ge::ut::GraphBuilder builder("graph"); + auto var_input = builder.AddNode("var", "Variable", 1, 1); + auto const_input = builder.AddNode("const", "Const", 1, 1); + auto assign = builder.AddNode("assgin", "Assign", 2, 1); + // add link + builder.AddDataEdge(var_input, 0, assign, 0); + builder.AddDataEdge(const_input, 0, assign, 1); + // set offset + var_input->GetOpDesc()->SetOutputOffset({10000}); + const_input->GetOpDesc()->SetOutputOffset({1000}); + assign->GetOpDesc()->SetInputOffset({10100, 1000}); + assign->GetOpDesc()->SetOutputOffset({10100}); + // set inner offset + int64_t inner_offset = 100; + ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableInputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset); + ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableOutputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset); + // add var addr + VarManager::Instance(session_id)->var_resource_->var_offset_map_.emplace(10000, RT_MEMORY_HBM); + + return builder.GetGraph(); + } + +protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestGraphMemAssigner, graph_memory_assign_fail_case) { + ge::ComputeGraphPtr compute_graph = make_shared(""); + GraphMemoryAssigner graph_mem_assigner(compute_graph); + MemoryOffset mem_offset(2, 10000); + graph_mem_assigner.memory_offset_.insert({2, mem_offset}); + VarManager::Instance(0)->graph_mem_max_size_ = 0; + + map mem_type_to_offset = {}; + Status ret = graph_mem_assigner.ReAssignMemory(false, mem_type_to_offset); + EXPECT_EQ(ret, ACL_ERROR_GE_MEMORY_ALLOCATION); +} + diff --git a/tests/ut/ge/graph/load/ffts_plus_task_info_unittest.cc b/tests/ut/ge/graph/load/ffts_plus_task_info_unittest.cc new file mode 100644 index 00000000..6d18fd97 --- /dev/null +++ b/tests/ut/ge/graph/load/ffts_plus_task_info_unittest.cc @@ -0,0 +1,697 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public + +#include "graph/load/model_manager/task_info/ffts_plus_task_info.h" +#include "cce/aicpu_engine_struct.h" +#include "common/ge/ge_util.h" +#include "common/properties_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/fmk_error_codes.h" +#include "graph/attr_value.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" + +namespace ge { +extern OpDescPtr CreateOpDesc(string name, string type); + +extern Status DavinciModel::GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt) { + addr = 0x1245; + pref_cnt = 3; + return SUCCESS; +} +class UtestFftsPlusTaskInfo : public testing::Test { +protected: + void SetUp() {} + + void TearDown() {} + +public: + void InitTaskSQEInfo(domi::FftsPlusTaskDef *task_def) { + domi::FftsPlusSqeDef *sqedef = task_def->mutable_ffts_plus_sqe(); + //header + domi::StarsSqeHeaderDef *headerdef = sqedef->mutable_sqe_header(); + headerdef->set_l1_lock(1); + headerdef->set_l1_unlock(1); + headerdef->set_block_dim(1); + //sqe + sqedef->set_pmg(1); + sqedef->set_ns(1); + sqedef->set_part_id(1); + sqedef->set_qos(1); + + sqedef->set_total_context_num(2); + sqedef->set_ready_context_num(1); + sqedef->set_preload_context_num(1); + + sqedef->set_dsplit_unit(1); + sqedef->set_prefetch_ost_num(1); + sqedef->set_cmaint_ost_num(1); + + sqedef->set_aic_prefetch_lower(1); + sqedef->set_aic_prefetch_upper(1); + sqedef->set_aiv_prefetch_lower(1); + sqedef->set_aiv_prefetch_upper(1); + } + + void InitTaskAdditionalDataInfo(domi::FftsPlusTaskDef *task_def) { + domi::AdditionalDataDef *additionaldata = task_def->add_additional_data(); + additionaldata->set_data_type(1); + additionaldata->add_context_id(0); + additionaldata->add_context_id(1); + additionaldata->add_context_id(2); + domi::AdditionalDataDef *additionaldata1 = task_def->add_additional_data(); + additionaldata1->set_data_type(2); + additionaldata1->add_context_id(0); + additionaldata1->add_context_id(3); + additionaldata1->add_context_id(5); + } + + void InitAicAivCtx(domi::FftsPlusAicAivCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // 16 bits, len = 26 + } + ctxdef->set_stat(1); + ctxdef->set_schem(1); + ctxdef->set_atm(1); + ctxdef->set_prefetch_enable_bitmap(1); + ctxdef->set_prefetch_once_bitmap(1); + + ctxdef->set_thread_id(2); + ctxdef->set_thread_dim(1); + + ctxdef->set_non_tail_block_dim(6); + ctxdef->set_tail_block_dim(5); + + ctxdef->set_task_param_ptr_base(0x235689); + ctxdef->set_task_param_ptr_offset(32); + // task_addr = {0,200,700,1000,2000, 3500} + // task_addr_offset = {20,40,2,100,200} + ctxdef->add_task_addr(0); + ctxdef->add_task_addr(200); + ctxdef->add_task_addr(700); + ctxdef->add_task_addr(1000); + ctxdef->add_task_addr(2000); + ctxdef->add_task_addr(3500); + + ctxdef->add_task_addr_offset(20); + ctxdef->add_task_addr_offset(40); + ctxdef->add_task_addr_offset(2); + ctxdef->add_task_addr_offset(100); + ctxdef->add_task_addr_offset(200); + + ctxdef->set_input_output_count(3); + + ctxdef->add_kernel_name("aictest"); + for (int j = 1; j < 4; ++j) { + ctxdef->add_src_slot(1); // len = 4, context ID for source data which is out of subgraph + } + } + + void InitMixAicAivCtx(domi::FftsPlusMixAicAivCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + ctxdef->set_stat(1); + ctxdef->set_schem(1); + ctxdef->set_atm(1); + ctxdef->set_prefetch_enable_bitmap(1); + ctxdef->set_prefetch_once_bitmap(1); + + ctxdef->set_non_tail_block_ratio_n(1); + ctxdef->set_tail_block_ratio_n(1); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_non_tail_block_dim(1); + ctxdef->set_tail_block_dim(1); + + ctxdef->set_aic_task_param_ptr(1); + ctxdef->set_aic_task_param_ptr_offset(1); + + ctxdef->set_aiv_task_param_ptr(0x147852); + ctxdef->set_aiv_task_param_ptr_offset(32); + + ctxdef->add_kernel_name("mixaic"); + + // task_addr = {0,200,700,1000,2000, 3500} + // task_addr_offset = {20,40,2,100,200} + ctxdef->add_task_addr(0); + ctxdef->add_task_addr(200); + ctxdef->add_task_addr(700); + ctxdef->add_task_addr(1000); + ctxdef->add_task_addr(2000); + ctxdef->add_task_addr(3500); + + ctxdef->add_task_addr_offset(20); + ctxdef->add_task_addr_offset(40); + ctxdef->add_task_addr_offset(2); + ctxdef->add_task_addr_offset(100); + ctxdef->add_task_addr_offset(200); + + ctxdef->set_input_output_count(1); + for (int j = 1; j < 4; ++j) { + ctxdef->add_src_slot(1); // len = 4, context ID for source data which is out of subgraph + } + } + + void InitSdmaCtx(domi::FftsPlusSdmaCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + ctxdef->set_sat(1); + ctxdef->set_atm(1); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_sdma_sqe_header(1); + + ctxdef->set_src_stream_id(1); + ctxdef->set_src_sub_stream_id(1); + ctxdef->set_dst_stream_id(1); + ctxdef->set_dst_sub_stream_id(1); + + ctxdef->set_src_addr_base(0x457878); + ctxdef->set_src_addr_offset(32); + ctxdef->set_dst_addr_base(0x126547); + ctxdef->set_dst_addr_offset(32); + + ctxdef->set_non_tail_data_len(1); + ctxdef->set_tail_data_len(1); + } + + void InitNotifyCtx(domi::FftsPlusNotifyCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + ctxdef->set_atm(1); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_notify_id_base(1); + } + + void InitWriteValueCtx(domi::FftsPlusWriteValueCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + ctxdef->set_atm(1); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_aw_size(1); + ctxdef->set_snoop(1); + ctxdef->set_aw_cache(1); + ctxdef->set_aw_prot(1); + ctxdef->set_va(1); + + ctxdef->set_write_addr_base(0x147852); + ctxdef->set_write_addr_offset(32); + for (int j = 1; j < 4; ++j) { + ctxdef->add_write_value(1); + } + } + + void InitAicpuCtxCtx(domi::FftsPlusAicpuCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int j = 1; j < RT_CTX_SUCCESSOR_NUM; ++j) { + ctxdef->add_successor_context_id(1); // len = 26 + } + ctxdef->set_atm(1); + + ctxdef->set_sqe_index(1); + ctxdef->set_kernel_type(1); + ctxdef->set_bm(1); + ctxdef->set_topic_type(1); + ctxdef->set_qos(1); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_non_tail_block_dim(1); + ctxdef->set_tail_block_dim(1); + for (int i = 1; i < 9; ++i) { + ctxdef->add_user_data(1); // len = 9 + } + ctxdef->set_sub_topic_id(1); + ctxdef->set_topic_id(1); + ctxdef->set_group_id(1); + ctxdef->set_user_data_len(64); + + ctxdef->set_task_param_offset(32); + } + + void InitDataCtx(domi::FftsPlusDataCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_cnt_init(1); + ctxdef->set_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + ctxdef->set_atm(1); + + ctxdef->set_orig_consumer_counter(1); + ctxdef->set_run_consumer_counter(1); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_addr_base(0x125478); + ctxdef->set_addr_offset(32); + + ctxdef->set_non_tail_num_outter(1); + ctxdef->set_non_tail_num_inner(1); + ctxdef->set_non_tail_len_inner(1); + ctxdef->set_non_tail_stride_outter(1); + ctxdef->set_non_tail_stride_inner(1); + + ctxdef->set_tail_num_outter(1); + ctxdef->set_tail_num_inner(1); + ctxdef->set_tail_len_inner(1); + ctxdef->set_tail_stride_outter(1); + ctxdef->set_tail_stride_inner(1); + } + + void InitAtStartCtx(domi::FftsPlusAtStartCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(1); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(i); // len = 26 + } + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(1); + + ctxdef->set_thread_id_init(1); + ctxdef->set_thread_window_size(1); + } + + void InitAtEndCtx(domi::FftsPlusAtEndCtxDef *ctxdef) { + ctxdef->set_at_start_slot_num(12); + ctxdef->set_out_label_slot_num(12); + ctxdef->set_aten(1); + + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCC_AT_START_SLOT_NUM; ++i) { + ctxdef->add_succ_at_start_slot(i); // len = 12 + ctxdef->add_succ_out_label_slot(1); // len = 12 + } + + ctxdef->set_thread_id(1); + } + + void InitLabelCtx(domi::FftsPlusLabelCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(1); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + } + + void InitCaseSwitchCtx(domi::FftsPlusCaseSwitchCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(32); + ctxdef->set_start_label_id(32); + ctxdef->set_label_list_len(32); + ctxdef->set_pred_cnt_init(32); + ctxdef->set_pred_cnt(32); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(1); // len = 26 + } + ctxdef->set_atm(32); + + ctxdef->set_thread_id(32); + ctxdef->set_thread_dim(32); + + ctxdef->set_ar_size(32); + ctxdef->set_snoop(32); + ctxdef->set_ar_cache(32); + ctxdef->set_ar_prot(32); + ctxdef->set_va(32); + + ctxdef->set_load_addr0_base(0x123456); + ctxdef->set_ld0_en(32); + ctxdef->set_load_addr0_offset(32); + + ctxdef->set_load_addr1_base(0x12457); + ctxdef->set_ld1_en(32); + ctxdef->set_load_addr1_offset(32); + } + + void InitCaseDefaultCtx(domi::FftsPlusCaseDefaultCtxDef *ctxdef) { + ctxdef->set_successor_num(26); + ctxdef->set_aten(32); + ctxdef->set_start_label_id(1); + ctxdef->set_label_list_len(32); + ctxdef->set_pred_cnt_init(1); + ctxdef->set_pred_cnt(32); + for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { + ctxdef->add_successor_list(2); // len = 26 + } + } + + void InitCondSwitchCtx(domi::FftsPlusCondSwitchCtxDef *ctxdef) { + ctxdef->set_true_successor_num(12); + ctxdef->set_false_successor_num(14); + ctxdef->set_aten(32); + + ctxdef->set_condition(32); + ctxdef->set_pred_cnt_init(32); + ctxdef->set_pred_cnt(32); + + for (int i = 1; i < RT_CTX_FALSE_SUCCESSOR_NUM; ++i) { + if (i < RT_CTX_TRUE_SUCCESSOR_NUM) { + ctxdef->add_true_successor_list(1); // len = 12 + } + ctxdef->add_false_successor_list(1); // len = 14 + } + ctxdef->set_atm(32); + + ctxdef->set_thread_id(1); + ctxdef->set_thread_dim(32); + + ctxdef->set_ar_size(32); + ctxdef->set_snoop(32); + ctxdef->set_ar_cache(32); + ctxdef->set_ar_prot(32); + ctxdef->set_va(32); + + ctxdef->set_load_addr0_base(0x142545); + ctxdef->set_ld0_en(32); + ctxdef->set_load_addr0_offset(32); + + ctxdef->set_load_addr1_base(0x365451); + ctxdef->set_ld1_en(64); + ctxdef->set_load_addr1_offset(32); + + ctxdef->set_cmp_value_1(1); + ctxdef->set_cmp_value_2(1); + } +}; + +// test FftsPlusTaskInfo Init software ctx +TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_software_ctx) { + DavinciModel davinci_model(0, nullptr); + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + davinci_model.stream_list_ = { stream }; + domi::TaskDef task_def; + task_def.set_stream_id(0); + + domi::FftsPlusTaskDef *ffts_plus_task_def = task_def.mutable_ffts_plus_task(); + FftsPlusTaskInfo ffts_plus_task_info; + // init failed when model without op_desc + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), PARAM_INVALID); + + davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); + ffts_plus_task_def->set_op_index(0); + ffts_plus_task_def->set_addr_size(2); + + rtFftsPlusTaskInfo_t sub_task_info; + ffts_plus_task_info.ffts_plus_task_info_ = sub_task_info; + ffts_plus_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + + InitTaskSQEInfo(ffts_plus_task_def); + InitTaskAdditionalDataInfo(ffts_plus_task_def); + + domi::FftsPlusCtxDef *fftsplusstartctx = ffts_plus_task_def->add_ffts_plus_ctx(); + fftsplusstartctx->set_op_index(0); + fftsplusstartctx->set_hardware_ctx_type(0); + fftsplusstartctx->set_software_ctx_type(static_cast(RT_SOFT_CTX_TYPE_AT_START)); + domi::FftsPlusAtStartCtxDef *startctxdef = fftsplusstartctx->mutable_at_start_ctx(); + InitAtStartCtx(startctxdef); + + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); + startctxdef->add_successor_list(1); + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *fftsplusendctx = ffts_plus_task_def->add_ffts_plus_ctx(); + fftsplusendctx->set_op_index(0); + fftsplusendctx->set_hardware_ctx_type(0); + fftsplusendctx->set_software_ctx_type(static_cast(RT_SOFT_CTX_TYPE_AT_END)); + domi::FftsPlusAtEndCtxDef *endctxdef = fftsplusendctx->mutable_at_end_ctx(); + InitAtEndCtx(endctxdef); + + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); + endctxdef->add_succ_at_start_slot(1); + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); + endctxdef->add_succ_out_label_slot(1); + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *fftspluslabelctx = ffts_plus_task_def->add_ffts_plus_ctx(); + fftspluslabelctx->set_op_index(0); + fftspluslabelctx->set_hardware_ctx_type(0); + fftspluslabelctx->set_software_ctx_type(static_cast(RT_SOFT_CTX_TYPE_LABEL)); + domi::FftsPlusLabelCtxDef *labelctxdef = fftspluslabelctx->mutable_label_ctx(); + InitLabelCtx(labelctxdef); + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); + labelctxdef->add_successor_list(1); + EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsPlusTaskInfo Init hardware ctx +TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_hardware_ctx) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsPlusTaskInfo task_info; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + davinci_model.stream_list_ = { stream }; + + task_def.set_stream_id(0); + + domi::FftsPlusTaskDef *ffts_plus_task_def = task_def.mutable_ffts_plus_task(); + rtFftsPlusTaskInfo_t sub_task_info; + task_info.ffts_plus_task_info_ = sub_task_info; + + davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); + davinci_model.InitTbeHandleWithFfts(davinci_model.op_list_[0]); + + ffts_plus_task_def->set_op_index(0); + ffts_plus_task_def->set_addr_size(2); + InitTaskSQEInfo(ffts_plus_task_def); + InitTaskAdditionalDataInfo(ffts_plus_task_def); + + domi::FftsPlusCtxDef *notifyctx = ffts_plus_task_def->add_ffts_plus_ctx(); + notifyctx->set_op_index(0); + notifyctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_NOTIFY_WAIT)); + notifyctx->set_software_ctx_type(0); + domi::FftsPlusNotifyCtxDef *notifydef = notifyctx->mutable_notify_ctx(); + InitNotifyCtx(notifydef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + notifydef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *sdmactx = ffts_plus_task_def->add_ffts_plus_ctx(); + sdmactx->set_op_index(0); + sdmactx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_SDMA)); + sdmactx->set_software_ctx_type(0); + domi::FftsPlusSdmaCtxDef *smdadef = sdmactx->mutable_sdma_ctx(); + InitSdmaCtx(smdadef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + smdadef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *writevalctx = ffts_plus_task_def->add_ffts_plus_ctx(); + writevalctx->set_op_index(0); + writevalctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_WRITE_VALUE)); + writevalctx->set_software_ctx_type(0); + domi::FftsPlusWriteValueCtxDef *writedef = writevalctx->mutable_write_value_ctx(); + InitWriteValueCtx(writedef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + writedef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + writedef->add_write_value(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *aicpuctx = ffts_plus_task_def->add_ffts_plus_ctx(); + aicpuctx->set_op_index(0); + aicpuctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_AICPU)); + aicpuctx->set_software_ctx_type(0); + domi::FftsPlusAicpuCtxDef *aicpudef = aicpuctx->mutable_aicpu_ctx(); + InitAicpuCtxCtx(aicpudef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + aicpudef->add_successor_context_id(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + aicpudef->add_user_data(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *datactx = ffts_plus_task_def->add_ffts_plus_ctx(); + datactx->set_op_index(0); + datactx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_FLUSH_DATA)); + datactx->set_software_ctx_type(0); + domi::FftsPlusDataCtxDef *datadef = datactx->mutable_data_ctx(); + InitDataCtx(datadef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + datadef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *caseswitchctx = ffts_plus_task_def->add_ffts_plus_ctx(); + caseswitchctx->set_op_index(0); + caseswitchctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_LOAD)); + caseswitchctx->set_software_ctx_type(static_cast(RT_SOFT_CTX_TYPE_CASE_SWITCH)); + domi::FftsPlusCaseSwitchCtxDef *caseswitchdef = caseswitchctx->mutable_case_switch_ctx(); + InitCaseSwitchCtx(caseswitchdef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + caseswitchdef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *candswitchctx = ffts_plus_task_def->add_ffts_plus_ctx(); + candswitchctx->set_op_index(0); + candswitchctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_LOAD)); + candswitchctx->set_software_ctx_type(static_cast(RT_SOFT_CTX_TYPE_COND_SWITCH)); + domi::FftsPlusCondSwitchCtxDef *candswitchdef = candswitchctx->mutable_cond_switch_ctx(); + InitCondSwitchCtx(candswitchdef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + candswitchdef->add_true_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + candswitchdef->add_false_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); + + domi::FftsPlusCtxDef *aicaivctx = ffts_plus_task_def->add_ffts_plus_ctx(); + aicaivctx->set_op_index(0); + aicaivctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_AIV)); + aicaivctx->set_software_ctx_type(0); + domi::FftsPlusAicAivCtxDef *aicaivdef = aicaivctx->mutable_aic_aiv_ctx(); + InitAicAivCtx(aicaivdef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + aicaivdef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); + aicaivdef->add_kernel_name("aivtest"); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); + aicaivdef->add_src_slot(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); + + domi::FftsPlusCtxDef *mixaicaivctx = ffts_plus_task_def->add_ffts_plus_ctx(); + mixaicaivctx->set_op_index(0); + mixaicaivctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_MIX_AIC)); + mixaicaivctx->set_software_ctx_type(0); + domi::FftsPlusMixAicAivCtxDef *mixctxdef = mixaicaivctx->mutable_mix_aic_aiv_ctx(); + InitMixAicAivCtx(mixctxdef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); + mixctxdef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); + mixctxdef->add_kernel_name("mixaiv"); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); + mixctxdef->add_src_slot(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); +} + +// test FftsPlusTaskInfo Init hardware ctx +TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_hardware_ctx_ex) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsPlusTaskInfo task_info; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + davinci_model.stream_list_ = { stream }; + + task_def.set_stream_id(0); + + domi::FftsPlusTaskDef *ffts_plus_task_def = task_def.mutable_ffts_plus_task(); + rtFftsPlusTaskInfo_t sub_task_info; + task_info.ffts_plus_task_info_ = sub_task_info; + + davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); + ffts_plus_task_def->set_op_index(0); + ffts_plus_task_def->set_addr_size(2); + InitTaskSQEInfo(ffts_plus_task_def); + InitTaskAdditionalDataInfo(ffts_plus_task_def); + + domi::FftsPlusCtxDef *casesdefaultctx = ffts_plus_task_def->add_ffts_plus_ctx(); + casesdefaultctx->set_op_index(0); + casesdefaultctx->set_hardware_ctx_type(static_cast(RT_HW_CTX_TYPE_LOAD)); + casesdefaultctx->set_software_ctx_type(static_cast(RT_SOFT_CTX_TYPE_CASE_SWITCH)); + domi::FftsPlusCaseDefaultCtxDef *casesdefaultdef = casesdefaultctx->mutable_case_default_ctx(); + InitCaseDefaultCtx(casesdefaultdef); + + EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); + casesdefaultdef->add_successor_list(1); + EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); +} +// test FftsPlusTaskInfo UpdateArgs +TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_update_args) { + DavinciModel davinci_model(0, nullptr); + FftsPlusTaskInfo task_info; + task_info.davinci_model_ = &davinci_model; + task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(task_info.UpdateArgs(), SUCCESS); +} + +// test FftsPlusTaskInfo CalculateArgs +TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_calculate_args) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsPlusTaskInfo task_info; + EXPECT_EQ(task_info.CalculateArgs(task_def, &davinci_model), SUCCESS); +} + +// test FftsPlusTaskInfo Distribute +TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_distribute) { + DavinciModel davinci_model(0, nullptr); + FftsPlusTaskInfo task_info; + rtFftsPlusTaskInfo_t sub_task_info; + task_info.ffts_plus_task_info_ = sub_task_info; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + task_info.stream_ = stream; + EXPECT_EQ(task_info.Distribute(), SUCCESS); +} +} // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc index 327dd248..86569789 100644 --- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc @@ -23,15 +23,20 @@ #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "cce/aicpu_engine_struct.h" +#include "tests/depends/runtime/src/runtime_stub.h" namespace ge { extern OpDescPtr CreateOpDesc(string name, string type); class UtestKernelExTaskInfo : public testing::Test { protected: - void SetUp() {} + void SetUp() { + RTS_STUB_SETUP(); + } - void TearDown() {} + void TearDown() { + RTS_STUB_TEARDOWN(); + } }; // test kernel_ex_task_Release @@ -209,4 +214,136 @@ TEST_F(UtestKernelExTaskInfo, parse_topic_type_failed_2) { KernelExTaskInfo kernel_ex_task_info; EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS); } + +TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::TaskDef task_def; + domi::KernelExDef kernel_ex_def; + kernel_ex_def.set_kernel_ext_info(buf, len); + kernel_ex_def.set_kernel_ext_info_size(len); + domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); + *kernel_ex_def_tmp = kernel_ex_def; + + const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + + KernelExTaskInfo kernel_ex_task_info; + kernel_ex_task_info.op_desc_ = op_desc; + DavinciModel davinci_model(0, nullptr); + kernel_ex_task_info.davinci_model_ = &davinci_model; + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); + EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); + kernel_ex_task_info.op_desc_ = op_desc; + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); + EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); +} + +TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_01) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::TaskDef task_def; + domi::KernelExDef kernel_ex_def; + kernel_ex_def.set_kernel_ext_info(buf, len); + kernel_ex_def.set_kernel_ext_info_size(len); + domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); + *kernel_ex_def_tmp = kernel_ex_def; + + const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); + + KernelExTaskInfo kernel_ex_task_info; + kernel_ex_task_info.op_desc_ = op_desc; + DavinciModel davinci_model(0, nullptr); + kernel_ex_task_info.davinci_model_ = &davinci_model; + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); + + kernel_ex_task_info.is_blocking_aicpu_op_ = true; + EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); +} + +TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_02) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::TaskDef task_def; + domi::KernelExDef kernel_ex_def; + kernel_ex_def.set_kernel_ext_info(buf, len); + kernel_ex_def.set_kernel_ext_info_size(len); + domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); + *kernel_ex_def_tmp = kernel_ex_def; + + const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + KernelExTaskInfo kernel_ex_task_info; + kernel_ex_task_info.op_desc_ = op_desc; + DavinciModel davinci_model(0, nullptr); + kernel_ex_task_info.davinci_model_ = &davinci_model; + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); + + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); + RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); + EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); + + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); + RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); + EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); +} + } // namespace ge diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index 0c8da4b5..45ae7853 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -22,15 +22,20 @@ #include "graph/load/model_manager/davinci_model.h" #include "graph/load/model_manager/task_info/kernel_task_info.h" #include "graph/load/model_manager/task_info/hccl_task_info.h" +#include "tests/depends/runtime/src/runtime_stub.h" namespace ge { extern OpDescPtr CreateOpDesc(string name, string type); class UtestKernelTaskInfo : public testing::Test { protected: - void SetUp() {} + void SetUp() { + RTS_STUB_SETUP(); + } - void TearDown() {} + void TearDown() { + RTS_STUB_TEARDOWN(); + } }; // test KernelTaskInfo Init. @@ -1240,4 +1245,135 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_super_kernel_info) { EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS); } +TEST_F(UtestKernelTaskInfo, blocking_aicpu_op) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::TaskDef task_def; + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + + const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); + op_desc->SetId(0); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + DavinciModel davinci_model(0, nullptr); + davinci_model.op_list_.emplace(0, op_desc); + + KernelTaskInfo kernel_task_info; + kernel_task_info.op_desc_ = op_desc; + kernel_task_info.davinci_model_ = &davinci_model; + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); + EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); + kernel_task_info.op_desc_ = op_desc; + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); + EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); +} + +TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_01) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + + const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); + op_desc->SetId(0); + DavinciModel davinci_model(0, nullptr); + davinci_model.op_list_.emplace(0, op_desc); + + KernelTaskInfo kernel_task_info; + kernel_task_info.davinci_model_ = &davinci_model; + kernel_task_info.op_desc_ = op_desc; + + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); + + kernel_task_info.is_blocking_aicpu_op_ = true; + EXPECT_EQ(kernel_task_info.Distribute(), FAILED); +} + +TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_02) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + + const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + op_desc->SetId(0); + DavinciModel davinci_model(0, nullptr); + davinci_model.op_list_.emplace(0, op_desc); + + KernelTaskInfo kernel_task_info; + kernel_task_info.davinci_model_ = &davinci_model; + kernel_task_info.op_desc_ = op_desc; + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + EXPECT_EQ(kernel_task_info.Distribute(), FAILED); + + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); + RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); + EXPECT_EQ(kernel_task_info.Distribute(), FAILED); + + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); + RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); + EXPECT_EQ(kernel_task_info.Distribute(), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); +} + } // namespace ge diff --git a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc index 60f33ed3..500dbc2a 100644 --- a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc +++ b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc @@ -367,7 +367,7 @@ TEST(UtestIrBuild, check_data_op_attr_index_valid) { }; ModelBufferData model; graphStatus ret = aclgrphBuildModel(graph, build_options, model); - EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); + EXPECT_EQ(ret, ge::FAILED); } // set attr index invalid, when not set input shape range @@ -377,7 +377,7 @@ TEST(UtestIrBuild, check_data_attr_index_succ_no_input_range) { const map build_options; ModelBufferData model; graphStatus ret = aclgrphBuildModel(graph, build_options, model); - EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); + EXPECT_EQ(ret, ge::FAILED); } TEST(UtestIrBuild, check_modify_mixlist_param) { diff --git a/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc index b225949b..034b3f47 100644 --- a/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc +++ b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc @@ -27,7 +27,7 @@ #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" #undef protected #undef private - +#include "tests/depends/runtime/src/runtime_stub.h" using namespace std; using namespace testing; @@ -43,8 +43,12 @@ using namespace hybrid; class UtestAicpuNodeExecutor : public testing::Test { protected: - void SetUp() {} - void TearDown() {} + void SetUp() { + RTS_STUB_SETUP(); + } + void TearDown() { + RTS_STUB_TEARDOWN(); + } }; static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { @@ -164,5 +168,222 @@ TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { } +TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = std::make_shared(graph); + ge_root_model->SetModelName("test_name"); + HybridModel hybrid_model(ge_root_model); + + NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1); + ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true); + std::unique_ptr new_node; + ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); + NodeItem *node_item = new_node.get(); + node_item->input_start = 0; + node_item->output_start = 0; + node_item->is_dynamic = true; + node_item->shape_inference_type = DEPEND_SHAPE_RANGE; + + GraphItem graph_item; + graph_item.node_items_.emplace_back(node_item); + graph_item.total_inputs_ = 1; + graph_item.total_outputs_ = 1; + + GraphExecutionContext graph_execution_context; + SubgraphContext subgraph_context(&graph_item, &graph_execution_context); + ASSERT_EQ(subgraph_context.Init(), SUCCESS); + graph_execution_context.callback_manager = std::unique_ptr(new CallbackManager()); + + auto node_state = subgraph_context.GetOrCreateNodeState(node_item); + ASSERT_NE(node_state, nullptr); + + uint64_t value_0 = 512; + TensorValue in_tensor0(&value_0, sizeof(value_0)); + subgraph_context.SetInput(*node_item, 0, in_tensor0); + + TensorValue out_tensor0(&value_0, sizeof(value_0)); + subgraph_context.SetOutput(*node_item, 0, out_tensor0); + + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + domi::TaskDef task_def; + + AicpuTaskStruct args; + args.head.length = sizeof(args); + args.head.ioAddrNum = 2; + + kernel_def.set_args(reinterpret_cast(&args), args.head.length); + kernel_def.set_args_size(args.head.length); + domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel(); + *kernel_def_tmp = kernel_def; + + AicpuNodeTask aicpu_node_task(node_item, task_def); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); + ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); + + node_item->shape_inference_type = DEPEND_COMPUTE; + domi::KernelExDef kernel_ex_def; + kernel_ex_def.set_kernel_ext_info(buf, len); + kernel_ex_def.set_kernel_ext_info_size(len); + kernel_ex_def.set_args(reinterpret_cast(&args), args.head.length); + kernel_ex_def.set_args_size(args.head.length); + domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); + *kernel_ex_def_tmp = kernel_ex_def; + hybrid_model.task_defs_[node] = std::vector({task_def, task_def}); + + AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); + ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); +} + +TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task_fail) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = std::make_shared(graph); + ge_root_model->SetModelName("test_name"); + HybridModel hybrid_model(ge_root_model); + + NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1); + ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true); + std::unique_ptr new_node; + ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); + NodeItem *node_item = new_node.get(); + node_item->input_start = 0; + node_item->output_start = 0; + node_item->is_dynamic = true; + node_item->shape_inference_type = DEPEND_SHAPE_RANGE; + + GraphItem graph_item; + graph_item.node_items_.emplace_back(node_item); + graph_item.total_inputs_ = 1; + graph_item.total_outputs_ = 1; + + GraphExecutionContext graph_execution_context; + SubgraphContext subgraph_context(&graph_item, &graph_execution_context); + ASSERT_EQ(subgraph_context.Init(), SUCCESS); + graph_execution_context.callback_manager = std::unique_ptr(new CallbackManager()); + + auto node_state = subgraph_context.GetOrCreateNodeState(node_item); + ASSERT_NE(node_state, nullptr); + + uint64_t value_0 = 512; + TensorValue in_tensor0(&value_0, sizeof(value_0)); + subgraph_context.SetInput(*node_item, 0, in_tensor0); + + TensorValue out_tensor0(&value_0, sizeof(value_0)); + subgraph_context.SetOutput(*node_item, 0, out_tensor0); + + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + domi::TaskDef task_def; + + AicpuTaskStruct args; + args.head.length = sizeof(args); + args.head.ioAddrNum = 2; + + kernel_def.set_args(reinterpret_cast(&args), args.head.length); + kernel_def.set_args_size(args.head.length); + domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel(); + *kernel_def_tmp = kernel_def; + + AicpuNodeTask aicpu_node_task(node_item, task_def); + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); + + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); + RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); + + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); + RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); + + node_item->shape_inference_type = DEPEND_COMPUTE; + domi::KernelExDef kernel_ex_def; + kernel_ex_def.set_kernel_ext_info(buf, len); + kernel_ex_def.set_kernel_ext_info_size(len); + kernel_ex_def.set_args(reinterpret_cast(&args), args.head.length); + kernel_ex_def.set_args_size(args.head.length); + domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); + *kernel_ex_def_tmp = kernel_ex_def; + hybrid_model.task_defs_[node] = std::vector({task_def, task_def}); + + AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); + + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); + RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); + + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); + RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); +} } // namespace ge diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc index 8964df74..52091856 100644 --- a/tests/ut/ge/single_op/single_op_task_unittest.cc +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -19,6 +19,7 @@ #include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" +#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" #include "runtime/rt.h" #define protected public @@ -30,6 +31,7 @@ #include "external/register/op_tiling_registry.h" #undef private #undef protected +#include "tests/depends/runtime/src/runtime_stub.h" using namespace std; using namespace testing; @@ -38,9 +40,13 @@ using namespace optiling; class UtestSingleOpTask : public testing::Test { protected: - void SetUp() {} + void SetUp() { + RTS_STUB_SETUP(); + } - void TearDown() {} + void TearDown() { + RTS_STUB_TEARDOWN(); + } }; TEST_F(UtestSingleOpTask, test_build_kernel_task) { @@ -237,3 +243,124 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) { ASSERT_EQ(ret, PARAM_INVALID); } } + +TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_01) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + + auto op_desc = make_shared("deque", "Deque"); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + AiCpuCCTask aicpu_task; + aicpu_task.SetOpDesc(op_desc); + rtStream_t stream; + ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); + + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); + ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); +} + +TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_02) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + + auto op_desc = make_shared("deque", "Deque"); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + AiCpuTask aicpu_task; + aicpu_task.SetOpDesc(op_desc); + rtStream_t stream; + ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); + + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); + ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); +} + +TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { + int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); + vector aicpu_ext_info(len, 0); + char *buf = aicpu_ext_info.data(); + int offset = 0; + hybrid::AicpuExtInfo *ext_info = reinterpret_cast(buf + offset); + ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; + ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); + offset += sizeof(hybrid::AicpuExtInfo); + hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast(buf + offset); + async_wait_info->waitType = 0; + async_wait_info->waitId = 0; + async_wait_info->timeOut = 0; + async_wait_info->reserved = 0; + + domi::KernelDef kernel_def; + kernel_def.set_kernel_ext_info(buf, len); + kernel_def.set_kernel_ext_info_size(len); + + auto op_desc = make_shared("deque", "Deque"); + ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); + AiCpuTask aicpu_task; + aicpu_task.SetOpDesc(op_desc); + rtStream_t stream; + ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); + + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); + ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); + + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); + RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); + + ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); + RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); + ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); + + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); + RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); + RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); + EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); +} diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index df57c82e..5733d68f 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -62,6 +62,7 @@ enum FWKTaskExtInfoType { FWK_ADPT_EXT_SESSION_INFO, FWK_ADPT_EXT_BITMAP, FWK_ADPT_EXT_TOPIC_TYPE, + FWK_ADPT_EXT_ASYNCWAIT, FWK_ADPT_EXT_INVALID }; @@ -80,6 +81,12 @@ enum FWKExtUpdateAddrType { FWK_ADPT_UPDATE_INPUT_OUTPUT }; +enum FWKExtWaitType { + FWK_ADPT_WAIT_TYPE_NULL = 0, + FWK_ADPT_WAIT_TYPE_EVENT, + FWK_ADPT_WAIT_TYPE_INVALID +}; + #pragma pack(push, 1) // API Parameter Structure struct StrFWKKernel { @@ -133,6 +140,15 @@ struct ResultSummary { uint64_t raw_data_size; // size of raw data }; #pragma pack(pop) + +#pragma pack(push, 1) +struct AsyncWait { + uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait + uint32_t waitId; // wait id, GE refresh + uint32_t timeOut; // reserved + uint64_t reserved; +}; +#pragma pack(pop) } // end namespace FWKAdapter } // namespace aicpu diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c1327c45..a244c793 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -52,6 +52,14 @@ typedef enum tagRtAicpuScheType { SCHEDULE_HARDWARE, /* HWTS Schedule */ } rtAicpuScheType; +typedef enum tagRtDeviceCapabilityType { + RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule + RT_SCHEDULE_SOFTWARE_OPT, + RT_SCHEDULE_HARDWARE, // HWTS Schedule + RT_AICPU_BLOCKING_OP_NOT_SUPPORT, + RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation +} rtDeviceCapabilityType; + typedef enum tagRtVersion { VER_BEGIN = 0, VER_NA = VER_BEGIN, diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 2cf6712f..18d837eb 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -65,6 +65,7 @@ typedef enum tagRtFeatureType { typedef enum tagRtDeviceFeatureType { FEATURE_TYPE_SCHE, + FEATURE_TYPE_BLOCKING_OPERATOR, FEATURE_TYPE_END, } rtDeviceFeatureType_t; @@ -78,6 +79,17 @@ typedef enum tagMemoryInfo { MEMORY_INFO_RSV } rtMemoryInfo_t; +typedef enum tagRtDeviceModuleType { + RT_MODULE_TYPE_SYSTEM = 0, + RT_MODULE_TYPE_AICPU, + RT_MODULE_TYPE_CCPU, + RT_MODULE_TYPE_DCPU, + RT_MODULE_TYPE_AICORE, + RT_MODULE_TYPE_TSCPU, + RT_MODULE_TYPE_PCIE, + RT_MODULE_TYPE_VECTOR_CORE +} tagRtDeviceModuleType_t; + /** * @ingroup dvrt_dev * @brief get total device number. diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h index 720da7cd..9ff639e2 100755 --- a/third_party/fwkacllib/inc/runtime/rt_ffts.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -178,7 +178,7 @@ typedef struct tagFftsTaskInfo { } rtFftsTaskInfo_t; RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); - +RTS_API rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif