@@ -86,6 +86,24 @@ class AicpuNodeTaskBase : public NodeTask { | |||
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
Status UpdateEventIdForBlockingAicpuOp(); | |||
virtual Status CopyDataToHbm(TaskContext &context, | |||
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) = 0; | |||
/// | |||
/// read result summary and prepare copy task memory. | |||
/// @param context task context | |||
/// @param out_shape_hbm if scalar, TensorBuffer->data is null, size=0 | |||
/// @return SUCCESS:success other:failed | |||
/// | |||
Status ReadResultSummaryAndPrepareMemory(TaskContext &context, | |||
std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||
Status UpdateShapeByHbmBuffer(TaskContext &context, | |||
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||
Status PrepareCopyInputs(const TaskContext &context, | |||
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||
protected: | |||
const NodeItem *node_item_; | |||
// just reference. | |||
@@ -114,6 +132,14 @@ class AicpuNodeTaskBase : public NodeTask { | |||
// for blocking aicpu op | |||
bool is_blocking_aicpu_op_ = false; | |||
rtEvent_t rt_event_ = nullptr; | |||
std::vector<std::unique_ptr<TensorBuffer>> output_summary_; | |||
std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_; | |||
std::unique_ptr<TensorBuffer> copy_input_release_flag_dev_; | |||
std::unique_ptr<TensorBuffer> copy_input_data_size_dev_; | |||
std::unique_ptr<TensorBuffer> copy_input_src_dev_; | |||
std::unique_ptr<TensorBuffer> copy_input_dst_dev_; | |||
}; | |||
class AicpuTfNodeTask : public AicpuNodeTaskBase { | |||
@@ -178,20 +178,36 @@ class AiCpuBaseTask : public OpTask { | |||
rtStream_t stream); | |||
Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | |||
Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | |||
<<<<<<< HEAD | |||
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &outputs, | |||
rtStream_t stream); | |||
Status ReadResultSummaryAndPrepareMemory(); | |||
======= | |||
>>>>>>> 9f5ec82a (fix) | |||
// for blocking aicpu op | |||
Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | |||
Status UpdateEventIdForBlockingAicpuOp(); | |||
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &outputs, | |||
rtStream_t stream); | |||
Status ReadResultSummaryAndPrepareMemory(); | |||
Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||
Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||
<<<<<<< HEAD | |||
Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||
Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||
======= | |||
>>>>>>> 9f5ec82a (fix) | |||
virtual Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) = 0; | |||
protected: | |||
size_t num_inputs_ = 0; | |||
@@ -213,6 +229,16 @@ class AiCpuBaseTask : public OpTask { | |||
// for blocking aicpu op | |||
bool is_blocking_aicpu_op_ = false; | |||
rtEvent_t rt_event_ = nullptr; | |||
std::vector<void *> output_summary_; | |||
std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_; | |||
void *copy_input_release_flag_dev_ = nullptr; | |||
void *copy_input_data_size_dev_ = nullptr; | |||
void *copy_input_src_dev_ = nullptr; | |||
void *copy_input_dst_dev_ = nullptr; | |||
vector<void *> out_shape_hbm_; | |||
}; | |||
class AiCpuTask : public AiCpuBaseTask { | |||
@@ -394,3 +394,33 @@ TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||
} | |||
TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { | |||
AiCpuCCTask task; | |||
rtStream_t stream = nullptr; | |||
task.num_inputs_ = 2; | |||
task.num_outputs_ = 1; | |||
task.input_is_const_ = {true, false}; | |||
int total_addr = 3; | |||
uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr}; | |||
task.io_addr_ = reinterpret_cast<uintptr_t*>(addrs); | |||
task.io_addr_num_ = total_addr; | |||
ge::hybrid::AicpuExtInfo aicpu_ext_info; | |||
aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||
aicpu_ext_info.infoLen = sizeof(int32_t); | |||
int32_t type = ge::DEPEND_COMPUTE; | |||
memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||
char *ext_mem = (char*)malloc(sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||
memcpy_s(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, | |||
sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||
std::string ext_info_str(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||
vector<DataBuffer> inputs(2, DataBuffer()); | |||
vector<DataBuffer> outputs(1, DataBuffer()); | |||
vector<GeTensorDesc> inputs_desc(2, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); | |||
vector<GeTensorDesc> outputs_desc(1, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); | |||
ASSERT_EQ(task.SetExtInfoAndType(ext_info_str, 0), SUCCESS); | |||
task.unknown_type_ = ge::DEPEND_COMPUTE; | |||
task.num_outputs_ = 1; | |||
ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS); | |||
ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS); | |||
} |