From 52d24b76c31b97be9b70df3de6461d84fd53273d Mon Sep 17 00:00:00 2001 From: guopeian Date: Tue, 20 Jul 2021 10:22:27 +0800 Subject: [PATCH] fix --- .../node_executor/aicpu/aicpu_node_executor.h | 26 +++++++++++++++++++ ge/single_op/task/op_task.h | 26 +++++++++++++++++++ tests/ut/ge/single_op/single_op_task_unittest.cc | 30 ++++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 8a0e7fb2..9ccc9273 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -86,6 +86,24 @@ class AicpuNodeTaskBase : public NodeTask { Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); Status UpdateEventIdForBlockingAicpuOp(); + virtual Status CopyDataToHbm(TaskContext &context, + const std::vector> &out_shape_hbm) = 0; + + /// + /// read result summary and prepare copy task memory. + /// @param context task context + /// @param out_shape_hbm if scalar, TensorBuffer->data is null, size=0 + /// @return SUCCESS:success other:failed + /// + Status ReadResultSummaryAndPrepareMemory(TaskContext &context, + std::vector> &out_shape_hbm); + + Status UpdateShapeByHbmBuffer(TaskContext &context, + const std::vector> &out_shape_hbm); + + Status PrepareCopyInputs(const TaskContext &context, + const std::vector> &out_shape_hbm); + protected: const NodeItem *node_item_; // just reference. @@ -114,6 +132,14 @@ class AicpuNodeTaskBase : public NodeTask { // for blocking aicpu op bool is_blocking_aicpu_op_ = false; rtEvent_t rt_event_ = nullptr; + + std::vector> output_summary_; + std::vector output_summary_host_; + + std::unique_ptr copy_input_release_flag_dev_; + std::unique_ptr copy_input_data_size_dev_; + std::unique_ptr copy_input_src_dev_; + std::unique_ptr copy_input_dst_dev_; }; class AicpuTfNodeTask : public AicpuNodeTaskBase { diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 6fc96f55..694eec3a 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -178,20 +178,36 @@ class AiCpuBaseTask : public OpTask { rtStream_t stream); Status UpdateOutputShape(vector &output_desc); Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); +<<<<<<< HEAD Status UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, rtStream_t stream); Status ReadResultSummaryAndPrepareMemory(); +======= + +>>>>>>> 9f5ec82a (fix) // for blocking aicpu op Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); Status UpdateEventIdForBlockingAicpuOp(); Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); + + Status UpdateShapeAndDataByResultSummary(vector &output_desc, + vector &outputs, + rtStream_t stream); + Status ReadResultSummaryAndPrepareMemory(); + + Status PrepareCopyInputs(vector &outputs); + + Status UpdateShapeByHbmBuffer(vector &output_desc); +<<<<<<< HEAD Status PrepareCopyInputs(vector &outputs); Status UpdateShapeByHbmBuffer(vector &output_desc); +======= +>>>>>>> 9f5ec82a (fix) virtual Status CopyDataToHbm(vector &outputs, rtStream_t stream) = 0; protected: size_t num_inputs_ = 0; @@ -213,6 +229,16 @@ class AiCpuBaseTask : public OpTask { // for blocking aicpu op bool is_blocking_aicpu_op_ = false; rtEvent_t rt_event_ = nullptr; + + std::vector output_summary_; + std::vector output_summary_host_; + + void *copy_input_release_flag_dev_ = nullptr; + void *copy_input_data_size_dev_ = nullptr; + void *copy_input_src_dev_ = nullptr; + void *copy_input_dst_dev_ = nullptr; + + vector out_shape_hbm_; }; class AiCpuTask : public AiCpuBaseTask { diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc index 08d0a33d..dd1e1b3e 100644 --- a/tests/ut/ge/single_op/single_op_task_unittest.cc +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -394,3 +394,33 @@ TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); } + +TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { + AiCpuCCTask task; + rtStream_t stream = nullptr; + task.num_inputs_ = 2; + task.num_outputs_ = 1; + task.input_is_const_ = {true, false}; + int total_addr = 3; + uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr}; + task.io_addr_ = reinterpret_cast(addrs); + task.io_addr_num_ = total_addr; + ge::hybrid::AicpuExtInfo aicpu_ext_info; + aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; + aicpu_ext_info.infoLen = sizeof(int32_t); + int32_t type = ge::DEPEND_COMPUTE; + memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); + char *ext_mem = (char*)malloc(sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); + memcpy_s(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, + sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); + std::string ext_info_str(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); + vector inputs(2, DataBuffer()); + vector outputs(1, DataBuffer()); + vector inputs_desc(2, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); + vector outputs_desc(1, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); + ASSERT_EQ(task.SetExtInfoAndType(ext_info_str, 0), SUCCESS); + task.unknown_type_ = ge::DEPEND_COMPUTE; + task.num_outputs_ = 1; + ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS); + ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS); +}