Browse Source

fix

pull/2049/head
guopeian 3 years ago
parent
commit
52d24b76c3
3 changed files with 82 additions and 0 deletions
  1. +26
    -0
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
  2. +26
    -0
      ge/single_op/task/op_task.h
  3. +30
    -0
      tests/ut/ge/single_op/single_op_task_unittest.cc

+ 26
- 0
ge/hybrid/node_executor/aicpu/aicpu_node_executor.h View File

@@ -86,6 +86,24 @@ class AicpuNodeTaskBase : public NodeTask {
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support);
Status UpdateEventIdForBlockingAicpuOp();

virtual Status CopyDataToHbm(TaskContext &context,
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) = 0;

///
/// read result summary and prepare copy task memory.
/// @param context task context
/// @param out_shape_hbm if scalar, TensorBuffer->data is null, size=0
/// @return SUCCESS:success other:failed
///
Status ReadResultSummaryAndPrepareMemory(TaskContext &context,
std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm);

Status UpdateShapeByHbmBuffer(TaskContext &context,
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm);

Status PrepareCopyInputs(const TaskContext &context,
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm);

protected:
const NodeItem *node_item_;
// just reference.
@@ -114,6 +132,14 @@ class AicpuNodeTaskBase : public NodeTask {
// for blocking aicpu op
bool is_blocking_aicpu_op_ = false;
rtEvent_t rt_event_ = nullptr;

std::vector<std::unique_ptr<TensorBuffer>> output_summary_;
std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_;

std::unique_ptr<TensorBuffer> copy_input_release_flag_dev_;
std::unique_ptr<TensorBuffer> copy_input_data_size_dev_;
std::unique_ptr<TensorBuffer> copy_input_src_dev_;
std::unique_ptr<TensorBuffer> copy_input_dst_dev_;
};

class AicpuTfNodeTask : public AicpuNodeTaskBase {


+ 26
- 0
ge/single_op/task/op_task.h View File

@@ -178,20 +178,36 @@ class AiCpuBaseTask : public OpTask {
rtStream_t stream);
Status UpdateOutputShape(vector<GeTensorDesc> &output_desc);
Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc);
<<<<<<< HEAD
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &outputs,
rtStream_t stream);
Status ReadResultSummaryAndPrepareMemory();
=======

>>>>>>> 9f5ec82a (fix)
// for blocking aicpu op
Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream);
Status UpdateEventIdForBlockingAicpuOp();
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support);
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &outputs,
rtStream_t stream);
Status ReadResultSummaryAndPrepareMemory();

Status PrepareCopyInputs(vector<DataBuffer> &outputs);

Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc);

<<<<<<< HEAD
Status PrepareCopyInputs(vector<DataBuffer> &outputs);

Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc);

=======
>>>>>>> 9f5ec82a (fix)
virtual Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) = 0;
protected:
size_t num_inputs_ = 0;
@@ -213,6 +229,16 @@ class AiCpuBaseTask : public OpTask {
// for blocking aicpu op
bool is_blocking_aicpu_op_ = false;
rtEvent_t rt_event_ = nullptr;
std::vector<void *> output_summary_;
std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_;

void *copy_input_release_flag_dev_ = nullptr;
void *copy_input_data_size_dev_ = nullptr;
void *copy_input_src_dev_ = nullptr;
void *copy_input_dst_dev_ = nullptr;

vector<void *> out_shape_hbm_;
};

class AiCpuTask : public AiCpuBaseTask {


+ 30
- 0
tests/ut/ge/single_op/single_op_task_unittest.cc View File

@@ -394,3 +394,33 @@ TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) {
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
}

TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) {
AiCpuCCTask task;
rtStream_t stream = nullptr;
task.num_inputs_ = 2;
task.num_outputs_ = 1;
task.input_is_const_ = {true, false};
int total_addr = 3;
uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr};
task.io_addr_ = reinterpret_cast<uintptr_t*>(addrs);
task.io_addr_num_ = total_addr;
ge::hybrid::AicpuExtInfo aicpu_ext_info;
aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE;
aicpu_ext_info.infoLen = sizeof(int32_t);
int32_t type = ge::DEPEND_COMPUTE;
memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t));
char *ext_mem = (char*)malloc(sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t));
memcpy_s(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info,
sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t));
std::string ext_info_str(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t));
vector<DataBuffer> inputs(2, DataBuffer());
vector<DataBuffer> outputs(1, DataBuffer());
vector<GeTensorDesc> inputs_desc(2, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT));
vector<GeTensorDesc> outputs_desc(1, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT));
ASSERT_EQ(task.SetExtInfoAndType(ext_info_str, 0), SUCCESS);
task.unknown_type_ = ge::DEPEND_COMPUTE;
task.num_outputs_ = 1;
ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS);
ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS);
}

Loading…
Cancel
Save