From 07ab8cb6b6e2c2db4598f7e046739f4bdb8ab6eb Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 18 Nov 2020 12:02:16 +0800 Subject: [PATCH] feat(dnn): add param_pack for rocm GitOrigin-RevId: 2180504c714a52974f9d96fe49597415a9f1c8fe --- dnn/src/rocm/handle.cpp | 2 + dnn/src/rocm/param_pack/opr_impl.cpp | 65 ++++++++++++ dnn/src/rocm/param_pack/opr_impl.h | 35 +++++++ dnn/src/rocm/param_pack/param_pack.cpp.hip | 67 +++++++++++++ dnn/src/rocm/param_pack/param_pack.h.hip | 30 ++++++ dnn/test/rocm/param_pack.cpp | 156 +++++++++++++++++++++++++++++ 6 files changed, 355 insertions(+) create mode 100644 dnn/src/rocm/param_pack/opr_impl.cpp create mode 100644 dnn/src/rocm/param_pack/opr_impl.h create mode 100644 dnn/src/rocm/param_pack/param_pack.cpp.hip create mode 100644 dnn/src/rocm/param_pack/param_pack.h.hip create mode 100644 dnn/test/rocm/param_pack.cpp diff --git a/dnn/src/rocm/handle.cpp b/dnn/src/rocm/handle.cpp index b26e7a00..3b5b8a9e 100644 --- a/dnn/src/rocm/handle.cpp +++ b/dnn/src/rocm/handle.cpp @@ -36,6 +36,7 @@ #include "src/rocm/argmxx/opr_impl.h" #include "src/rocm/sleep/opr_impl.h" #include "src/rocm/batch_normalization/opr_impl.h" +#include "src/rocm/param_pack/opr_impl.h" #include #include @@ -174,6 +175,7 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(ArgminForward); MEGDNN_SPECIALIZE_CREATE_OPERATOR(SleepForward); MEGDNN_SPECIALIZE_CREATE_OPERATOR(BNForward); MEGDNN_SPECIALIZE_CREATE_OPERATOR(BNBackward); +MEGDNN_SPECIALIZE_CREATE_OPERATOR(ParamPackConcat); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpragmas" diff --git a/dnn/src/rocm/param_pack/opr_impl.cpp b/dnn/src/rocm/param_pack/opr_impl.cpp new file mode 100644 index 00000000..373f7d72 --- /dev/null +++ b/dnn/src/rocm/param_pack/opr_impl.cpp @@ -0,0 +1,65 @@ +/** + * \file dnn/src/rocm/param_pack/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "hcc_detail/hcc_defs_prologue.h" +#include "src/rocm/param_pack/opr_impl.h" +#include "src/rocm/param_pack/param_pack.h.hip" +#include "src/rocm/utils.h" + +namespace megdnn { +namespace rocm { + +size_t ParamPackConcatImpl::get_workspace_in_bytes(const TensorShapeArray& srcs, + const TensorShape&, + const TensorShape&) { + return sizeof(size_t) * srcs.size(); +} + +template +void ParamPackConcatImpl::exec_internal(_megdnn_tensor_in srcs, + _megdnn_tensor_in offsets, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) { + size_t inp_size = srcs.layout.shape[0], + out_size = dst.layout.total_nr_elems(); + auto stream = hip_stream(this->handle()); + + auto src_cpu = static_cast(srcs.raw_ptr); + megdnn_assert_internal(src_cpu); + auto src_gpu = reinterpret_cast(workspace.raw_ptr); + + auto offsets_gpu = offsets.ptr(); + + hip_check(hipMemcpyAsync(src_gpu, src_cpu, sizeof(const T*) * inp_size, + hipMemcpyHostToDevice, stream)); + + param_pack::concat_proxy(src_gpu, dst.ptr(), inp_size, out_size, + offsets_gpu, stream); +} + +void ParamPackConcatImpl::exec(_megdnn_tensor_in srcs, + _megdnn_tensor_in offsets, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) { + check_exec(dst.layout, offsets.layout, srcs.layout); +#define cb(DType) \ + if (dst.layout.dtype == DType()) { \ + using ctype = typename DTypeTrait::ctype; \ + exec_internal(srcs, offsets, dst, workspace); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb) + megdnn_throw("bad type"); +#undef cb +} + +} // namespace rocm +} // namespace megdnn diff --git a/dnn/src/rocm/param_pack/opr_impl.h b/dnn/src/rocm/param_pack/opr_impl.h new file mode 100644 index 00000000..4e66b69f --- /dev/null +++ b/dnn/src/rocm/param_pack/opr_impl.h @@ -0,0 +1,35 @@ +/** + * \file dnn/src/rocm/param_pack/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once +#include "megdnn/oprs.h" + +namespace megdnn { +namespace rocm { + +class ParamPackConcatImpl final : public ParamPackConcat { +public: + using ParamPackConcat::ParamPackConcat; + void exec(_megdnn_tensor_in srcs, _megdnn_tensor_in table, + _megdnn_tensor_out dst, _megdnn_workspace workspace) override; + + size_t get_workspace_in_bytes(const TensorShapeArray& srcs, + const TensorShape& table, + const TensorShape& dst) override; + +private: + template + void exec_internal(_megdnn_tensor_in srcs, _megdnn_tensor_in table, + _megdnn_tensor_out dst, _megdnn_workspace workspace); +}; + +} // namespace rocm +} // namespace megdnn diff --git a/dnn/src/rocm/param_pack/param_pack.cpp.hip b/dnn/src/rocm/param_pack/param_pack.cpp.hip new file mode 100644 index 00000000..7567a14b --- /dev/null +++ b/dnn/src/rocm/param_pack/param_pack.cpp.hip @@ -0,0 +1,67 @@ +/** + * \file dnn/src/rocm/param_pack/param_pack.cpp.hip + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "hcc_detail/hcc_defs_prologue.h" +#include "src/rocm/param_pack/param_pack.h.hip" +#include "megdnn/dtype.h" +#include "src/rocm/utils.h.hip" + +namespace megdnn { +namespace rocm { +namespace param_pack { + +template +__global__ void concat_kernel(const T** srcs, T* dst, + const int32_t* offsets, + size_t srcs_size, + size_t total_size) { + size_t addr = threadIdx.x + blockIdx.x * blockDim.x; + if (addr < total_size) { + size_t l = 0, r = srcs_size - 1, mid; + while (l < r) { + mid = (l + r) >> 1; + if (offsets[(mid << 1) + 1] > addr) { + r = mid; + } else { + l = mid + 1; + } + } + if (addr < offsets[l << 1]) + dst[addr] = 0; + else + dst[addr] = srcs[l][addr - offsets[l << 1]]; + } +} + +template +void concat_proxy(const T** srcs, T* dst, size_t srcs_size, size_t total_size, + const int32_t* offsets, + hipStream_t stream) { + size_t NR_BLOCKS = DIVUP(total_size, NR_THREADS); + hipLaunchKernelGGL(concat_kernel, NR_BLOCKS, NR_THREADS, 0, stream, + srcs, dst, offsets, srcs_size, total_size); + after_kernel_launch(); +} + +#define INST(T) \ + template void concat_proxy(const T**, T*, size_t, size_t, \ + const int32_t*, \ + hipStream_t); +#define cb(DType) INST(typename DTypeTrait::ctype) +MEGDNN_FOREACH_COMPUTING_DTYPE(cb) +#undef cb +#undef INST + +} // namespace param_pack +} // namespace hip +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/rocm/param_pack/param_pack.h.hip b/dnn/src/rocm/param_pack/param_pack.h.hip new file mode 100644 index 00000000..511f454e --- /dev/null +++ b/dnn/src/rocm/param_pack/param_pack.h.hip @@ -0,0 +1,30 @@ +/** + * \file dnn/src/rocm/param_pack/param_pack.h.hip + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include "hip_header.h" +#include +#include + +namespace megdnn { +namespace rocm { +namespace param_pack { + +template +void concat_proxy(const T** srcs, T* dst, size_t srcs_size, size_t total_size, + const int32_t* offsets, hipStream_t stream); + +} // namespace param_pack +} // namespace rocm +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/test/rocm/param_pack.cpp b/dnn/test/rocm/param_pack.cpp new file mode 100644 index 00000000..b3bbfbe6 --- /dev/null +++ b/dnn/test/rocm/param_pack.cpp @@ -0,0 +1,156 @@ +/** + * \file dnn/test/rocm/param_pack.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "hcc_detail/hcc_defs_prologue.h" +#include "test/rocm/fixture.h" + +#include "test/common/checker.h" +#include "test/common/utils.h" + +using namespace megdnn; +using namespace test; + +namespace { + +template +std::vector create_offsets(const TensorShapeArray& shapes, + size_t alignment) { + size_t dtype_size = sizeof(T); + if (alignment < dtype_size) + alignment = dtype_size; + alignment /= dtype_size; + + auto get_aligned = [alignment](size_t v) { + auto mod = v & (alignment - 1); + return v + ((alignment - mod) & (alignment - 1)); + }; + + std::vector offsets(shapes.size() << 1); + size_t offset = 0; + for (size_t i = 0; i < shapes.size(); i++) { + offset = get_aligned(offset); + offsets[i << 1] = offset; + offset += shapes[i].total_nr_elems(); + offsets[(i << 1) + 1] = offset; + } + return offsets; +} + +template +std::vector create_pack(size_t pack_size, + const std::vector& offsets, + const std::vector>& ptr) { + megdnn_assert(pack_size == static_cast(offsets.back())); + std::vector data(pack_size, 0); + for (size_t i = 0; i * 2 < offsets.size(); ++i) { + size_t begin = offsets[i * 2], end = offsets[i * 2 + 1]; + for (size_t j = 0; j < end - begin; j++) + data[begin + j] = ptr[i][j]; + } + return data; +} + +template +std::vector> create_params(size_t nr_params, + const TensorShapeArray& shapes) { + std::vector> params; + for (size_t i = 0; i < nr_params; ++i) { + std::vector expected_data; + for (size_t x = 0; x < shapes[i].total_nr_elems(); ++x) { + expected_data.push_back(rand()); + } + params.push_back(std::move(expected_data)); + } + return params; +} + +template +T* create_device_data(Handle* handle, const T* data, size_t size) { + T* data_device = + static_cast(test::megdnn_malloc(handle, size * sizeof(T))); + if (data) + test::megdnn_memcpy_H2D(handle, data_device, data, size * sizeof(T)); + return data_device; +} + +template +void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes, + DType type) { + auto concat = handle->create_operator(); + size_t nr_params = shapes.size(); + + std::vector param_ptrs; + std::vector> params = create_params(nr_params, shapes); + for (size_t i = 0; i < nr_params; ++i) { + param_ptrs.push_back(create_device_data(handle, params[i].data(), + shapes[i].total_nr_elems())); + } + std::vector offsets = + create_offsets(shapes, handle->alignment_requirement()); + size_t pack_size = offsets.back(); + int32_t* offsets_gpu = + create_device_data(handle, offsets.data(), offsets.size()); + + std::vector expected_pack = create_pack(pack_size, offsets, params); + T* pack_gpu = create_device_data(handle, nullptr, expected_pack.size()); + + TensorLayout dst_layout({pack_size}, type); + TensorND dst_tensor(pack_gpu, dst_layout); + + TensorLayout offsets_layout({offsets.size()}, dtype::Int32()); + TensorND offsets_tensor(offsets_gpu, offsets_layout); + + test::WorkspaceWrapper workspace( + handle, concat->get_workspace_in_bytes(shapes, offsets_layout, + {pack_size})); + TensorND src_tensor(param_ptrs.data(), + TensorLayout({nr_params}, dtype::Int32())); + + concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace()); + + // check + T* actual_pack = static_cast(malloc(pack_size * sizeof(T))); + test::megdnn_memcpy_D2H(handle, actual_pack, pack_gpu, + sizeof(T) * pack_size); + for (size_t i = 0; i < pack_size; ++i) { + ASSERT_EQ(actual_pack[i], expected_pack[i]); + } + free(actual_pack); + test::megdnn_free(handle, pack_gpu); + test::megdnn_free(handle, offsets_gpu); + for (auto ptr : param_ptrs) { + test::megdnn_free(handle, ptr); + } +} + +} // namespace + +TEST_F(ROCM, PARAM_PACK) { + SmallVector shapes_vec; + shapes_vec.push_back({{1}}); + shapes_vec.push_back({{129}, {21}}); + shapes_vec.push_back({{15}, {21}, {34}}); + shapes_vec.push_back({{1, 2}, {3, 5}, {5, 8}, {7, 11}, {9, 14}}); + shapes_vec.push_back({{1, 2}, + {3, 5}, + {1}, + {3, 3, 3, 4}, + {71}, + {9, 14}, + {111, 111, 111}, + {128, 128, 128}}); + for (auto shapes : shapes_vec) { + test_param_pack_concat(handle_rocm(), shapes, dtype::Int32()); + test_param_pack_concat(handle_rocm(), shapes, dtype::Int16()); + test_param_pack_concat(handle_rocm(), shapes, dtype::Float32()); + } +} + +// vim: syntax=cpp.doxygen