|
- /**
- * \file dnn/src/cuda/param_pack/opr_impl.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
- #include "src/cuda/param_pack/opr_impl.h"
- #include "src/cuda/param_pack/param_pack.cuh"
- #include "src/cuda/utils.h"
-
- namespace megdnn {
- namespace cuda {
-
- size_t ParamPackConcatImpl::get_workspace_in_bytes(const TensorShapeArray& srcs,
- const TensorShape&,
- const TensorShape&) {
- return sizeof(size_t) * srcs.size();
- }
-
- template <typename T>
- void ParamPackConcatImpl::exec_internal(_megdnn_tensor_in srcs,
- _megdnn_tensor_in offsets,
- _megdnn_tensor_out dst,
- _megdnn_workspace workspace) {
- size_t inp_size = srcs.layout.shape[0],
- out_size = dst.layout.total_nr_elems();
- auto stream = cuda_stream(this->handle());
-
- auto src_cpu = static_cast<const T**>(srcs.raw_ptr);
- megdnn_assert_internal(src_cpu);
- auto src_gpu = reinterpret_cast<const T**>(workspace.raw_ptr);
-
- auto offsets_gpu = offsets.ptr<int32_t>();
-
- cuda_check(cudaMemcpyAsync(src_gpu, src_cpu, sizeof(const T*) * inp_size,
- cudaMemcpyHostToDevice, stream));
-
- param_pack::concat_proxy<T>(src_gpu, dst.ptr<T>(), inp_size, out_size,
- offsets_gpu, stream);
- }
-
- void ParamPackConcatImpl::exec(_megdnn_tensor_in srcs,
- _megdnn_tensor_in offsets,
- _megdnn_tensor_out dst,
- _megdnn_workspace workspace) {
- check_exec(dst.layout, offsets.layout, srcs.layout);
- #define cb(DType) \
- if (dst.layout.dtype == DType()) { \
- using ctype = typename DTypeTrait<DType>::ctype; \
- exec_internal<ctype>(srcs, offsets, dst, workspace); \
- return; \
- }
- MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
- megdnn_throw("bad type");
- #undef cb
- }
-
- size_t ParamPackSplitImpl::get_workspace_in_bytes(
- const TensorShape&, const TensorShape&, const TensorShapeArray& dsts) {
- return sizeof(size_t) * dsts.size();
- }
-
- template <typename T>
- void ParamPackSplitImpl::exec_internal(_megdnn_tensor_in src,
- _megdnn_tensor_in table,
- _megdnn_tensor_out dsts,
- _megdnn_workspace workspace) {
- // inner and outer table must be int32
- megdnn_assert(table.layout.dtype == dtype::Int32());
- // dsts is src pointer, ndim must be 1
- megdnn_assert(dsts.layout.ndim == 1);
-
- auto out_size = dsts.layout.shape[0],
- inp_size = src.layout.total_nr_elems();
-
- auto stream = cuda_stream(this->handle());
-
- auto total_workspace_size = sizeof(T*) * out_size;
- auto dsts_cpu = static_cast<T**>(dsts.raw_ptr);
- megdnn_assert_internal(dsts_cpu);
- auto dsts_gpu = reinterpret_cast<T**>(workspace.raw_ptr);
-
- auto table_outer_gpu = table.ptr<int32_t>();
- auto table_inner_gpu = table_outer_gpu + inp_size;
-
- cuda_check(cudaMemcpyAsync(dsts_gpu, dsts_cpu, total_workspace_size,
- cudaMemcpyHostToDevice, stream));
-
- // param_pack_split_proxy()
- param_pack::split_proxy<T>(src.ptr<T>(), dsts_gpu, inp_size,
- table_outer_gpu, table_inner_gpu, stream);
- }
-
- void ParamPackSplitImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in table,
- _megdnn_tensor_out dsts,
- _megdnn_workspace workspace) {
- check_exec(src.layout, table.layout, dsts.layout);
- #define cb(DType) \
- if (src.layout.dtype == DType()) { \
- using ctype = typename DTypeTrait<DType>::ctype; \
- exec_internal<ctype>(src, table, dsts, workspace); \
- return; \
- }
- MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
- megdnn_throw("bad type");
- #undef cb
- }
-
- } // namespace cuda
- } // namespace megdnn
|