From eab6afab47141e5e78eed66d28e2ad76c43a83a7 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 9 Jun 2021 18:55:40 +0800 Subject: [PATCH] feat(mgb): add padding opr for megbrain GitOrigin-RevId: 490e0c5d5add99d54d9c9e3095147016b3c191c7 --- dnn/include/megdnn/oprs/general.h | 47 +++ dnn/scripts/opr_param_defs.py | 26 ++ dnn/src/common/handle_impl.h | 4 +- dnn/src/common/opr_trait.h | 2 + dnn/src/common/padding.cpp | 144 +++++++++ dnn/src/cuda/handle_create.cpp | 1 + dnn/src/cuda/padding/opr_impl.cpp | 71 +++++ dnn/src/cuda/padding/opr_impl.h | 35 +++ dnn/src/cuda/padding/padding.cu | 284 ++++++++++++++++++ dnn/src/cuda/padding/padding.cuh | 36 +++ dnn/src/cuda/utils.cuh | 2 +- dnn/src/naive/argmxx/opr_impl.cpp | 2 +- dnn/src/naive/handle.cpp | 1 + dnn/src/naive/padding/opr_impl.cpp | 281 ++++++++++++++++++ dnn/src/naive/padding/opr_impl.h | 30 ++ dnn/test/common/padding.h | 441 ++++++++++++++++++++++++++++ dnn/test/cuda/padding.cpp | 219 ++++++++++++++ dnn/test/naive/padding.cpp | 132 +++++++++ src/opr/impl/misc.oprdecl | 5 + src/opr/impl/misc.sereg.h | 2 +- src/opr/impl/tensor_manip.cpp | 26 ++ src/opr/impl/tensor_manip.sereg.h | 33 +++ src/opr/include/megbrain/opr/tensor_manip.h | 20 ++ src/opr/test/dnn/padding.cpp | 68 +++++ src/opr/test/tensor_manip.cpp | 44 +++ src/serialization/impl/schema.fbs | 1 + 26 files changed, 1953 insertions(+), 4 deletions(-) create mode 100644 dnn/src/common/padding.cpp create mode 100644 dnn/src/cuda/padding/opr_impl.cpp create mode 100644 dnn/src/cuda/padding/opr_impl.h create mode 100644 dnn/src/cuda/padding/padding.cu create mode 100644 dnn/src/cuda/padding/padding.cuh create mode 100644 dnn/src/naive/padding/opr_impl.cpp create mode 100644 dnn/src/naive/padding/opr_impl.h create mode 100644 dnn/test/common/padding.h create mode 100644 dnn/test/cuda/padding.cpp create mode 100644 dnn/test/naive/padding.cpp create mode 100644 src/opr/test/dnn/padding.cpp diff --git a/dnn/include/megdnn/oprs/general.h b/dnn/include/megdnn/oprs/general.h index 2f9abccd..20a9f1bf 100644 --- a/dnn/include/megdnn/oprs/general.h +++ b/dnn/include/megdnn/oprs/general.h @@ -1353,6 +1353,53 @@ public: protected: void check_exec(const TensorLayout& dst, size_t workspace_in_bytes); }; + +/*! + * \brief standard padding operator + * Inputs must have the same dtype, and the output tensor shape must greater or equal than + * input tensor in every dimensions, the extra space will be fulled with m which default to + * be 0. + */ + +class PaddingBase: public OperatorBase { + DEF_OPR_PARAM(Padding); + DEF_OPR_IMPL(PaddingBase, OperatorBase, 1, 1); +public: + using Mode = Param::PaddingMode; +protected: + SmallVector get_offsets(); + void check_exec(const TensorLayout& src, const TensorLayout& dst); +}; + +class PaddingForward: public PaddingBase { + DEF_OPR_IMPL(PaddingForward, PaddingBase, 1, 1); +public: + virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0; + void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, + _megdnn_workspace) { + return exec(src, dst); + } + virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0; + void deduce_layout(const TensorLayout &src, TensorLayout &dst); +protected: + void forward_check_exec(const TensorLayout& src, const TensorLayout& dst); +}; + +using Padding = PaddingForward; + +class PaddingBackward: public PaddingBase { + DEF_OPR_IMPL(PaddingBackward, PaddingBase, 1, 1); +public: + virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0; + void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, + _megdnn_workspace) { + return exec(src, dst); + } + virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0; +protected: + void backward_check_exec(const TensorLayout& src, const TensorLayout& dst); +}; + } // namespace megdnn #include "megdnn/internal/opr_header_epilogue.h" diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index e8c66167..83fafb66 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -1171,3 +1171,29 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o add_fields('int32', 'qmax', '2147483647') ) pdef('Fill').add_fields('float32', 'value', '0') + + +PADDING_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), + Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'), + Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii')] +(pdef('Padding'). + add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0). + add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0). + add_fields('uint32', Doc('front_offset_dim2','offset in dim 2'), 0). + add_fields('uint32', Doc('front_offset_dim3','offset in dim 3'), 0). + add_fields('uint32', Doc('front_offset_dim4','offset in dim 4'), 0). + add_fields('uint32', Doc('front_offset_dim5','offset in dim 5'), 0). + add_fields('uint32', Doc('front_offset_dim6','offset in dim 6'), 0). + add_fields('uint32', Doc('back_offset_dim0', 'back offset in dim0'), 0). + add_fields('uint32', Doc('back_offset_dim1', 'back offset in dim1'), 0). + add_fields('uint32', Doc('back_offset_dim2', 'back offset in dim2'), 0). + add_fields('uint32', Doc('back_offset_dim3', 'back offset in dim3'), 0). + add_fields('uint32', Doc('back_offset_dim4', 'back offset in dim4'), 0). + add_fields('uint32', Doc('back_offset_dim5', 'back offset in dim5'), 0). + add_fields('uint32', Doc('back_offset_dim6', 'back offset in dim6'), 0). + add_fields('float32', Doc('padding_val','param of padding opr'), 0). + add_enum('PaddingMode', *PADDING_MODES, + name_field='padding_mode', default=2, + member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] + ) +) diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 4e393752..b2b58b45 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -217,7 +217,9 @@ private: cb(CheckHasInf) \ cb(LSQForward) \ cb(LSQBackward) \ - cb(Fill) + cb(Fill) \ + cb(PaddingForward) \ + cb(PaddingBackward) /*! * \brief specialize HandleImpl::create_operator for a single opr type; diff --git a/dnn/src/common/opr_trait.h b/dnn/src/common/opr_trait.h index 1417bdce..92e109ee 100644 --- a/dnn/src/common/opr_trait.h +++ b/dnn/src/common/opr_trait.h @@ -27,6 +27,8 @@ struct OprTrait {}; static const bool can_deduce_layout = CanDeduceLayout; \ } +DEF(Padding, 2, false, true); +DEF(PaddingBackward, 2, false, false); DEF(ConvolutionForward, 3, true, true); DEF(Convolution3DForward, 3, true, true); DEF(ConvolutionBackwardData, 3, true, false); diff --git a/dnn/src/common/padding.cpp b/dnn/src/common/padding.cpp new file mode 100644 index 00000000..eda3e74f --- /dev/null +++ b/dnn/src/common/padding.cpp @@ -0,0 +1,144 @@ +/** + * \file dnn/src/common/padding.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "megdnn/oprs.h" +#include "megdnn/oprs/general.h" +#include "megdnn/thin/small_vector.h" +#include "src/common/opr_param_defs_enumv.cuh" +#include "src/common/utils.h" + +namespace megdnn { + +using padding_param = megdnn::param_enumv::Padding; + +void PaddingForward::forward_check_exec(const TensorLayout& src, + const TensorLayout& dst) { + check_exec(src, dst); + megdnn_assert(src.dtype.enumv() != DTypeEnum::Bool && + src.dtype.enumv() != DTypeEnum::IntB1 && + src.dtype.enumv() != DTypeEnum::IntB2 && + src.dtype.enumv() != DTypeEnum::IntB4, + "unsupported %s dtype for forward padding opr", + src.dtype.name()); +} + +void PaddingForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) { + SmallVector offsets(get_offsets()); + TensorShape dst_shape; + switch (src.ndim) { + case 1: + dst_shape = {src.shape[0] + offsets[0] + offsets[1]}; + break; + case 2: + dst_shape = {src.shape[0] + offsets[0] + offsets[1], + src.shape[1] + offsets[2] + offsets[3]}; + break; + case 3: + dst_shape = {src.shape[0] + offsets[0] + offsets[1], + src.shape[1] + offsets[2] + offsets[3], + src.shape[2] + offsets[4] + offsets[5]}; + break; + case 4: + dst_shape = {src.shape[0] + offsets[0] + offsets[1], + src.shape[1] + offsets[2] + offsets[3], + src.shape[2] + offsets[4] + offsets[5], + src.shape[3] + offsets[6] + offsets[7]}; + break; + case 5: + dst_shape = {src.shape[0] + offsets[0] + offsets[1], + src.shape[1] + offsets[2] + offsets[3], + src.shape[2] + offsets[4] + offsets[5], + src.shape[3] + offsets[6] + offsets[7], + src.shape[4] + offsets[8] + offsets[9]}; + break; + case 6: + dst_shape = {src.shape[0] + offsets[0] + offsets[1], + src.shape[1] + offsets[2] + offsets[3], + src.shape[2] + offsets[4] + offsets[5], + src.shape[3] + offsets[6] + offsets[7], + src.shape[4] + offsets[8] + offsets[9], + src.shape[5] + offsets[10] + offsets[11]}; + break; + case 7: + dst_shape = {src.shape[0] + offsets[0] + offsets[1], + src.shape[1] + offsets[2] + offsets[3], + src.shape[2] + offsets[4] + offsets[5], + src.shape[3] + offsets[6] + offsets[7], + src.shape[4] + offsets[8] + offsets[9], + src.shape[5] + offsets[10] + offsets[11], + src.shape[6] + offsets[12] + offsets[13]}; + break; + default: + megdnn_assert(false, "invalid tensor ndim %zu", src.ndim); + break; + } + dst = TensorLayout(dst_shape, src.dtype); +} + +void PaddingBackward::backward_check_exec(const TensorLayout& src, + const TensorLayout& dst) { + check_exec(dst, src); + megdnn_assert(src.dtype.enumv() == + DTypeEnum::Float32 DNN_INC_FLOAT16( + || src.dtype.enumv() == DTypeEnum::Float16 || + src.dtype.enumv() == DTypeEnum::BFloat16), + "unsupported %s dtype for forward padding opr", + src.dtype.name()); +} + +SmallVector PaddingBase::get_offsets() { + SmallVector offsets = { + param().front_offset_dim0, param().back_offset_dim0, + param().front_offset_dim1, param().back_offset_dim1, + param().front_offset_dim2, param().back_offset_dim2, + param().front_offset_dim3, param().back_offset_dim3, + param().front_offset_dim4, param().back_offset_dim4, + param().front_offset_dim5, param().back_offset_dim5, + param().front_offset_dim6, param().back_offset_dim6}; + return offsets; +} + +void PaddingBase::check_exec(const TensorLayout& src, const TensorLayout& dst) { + SmallVector offsets(get_offsets()); + // make sure the src and dst tensor not empty + megdnn_assert(src.ndim != 0 && dst.ndim != 0); + // make sure src and dst is same dtype + megdnn_assert_eq_dtype(src, dst); + // make sure src and dst is same ndim + megdnn_assert(src.ndim == dst.ndim, "the src.ndim = %zu the dst.ndim = %zu", + src.ndim, dst.ndim); + // make sure in every dimension dst is equal or greater than src + for (size_t i = 0; i < src.ndim; ++i) { + megdnn_assert(dst.shape[i] == + src.shape[i] + offsets[i * 2] + offsets[i * 2 + 1]); + } + // check the padding mode is valid + megdnn_assert(static_cast(param().padding_mode) == + padding_param::PaddingMode::REFLECT || + static_cast(param().padding_mode) == + padding_param::PaddingMode::REPLICATE || + static_cast(param().padding_mode) == + padding_param::PaddingMode::CONSTANT, + "unsupported padding mode"); + // addition check for reflect padding, make sure the reflected index is + // valid + if (static_cast(param().padding_mode) == + padding_param::PaddingMode::REFLECT) { + for (size_t i = 0; i < src.ndim; ++i) { + megdnn_assert(offsets[i * 2] < src.shape[i] && + dst.shape[i] - offsets[i * 2] - src.shape[i] < + src.shape[i]); + } + } +} + +} // namespace megdnn diff --git a/dnn/src/cuda/handle_create.cpp b/dnn/src/cuda/handle_create.cpp index f9ab2233..2e816a98 100644 --- a/dnn/src/cuda/handle_create.cpp +++ b/dnn/src/cuda/handle_create.cpp @@ -12,6 +12,7 @@ #include "src/common/handle_impl.h" +#include "src/cuda/padding/opr_impl.h" #include "src/cuda/adaptive_pooling/opr_impl.h" #include "src/cuda/add_update/opr_impl.h" #include "src/cuda/argmxx/opr_impl.h" diff --git a/dnn/src/cuda/padding/opr_impl.cpp b/dnn/src/cuda/padding/opr_impl.cpp new file mode 100644 index 00000000..9c2c9528 --- /dev/null +++ b/dnn/src/cuda/padding/opr_impl.cpp @@ -0,0 +1,71 @@ +/** + * \file dnn/src/cuda/padding/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#include "src/cuda/padding/opr_impl.h" +#include "src/common/utils.h" +#include "src/cuda/handle.h" +#include "src/cuda/padding/padding.cuh" +#include "src/cuda/utils.h" + +namespace megdnn { +namespace cuda { + +void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { + forward_check_exec(src.layout, dst.layout); + SmallVector offsets(get_offsets()); + // SamllVector can not be used as argument in cu file + size_t param_offsets[MEGDNN_MAX_NDIM * 2] = { + offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], + offsets[5], offsets[6], offsets[7], offsets[8], offsets[9], + offsets[10], offsets[11], offsets[12], offsets[13]}; + auto stream = cuda_stream(this->handle()); +#define cb(DType) \ + if (src.layout.dtype.enumv() == DTypeTrait::enumv) { \ + using ctype = typename DTypeTrait::ctype; \ + padding::padding_forward_proxy(src, dst, param_offsets, \ + uint32_t(param().padding_mode), \ + param().padding_val, stream); \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb) +#undef cb +} + +void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { + backward_check_exec(src.layout, dst.layout); + SmallVector offsets(get_offsets()); + // SamllVector can not be used as argument in cu file + size_t param_offsets[MEGDNN_MAX_NDIM * 2] = { + offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], + offsets[5], offsets[6], offsets[7], offsets[8], offsets[9], + offsets[10], offsets[11], offsets[12], offsets[13]}; + auto stream = cuda_stream(this->handle()); +#define cb(DType) \ + if (src.layout.dtype.enumv() == DTypeTrait::enumv) { \ + using ctype = typename DTypeTrait::ctype; \ + padding::padding_backward_proxy(src, dst, param_offsets, \ + uint32_t(param().padding_mode), \ + stream); \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) +#undef cb +} + +size_t PaddingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, + const TensorLayout& dst) { + return 0; +} + +size_t PaddingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src, + const TensorLayout& dst) { + return 0; +} +} // namespace cuda +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/cuda/padding/opr_impl.h b/dnn/src/cuda/padding/opr_impl.h new file mode 100644 index 00000000..9cd495ac --- /dev/null +++ b/dnn/src/cuda/padding/opr_impl.h @@ -0,0 +1,35 @@ +/** + * \file dnn/src/cuda/padding/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#pragma once +#include "megdnn/oprs.h" + +namespace megdnn { +namespace cuda { +class PaddingForwardImpl : public PaddingForward { + using PaddingForward::PaddingForward; + +public: + void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; + size_t get_workspace_in_bytes(const TensorLayout& src, + const TensorLayout& dst) override; +}; + +class PaddingBackwardImpl : public PaddingBackward { + using PaddingBackward::PaddingBackward; + +public: + void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; + size_t get_workspace_in_bytes(const TensorLayout& src, + const TensorLayout& dst) override; +}; +} // namespace cuda +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/cuda/padding/padding.cu b/dnn/src/cuda/padding/padding.cu new file mode 100644 index 00000000..9c7b3074 --- /dev/null +++ b/dnn/src/cuda/padding/padding.cu @@ -0,0 +1,284 @@ +/** + * \file dnn/src/cuda/padding/padding.cu + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#include +#include +#include +#include "megdnn/basic_types.h" +#include "padding.cuh" +#include "src/cuda/int_fastdiv.cuh" +#include "src/cuda/query_blocksize.cuh" + +namespace megdnn { +namespace cuda { +namespace padding { + +struct ShapeParams { + size_t src_shape[MEGDNN_MAX_NDIM]; + size_t dst_shape[MEGDNN_MAX_NDIM]; + Uint32Fastdiv src_stride[MEGDNN_MAX_NDIM]; + Uint32Fastdiv dst_stride[MEGDNN_MAX_NDIM]; + size_t offsets[MEGDNN_MAX_NDIM * 2]; +}; + +template +__global__ void paddingConst_kernel(const size_t ndim, + const size_t total_out_nr, + const T* const src, T* const dst, + ShapeParams params, + const float_t padding_val) { + KERN_FOR(out_index, total_out_nr) { + bool in_src_valid_area = true; + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + Uint32Fastdiv dst_stride = params.dst_stride[dim], src_stride = params.src_stride[dim]; + size_t src_shape = params.src_shape[dim]; + size_t offset = params.offsets[dim*2]; + + size_t dim_index = out_index_tmp / dst_stride; + in_src_valid_area &= (dim_index >= offset && dim_index < offset+src_shape); + if(!in_src_valid_area) break; + out_index_tmp -= dim_index * dst_stride.divisor(); + in_index += (dim_index - offset)*src_stride.divisor(); + /* + size_t dim_index = out_index_tmp / params.dst_stride[dim]; + out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); + in_src_valid_area &= (dim_index >= params.offsets[dim * 2] && + dim_index < params.offsets[dim * 2] + + params.src_shape[dim]); + in_index += (dim_index - params.offsets[dim * 2]) * + params.src_stride[dim].divisor(); + */ + } + dst[out_index] = in_src_valid_area ? src[in_index] : padding_val; + } +} + +template +__global__ void paddingReplicate_kernel(const size_t ndim, + const size_t total_out_nr, + const T* const src, T* const dst, + ShapeParams params, const float_t) { + KERN_FOR(out_index, total_out_nr) { + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = out_index_tmp / params.dst_stride[dim]; + out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); + dim_index = (size_t)llmin( + (long long)params.src_shape[dim] - 1, + llmax((long long)dim_index - + (long long)params.offsets[dim * 2], + (long long)0)); + in_index += dim_index * params.src_stride[dim].divisor(); + } + dst[out_index] = src[in_index]; + } +} + +template +__global__ void paddingReflect_kernel(const size_t ndim, + const size_t total_out_nr, + const T* const src, T* const dst, + ShapeParams params, const float_t) { + KERN_FOR(out_index, total_out_nr) { + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + long long dim_index = out_index_tmp / params.dst_stride[dim]; + out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); + dim_index -= (long long)params.offsets[dim * 2]; + dim_index = llmax(dim_index, -dim_index); + dim_index = llmin(dim_index, 2 * (long long)params.src_shape[dim] - + dim_index - 2); + in_index += size_t(dim_index) * + (size_t)params.src_stride[dim].divisor(); + } + dst[out_index] = src[in_index]; + } +} + +template +__global__ void paddingConstBackward_kernel(const size_t ndim, + const size_t total_in_nr, + const T* const src, T* const dst, + ShapeParams params) { + KERN_FOR(in_index, total_in_nr) { + bool in_dst_valid_area = true; + size_t out_index = 0; + size_t in_index_tmp = in_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = in_index_tmp / params.src_stride[dim]; + in_index_tmp -= dim_index * params.src_stride[dim].divisor(); + in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] && + dim_index < params.offsets[dim * 2] + + params.dst_shape[dim]); + out_index += (dim_index - params.offsets[dim * 2]) * + params.dst_stride[dim].divisor(); + } + if (in_dst_valid_area) { + dst[out_index] = src[in_index]; + } + } +} + +template +__global__ void paddingReplicateBackward_kernel(const size_t ndim, + const size_t total_in_nr, + const T* const src, + T* const dst, + ShapeParams params) { + KERN_FOR(in_index, total_in_nr) { + size_t out_index = 0; + size_t in_index_tmp = in_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = in_index_tmp / params.src_stride[dim]; + in_index_tmp -= dim_index * params.src_stride[dim].divisor(); + dim_index = (size_t)llmin( + (long long)params.dst_shape[dim] - 1, + llmax((long long)dim_index - + (long long)params.offsets[dim * 2], + (long long)0)); + out_index += dim_index * params.dst_stride[dim].divisor(); + } + atomic_add(&dst[out_index], src[in_index]); + } +} + +template +__global__ void paddingReflectBackward_kernel(const size_t ndim, + const size_t total_in_nr, + const T* const src, T* const dst, + ShapeParams params) { + KERN_FOR(in_index, total_in_nr) { + size_t out_index = 0; + size_t in_index_tmp = in_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + long long dim_index = in_index_tmp / params.src_stride[dim]; + in_index_tmp -= dim_index * params.src_stride[dim].divisor(); + dim_index -= (long long)params.offsets[dim * 2]; + dim_index = llmax(dim_index, -dim_index); + dim_index = llmin(dim_index, 2 * (long long)params.dst_shape[dim] - + dim_index - 2); + out_index += size_t(dim_index) * + (size_t)params.dst_stride[dim].divisor(); + } + atomic_add(&dst[out_index], src[in_index]); + } +} + +template +void padding_forward_proxy(const TensorND& src, const TensorND& dst, + size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, + const float_t padding_val, cudaStream_t stream) { + ShapeParams params; + for (size_t i = 0; i < src.layout.ndim; ++i) { + params.src_shape[i] = src.layout.shape[i]; + params.dst_shape[i] = dst.layout.shape[i]; + params.src_stride[i] = src.layout.stride[i]; + params.dst_stride[i] = dst.layout.stride[i]; + params.offsets[i * 2] = offsets[i * 2]; + params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; + } + + void (*fwd_kern)(const size_t, const size_t, const T* const, T* const, + ShapeParams, const float_t); + switch (mode) { + case param_enumv::Padding::PaddingMode::CONSTANT: + fwd_kern = paddingConst_kernel; + break; + case param_enumv::Padding::PaddingMode::REPLICATE: + fwd_kern = paddingReplicate_kernel; + break; + case param_enumv::Padding::PaddingMode::REFLECT: + fwd_kern = paddingReflect_kernel; + break; + default: + megdnn_assert(false, "invalid padding mode"); + } + + size_t total_nr = dst.layout.total_nr_elems(); + + uint32_t nr_threads = query_blocksize_for_kernel(fwd_kern); + dim3 threads(nr_threads); + dim3 blocks(DIVUP(total_nr, nr_threads)); + fwd_kern<<>>(src.layout.ndim, total_nr, + src.ptr(), dst.ptr(), params, + padding_val); + after_kernel_launch(); +} + +template +void padding_backward_proxy(const TensorND& src, const TensorND& dst, + size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, + cudaStream_t stream) { + ShapeParams params; + + for (size_t i = 0; i < src.layout.ndim; ++i) { + params.src_shape[i] = src.layout.shape[i]; + params.dst_shape[i] = dst.layout.shape[i]; + params.src_stride[i] = src.layout.stride[i]; + params.dst_stride[i] = dst.layout.stride[i]; + params.offsets[i * 2] = offsets[i * 2]; + params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; + } + + cudaMemset(dst.raw_ptr, 0, dst.layout.access_bytes()); + + void (*bwd_kern)(const size_t, const size_t, const T* const, T* const, + ShapeParams); + + switch (mode) { + case param_enumv::Padding::PaddingMode::CONSTANT: + bwd_kern = paddingConstBackward_kernel; + break; + case param_enumv::Padding::PaddingMode::REPLICATE: + bwd_kern = paddingReplicateBackward_kernel; + break; + case param_enumv::Padding::PaddingMode::REFLECT: + bwd_kern = paddingReflectBackward_kernel; + break; + default: + megdnn_assert(false, "invalid padding mode"); + } + size_t total_nr = src.layout.total_nr_elems(); + uint32_t nr_threads = query_blocksize_for_kernel(bwd_kern); + dim3 threads(nr_threads); + dim3 blocks(DIVUP(total_nr, nr_threads)); + bwd_kern<<>>( + src.layout.ndim, total_nr, src.ptr(), dst.ptr(), params); + after_kernel_launch(); +} + +#define INST(T) \ + template void padding_forward_proxy( \ + const TensorND& src, const TensorND& dst, \ + size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \ + const float_t padding_val, cudaStream_t stream); +#define cb(DType) INST(typename DTypeTrait::ctype) +MEGDNN_FOREACH_COMPUTING_DTYPE(cb) +#undef cb +#undef INST + +#define INST(T) \ + template void padding_backward_proxy( \ + const TensorND& src, const TensorND& dst, \ + size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \ + cudaStream_t stream); +#define cb(DType) INST(typename DTypeTrait::ctype) +MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) +#undef cb +#undef INST + +} // namespace padding +} // namespace cuda +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/cuda/padding/padding.cuh b/dnn/src/cuda/padding/padding.cuh new file mode 100644 index 00000000..17629d49 --- /dev/null +++ b/dnn/src/cuda/padding/padding.cuh @@ -0,0 +1,36 @@ +/** + * \file dnn/src/cuda/padding/padding.cuh + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#pragma once +#include "cuda_runtime.h" +#include "megdnn/basic_types.h" +#include "src/common/opr_param_defs_enumv.cuh" +#include "src/cuda/utils.cuh" + +namespace megdnn { +namespace cuda { +namespace padding { + +template +void padding_forward_proxy(const TensorND& src, const TensorND& dst, + size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, + const float_t padding_val, cudaStream_t stream); + +template +void padding_backward_proxy(const TensorND& src, const TensorND& dst, + size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, + cudaStream_t stream); + +} // namespace padding +} // namespace cuda +} // namespace megdnn + +// vim: syntax=cuda.doxygen \ No newline at end of file diff --git a/dnn/src/cuda/utils.cuh b/dnn/src/cuda/utils.cuh index 5e624409..e3674ac0 100644 --- a/dnn/src/cuda/utils.cuh +++ b/dnn/src/cuda/utils.cuh @@ -78,7 +78,7 @@ #if MEGDNN_THREADS_512 #define NR_THREADS 512 #define NR_THREADS_X 32 -#define NR_THREADS_Y 16 +#define NR_THREADS_Y 16 #else #define NR_THREADS 1024 #define NR_THREADS_X 32 diff --git a/dnn/src/naive/argmxx/opr_impl.cpp b/dnn/src/naive/argmxx/opr_impl.cpp index 7d003333..519d71fc 100644 --- a/dnn/src/naive/argmxx/opr_impl.cpp +++ b/dnn/src/naive/argmxx/opr_impl.cpp @@ -16,7 +16,7 @@ #include -namespace megdnn { +namespace { using namespace megdnn; diff --git a/dnn/src/naive/handle.cpp b/dnn/src/naive/handle.cpp index 6c76dc33..b492fd93 100644 --- a/dnn/src/naive/handle.cpp +++ b/dnn/src/naive/handle.cpp @@ -14,6 +14,7 @@ #include "src/common/handle_impl.h" +#include "src/naive/padding/opr_impl.h" #include "src/naive/adaptive_pooling/opr_impl.h" #include "src/naive/add_update/opr_impl.h" #include "src/naive/argmxx/opr_impl.h" diff --git a/dnn/src/naive/padding/opr_impl.cpp b/dnn/src/naive/padding/opr_impl.cpp new file mode 100644 index 00000000..e3bfa4aa --- /dev/null +++ b/dnn/src/naive/padding/opr_impl.cpp @@ -0,0 +1,281 @@ +/** + * \file dnn/src/naive/padding/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#include "src/naive/padding/opr_impl.h" +#include +#include +#include "src/common/utils.h" +#include "src/naive/handle.h" +namespace megdnn { +namespace naive { + +struct ShapeParams { + size_t src_shape[MEGDNN_MAX_NDIM]; + size_t dst_shape[MEGDNN_MAX_NDIM]; + ptrdiff_t src_stride[MEGDNN_MAX_NDIM]; + ptrdiff_t dst_stride[MEGDNN_MAX_NDIM]; + size_t offsets[MEGDNN_MAX_NDIM * 2]; +}; + +template +void exec_const_internal(const size_t ndim, const size_t total_out_nr, + const T* const src, T* const dst, ShapeParams params, + const T padding_val) MEGDNN_NOEXCEPT { + rep(out_index, total_out_nr) { + bool in_src_valid_area = true; + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = out_index_tmp / params.dst_stride[dim]; + out_index_tmp -= dim_index * params.dst_stride[dim]; + in_src_valid_area &= (dim_index >= params.offsets[dim * 2] && + dim_index < params.offsets[dim * 2] + + params.src_shape[dim]); + in_index += (dim_index - params.offsets[dim * 2]) * + params.src_stride[dim]; + } + + if (in_src_valid_area) { + dst[out_index] = src[in_index]; + } else { + dst[out_index] = padding_val; + } + } +} + +template +void exec_replicate_internal(const size_t ndim, const size_t total_out_nr, + const T* const src, T* const dst, + ShapeParams params) MEGDNN_NOEXCEPT { + rep(out_index, total_out_nr) { + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = out_index_tmp / params.dst_stride[dim]; + out_index_tmp -= dim_index * params.dst_stride[dim]; + dim_index = (size_t)std::min( + (long long)params.src_shape[dim] - 1, + std::max((long long)dim_index - + (long long)params.offsets[dim * 2], + (long long)0)); + in_index += dim_index * params.src_stride[dim]; + } + dst[out_index] = src[in_index]; + } +} + +template +void exec_reflect_internal(const size_t ndim, const size_t total_out_nr, + const T* const src, T* const dst, + ShapeParams params) MEGDNN_NOEXCEPT { + rep(out_index, total_out_nr) { + size_t in_index = 0; + size_t out_index_tmp = out_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + long long dim_index = out_index_tmp / params.dst_stride[dim]; + out_index_tmp -= dim_index * params.dst_stride[dim]; + dim_index -= (long long)params.offsets[dim * 2]; + dim_index = std::max(dim_index, -dim_index); + dim_index = + std::min(dim_index, 2 * (long long)params.src_shape[dim] - + dim_index - 2); + in_index += size_t(dim_index) * (size_t)params.src_stride[dim]; + } + dst[out_index] = src[in_index]; + } +} + +template +void backward_exec_const_internal(const size_t ndim, const size_t total_in_nr, + const T* const src, T* const dst, + ShapeParams params) MEGDNN_NOEXCEPT { + rep(in_index, total_in_nr) { + bool in_dst_valid_area = true; + size_t out_index = 0; + size_t in_index_tmp = in_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = in_index_tmp / params.src_stride[dim]; + in_index_tmp -= dim_index * params.src_stride[dim]; + in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] && + dim_index < params.offsets[dim * 2] + + params.dst_shape[dim]); + out_index += (dim_index - params.offsets[dim * 2]) * + params.dst_stride[dim]; + } + if (in_dst_valid_area) { + dst[out_index] = src[in_index]; + } + } +} + +template +void backward_exec_replicate_internal(const size_t ndim, + const size_t total_in_nr, + const T* const src, T* const dst, + ShapeParams params) MEGDNN_NOEXCEPT { + rep(in_index, total_in_nr) { + size_t out_index = 0; + size_t in_index_tmp = in_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + size_t dim_index = in_index_tmp / params.src_stride[dim]; + in_index_tmp -= dim_index * params.src_stride[dim]; + dim_index = (size_t)std::min( + (long long)params.dst_shape[dim] - 1, + std::max((long long)dim_index - + (long long)params.offsets[dim * 2], + (long long)0)); + out_index += dim_index * params.dst_stride[dim]; + } + dst[out_index] += src[in_index]; + } +} + +template +void backward_exec_reflect_internal(const size_t ndim, const size_t total_in_nr, + const T* const src, T* const dst, + ShapeParams params) MEGDNN_NOEXCEPT { + rep(in_index, total_in_nr) { + size_t out_index = 0; + size_t in_index_tmp = in_index; + for (size_t dim = 0; dim <= ndim - 1; ++dim) { + long long dim_index = in_index_tmp / params.src_stride[dim]; + in_index_tmp -= dim_index * params.src_stride[dim]; + dim_index -= (long long)params.offsets[dim * 2]; + dim_index = std::max(dim_index, -dim_index); + dim_index = + std::min(dim_index, 2 * (long long)params.dst_shape[dim] - + dim_index - 2); + out_index += size_t(dim_index) * (size_t)params.dst_stride[dim]; + } + dst[out_index] += src[in_index]; + } +} + +void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { + forward_check_exec(src.layout, dst.layout); + SmallVector offsets(get_offsets()); + ShapeParams params; + for (size_t i = 0; i < src.layout.ndim; ++i) { + params.src_shape[i] = src.layout.shape[i]; + params.dst_shape[i] = dst.layout.shape[i]; + params.src_stride[i] = src.layout.stride[i]; + params.dst_stride[i] = dst.layout.stride[i]; + params.offsets[i * 2] = offsets[i * 2]; + params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; + } + + size_t n = dst.layout.total_nr_elems(); + switch (param().padding_mode) { + case param::Padding::PaddingMode::CONSTANT: +#define cb(DType) \ + if (src.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR(exec_const_internal( \ + src.layout.ndim, n, src.ptr(), dst.ptr(), params, \ + T(param().padding_val))); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb) +#undef cb + break; + case param::Padding::PaddingMode::REPLICATE: +#define cb(DType) \ + if (src.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR(exec_replicate_internal( \ + src.layout.ndim, n, src.ptr(), dst.ptr(), params)); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb) +#undef cb + break; + case param::Padding::PaddingMode::REFLECT: +#define cb(DType) \ + if (src.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR(exec_reflect_internal( \ + src.layout.ndim, n, src.ptr(), dst.ptr(), params)); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb) +#undef cb + break; + default: + megdnn_assert(false, "unsupported padding mode!"); + } +} + +void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { + backward_check_exec(src.layout, dst.layout); + SmallVector offsets(get_offsets()); + ShapeParams params; + for (size_t i = 0; i < src.layout.ndim; ++i) { + params.src_shape[i] = src.layout.shape[i]; + params.dst_shape[i] = dst.layout.shape[i]; + params.src_stride[i] = src.layout.stride[i]; + params.dst_stride[i] = dst.layout.stride[i]; + params.offsets[i * 2] = offsets[i * 2]; + params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; + } + size_t n = src.layout.total_nr_elems(); + + memset(dst.raw_ptr, 0, dst.layout.access_bytes()); + + switch (param().padding_mode) { + case param::Padding::PaddingMode::CONSTANT: +#define cb(DType) \ + if (src.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_const_internal( \ + src.layout.ndim, n, src.ptr(), dst.ptr(), params)); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) +#undef cb + break; + case param::Padding::PaddingMode::REPLICATE: +#define cb(DType) \ + if (src.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_replicate_internal( \ + src.layout.ndim, n, src.ptr(), dst.ptr(), params)); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) +#undef cb + break; + case param::Padding::PaddingMode::REFLECT: +#define cb(DType) \ + if (src.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_reflect_internal( \ + src.layout.ndim, n, src.ptr(), dst.ptr(), params)); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) +#undef cb + break; + default: + megdnn_assert(false, "unsupported padding mode!"); + } +} + +size_t PaddingForwardImpl::get_workspace_in_bytes( + const TensorLayout& /* src */, const TensorLayout& /* dst */) { + return 0; +} + +size_t PaddingBackwardImpl::get_workspace_in_bytes( + const TensorLayout& /* src */, const TensorLayout& /* dst */) { + return 0; +} +} // namespace naive +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/naive/padding/opr_impl.h b/dnn/src/naive/padding/opr_impl.h new file mode 100644 index 00000000..f3659c3f --- /dev/null +++ b/dnn/src/naive/padding/opr_impl.h @@ -0,0 +1,30 @@ +/** + * \file dnn/src/naive/padding/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#pragma once +#include "megdnn/oprs.h" + +namespace megdnn{ +namespace naive{ + class PaddingForwardImpl: public PaddingForward{ + using PaddingForward::PaddingForward; + public: + void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; + size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override; + }; + + class PaddingBackwardImpl: public PaddingBackward{ + using PaddingBackward::PaddingBackward; + public: + void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; + size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override; + }; +} +} \ No newline at end of file diff --git a/dnn/test/common/padding.h b/dnn/test/common/padding.h new file mode 100644 index 00000000..88444fd4 --- /dev/null +++ b/dnn/test/common/padding.h @@ -0,0 +1,441 @@ +/** + * \file dnn/test/common/padding.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#pragma once +#include +#include +#include "megdnn/basic_types.h" +#include "megdnn/opr_param_defs.h" + +namespace megdnn { +namespace test { +namespace padding { + +struct TestArg { + param::Padding param; + TensorShape src; + TensorShape dst; + TestArg(param::Padding _param, TensorShape _src, TensorShape _dst) + : param(_param), src(_src), dst(_dst) {} +}; + +inline std::vector get_args() { + size_t src_shape_dim0 = 5; + size_t src_shape_dim1 = 5; + size_t src_shape_dim2 = 5; + size_t src_shape_dim3 = 5; + size_t src_shape_dim4 = 5; + size_t src_shape_dim5 = 5; + size_t src_shape_dim6 = 5; + + size_t dst_shape_dim0 = 8; + size_t dst_shape_dim1 = 8; + size_t dst_shape_dim2 = 8; + size_t dst_shape_dim3 = 8; + size_t dst_shape_dim4 = 8; + size_t dst_shape_dim5 = 8; + size_t dst_shape_dim6 = 8; + + std::vector args; + + param::Padding cur_param; + + cur_param.front_offset_dim0 = 0; + cur_param.front_offset_dim1 = 0; + cur_param.front_offset_dim2 = 0; + cur_param.front_offset_dim3 = 0; + cur_param.front_offset_dim4 = 0; + cur_param.front_offset_dim5 = 0; + cur_param.front_offset_dim6 = 0; + cur_param.back_offset_dim0 = 0; + cur_param.back_offset_dim1 = 0; + cur_param.back_offset_dim2 = 0; + cur_param.back_offset_dim3 = 0; + cur_param.back_offset_dim4 = 0; + cur_param.back_offset_dim5 = 0; + cur_param.back_offset_dim6 = 0; + + cur_param.padding_val = 2; + + cur_param.front_offset_dim0 = 1; + cur_param.back_offset_dim0 = 2; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0}, + TensorShape{dst_shape_dim0}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back(cur_param, TensorShape{src_shape_dim0}, + TensorShape{dst_shape_dim0}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0}, + TensorShape{dst_shape_dim0}); + + cur_param.front_offset_dim1 = 2; + cur_param.back_offset_dim1 = 1; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, + TensorShape{dst_shape_dim0, dst_shape_dim1}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, + TensorShape{dst_shape_dim0, dst_shape_dim1}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, + TensorShape{dst_shape_dim0, dst_shape_dim1}); + + cur_param.front_offset_dim2 = 1; + cur_param.back_offset_dim2 = 2; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); + + cur_param.front_offset_dim3 = 0; + cur_param.back_offset_dim3 = 3; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back(cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, + src_shape_dim2, src_shape_dim3}, + TensorShape{dst_shape_dim0, dst_shape_dim1, + dst_shape_dim2, dst_shape_dim3}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back(cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, + src_shape_dim2, src_shape_dim3}, + TensorShape{dst_shape_dim0, dst_shape_dim1, + dst_shape_dim2, dst_shape_dim3}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back(cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, + src_shape_dim2, src_shape_dim3}, + TensorShape{dst_shape_dim0, dst_shape_dim1, + dst_shape_dim2, dst_shape_dim3}); + + cur_param.front_offset_dim4 = 3; + cur_param.back_offset_dim4 = 0; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4}); + + cur_param.front_offset_dim5 = 1; + cur_param.back_offset_dim5 = 2; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); + + cur_param.front_offset_dim6 = 0; + cur_param.front_offset_dim6 = 3; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5, + src_shape_dim6}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, + dst_shape_dim6}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5, + src_shape_dim6}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, + dst_shape_dim6}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5, + src_shape_dim6}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, + dst_shape_dim6}); + + return args; + +} + +inline std::vector get_args_backward() { + size_t src_shape_dim0 = 8; + size_t src_shape_dim1 = 8; + size_t src_shape_dim2 = 8; + size_t src_shape_dim3 = 8; + size_t src_shape_dim4 = 8; + size_t src_shape_dim5 = 8; + size_t src_shape_dim6 = 8; + + size_t dst_shape_dim0 = 5; + size_t dst_shape_dim1 = 5; + size_t dst_shape_dim2 = 5; + size_t dst_shape_dim3 = 5; + size_t dst_shape_dim4 = 5; + size_t dst_shape_dim5 = 5; + size_t dst_shape_dim6 = 5; + + std::vector args; + + param::Padding cur_param; + + cur_param.front_offset_dim0 = 0; + cur_param.front_offset_dim1 = 0; + cur_param.front_offset_dim2 = 0; + cur_param.front_offset_dim3 = 0; + cur_param.front_offset_dim4 = 0; + cur_param.front_offset_dim5 = 0; + cur_param.front_offset_dim6 = 0; + cur_param.back_offset_dim0 = 0; + cur_param.back_offset_dim1 = 0; + cur_param.back_offset_dim2 = 0; + cur_param.back_offset_dim3 = 0; + cur_param.back_offset_dim4 = 0; + cur_param.back_offset_dim5 = 0; + cur_param.back_offset_dim6 = 0; + + cur_param.padding_val = 2; + + cur_param.front_offset_dim0 = 1; + cur_param.back_offset_dim0 = 2; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0}, + TensorShape{dst_shape_dim0}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back(cur_param, TensorShape{src_shape_dim0}, + TensorShape{dst_shape_dim0}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0}, + TensorShape{dst_shape_dim0}); + + + cur_param.front_offset_dim1 = 2; + cur_param.back_offset_dim1 = 1; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, + TensorShape{dst_shape_dim0, dst_shape_dim1}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, + TensorShape{dst_shape_dim0, dst_shape_dim1}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, + TensorShape{dst_shape_dim0, dst_shape_dim1}); + + cur_param.front_offset_dim2 = 1; + cur_param.back_offset_dim2 = 2; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); + + cur_param.front_offset_dim3 = 0; + cur_param.back_offset_dim3 = 3; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back(cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, + src_shape_dim2, src_shape_dim3}, + TensorShape{dst_shape_dim0, dst_shape_dim1, + dst_shape_dim2, dst_shape_dim3}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back(cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, + src_shape_dim2, src_shape_dim3}, + TensorShape{dst_shape_dim0, dst_shape_dim1, + dst_shape_dim2, dst_shape_dim3}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back(cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, + src_shape_dim2, src_shape_dim3}, + TensorShape{dst_shape_dim0, dst_shape_dim1, + dst_shape_dim2, dst_shape_dim3}); + + cur_param.front_offset_dim4 = 3; + cur_param.back_offset_dim4 =0; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4}); + + cur_param.front_offset_dim5 = 1; + cur_param.back_offset_dim5 = 2; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); + + cur_param.front_offset_dim6 = 0; + cur_param.back_offset_dim6 = 3; + + cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5, + src_shape_dim6}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, + dst_shape_dim6}); + + cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5, + src_shape_dim6}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, + dst_shape_dim6}); + + cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; + args.emplace_back( + cur_param, + TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, + src_shape_dim3, src_shape_dim4, src_shape_dim5, + src_shape_dim6}, + TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, + dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, + dst_shape_dim6}); + + return args; + +} + +} // namespace padding +} // namespace test +} // namespace megdnn \ No newline at end of file diff --git a/dnn/test/cuda/padding.cpp b/dnn/test/cuda/padding.cpp new file mode 100644 index 00000000..9aaa5fdc --- /dev/null +++ b/dnn/test/cuda/padding.cpp @@ -0,0 +1,219 @@ +/** + * \file dnn/test/cuda/padding.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "test/cuda/benchmark.h" +#include "test/common/padding.h" +#include "megdnn/oprs.h" +#include "test/common/checker.h" +#include "test/common/rng.h" +#include "test/cuda/fixture.h" + +using namespace megdnn; +using namespace test; + +TEST_F(CUDA, PADDING) { + std::vector args = padding::get_args(); + Checker checker(handle_cuda()); + UniformIntNonZeroRNG rng(1, 9); + for (auto&& arg : args) { + checker.set_param(arg.param) + .set_rng(0, &rng) + .set_dtype(0, dtype::Int8()) + .set_dtype(1, dtype::Int8()) + .execs({arg.src, arg.dst}); + } +} + +TEST_F(CUDA, PADDING_BACKWARD) { + std::vector args = padding::get_args_backward(); + Checker checker(handle_cuda()); + UniformFloatRNG rng(1, 9); + for (auto&& arg : args) { + checker.set_param(arg.param) + .set_rng(0, &rng) + .set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .execs({arg.src, arg.dst}); + } +} + +TEST_F(CUDA, PADDING_REFLECT) { + Checker checker(handle_cuda(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REFLECT; + param.front_offset_dim0 = 2; + param.front_offset_dim1 = 0; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 3; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({5}, dtype::Int8(), {1, 2, 3, 4, 5}), {}}, + Testcase{{}, + TensorValue({10}, dtype::Int8(), + {3, 2, 1, 2, 3, 4, 5, 4, 3, 2})}); +} + +TEST_F(CUDA, PADDING_REFLECT2) { + Checker checker(handle_cuda(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REFLECT; + param.front_offset_dim0 = 1; + param.front_offset_dim1 = 2; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 1; + param.back_offset_dim1 = 2; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({3, 3}, dtype::Int8(), + {3, 5, 1, 3, 6, 1, 4, 7, 9}), + {}}, + Testcase{{}, + TensorValue({5, 7}, dtype::Int8(), + {1, 6, 3, 6, 1, 6, 3, 1, 5, 3, 5, 1, + 5, 3, 1, 6, 3, 6, 1, 6, 3, 9, 7, 4, + 7, 9, 7, 4, 1, 6, 3, 6, 1, 6, 3})}); +} + +TEST_F(CUDA, PADDING_REPLICATE) { + Checker checker(handle_cuda(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REPLICATE; + param.front_offset_dim0 = 1; + param.front_offset_dim1 = 0; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 2; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({9}, dtype::Int8(), + {1, 2, 3, 4, 5, 6, 7, 8, 9}), + {}}, + Testcase{{}, + TensorValue({12}, dtype::Int8(), + {1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9})}); +} + +TEST_F(CUDA, PADDING_REPLICATE2) { + Checker checker(handle_cuda(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REPLICATE; + param.front_offset_dim0 = 2; + param.front_offset_dim1 = 1; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 0; + param.back_offset_dim1 = 3; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({3, 3}, dtype::Int8(), + {1, 2, 3, 4, 5, 6, 7, 8, 9}), + {}}, + Testcase{{}, + TensorValue({5, 7}, dtype::Int8(), + {1, 1, 2, 3, 3, 3, 3, 1, 1, 2, 3, 3, + 3, 3, 1, 1, 2, 3, 3, 3, 3, 4, 4, 5, + 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9})}); +} + +// #if MEGDNN_WITH_BENCHMARK + +TEST_F(CUDA, BENCHMARK_PADDING_CONSTANT) { + using Param = Padding::Param; + + auto run = [&](const TensorShapeArray& shapes, Param param) { + CUBenchmarker benchmarker(handle_cuda()); + benchmarker.set_param(param); + benchmarker.set_dtype(0, dtype::Int8()) + .set_dtype(1, dtype::Int8()); + + for (auto&& shape : shapes) { + double memaccess = + double(TensorLayout(shape, dtype::Int8()) + .span() + .dist_byte()) * + 2e-6; + auto time_ms = benchmarker.execs({shape, {}}); + printf("execute %s, time %.4f ms, %.4f GB/s\n", + shape.to_string().c_str(), time_ms, memaccess / time_ms); + } + }; + + printf("mode -> constant; dtype -> int8\n"); + { + TensorShapeArray shapes = { + {16, 3, 736, 1280}, + }; + Param param; + param.padding_mode = param::Padding::PaddingMode::CONSTANT; + param.front_offset_dim1 = 1; + run(shapes, param); + } + + printf("mode -> replicate; dtype -> int8\n"); + { + TensorShapeArray shapes = { + {16, 3, 736, 1280}, + }; + Param param; + param.padding_mode = param::Padding::PaddingMode::REPLICATE; + param.front_offset_dim1 = 1; + run(shapes, param); + } + printf("mode -> reflect; dtype -> int8\n"); + { + TensorShapeArray shapes = { + {16, 3, 736, 1280}, + }; + Param param; + param.padding_mode = param::Padding::PaddingMode::REFLECT; + param.front_offset_dim1 = 1; + run(shapes, param); + } +} + +// #endif \ No newline at end of file diff --git a/dnn/test/naive/padding.cpp b/dnn/test/naive/padding.cpp new file mode 100644 index 00000000..dbc46804 --- /dev/null +++ b/dnn/test/naive/padding.cpp @@ -0,0 +1,132 @@ +/** + * \file dnn/test/naive/padding.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "test/common/padding.h" +#include "megdnn/dtype.h" +#include "megdnn/oprs.h" +#include "test/common/checker.h" +#include "test/naive/fixture.h" + +namespace megdnn{ +namespace test{ + +TEST_F(NAIVE, PADDING) { + std::vector args = padding::get_args(); + Checker checker(handle()); + for(auto&& arg : args){ + checker.set_param(arg.param) + .set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .execs({arg.src, arg.dst}); + } +} + +TEST_F(NAIVE, PADDING_CONSTANT) { + Checker checker(handle(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::CONSTANT; + param.front_offset_dim0 = 2; + param.front_offset_dim1 = 1; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 2; + param.back_offset_dim1 = 3; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({1,1}, dtype::Float32(), {1}), {}}, + Testcase{{},TensorValue({5,5}, dtype::Float32(), {10,10,10,10,10,10,10,10,10,10,10,1,10,10,10,10,10,10,10,10,10,10,10,10,10})} + ); +} + +TEST_F(NAIVE, PADDING_REFLECT) { + Checker checker(handle(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REFLECT; + param.front_offset_dim0 = 2; + param.front_offset_dim1 = 0; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 3; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({5}, dtype::Float32(), {1,2,3,4,5}), {}}, + Testcase{{},TensorValue({10}, dtype::Float32(), {3,2,1,2,3,4,5,4,3,2})} + ); +} + +TEST_F(NAIVE, PADDING_REPLICATE) { + Checker checker(handle(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REPLICATE; + param.front_offset_dim0 = 1; + param.front_offset_dim1 = 0; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 2; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({9}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}}, + Testcase{{},TensorValue({12}, dtype::Float32(), {1,1,2,3,4,5,6,7,8,9,9,9})} + ); +} + +TEST_F(NAIVE, PADDING_REPLICATE2) { + Checker checker(handle(), false); + param::Padding param; + param.padding_val = 10; + param.padding_mode = param::Padding::PaddingMode::REPLICATE; + param.front_offset_dim0 = 2; + param.front_offset_dim1 = 1; + param.front_offset_dim2 = 0; + param.front_offset_dim3 = 0; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 0; + param.back_offset_dim1 = 3; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + checker.set_param(param).exect( + Testcase{TensorValue({3,3}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}}, + Testcase{{},TensorValue({5,7}, dtype::Float32(), {1,1,2,3,3,3,3,1,1,2,3,3,3,3,1,1,2,3,3,3,3,4,4,5,6,6,6,6,7,7,8,9,9,9,9})} + ); +} + +} +} \ No newline at end of file diff --git a/src/opr/impl/misc.oprdecl b/src/opr/impl/misc.oprdecl index ee3681fd..0e7d5201 100644 --- a/src/opr/impl/misc.oprdecl +++ b/src/opr/impl/misc.oprdecl @@ -67,4 +67,9 @@ decl_opr('NvOf', inputs=['src'], params='NvOf', desc='opr Implements NVIDIA Optical Flow SDK.') +decl_opr('Padding', + inputs=['src'], + params='Padding', + desc='tensor padding opr.') + # vim: ft=python diff --git a/src/opr/impl/misc.sereg.h b/src/opr/impl/misc.sereg.h index 9da06d1b..f85c231a 100644 --- a/src/opr/impl/misc.sereg.h +++ b/src/opr/impl/misc.sereg.h @@ -68,7 +68,7 @@ namespace opr { //! current cumsum version using CumsumV1 = opr::Cumsum; MGB_SEREG_OPR(CumsumV1, 1); - + #if MGB_CUDA MGB_SEREG_OPR(NvOf, 1); #endif diff --git a/src/opr/impl/tensor_manip.cpp b/src/opr/impl/tensor_manip.cpp index 03e453bc..846ce5a9 100644 --- a/src/opr/impl/tensor_manip.cpp +++ b/src/opr/impl/tensor_manip.cpp @@ -1608,4 +1608,30 @@ void RelayoutFormat::init_output_format() { // f}}} // + +/* f{{{ ======================= PaddingForward ======================= */ + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingForward); +MEGDNN_OPR_INIT1(PaddingForward, "padding") + +#if MGB_ENABLE_GRAD +MGB_IMPL_OPR_GRAD(PaddingForward) { + mgb_assert(opr.input().size() == 1); + if (wrt_idx == 0) { + SymbolVar grad = PaddingBackward::make(out_grad[0], opr.input(0), opr.param()); + return grad.node(); + } else + return InvalidGrad::make(opr, wrt_idx); +} +#endif + +// f}}} + +/* f{{{ ======================= PaddingBackward ======================= */ + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingBackward); +MEGDNN_OPR_INIT2(PaddingBackward, "padding_backward", 1, false); + +// f}}} + // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/tensor_manip.sereg.h b/src/opr/impl/tensor_manip.sereg.h index db3c8d4f..083effbf 100644 --- a/src/opr/impl/tensor_manip.sereg.h +++ b/src/opr/impl/tensor_manip.sereg.h @@ -24,6 +24,35 @@ MGB_SEREG_MODIFY_SUBTENSOR_OPR(IncrSubtensor); namespace mgb { namespace serialization { + + template<> + struct OprMaker { + using Opr = opr::Padding; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if(inputs.size() == 1) { + return Opr::make(inputs[0], param, config).node()->owner_opr(); + }else{ + return nullptr; + } + } + }; + + template<> + struct OprMaker { + using Opr = opr::PaddingBackward; + using Param = Opr::Param; + static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(graph); + if(inputs.size() == 2) { + return Opr::make(inputs[0], inputs[1], param, config).node()->owner_opr(); + }else{ + return nullptr; + } + } + }; + template<> struct OprMaker: public OprMakerVariadic{}; @@ -185,6 +214,10 @@ namespace opr { using RelayoutFormatV1 = opr::RelayoutFormat; MGB_SEREG_OPR(RelayoutFormatV1, 1); + + MGB_SEREG_OPR(Padding, 1); + + MGB_SEREG_OPR(PaddingBackward, 2); } // namespace opr } // namespace mgb diff --git a/src/opr/include/megbrain/opr/tensor_manip.h b/src/opr/include/megbrain/opr/tensor_manip.h index dd3713e3..fb12276b 100644 --- a/src/opr/include/megbrain/opr/tensor_manip.h +++ b/src/opr/include/megbrain/opr/tensor_manip.h @@ -635,6 +635,26 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat, const OperatorNodeConfig &config = {}); void init_output_format() override final; }; + +/*! + * \brief padding the src tensor to dst tensor + */ +MGB_DEFINE_OPR_CLASS(PaddingForward, intl::MegDNNOprWrapperFwd) // { + public: + PaddingForward(VarNode* src, const Param& param, const OperatorNodeConfig& config); + static SymbolVar make(SymbolVar src, const Param& param = {}, const OperatorNodeConfig &config = {}); +}; +using Padding = PaddingForward; + +/*! + * \brief padding backward + */ +MGB_DEFINE_OPR_CLASS(PaddingBackward, intl::MegDNNOprWrapperBwd) // { + public: + PaddingBackward(VarNode* src, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); + static SymbolVar make(SymbolVar src, SymbolVar in_for_shape, const Param ¶m = {}, const OperatorNodeConfig &config = {}); +}; + } // opr } // mgb diff --git a/src/opr/test/dnn/padding.cpp b/src/opr/test/dnn/padding.cpp new file mode 100644 index 00000000..ddf8e613 --- /dev/null +++ b/src/opr/test/dnn/padding.cpp @@ -0,0 +1,68 @@ +#include "./legacy_checker.h" +#include "megbrain/comp_node_env.h" + +#include "megbrain/gopt/inference.h" +#include "megbrain/opr/basic_arith.h" +#include "megbrain/opr/dnn/convolution.h" +#include "megbrain/opr/tensor_manip.h" +#include "megbrain/serialization/serializer.h" +#include "megbrain/test/autocheck.h" +#include "megbrain/test/helper.h" +#include "megbrain/test/megdnn_helper.h" +#include "megdnn/dtype.h" +#include "megdnn/oprs/base.h" + +#include + +#include +#include +#include + +using namespace std; +using namespace mgb; + +namespace { + +TEST(TestOprDNN, PaddingForwardSerialization) { + using namespace serialization; + + auto fname = output_file("PaddingForwardTest"); + auto dump = [&]() { + opr::Padding::Param param; + param.padding_mode = megdnn::param::Padding::PaddingMode(0); + param.front_offset_dim0 = 3; + param.front_offset_dim1 = 3; + param.front_offset_dim2 = 3; + param.front_offset_dim3 = 3; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 0; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + param.padding_val = 0; + + auto cn = CompNode::load("xpu"); + auto graph = ComputingGraph::make(); + HostTensorND inp_host{cn, {32, 4, 24, 24}, dtype::Float32()}; + auto inp = opr::ImmutableTensor::make(*graph, inp_host); + auto opr = opr::PaddingForward::make(inp, param, {}); + auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str())); + auto rst = dumper->dump({opr}); + ASSERT_EQ(rst.outputs.size(), 1u); + }; + + auto load = [&]() { + auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str())); + auto rst = loader->load(); + ASSERT_EQ(rst.output_var_list.size(), 1u); + }; + + dump(); + load(); +} +} // namespace \ No newline at end of file diff --git a/src/opr/test/tensor_manip.cpp b/src/opr/test/tensor_manip.cpp index b2dd152c..0acf4264 100644 --- a/src/opr/test/tensor_manip.cpp +++ b/src/opr/test/tensor_manip.cpp @@ -10,6 +10,7 @@ */ #include "megbrain/test/helper.h" +#include "megbrain/test/megdnn_helper.h" #include "megbrain/test/autocheck.h" #include "megbrain/opr/tensor_manip.h" #include "megbrain/opr/tensor_gen.h" @@ -2162,4 +2163,47 @@ TEST(TestParamPack, Split) { test_param_pack_split<3>({{2, 9}, {123}, {5, 3}}); } +TEST(TestTensorManip, Padding_random) { + opr::Padding::Param param; + param.padding_mode = megdnn::param::Padding::PaddingMode(0); + param.front_offset_dim0 = 3; + param.front_offset_dim1 = 3; + param.front_offset_dim2 = 3; + param.front_offset_dim3 = 3; + param.front_offset_dim4 = 0; + param.front_offset_dim5 = 0; + param.front_offset_dim6 = 0; + param.back_offset_dim0 = 0; + param.back_offset_dim1 = 0; + param.back_offset_dim2 = 0; + param.back_offset_dim3 = 0; + param.back_offset_dim4 = 0; + param.back_offset_dim5 = 0; + param.back_offset_dim6 = 0; + param.padding_val = 0; + + using Checker = AutoOprChecker<1,1>; + + auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { + return {opr::Padding::make(inputs[0], param)}; + }; + auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) { + auto opr = megdnn_naive_handle()->create_operator(); + TensorShape out_shp{inp[0]->as_megdnn().layout.shape[0]+param.front_offset_dim0+param.back_offset_dim0, + inp[0]->as_megdnn().layout.shape[1]+param.front_offset_dim1+param.back_offset_dim1, + inp[0]->as_megdnn().layout.shape[2]+param.front_offset_dim2+param.back_offset_dim2, + inp[0]->as_megdnn().layout.shape[3]+param.front_offset_dim3+param.back_offset_dim3}; + opr->param() = param; + dest[0].resize(out_shp); + opr->exec(inp[0]->as_megdnn(), dest[0].as_megdnn(), {}); + }; + + Checker::RunOptions opt; + opt.numdiff_max_err = 2e-3; + Checker(make_graph, fwd, CompNode::load("xpu0")) + .run({TensorShape{5, 5, 5, 5}}, opt) + .run({TensorShape{4, 3, 4, 5}}, opt) + .run({TensorShape{5, 4, 4, 5}}, opt); +} + // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/serialization/impl/schema.fbs b/src/serialization/impl/schema.fbs index 22e43d92..1923337c 100644 --- a/src/serialization/impl/schema.fbs +++ b/src/serialization/impl/schema.fbs @@ -113,6 +113,7 @@ union OperatorParam { param.PermutationRNG = 79, param.BetaRNG = 80, param.SlidingWindowTranspose = 81, + param.Padding = 82, } table Operator {