From 43098fb8f1495409cbafe90e2e64c41654c26ab8 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 29 Jun 2021 14:19:59 +0800 Subject: [PATCH] feat(mge): add SlidingWindowTranspose opr BREAKING CHANGE: GitOrigin-RevId: 54d726d2fee21f55811a482594ab81fc9fed1e3e --- dnn/include/megdnn/oprs/nn.h | 49 ++++++- dnn/scripts/opr_param_defs.py | 4 + dnn/src/common/handle_impl.h | 2 + dnn/src/common/opr_trait.h | 2 + dnn/src/common/sliding_window_transpose.cpp | 75 +++++++++++ dnn/src/cuda/handle_create.cpp | 1 + dnn/src/cuda/sliding_window_transpose/opr_impl.cpp | 76 +++++++++++ dnn/src/cuda/sliding_window_transpose/opr_impl.h | 45 +++++++ .../sliding_window_transpose.cu | 133 +++++++++++++++++++ .../sliding_window_transpose.cuh | 33 +++++ dnn/src/naive/handle.cpp | 1 + .../naive/sliding_window_transpose/opr_impl.cpp | 141 +++++++++++++++++++++ dnn/src/naive/sliding_window_transpose/opr_impl.h | 51 ++++++++ dnn/test/common/sliding_window_transpose.h | 86 +++++++++++++ dnn/test/cuda/sliding_window_transpose.cpp | 96 ++++++++++++++ dnn/test/naive/sliding_window_transpose.cpp | 61 +++++++++ imperative/python/megengine/functional/nn.py | 55 ++++++++ imperative/python/megengine/module/__init__.py | 2 +- .../python/megengine/module/sliding_window.py | 86 ++++++++++++- .../python/test/unit/functional/test_functional.py | 36 ++++++ imperative/src/impl/ops/specializations.cpp | 17 ++- src/core/include/megbrain/ir/ops.td | 2 + src/opr/impl/dnn/dnn.sereg.h | 5 + src/opr/impl/dnn/sliding_window_transpose.cpp | 36 ++++++ .../megbrain/opr/dnn/sliding_window_transpose.h | 49 +++++++ src/opr/test/dnn/sliding_window_transpose.cpp | 64 ++++++++++ src/serialization/impl/schema.fbs | 1 + 27 files changed, 1205 insertions(+), 4 deletions(-) create mode 100644 dnn/src/common/sliding_window_transpose.cpp create mode 100644 dnn/src/cuda/sliding_window_transpose/opr_impl.cpp create mode 100644 dnn/src/cuda/sliding_window_transpose/opr_impl.h create mode 100644 dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cu create mode 100644 dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cuh create mode 100644 dnn/src/naive/sliding_window_transpose/opr_impl.cpp create mode 100644 dnn/src/naive/sliding_window_transpose/opr_impl.h create mode 100644 dnn/test/common/sliding_window_transpose.h create mode 100644 dnn/test/cuda/sliding_window_transpose.cpp create mode 100644 dnn/test/naive/sliding_window_transpose.cpp create mode 100644 src/opr/impl/dnn/sliding_window_transpose.cpp create mode 100644 src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h create mode 100644 src/opr/test/dnn/sliding_window_transpose.cpp diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index c4126ff1..56b10206 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -666,7 +666,7 @@ public: * http://deeplearning.net/software/theano/library/tensor/nnet/neighbours.html * * \f$ dst_{n, c, oh, ow, wh, ww} = src_{n, c, ih+wh, iw+fw}\f$, - * where \f$ ih=-pad_h+oh*stride_h, iw=-pad_w+ow*stride_w\f$. + * where \f$ ih=-pad_h+oh*stride_h+(wh-1)*(dilation_h-1), iw=-pad_w+ow*stride_w+(ww-1)*(dilation_w-1)\f$. */ virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) = 0; @@ -698,6 +698,53 @@ protected: size_t workspace_in_bytes); }; +class SlidingWindowTransposeBase : public OperatorBase { + DEF_OPR_IMPL_CTOR(SlidingWindowTransposeBase, OperatorBase); + DEF_OPR_PARAM(SlidingWindowTranspose); + +protected: + void deduce_layout_fwd(const TensorLayout& src, TensorLayout& dst); + void check_layout_fwd(const TensorLayout& filter, const TensorLayout& dst); +}; + +class SlidingWindowTransposeForward : public SlidingWindowTransposeBase { + DEF_OPR_IMPL(SlidingWindowTransposeForward, SlidingWindowTransposeBase, 1, 1); + +public: + /** + * \param[in] src (N, C, IH, IW, window_h, window_w) + * \param[out] dst (N, C, OH, OW) + */ + virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, + _megdnn_workspace workspace) = 0; + virtual size_t get_workspace_in_bytes(const TensorLayout& src, + const TensorLayout& dst) = 0; + void deduce_layout(const TensorLayout& src, TensorLayout& dst); + +protected: + void check_exec(const TensorLayout& src, const TensorLayout& dst, + size_t workspace_in_bytes); +}; +using SlidingWindowTranspose = SlidingWindowTransposeForward; + +class SlidingWindowTransposeBackward : public SlidingWindowTransposeBase { + DEF_OPR_IMPL(SlidingWindowTransposeBackward, SlidingWindowTransposeBase, 1, 1); + +public: + /** + * \param[in] diff the backpropagated gradient wrt. dst + * \param[out] grad the backpropagated gradient wrt. src + */ + virtual void exec(_megdnn_tensor_in diff, _megdnn_tensor_out grad, + _megdnn_workspace workspace) = 0; + virtual size_t get_workspace_in_bytes(const TensorLayout& diff, + const TensorLayout& grad) = 0; + +protected: + void check_exec(const TensorLayout& diff, const TensorLayout& grad, + size_t workspace_in_bytes); +}; + /** * \brief base class for Pooling */ diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index 3a81e0ca..cb9ec7d5 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -224,6 +224,10 @@ pdef('Axis').add_fields('int32', 'axis', 0) add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1, 'dilate_h', 1, 'dilate_w', 1, 'window_h', 3, 'window_w', 3)) +(pdef('SlidingWindowTranspose'). + add_fields('uint32', 'out_h', 0, 'out_w', 0, 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1, + 'dilate_h', 1, 'dilate_w', 1, 'window_h', 3, 'window_w', 3)) + (pdef('Pooling', version=0, is_legacy=True). add_enum( 'Mode', diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index e2cc6121..34cf79c0 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -104,6 +104,8 @@ private: cb(ConvBiasForward) \ cb(Images2NeibsForward) \ cb(Images2NeibsBackward) \ + cb(SlidingWindowTransposeForward) \ + cb(SlidingWindowTransposeBackward) \ cb(ElemwiseForward) \ cb(ElemwiseMultiType) \ cb(AddUpdateForward) \ diff --git a/dnn/src/common/opr_trait.h b/dnn/src/common/opr_trait.h index cb181582..95201062 100644 --- a/dnn/src/common/opr_trait.h +++ b/dnn/src/common/opr_trait.h @@ -39,6 +39,8 @@ DEF(SeparableConvForward, 4, true, true); DEF(SeparableFilterForward, 4, true, true); DEF(Images2NeibsForward, 2, true, true); DEF(Images2NeibsBackward, 2, true, false); +DEF(SlidingWindowTransposeForward, 2, true, true); +DEF(SlidingWindowTransposeBackward, 2, true, false); DEF(PoolingForward, 2, true, true); DEF(PoolingBackward, 4, true, false); DEF(AdaptivePoolingForward, 2, true, false); diff --git a/dnn/src/common/sliding_window_transpose.cpp b/dnn/src/common/sliding_window_transpose.cpp new file mode 100644 index 00000000..6801df09 --- /dev/null +++ b/dnn/src/common/sliding_window_transpose.cpp @@ -0,0 +1,75 @@ +/** + * \file dnn/src/common/sliding_window_transpose.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "megdnn/oprs.h" + +#include "src/common/utils.h" + +namespace megdnn { + +void SlidingWindowTransposeBase::deduce_layout_fwd(const TensorLayout &src, + TensorLayout &dst) +{ + auto errmsg = [&]() { + return megdnn_layout_msg(src) + ", " + + "out_h=" + std::to_string(param().out_h) + ", " + + "out_w=" + std::to_string(param().out_w) + ", " + + "pad_h=" + std::to_string(param().pad_h) + ", " + + "pad_w=" + std::to_string(param().pad_w) + ", " + + "stride_h=" + std::to_string(param().stride_h) + ", " + + "stride_w=" + std::to_string(param().stride_w) + ", " + + "window_h=" + std::to_string(param().window_h) + ", " + + "window_w=" + std::to_string(param().window_w); + }; + MEGDNN_MARK_USED_VAR(errmsg); + megdnn_assert_contiguous(src); + megdnn_assert(src.ndim == 6_z, "%s", errmsg().c_str()); + size_t n = src[0], ic = src[1]; + size_t oh = this->param().out_h; + size_t ow = this->param().out_w; + + dst = TensorLayout(TensorShape({n, ic, oh, ow}), src.dtype); +} + +void SlidingWindowTransposeBase::check_layout_fwd(const TensorLayout &src, + const TensorLayout &dst) +{ + TensorLayout dst_expected; + deduce_layout_fwd(src, dst_expected); + megdnn_assert_eq_layout(dst_expected, dst); +} + +void SlidingWindowTransposeForward::deduce_layout(const TensorLayout &src, + TensorLayout &dst) +{ + deduce_layout_fwd(src, dst); +} + +void SlidingWindowTransposeForward::check_exec(const TensorLayout &src, + const TensorLayout &dst, + size_t workspace_in_bytes) +{ + check_layout_fwd(src, dst); + auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst); + megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); +} + +void SlidingWindowTransposeBackward::check_exec(const TensorLayout &diff, + const TensorLayout &grad, + size_t workspace_in_bytes) +{ + check_layout_fwd(grad, diff); + auto required_workspace_in_bytes = get_workspace_in_bytes(grad, diff); + megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); +} + +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/handle_create.cpp b/dnn/src/cuda/handle_create.cpp index a0316ff2..83409e06 100644 --- a/dnn/src/cuda/handle_create.cpp +++ b/dnn/src/cuda/handle_create.cpp @@ -71,6 +71,7 @@ #include "src/cuda/separable_conv/opr_impl.h" #include "src/cuda/separable_filter/opr_impl.h" #include "src/cuda/sleep/opr_impl.h" +#include "src/cuda/sliding_window_transpose/opr_impl.h" #include "src/cuda/split/opr_impl.h" #include "src/cuda/svd/opr_impl.h" #include "src/cuda/tensor_remap/opr_impl.h" diff --git a/dnn/src/cuda/sliding_window_transpose/opr_impl.cpp b/dnn/src/cuda/sliding_window_transpose/opr_impl.cpp new file mode 100644 index 00000000..e8c5af9c --- /dev/null +++ b/dnn/src/cuda/sliding_window_transpose/opr_impl.cpp @@ -0,0 +1,76 @@ +/** + * \file dnn/src/cuda/sliding_window_transpose/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "src/cuda/sliding_window_transpose/opr_impl.h" + +#include "src/cuda/utils.h" +#include "src/cuda/sliding_window_transpose/sliding_window_transpose.cuh" + +namespace megdnn { +namespace cuda { + +void SlidingWindowTransposeForwardImpl::exec(_megdnn_tensor_in src, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) +{ + check_exec(src.layout, dst.layout, workspace.size); + auto stream = cuda_stream(handle()); + int N = src.layout[0], C = src.layout[1], + OH = src.layout[2], OW = src.layout[3]; + int IH = dst.layout[2], IW = dst.layout[3]; + int ph = param().pad_h, pw = param().pad_w; + int sh = param().stride_h, sw = param().stride_w; + int dh = param().dilate_h, dw = param().dilate_w; + int wh = param().window_h, ww = param().window_w; +#define cb(DType) \ + if (src.layout.dtype.enumv() == DTypeTrait::enumv) { \ + using T = DTypeTrait::ctype; \ + sliding_window_transpose::forward(src.ptr(), dst.ptr(), \ + N, C, IH, IW, OH, OW, \ + ph, pw, sh, sw, dh, dw, wh, ww, \ + stream); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb); +#undef cb + megdnn_assert_internal(0); +} + +void SlidingWindowTransposeBackwardImpl::exec(_megdnn_tensor_in diff, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) +{ + check_exec(diff.layout, grad.layout, workspace.size); + auto stream = cuda_stream(handle()); + int N = grad.layout[0], C = grad.layout[1], + OH = grad.layout[2], OW = grad.layout[3]; + int IH = diff.layout[2], IW = diff.layout[3]; + int ph = param().pad_h, pw = param().pad_w; + int sh = param().stride_h, sw = param().stride_w; + int dh = param().dilate_h, dw = param().dilate_w; + int wh = param().window_h, ww = param().window_w; +#define cb(DType) \ + if (diff.layout.dtype == DType()) { \ + using T = DTypeTrait::ctype; \ + sliding_window_transpose::backward(diff.ptr(), grad.ptr(), \ + N, C, IH, IW, OH, OW, \ + ph, pw, sh, sw, dh, dw, wh, ww, \ + stream); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb); +#undef cb + megdnn_assert_internal(0); +} + +} // namespace cuda +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/sliding_window_transpose/opr_impl.h b/dnn/src/cuda/sliding_window_transpose/opr_impl.h new file mode 100644 index 00000000..37ffb0aa --- /dev/null +++ b/dnn/src/cuda/sliding_window_transpose/opr_impl.h @@ -0,0 +1,45 @@ +/** + * \file dnn/src/cuda/sliding_window_transpose/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#pragma once +#include "megdnn/oprs.h" + +#include + +namespace megdnn { +namespace cuda { + +class SlidingWindowTransposeForwardImpl: public SlidingWindowTransposeForward { + public: + using SlidingWindowTransposeForward::SlidingWindowTransposeForward; + void exec(_megdnn_tensor_in src, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout &, + const TensorLayout &) override { + return 0; + } +}; + +class SlidingWindowTransposeBackwardImpl: public SlidingWindowTransposeBackward { + public: + using SlidingWindowTransposeBackward::SlidingWindowTransposeBackward; + void exec(_megdnn_tensor_in diff, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout &, + const TensorLayout &) override { + return 0; + } +}; + +} // namespace cuda +} // namespace megdnn +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cu b/dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cu new file mode 100644 index 00000000..ed4f4b95 --- /dev/null +++ b/dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cu @@ -0,0 +1,133 @@ +/** + * \file dnn/src/cuda/sliding_window_transpose/kernel.cu + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "src/cuda/sliding_window_transpose/sliding_window_transpose.cuh" + +#include "megdnn/dtype.h" +#include "src/cuda/utils.cuh" +#include + +namespace megdnn { +namespace cuda { +namespace sliding_window_transpose { + + +template +__global__ void forward_kernel(const T *src, T *dst, + int N, int C, int IH, int IW, int OH, int OW, + int ph, int pw, int sh, int sw, int dh, int dw, int WH, int WW) +{ + int id = threadIdx.x + blockIdx.x * blockDim.x; + if (id < N*C*IH*IW) { + int nc = id / (IH*IW); + int ih = id % (IH*IW) / IW; + int iw = id % (IH*IW) % IW; + dst[nc*IH*IW + ih*IW + iw] = 0.0f; + int oh_max = min((ih+ph) / sh, OH-1); + int oh_min = max((ih+ph-(WH-1)*dh+sh-1) / sh, 0); + int ow_max = min((iw+pw) / sw, OW-1); + int ow_min = max((iw+pw-(WW-1)*dw+sw-1) / sw, 0); + for (int oh = oh_min; oh <= oh_max; ++oh) + for (int ow = ow_min; ow <= ow_max; ++ow) + { + if ((ih+ph - sh*oh)%dh==0 && (iw+pw - sw*ow)%dw==0){ + int wh = ih+ph - sh*oh - (ih+ph - sh*oh)/dh * (dh-1); + int ww = iw+pw - sw*ow - (iw+pw - sw*ow)/dw * (dw-1); + dst[nc*IH*IW + ih*IW + iw] += + src[nc*OH*OW*WH*WW + oh*OW*WH*WW + ow*WH*WW + + wh*WW + ww]; + } + } + } +} + +template +void forward(const T* src, T* dst, int N, int C, int IH, int IW, int OH, int OW, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, + cudaStream_t stream) { + int threads = NR_THREADS; + int blocks = DIVUP(N*C*IH*IW, threads); + forward_kernel<<>>(src, dst, + N, C, IH, IW, OH, OW, + ph, pw, sh, sw, dh, dw, wh, ww); + after_kernel_launch(); +} + + +#define grid_y_max 512 + +template +__global__ void backward_kernel(const T *diff, T *grad, + int N, int C, int IH, int IW, int OH, int OW, + int ph, int pw, int sh, int sw, int dh, int dw, int WH, int WW) +{ + int NC = N * C; + int WP = WH*WW; + for (int wp = threadIdx.x; wp < WP; wp += blockDim.x) { + int nc = blockIdx.y; + while (nc < NC) { + int wh = wp / WW; + int ww = wp % WW; + int op = threadIdx.y + blockIdx.x * blockDim.y; + if (op < OH * OW) { + int oh = op / OW; + int ow = op % OW; + int ih = -ph + sh * oh + wh* dh; + int iw = -pw + sw * ow + ww* dw; + int dst_pos = nc * OH * OW * WH * WW + op * WH * WW + wp; + int src_pos = nc * IH * IW + ih * IW + iw; + grad[dst_pos] = (ih >= 0 && ih < IH && iw >= 0 && iw < IW) + ? diff[src_pos] + : 0.0f; + } + nc += grid_y_max; + } + } +} + +template +void backward(const T *diff, T *grad, + int N, int C, int IH, int IW, int OH, int OW, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, + cudaStream_t stream) +{ + int spatial_size = OH * OW; + int kernel_size = wh * ww; + int tx = min(NR_THREADS, kernel_size); + int ty = NR_THREADS / tx; + megdnn_assert(ty > 0); + int bx = DIVUP(spatial_size, ty); + int by = N * C; + + backward_kernel<<>>(diff, grad, N, C, IH, IW, OH, OW, ph, pw, sh, sw, dh, dw, + wh, ww); + after_kernel_launch(); +} + +#undef grid_y_max + +#define INST(T) \ + template void forward(const T *, T *, int, int, int, int, int, int, \ + int, int, int, int, int, int, int, int, \ + cudaStream_t); \ + template void backward(const T *, T *, int, int, int, int, int, int, \ + int, int, int, int, int, int, int, int, \ + cudaStream_t); +#define cb(DType) \ + INST(DTypeTrait::ctype) + +MEGDNN_FOREACH_COMPUTING_DTYPE(cb) + +} // namespace sliding_window_transpose +} // namespace cuda +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cuh b/dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cuh new file mode 100644 index 00000000..761c398b --- /dev/null +++ b/dnn/src/cuda/sliding_window_transpose/sliding_window_transpose.cuh @@ -0,0 +1,33 @@ +/** + * \file dnn/src/cuda/sliding_window_transpose/kernel.cuh + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#pragma once +#include + +namespace megdnn { +namespace cuda { +namespace sliding_window_transpose { + +template +void forward(const T *src, T *dst, + int N, int C, int IH, int IW, int OH, int OW, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, + cudaStream_t stream); + +template +void backward(const T *diff, T *grad, + int N, int C, int IH, int IW, int OH, int OW, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, + cudaStream_t stream); + +} // namespace sliding_window_transpose +} // namespace cuda +} // namespace megdnn +// vim: syntax=cpp.doxygen diff --git a/dnn/src/naive/handle.cpp b/dnn/src/naive/handle.cpp index 29560a64..2d6bd32d 100644 --- a/dnn/src/naive/handle.cpp +++ b/dnn/src/naive/handle.cpp @@ -73,6 +73,7 @@ #include "src/naive/separable_conv/opr_impl.h" #include "src/naive/separable_filter/opr_impl.h" #include "src/naive/sleep/opr_impl.h" +#include "src/naive/sliding_window_transpose/opr_impl.h" #include "src/naive/split/opr_impl.h" #include "src/naive/svd/opr_impl.h" #include "src/naive/tensor_remap/opr_impl.h" diff --git a/dnn/src/naive/sliding_window_transpose/opr_impl.cpp b/dnn/src/naive/sliding_window_transpose/opr_impl.cpp new file mode 100644 index 00000000..d0c768e7 --- /dev/null +++ b/dnn/src/naive/sliding_window_transpose/opr_impl.cpp @@ -0,0 +1,141 @@ +/** + * \file dnn/src/naive/sliding_window_transpose/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "src/naive/sliding_window_transpose/opr_impl.h" + +#include "src/common/utils.h" +#include "src/naive/handle.h" + +#include + +namespace megdnn { +namespace naive { + +template +void SlidingWindowTransposeForwardImpl::exec_internal(_megdnn_tensor_in src, + _megdnn_tensor_out dst) +{ + int N = dst.layout.shape[0], C = dst.layout.shape[1], + IH = dst.layout.shape[2], IW = dst.layout.shape[3]; + auto sptr = src.ptr(); + auto dptr = dst.ptr(); + size_t idx = 0; + int window_h = static_cast(param().window_h); + int window_w = static_cast(param().window_w); + int pad_h = static_cast(param().pad_h); + int pad_w = static_cast(param().pad_w); + int stride_h = static_cast(param().stride_h); + int stride_w = static_cast(param().stride_w); + int dilate_h = static_cast(param().dilate_h); + int dilate_w = static_cast(param().dilate_w); + int equ_window_h = dilate_h * (window_h-1) + 1; + int equ_window_w = dilate_w * (window_w-1) + 1; + memset(dptr, 0, sizeof(T) * N*C*IH*IW); + for (int n = 0; n < N; ++n) + for (int c = 0; c < C; ++c) + { + int ih = -pad_h; + for (; ih+equ_window_h <= IH+pad_h; ih += stride_h) { + int iw = -pad_w; + for (; iw+equ_window_w <= IW+pad_w; iw += stride_w) { + for (int kh = 0; kh < window_h; ++kh) + for (int kw = 0; kw < window_w; ++kw) + { + int ih2 = ih+dilate_h*kh, iw2 = iw+dilate_w*kw; + if (ih2 >= 0 && ih2 < IH && iw2 >= 0 && iw2 < IW) { + dptr[n*C*IH*IW + c*IH*IW + ih2*IW + iw2] += + sptr[idx*window_h*window_w + kh*window_w + kw]; + } + } + ++idx; + } + } + } +} + +void SlidingWindowTransposeForwardImpl::exec(_megdnn_tensor_in src, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) +{ + check_exec(src.layout, dst.layout, workspace.size); +#define cb(DType) \ + if (src.layout.dtype.enumv() == DTypeTrait::enumv) { \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + exec_internal::ctype>(src, dst); \ + ); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb); +#undef cb + megdnn_assert_internal(0); +} + +template +void SlidingWindowTransposeBackwardImpl::exec_internal(_megdnn_tensor_in diff, + _megdnn_tensor_out grad) +{ + int N = diff.layout.shape[0], C = diff.layout.shape[1], + IH = diff.layout.shape[2], IW = diff.layout.shape[3]; + auto sptr = grad.ptr(); + auto dptr = diff.ptr(); + size_t idx = 0; + int window_h = static_cast(param().window_h); + int window_w = static_cast(param().window_w); + int pad_h = static_cast(param().pad_h); + int pad_w = static_cast(param().pad_w); + int stride_h = static_cast(param().stride_h); + int stride_w = static_cast(param().stride_w); + int dilate_h = static_cast(param().dilate_h); + int dilate_w = static_cast(param().dilate_w); + int equ_window_h = dilate_h * (window_h-1) + 1; + int equ_window_w = dilate_w * (window_w-1) + 1; + for (int n = 0; n < N; ++n) + for (int c = 0; c < C; ++c) + { + int ih = -pad_h; + for (; ih+equ_window_h <= IH+pad_h; ih += stride_h) { + int iw = -pad_w; + for (; iw+equ_window_w <= IW+pad_w; iw += stride_w) { + for (int kh = 0; kh < window_h; ++kh) + for (int kw = 0; kw < window_w; ++kw) + { + int ih2 = ih+dilate_h*kh, iw2 = iw+dilate_w*kw; + sptr[idx*window_h*window_w + kh*window_w + kw] = + ih2 >= 0 && ih2 < IH && + iw2 >= 0 && iw2 < IW ? + dptr[n*C*IH*IW + c*IH*IW + ih2*IW + iw2] : 0.0f; + } + ++idx; + } + } + } +} + +void SlidingWindowTransposeBackwardImpl::exec(_megdnn_tensor_in diff, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) +{ + check_exec(diff.layout, grad.layout, workspace.size); +#define cb(DType) \ + if (diff.layout.dtype == DType()) { \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + exec_internal::ctype>(diff, grad); \ + ); \ + return; \ + } + MEGDNN_FOREACH_COMPUTING_DTYPE(cb); +#undef cb + megdnn_assert_internal(0); +} + +} // namespace naive +} // namespace megdnn +// vim: syntax=cpp.doxygen + diff --git a/dnn/src/naive/sliding_window_transpose/opr_impl.h b/dnn/src/naive/sliding_window_transpose/opr_impl.h new file mode 100644 index 00000000..558e1692 --- /dev/null +++ b/dnn/src/naive/sliding_window_transpose/opr_impl.h @@ -0,0 +1,51 @@ +/** + * \file dnn/src/naive/sliding_window_transpose/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#pragma once +#include "megdnn/oprs.h" + +namespace megdnn { +namespace naive { + +class SlidingWindowTransposeForwardImpl: public SlidingWindowTransposeForward { + public: + using SlidingWindowTransposeForward::SlidingWindowTransposeForward; + void exec(_megdnn_tensor_in src, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout &, + const TensorLayout &) override { + return 0; + } + private: + template + void exec_internal(_megdnn_tensor_in src, + _megdnn_tensor_out dst); +}; + +class SlidingWindowTransposeBackwardImpl: public SlidingWindowTransposeBackward { + public: + using SlidingWindowTransposeBackward::SlidingWindowTransposeBackward; + void exec(_megdnn_tensor_in diff, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout &, + const TensorLayout &) override { + return 0; + } + private: + template + void exec_internal(_megdnn_tensor_in diff, + _megdnn_tensor_out grad); +}; + +} // namespace naive +} // namespace megdnn +// vim: syntax=cpp.doxygen diff --git a/dnn/test/common/sliding_window_transpose.h b/dnn/test/common/sliding_window_transpose.h new file mode 100644 index 00000000..40af800b --- /dev/null +++ b/dnn/test/common/sliding_window_transpose.h @@ -0,0 +1,86 @@ + /** + * \file dnn/test/common/sliding_window_transpose.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#pragma once +#include "megdnn/opr_param_defs.h" +#include "megdnn/basic_types.h" +#include + +namespace megdnn { +namespace test { +namespace sliding_window_transpose { + +struct TestArg { + param::SlidingWindowTranspose param; + TensorShape ishape; + TestArg(param::SlidingWindowTranspose param, TensorShape ishape) + : param(param), ishape(ishape) {} +}; + +inline std::vector get_args() { + std::vector args; + // clang-format off + for (uint32_t ih : {25, 96}) + for (uint32_t iw : {26, 128}) + for (uint32_t ph : {0, 1}) + for (uint32_t pw : {0, 1}) + for (uint32_t sh : {1, 2}) + for (uint32_t sw : {1, 2}) + for (uint32_t dh : {1, 2}) + for (uint32_t dw : {1, 2}) + for (uint32_t wh : {3, 4}) + for (uint32_t ww : {3, 4}) { + unsigned long int oh = (ih + 2 * ph - dh * (wh-1)-1) / sh + 1; + unsigned long int ow = (iw + 2 * pw - dw * (ww-1)-1) / sw + 1; + args.emplace_back(param::SlidingWindowTranspose{ih, iw, ph, pw, sh, sw, dh, dw, wh, ww}, + TensorShape{2, 3, oh, ow, wh, ww}); + } + // clang-format on + // large window case + args.emplace_back(param::SlidingWindowTranspose{96, 128, 0, 0, 1, 1, 1, 1, 32, 64}, + TensorShape{2, 3, 65, 65, 32, 64}); + // // large size + args.emplace_back(param::SlidingWindowTranspose{28, 24, 0, 0, 1, 1, 1, 1, 1, 1}, + TensorShape{128, 128, 28, 24, 1, 1}); + + return args; +} + +inline std::vector get_benchmark_args() { + std::vector args; + // clang-format off + for (uint32_t ph : {0, 1}) + for (uint32_t pw : {0, 1}) + for (uint32_t sh : {1, 2}) + for (uint32_t sw : {1, 2}) + for (uint32_t dh : {1, 2}) + for (uint32_t dw : {1, 2}) + for (uint32_t wh : {3, 4}) + for (uint32_t ww : {3, 4}) + for (uint32_t b : {1, 64}) + for (uint32_t c : {64, 128}) + for (uint32_t hw : {64, 128}) { + unsigned long int o_hw = (hw + 2 * ph - dh * (wh-1)-1) / sh + 1; + args.emplace_back(param::SlidingWindowTranspose{hw, hw, ph, pw, sh, sw, dh, dw, wh, ww}, + TensorShape{b, c, o_hw, o_hw, wh, ww}); + } + // clang-format on + // large size + args.emplace_back(param::SlidingWindowTranspose{28, 24, 0, 0, 1, 1, 1, 1, 1, 1}, + TensorShape{1024, 128, 28, 24, 1, 1}); + + return args; +} + +} // namespace sliding_window_transpose +} // namespace test +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/test/cuda/sliding_window_transpose.cpp b/dnn/test/cuda/sliding_window_transpose.cpp new file mode 100644 index 00000000..9fce1642 --- /dev/null +++ b/dnn/test/cuda/sliding_window_transpose.cpp @@ -0,0 +1,96 @@ +/** + * \file dnn/test/cuda/sliding_window_transpose.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "test/cuda/fixture.h" + +#include "test/common/checker.h" +#include "test/common/sliding_window_transpose.h" +#include "test/common/rng.h" +#include "test/cuda/benchmark.h" + +namespace megdnn { +namespace test { + +TEST_F(CUDA, SLIDINGWINDOWTRANSPOSE_FORWARD) +{ + UniformFloatRNG rng(0, 1); + auto args = sliding_window_transpose::get_args(); + for (auto &&arg: args) { + Checker checker(handle_cuda()); + checker.set_rng(0, &rng); + checker.set_epsilon(1e-2); + TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32()); + TensorLayout olayout; + { + auto opr = handle_cuda()->create_operator(); + opr->param() = arg.param; + opr->deduce_layout(ilayout, olayout); + } + auto set_dtype = [&checker](DType dtype) + { + checker.set_dtype(0, dtype). + set_dtype(1, dtype); + }; + set_dtype(dtype::Float32()); + checker.set_param(arg.param).exec(TensorShapeArray{ + ilayout, olayout}); + set_dtype(dtype::Float16()); + checker.set_param(arg.param).exec(TensorShapeArray{ + ilayout, olayout}); + } +} + +#if MEGDNN_WITH_BENCHMARK +TEST_F(CUDA, BENCHMARK_SLIDINGWINDOWTRANSPOSE_FORWARD) +{ + auto args = sliding_window_transpose::get_benchmark_args(); + for (auto &&arg: args) { + CUBenchmarker bencher(handle_cuda()); + bencher.set_param(arg.param).set_dtype(0, dtype::Float32()). + exec(TensorShapeArray{ + arg.ishape, {}}); + } +} +#endif + +TEST_F(CUDA, SLIDINGWINDOWTRANSPOSE_BACKWARD) +{ + UniformFloatRNG rng(0, 1); + auto args = sliding_window_transpose::get_args(); + for (auto &&arg: args) { + Checker checker(handle_cuda()); + // checker.set_epsilon(1e-2); + checker.set_rng(0, &rng); + TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32()); + TensorLayout olayout; + { + auto opr = handle_cuda()->create_operator(); + opr->param() = arg.param; + opr->deduce_layout(ilayout, olayout); + } + auto set_dtype = [&checker](DType dtype) + { + checker.set_dtype(0, dtype). + set_dtype(1, dtype); + }; + set_dtype(dtype::Float32()); + checker.set_param(arg.param).exec(TensorShapeArray{ + olayout, ilayout}); + set_dtype(dtype::Float16()); + checker.set_param(arg.param).exec(TensorShapeArray{ + olayout, ilayout}); + } +} + +} // namespace test +} // namespace megdnn + +// vim: syntax=cpp.doxygen + diff --git a/dnn/test/naive/sliding_window_transpose.cpp b/dnn/test/naive/sliding_window_transpose.cpp new file mode 100644 index 00000000..63d5b7b3 --- /dev/null +++ b/dnn/test/naive/sliding_window_transpose.cpp @@ -0,0 +1,61 @@ +/** + * \file dnn/test/naive/sliding_window_transpose.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "test/naive/fixture.h" + +#include "megdnn/oprs/nn.h" +#include "test/common/checker.h" + +using namespace megdnn; +using namespace test; + +TEST_F(NAIVE, SlidingWindowTranspose_FORWARD) { + Checker checker(handle(), /* check_dispatch */false); + + SlidingWindowTranspose::Param param(3,3,0,0,1,1,1,1,2,2); + checker.set_param(param).exect( + Testcase{TensorValue({1, 1, 2, 2, 2, 2}, dtype::Uint8(), + {0,1,3,4, + 1,2,4,5, + 3,4,6,7, + 4,5,7,8}), {}}, + Testcase{{}, + TensorValue({1, 1, 3, 3}, dtype::Uint8(), + {0,2,2, + 6,16,10, + 6,14,8})}); + + param.out_h = 6; + param.out_w = 7; + param.pad_h = 1; + param.pad_w = 1; + param.stride_h = 2; + param.stride_w = 2; + param.dilate_h = 2; + param.dilate_w = 2; + param.window_h = 3; + param.window_w = 3; + checker.set_param(param).exect( + Testcase{TensorValue({1, 1, 2, 3, 3, 3}, dtype::Uint8(), + {0,0,0,0,8,10,0,22,24, + 0,0,0,8,10,12,22,24,26, + 0,0,0,10,12,0,24,26,0, + 0,8,10,0,22,24,0,36,38, + 8,10,12,22,24,26,36,38,40, + 10,12,0,24,26,0,38,40,0}), {}}, + Testcase{{}, + TensorValue({1, 1, 6, 7}, dtype::Uint8(), + {0,0,0,0,0,0,0, + 0,32,0,60,0,48,0, + 0,0,0,0,0,0,0, + 0,88,0,144,0,104,0, + 0,0,0,0,0,0,0, + 0,72,0,114,0,80,0})}); +} diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py index ce58ac65..663d7fa5 100644 --- a/imperative/python/megengine/functional/nn.py +++ b/imperative/python/megengine/functional/nn.py @@ -71,6 +71,7 @@ __all__ = [ "resize", "sigmoid", "sliding_window", + "sliding_window_transpose", "softmax", "softplus", "sync_batch_norm", @@ -1396,6 +1397,60 @@ def sliding_window( return output +def sliding_window_transpose( + inp: Tensor, + output_size: Union[int, Tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], + padding: Union[int, Tuple[int, int]] = 0, + stride: Union[int, Tuple[int, int]] = 1, + dilation: Union[int, Tuple[int, int]] = 1, +) -> Tensor: + """ + Sum over the sliding windows on the corresponding input location. + + Refer to :class:`~.SlidingWindowTranspose` for more information. + + :param inp: input tensor. + :param output_size: shape of output tensor. + :param kernel_size: size of the window. + :param padding: implicit zero padding added on both sides of input. Default: 0 + :param stride: stride of the window. Default: 1 + :param dilation: dilation of the window. Default: 1 + :return: output tensor. + """ + output_h, output_w = _pair_nonzero(output_size) + padding_h, padding_w = _pair(padding) + stride_h, stride_w = _pair_nonzero(stride) + dilation_h, dilation_w = _pair_nonzero(dilation) + window_h, window_w = _pair_nonzero(kernel_size) + + expected_h = ( + output_h + 2 * padding_h - dilation_h * (window_h - 1) - 1 + ) // stride_h + 1 + expected_w = ( + output_w + 2 * padding_w - dilation_w * (window_w - 1) - 1 + ) // stride_w + 1 + assert inp.ndim == 6, "the input dimension of sliding_window_transpose should be 6" + assert ( + inp.shape[2] == expected_h and inp.shape[3] == expected_w + ), "the input shape and output size do not match" + + op = builtin.SlidingWindowTranspose( + out_h=output_h, + out_w=output_w, + pad_h=padding_h, + pad_w=padding_w, + stride_h=stride_h, + stride_w=stride_w, + dilate_h=dilation_h, + dilate_w=dilation_w, + window_h=window_h, + window_w=window_w, + ) + (output,) = apply(op, inp) + return output + + interpolate = deprecated_func("1.3", "megengine.functional.vision", "interpolate", True) roi_pooling = deprecated_func("1.3", "megengine.functional.vision", "roi_pooling", True) roi_align = deprecated_func("1.3", "megengine.functional.vision", "roi_align", True) diff --git a/imperative/python/megengine/module/__init__.py b/imperative/python/megengine/module/__init__.py index b178fe2a..bacb61bb 100644 --- a/imperative/python/megengine/module/__init__.py +++ b/imperative/python/megengine/module/__init__.py @@ -34,4 +34,4 @@ from .normalization import GroupNorm, InstanceNorm, LayerNorm from .pooling import AvgPool2d, MaxPool2d from .quant_dequant import DequantStub, QuantStub from .sequential import Sequential -from .sliding_window import SlidingWindow +from .sliding_window import SlidingWindow, SlidingWindowTranspose diff --git a/imperative/python/megengine/module/sliding_window.py b/imperative/python/megengine/module/sliding_window.py index c7da29b3..eefe6612 100644 --- a/imperative/python/megengine/module/sliding_window.py +++ b/imperative/python/megengine/module/sliding_window.py @@ -8,7 +8,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from typing import Tuple, Union -from ..functional import sliding_window +from ..functional import sliding_window, sliding_window_transpose from .module import Module @@ -86,3 +86,87 @@ class SlidingWindow(Module): return sliding_window( inp, self.kernel_size, self.padding, self.stride, self.dilation ) + + +class SlidingWindowTranspose(Module): + r""" + Opposite opration of SlidingWindow, sum over the sliding windows on the + corresponding input location. Given an input of the size + :math:`(N, C, IH, IW, window_h, window_w)` and :attr:`output_size`, the + output shape would be :math:`(N, C, output\_size_{h}, output\_size_{w})` and the + arguments must satisfy + + .. math:: + \text{IH} = \lfloor \frac{\text{output_size}_{h} + 2 * \text{padding}_{h} - + \text{dilation}_{h} * (\text{kernel_size}_{h} - 1) - 1}{\text{stride}_{h}} + 1 \rfloor + + .. math:: + \text{IW} = \lfloor \frac{\text{output_size}_{w} + 2 * \text{padding}_{w} - + \text{dilation}_{w} * (\text{kernel_size}_{w} - 1) - 1}{\text{stride}_{w}} + 1 \rfloor + + For each output location, we have: + + .. math:: + \text{out}_{n, c, oh, ow} = \sum_{n,c,oh,ow=location(n, c, ih, iw, wh, ww)}\text{src}_{n, c, ih, iw, wh, ww} + + .. math:: + \text{location}(n, c, ih, iw, wh, ww) &= (n, c, oh+wh, ow+ww) \\ + \text{where } & oh=-pad_h+ih \times stride_h + (wh-1) \times (dilation_h-1) \\ + & ow=-pad_w+iw \times stride_w + (ww-1) \times (dilation_w-1) + + :param output_size: the size of the output tensor. + :param kernel_size: the size of the window to take a max over. + :param padding: implicit zero padding to be added on both sides. Default: 0 + :param stride: the stride of the window. Default: 1 + :param dilation: the dilation of the window. Default: 1 + + Example: + + .. testcode:: + + from megengine import tensor + import megengine.module as M + import numpy as np + + inp = tensor(np.arange(20).reshape(1,1,4,5)) + unfold = M.SlidingWindow(kernel_size=3, padding=0, stride=1, dilation=1) + fold = M.SlidingWindowTranspose((4,5), kernel_size=3, padding=0, stride=1, dilation=1) + out = fold(unfold(inp)) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[[[ 0 2 6 6 4] + [10 24 42 32 18] + [20 44 72 52 28] + [15 32 51 36 19]]]] + + """ + + def __init__( + self, + output_size: Union[int, Tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], + padding: Union[int, Tuple[int, int]] = 0, + stride: Union[int, Tuple[int, int]] = 1, + dilation: Union[int, Tuple[int, int]] = 1, + **kwargs + ): + super(SlidingWindowTranspose, self).__init__(**kwargs) + self.output_size = output_size + self.kernel_size = kernel_size + self.padding = padding + self.stride = stride + self.dilation = dilation + + def forward(self, inp): + return sliding_window_transpose( + inp, + self.output_size, + self.kernel_size, + self.padding, + self.stride, + self.dilation, + ) diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index 28f2254a..61f2adc2 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -953,3 +953,39 @@ def test_sliding_window(): tensor(inp), (wh, ww), padding=(ph, pw), stride=(sh, sw), dilation=(dh, dw) ) np.testing.assert_equal(gt_out, out.numpy()) + + +def test_sliding_window_transpose(): + N, C, H, W = 2, 3, 7, 8 + ph, pw = 1, 2 + sh, sw = 2, 1 + wh, ww = 3, 2 + dh, dw = 1, 3 + s = lambda i, p, s, d, w: (i + p * 2 - (w - 1) * d - 1) // s + 1 + inp = np.random.normal( + size=(N, C, s(H, ph, sh, dh, wh), s(W, pw, sw, dw, ww), wh, ww) + ).astype(np.float32) + gt_out = np.zeros((N, C, H, W), dtype=np.float32) + + for n, c in itertools.product(*map(range, inp.shape[:2])): + oh = 0 + for ih in range(-ph, H + ph - dh * (wh - 1), sh): + ow = 0 + for iw in range(-pw, W + pw - dw * (ww - 1), sw): + for kh, kw in itertools.product(*map(range, inp.shape[-2:])): + ih2 = ih + dh * kh + iw2 = iw + dw * kw + if ih2 >= 0 and ih2 < H and iw2 >= 0 and iw2 < W: + gt_out[n, c, ih2, iw2] += inp[n, c, oh, ow, kh, kw] + ow += 1 + oh += 1 + + out = F.sliding_window_transpose( + tensor(inp), + (H, W), + (wh, ww), + padding=(ph, pw), + stride=(sh, sw), + dilation=(dh, dw), + ) + np.testing.assert_equal(gt_out, out.numpy()) diff --git a/imperative/src/impl/ops/specializations.cpp b/imperative/src/impl/ops/specializations.cpp index cc6bb192..abe07a00 100644 --- a/imperative/src/impl/ops/specializations.cpp +++ b/imperative/src/impl/ops/specializations.cpp @@ -35,6 +35,8 @@ #include "megbrain/opr/tensor_gen.h" #include "megbrain/opr/tensor_manip.h" #include "megbrain/opr/utility.h" +#include "megbrain/opr/dnn/images2neibs.h" +#include "megbrain/opr/dnn/sliding_window_transpose.h" #include "../op_trait.h" @@ -658,4 +660,17 @@ OP_TRAIT_REG(LSQ, LSQ).apply_on_var_node(apply_on_var_node).fallback(); } // namespace lsq } // namespace -} // namespace mgb::imperative +namespace { namespace sliding_window_transpose { +auto apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + auto&& op = static_cast(def); + OperatorNodeConfig config{op.make_name()}; + return opr::SlidingWindowTranspose::make(inputs[0], op.param(), config); +} +OP_TRAIT_REG(SlidingWindowTranspose, SlidingWindowTranspose) + .apply_on_var_node(apply_on_var_node) + .fallback(); +}} // sliding_window_transpose + +} // namespace mgb::imperative diff --git a/src/core/include/megbrain/ir/ops.td b/src/core/include/megbrain/ir/ops.td index 2aeb01fa..a74af609 100644 --- a/src/core/include/megbrain/ir/ops.td +++ b/src/core/include/megbrain/ir/ops.td @@ -81,6 +81,8 @@ def BatchConvBias : MgbHashableOp<"BatchConvBias", [BatchConvBiasParam, Executio def Images2Neibs : MgbHashableOp<"Images2Neibs", [Images2NeibsParam]>; +def SlidingWindowTranspose : MgbHashableOp<"SlidingWindowTranspose", [SlidingWindowTransposeParam]>; + def BatchNorm : MgbHashableOp<"BatchNorm", [BNParam]>; def ROIAlign: MgbHashableOp<"ROIAlign", [ROIAlignParam]>; diff --git a/src/opr/impl/dnn/dnn.sereg.h b/src/opr/impl/dnn/dnn.sereg.h index 6e9e70fc..396446cb 100644 --- a/src/opr/impl/dnn/dnn.sereg.h +++ b/src/opr/impl/dnn/dnn.sereg.h @@ -16,6 +16,8 @@ #include "megbrain/opr/dnn/correlation.h" #include "megbrain/opr/dnn/fake_quant.h" #include "megbrain/opr/dnn/images2neibs.h" +#include "megbrain/opr/dnn/sliding_window_transpose.h" +#include "megbrain/opr/dnn/adaptive_pooling.h" #include "megbrain/opr/dnn/local.h" #include "megbrain/opr/dnn/lrn.h" #include "megbrain/opr/dnn/lsq.h" @@ -531,6 +533,9 @@ MGB_SEREG_OPR(ConvolutionBackwardFilterV2, 0); MGB_SEREG_OPR(Images2Neibs, 1); MGB_SEREG_OPR(Images2NeibsBackward, 2); +MGB_SEREG_OPR(SlidingWindowTranspose, 1); +MGB_SEREG_OPR(SlidingWindowTransposeBackward, 2); + using LocalV2 = Local; using LocalBackwardDataV2 = LocalBackwardData; using LocalBackwardFilterV2 = LocalBackwardFilter; diff --git a/src/opr/impl/dnn/sliding_window_transpose.cpp b/src/opr/impl/dnn/sliding_window_transpose.cpp new file mode 100644 index 00000000..a53b8b95 --- /dev/null +++ b/src/opr/impl/dnn/sliding_window_transpose.cpp @@ -0,0 +1,36 @@ +/** + * \file src/opr/impl/dnn/sliding_window_transpose.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "megbrain/opr/dnn/sliding_window_transpose.h" +#include "megbrain/graph/grad_impl.h" + +#include "../internal/megdnn_opr_wrapper.inl" + +using namespace mgb; +using namespace opr; + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(SlidingWindowTransposeForward); +MEGDNN_OPR_INIT1(SlidingWindowTransposeForward, "sliding_window_transpose") + +#if MGB_ENABLE_GRAD +MGB_IMPL_OPR_GRAD(SlidingWindowTransposeForward) { + mgb_assert(wrt_idx == 0 && out_grad.size() == 2 && !out_grad[1]); + return SlidingWindowTransposeBackward::make( + out_grad[0], opr.input(0), opr.param()).node(); +} +#endif + +MGB_DYN_TYPE_OBJ_FINAL_IMPL(SlidingWindowTransposeBackward); +MEGDNN_OPR_INIT2(SlidingWindowTransposeBackward, "sliding_window_transpose_grad", 1, false); + + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} + diff --git a/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h b/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h new file mode 100644 index 00000000..b44db80c --- /dev/null +++ b/src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h @@ -0,0 +1,49 @@ +/** + * \file src/opr/include/megbrain/opr/dnn/sliding_window_transpose.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#pragma once + +#include "megbrain/opr/internal/megdnn_opr_wrapper.h" +#include "megdnn/oprs.h" + +namespace mgb { +namespace opr { + +MGB_DEFINE_OPR_CLASS(SlidingWindowTransposeForward, + intl::MegDNNOprWrapperFwd) // { + + public: + SlidingWindowTransposeForward(VarNode *src, + const Param ¶m, + const OperatorNodeConfig &config); + + static SymbolVar make(SymbolVar src, + const Param ¶m = {}, + const OperatorNodeConfig &config = {}); +}; +using SlidingWindowTranspose = SlidingWindowTransposeForward; + +MGB_DEFINE_OPR_CLASS(SlidingWindowTransposeBackward, + intl::MegDNNOprWrapperBwd) // { + + public: + SlidingWindowTransposeBackward(VarNode *diff, VarNode *src_for_shape, + const Param ¶m, + const OperatorNodeConfig &config); + + static SymbolVar make(SymbolVar diff, SymbolVar src_for_shape, + const Param ¶m = {}, + const OperatorNodeConfig &config = {}); +}; + +} // namespace opr +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} \ No newline at end of file diff --git a/src/opr/test/dnn/sliding_window_transpose.cpp b/src/opr/test/dnn/sliding_window_transpose.cpp new file mode 100644 index 00000000..bac0f225 --- /dev/null +++ b/src/opr/test/dnn/sliding_window_transpose.cpp @@ -0,0 +1,64 @@ +/** + * \file src/opr/test/dnn/sliding_window_transpose.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "megbrain/test/helper.h" +#include "megbrain/test/autocheck.h" +#include "megbrain/test/megdnn_helper.h" +#include "megbrain/opr/dnn/sliding_window_transpose.h" + +#include "megdnn/oprs.h" + +using namespace mgb; + +TEST(TestOprDNN, SlidingWindowTranspose) { + using Checker = AutoOprChecker<1, 1>; + + opr::SlidingWindowTranspose::Param param; + param.pad_h = 1; + param.pad_w = 2; + param.stride_w = 2; + param.window_h = 4; + param.dilate_h = 2; + unsigned long ih = 16, iw = 15; + unsigned long oh = (ih + 2 * param.pad_h - param.dilate_h * (param.window_h-1)-1) / param.stride_h + 1; + unsigned long ow = (iw + 2 * param.pad_w - param.dilate_w * (param.window_w-1)-1) / param.stride_w + 1; + param.out_h = ih; + param.out_w = iw; + + auto make_graph = [&](const Checker::SymInpArray &inputs) -> + Checker::SymOutArray { + return {opr::SlidingWindowTranspose::make(inputs[0], param)}; + }; + + auto fwd = [&](Checker::NumOutArray &dest, Checker::NumInpArray inp) { + auto opr = megdnn_naive_handle()-> + create_operator(); + opr->param() = param; + TensorLayout dest_layout; + opr->deduce_layout(inp[0]->layout(), dest_layout); + std::vector workspace( + opr->get_workspace_in_bytes(inp[0]->layout(), dest_layout)); + dest[0].dtype(dtype::Float32()). + comp_node(inp[0]->comp_node()).resize(dest_layout); + opr->exec(inp[0]->as_megdnn(), dest[0].as_megdnn(), + {workspace.data(), workspace.size()}); + }; + Checker::RunOptions opt; + opt.numdiff_eps = 1; + Checker checker{make_graph, fwd}; + + checker. + run({TensorShape{2, 3, oh, ow, param.window_h, param.window_w}}, opt). + run({TensorShape{4, 5, oh, ow, param.window_h, param.window_w}}, opt). + run({TensorShape{3, 2, oh, ow, param.window_h, param.window_w}}, opt); +} + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/serialization/impl/schema.fbs b/src/serialization/impl/schema.fbs index 5a31493a..22e43d92 100644 --- a/src/serialization/impl/schema.fbs +++ b/src/serialization/impl/schema.fbs @@ -112,6 +112,7 @@ union OperatorParam { param.PoissonRNG = 78, param.PermutationRNG = 79, param.BetaRNG = 80, + param.SlidingWindowTranspose = 81, } table Operator {