Browse Source

feat(mgb): add padding opr for megbrain

GitOrigin-RevId: 490e0c5d5a
tags/v1.6.0-rc1
Megvii Engine Team 4 years ago
parent
commit
eab6afab47
26 changed files with 1953 additions and 4 deletions
  1. +47
    -0
      dnn/include/megdnn/oprs/general.h
  2. +26
    -0
      dnn/scripts/opr_param_defs.py
  3. +3
    -1
      dnn/src/common/handle_impl.h
  4. +2
    -0
      dnn/src/common/opr_trait.h
  5. +144
    -0
      dnn/src/common/padding.cpp
  6. +1
    -0
      dnn/src/cuda/handle_create.cpp
  7. +71
    -0
      dnn/src/cuda/padding/opr_impl.cpp
  8. +35
    -0
      dnn/src/cuda/padding/opr_impl.h
  9. +284
    -0
      dnn/src/cuda/padding/padding.cu
  10. +36
    -0
      dnn/src/cuda/padding/padding.cuh
  11. +1
    -1
      dnn/src/cuda/utils.cuh
  12. +1
    -1
      dnn/src/naive/argmxx/opr_impl.cpp
  13. +1
    -0
      dnn/src/naive/handle.cpp
  14. +281
    -0
      dnn/src/naive/padding/opr_impl.cpp
  15. +30
    -0
      dnn/src/naive/padding/opr_impl.h
  16. +441
    -0
      dnn/test/common/padding.h
  17. +219
    -0
      dnn/test/cuda/padding.cpp
  18. +132
    -0
      dnn/test/naive/padding.cpp
  19. +5
    -0
      src/opr/impl/misc.oprdecl
  20. +1
    -1
      src/opr/impl/misc.sereg.h
  21. +26
    -0
      src/opr/impl/tensor_manip.cpp
  22. +33
    -0
      src/opr/impl/tensor_manip.sereg.h
  23. +20
    -0
      src/opr/include/megbrain/opr/tensor_manip.h
  24. +68
    -0
      src/opr/test/dnn/padding.cpp
  25. +44
    -0
      src/opr/test/tensor_manip.cpp
  26. +1
    -0
      src/serialization/impl/schema.fbs

+ 47
- 0
dnn/include/megdnn/oprs/general.h View File

@@ -1353,6 +1353,53 @@ public:
protected:
void check_exec(const TensorLayout& dst, size_t workspace_in_bytes);
};

/*!
* \brief standard padding operator
* Inputs must have the same dtype, and the output tensor shape must greater or equal than
* input tensor in every dimensions, the extra space will be fulled with m which default to
* be 0.
*/

class PaddingBase: public OperatorBase {
DEF_OPR_PARAM(Padding);
DEF_OPR_IMPL(PaddingBase, OperatorBase, 1, 1);
public:
using Mode = Param::PaddingMode;
protected:
SmallVector<size_t> get_offsets();
void check_exec(const TensorLayout& src, const TensorLayout& dst);
};

class PaddingForward: public PaddingBase {
DEF_OPR_IMPL(PaddingForward, PaddingBase, 1, 1);
public:
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace) {
return exec(src, dst);
}
virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0;
void deduce_layout(const TensorLayout &src, TensorLayout &dst);
protected:
void forward_check_exec(const TensorLayout& src, const TensorLayout& dst);
};

using Padding = PaddingForward;

class PaddingBackward: public PaddingBase {
DEF_OPR_IMPL(PaddingBackward, PaddingBase, 1, 1);
public:
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace) {
return exec(src, dst);
}
virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0;
protected:
void backward_check_exec(const TensorLayout& src, const TensorLayout& dst);
};

} // namespace megdnn

#include "megdnn/internal/opr_header_epilogue.h"


+ 26
- 0
dnn/scripts/opr_param_defs.py View File

@@ -1171,3 +1171,29 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o
add_fields('int32', 'qmax', '2147483647')
)
pdef('Fill').add_fields('float32', 'value', '0')


PADDING_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'),
Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'),
Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii')]
(pdef('Padding').
add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0).
add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0).
add_fields('uint32', Doc('front_offset_dim2','offset in dim 2'), 0).
add_fields('uint32', Doc('front_offset_dim3','offset in dim 3'), 0).
add_fields('uint32', Doc('front_offset_dim4','offset in dim 4'), 0).
add_fields('uint32', Doc('front_offset_dim5','offset in dim 5'), 0).
add_fields('uint32', Doc('front_offset_dim6','offset in dim 6'), 0).
add_fields('uint32', Doc('back_offset_dim0', 'back offset in dim0'), 0).
add_fields('uint32', Doc('back_offset_dim1', 'back offset in dim1'), 0).
add_fields('uint32', Doc('back_offset_dim2', 'back offset in dim2'), 0).
add_fields('uint32', Doc('back_offset_dim3', 'back offset in dim3'), 0).
add_fields('uint32', Doc('back_offset_dim4', 'back offset in dim4'), 0).
add_fields('uint32', Doc('back_offset_dim5', 'back offset in dim5'), 0).
add_fields('uint32', Doc('back_offset_dim6', 'back offset in dim6'), 0).
add_fields('float32', Doc('padding_val','param of padding opr'), 0).
add_enum('PaddingMode', *PADDING_MODES,
name_field='padding_mode', default=2,
member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES]
)
)

+ 3
- 1
dnn/src/common/handle_impl.h View File

@@ -217,7 +217,9 @@ private:
cb(CheckHasInf) \
cb(LSQForward) \
cb(LSQBackward) \
cb(Fill)
cb(Fill) \
cb(PaddingForward) \
cb(PaddingBackward)

/*!
* \brief specialize HandleImpl::create_operator for a single opr type;


+ 2
- 0
dnn/src/common/opr_trait.h View File

@@ -27,6 +27,8 @@ struct OprTrait {};
static const bool can_deduce_layout = CanDeduceLayout; \
}

DEF(Padding, 2, false, true);
DEF(PaddingBackward, 2, false, false);
DEF(ConvolutionForward, 3, true, true);
DEF(Convolution3DForward, 3, true, true);
DEF(ConvolutionBackwardData, 3, true, false);


+ 144
- 0
dnn/src/common/padding.cpp View File

@@ -0,0 +1,144 @@
/**
* \file dnn/src/common/padding.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "megdnn/oprs.h"
#include "megdnn/oprs/general.h"
#include "megdnn/thin/small_vector.h"
#include "src/common/opr_param_defs_enumv.cuh"
#include "src/common/utils.h"

namespace megdnn {

using padding_param = megdnn::param_enumv::Padding;

void PaddingForward::forward_check_exec(const TensorLayout& src,
const TensorLayout& dst) {
check_exec(src, dst);
megdnn_assert(src.dtype.enumv() != DTypeEnum::Bool &&
src.dtype.enumv() != DTypeEnum::IntB1 &&
src.dtype.enumv() != DTypeEnum::IntB2 &&
src.dtype.enumv() != DTypeEnum::IntB4,
"unsupported %s dtype for forward padding opr",
src.dtype.name());
}

void PaddingForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) {
SmallVector<size_t> offsets(get_offsets());
TensorShape dst_shape;
switch (src.ndim) {
case 1:
dst_shape = {src.shape[0] + offsets[0] + offsets[1]};
break;
case 2:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3]};
break;
case 3:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5]};
break;
case 4:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7]};
break;
case 5:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7],
src.shape[4] + offsets[8] + offsets[9]};
break;
case 6:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7],
src.shape[4] + offsets[8] + offsets[9],
src.shape[5] + offsets[10] + offsets[11]};
break;
case 7:
dst_shape = {src.shape[0] + offsets[0] + offsets[1],
src.shape[1] + offsets[2] + offsets[3],
src.shape[2] + offsets[4] + offsets[5],
src.shape[3] + offsets[6] + offsets[7],
src.shape[4] + offsets[8] + offsets[9],
src.shape[5] + offsets[10] + offsets[11],
src.shape[6] + offsets[12] + offsets[13]};
break;
default:
megdnn_assert(false, "invalid tensor ndim %zu", src.ndim);
break;
}
dst = TensorLayout(dst_shape, src.dtype);
}

void PaddingBackward::backward_check_exec(const TensorLayout& src,
const TensorLayout& dst) {
check_exec(dst, src);
megdnn_assert(src.dtype.enumv() ==
DTypeEnum::Float32 DNN_INC_FLOAT16(
|| src.dtype.enumv() == DTypeEnum::Float16 ||
src.dtype.enumv() == DTypeEnum::BFloat16),
"unsupported %s dtype for forward padding opr",
src.dtype.name());
}

SmallVector<size_t> PaddingBase::get_offsets() {
SmallVector<size_t> offsets = {
param().front_offset_dim0, param().back_offset_dim0,
param().front_offset_dim1, param().back_offset_dim1,
param().front_offset_dim2, param().back_offset_dim2,
param().front_offset_dim3, param().back_offset_dim3,
param().front_offset_dim4, param().back_offset_dim4,
param().front_offset_dim5, param().back_offset_dim5,
param().front_offset_dim6, param().back_offset_dim6};
return offsets;
}

void PaddingBase::check_exec(const TensorLayout& src, const TensorLayout& dst) {
SmallVector<size_t> offsets(get_offsets());
// make sure the src and dst tensor not empty
megdnn_assert(src.ndim != 0 && dst.ndim != 0);
// make sure src and dst is same dtype
megdnn_assert_eq_dtype(src, dst);
// make sure src and dst is same ndim
megdnn_assert(src.ndim == dst.ndim, "the src.ndim = %zu the dst.ndim = %zu",
src.ndim, dst.ndim);
// make sure in every dimension dst is equal or greater than src
for (size_t i = 0; i < src.ndim; ++i) {
megdnn_assert(dst.shape[i] ==
src.shape[i] + offsets[i * 2] + offsets[i * 2 + 1]);
}
// check the padding mode is valid
megdnn_assert(static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::REFLECT ||
static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::REPLICATE ||
static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::CONSTANT,
"unsupported padding mode");
// addition check for reflect padding, make sure the reflected index is
// valid
if (static_cast<uint32_t>(param().padding_mode) ==
padding_param::PaddingMode::REFLECT) {
for (size_t i = 0; i < src.ndim; ++i) {
megdnn_assert(offsets[i * 2] < src.shape[i] &&
dst.shape[i] - offsets[i * 2] - src.shape[i] <
src.shape[i]);
}
}
}

} // namespace megdnn

+ 1
- 0
dnn/src/cuda/handle_create.cpp View File

@@ -12,6 +12,7 @@

#include "src/common/handle_impl.h"

#include "src/cuda/padding/opr_impl.h"
#include "src/cuda/adaptive_pooling/opr_impl.h"
#include "src/cuda/add_update/opr_impl.h"
#include "src/cuda/argmxx/opr_impl.h"


+ 71
- 0
dnn/src/cuda/padding/opr_impl.cpp View File

@@ -0,0 +1,71 @@
/**
* \file dnn/src/cuda/padding/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/padding/opr_impl.h"
#include "src/common/utils.h"
#include "src/cuda/handle.h"
#include "src/cuda/padding/padding.cuh"
#include "src/cuda/utils.h"

namespace megdnn {
namespace cuda {

void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
forward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
// SamllVector can not be used as argument in cu file
size_t param_offsets[MEGDNN_MAX_NDIM * 2] = {
offsets[0], offsets[1], offsets[2], offsets[3], offsets[4],
offsets[5], offsets[6], offsets[7], offsets[8], offsets[9],
offsets[10], offsets[11], offsets[12], offsets[13]};
auto stream = cuda_stream(this->handle());
#define cb(DType) \
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
using ctype = typename DTypeTrait<DType>::ctype; \
padding::padding_forward_proxy<ctype>(src, dst, param_offsets, \
uint32_t(param().padding_mode), \
param().padding_val, stream); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
}

void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
backward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
// SamllVector can not be used as argument in cu file
size_t param_offsets[MEGDNN_MAX_NDIM * 2] = {
offsets[0], offsets[1], offsets[2], offsets[3], offsets[4],
offsets[5], offsets[6], offsets[7], offsets[8], offsets[9],
offsets[10], offsets[11], offsets[12], offsets[13]};
auto stream = cuda_stream(this->handle());
#define cb(DType) \
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
using ctype = typename DTypeTrait<DType>::ctype; \
padding::padding_backward_proxy<ctype>(src, dst, param_offsets, \
uint32_t(param().padding_mode), \
stream); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
}

size_t PaddingForwardImpl::get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) {
return 0;
}

size_t PaddingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) {
return 0;
}
} // namespace cuda
} // namespace megdnn

+ 35
- 0
dnn/src/cuda/padding/opr_impl.h View File

@@ -0,0 +1,35 @@
/**
* \file dnn/src/cuda/padding/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"

namespace megdnn {
namespace cuda {
class PaddingForwardImpl : public PaddingForward {
using PaddingForward::PaddingForward;

public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) override;
};

class PaddingBackwardImpl : public PaddingBackward {
using PaddingBackward::PaddingBackward;

public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) override;
};
} // namespace cuda
} // namespace megdnn

+ 284
- 0
dnn/src/cuda/padding/padding.cu View File

@@ -0,0 +1,284 @@
/**
* \file dnn/src/cuda/padding/padding.cu
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include <algorithm>
#include <cstring>
#include <iostream>
#include "megdnn/basic_types.h"
#include "padding.cuh"
#include "src/cuda/int_fastdiv.cuh"
#include "src/cuda/query_blocksize.cuh"

namespace megdnn {
namespace cuda {
namespace padding {

struct ShapeParams {
size_t src_shape[MEGDNN_MAX_NDIM];
size_t dst_shape[MEGDNN_MAX_NDIM];
Uint32Fastdiv src_stride[MEGDNN_MAX_NDIM];
Uint32Fastdiv dst_stride[MEGDNN_MAX_NDIM];
size_t offsets[MEGDNN_MAX_NDIM * 2];
};

template <typename T>
__global__ void paddingConst_kernel(const size_t ndim,
const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params,
const float_t padding_val) {
KERN_FOR(out_index, total_out_nr) {
bool in_src_valid_area = true;
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
Uint32Fastdiv dst_stride = params.dst_stride[dim], src_stride = params.src_stride[dim];
size_t src_shape = params.src_shape[dim];
size_t offset = params.offsets[dim*2];

size_t dim_index = out_index_tmp / dst_stride;
in_src_valid_area &= (dim_index >= offset && dim_index < offset+src_shape);
if(!in_src_valid_area) break;
out_index_tmp -= dim_index * dst_stride.divisor();
in_index += (dim_index - offset)*src_stride.divisor();
/*
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim].divisor();
in_src_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.src_shape[dim]);
in_index += (dim_index - params.offsets[dim * 2]) *
params.src_stride[dim].divisor();
*/
}
dst[out_index] = in_src_valid_area ? src[in_index] : padding_val;
}
}

template <typename T>
__global__ void paddingReplicate_kernel(const size_t ndim,
const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params, const float_t) {
KERN_FOR(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim].divisor();
dim_index = (size_t)llmin(
(long long)params.src_shape[dim] - 1,
llmax((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
in_index += dim_index * params.src_stride[dim].divisor();
}
dst[out_index] = src[in_index];
}
}

template <typename T>
__global__ void paddingReflect_kernel(const size_t ndim,
const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params, const float_t) {
KERN_FOR(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim].divisor();
dim_index -= (long long)params.offsets[dim * 2];
dim_index = llmax(dim_index, -dim_index);
dim_index = llmin(dim_index, 2 * (long long)params.src_shape[dim] -
dim_index - 2);
in_index += size_t(dim_index) *
(size_t)params.src_stride[dim].divisor();
}
dst[out_index] = src[in_index];
}
}

template <typename T>
__global__ void paddingConstBackward_kernel(const size_t ndim,
const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) {
KERN_FOR(in_index, total_in_nr) {
bool in_dst_valid_area = true;
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim].divisor();
in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.dst_shape[dim]);
out_index += (dim_index - params.offsets[dim * 2]) *
params.dst_stride[dim].divisor();
}
if (in_dst_valid_area) {
dst[out_index] = src[in_index];
}
}
}

template <typename T>
__global__ void paddingReplicateBackward_kernel(const size_t ndim,
const size_t total_in_nr,
const T* const src,
T* const dst,
ShapeParams params) {
KERN_FOR(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim].divisor();
dim_index = (size_t)llmin(
(long long)params.dst_shape[dim] - 1,
llmax((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
out_index += dim_index * params.dst_stride[dim].divisor();
}
atomic_add(&dst[out_index], src[in_index]);
}
}

template <typename T>
__global__ void paddingReflectBackward_kernel(const size_t ndim,
const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) {
KERN_FOR(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim].divisor();
dim_index -= (long long)params.offsets[dim * 2];
dim_index = llmax(dim_index, -dim_index);
dim_index = llmin(dim_index, 2 * (long long)params.dst_shape[dim] -
dim_index - 2);
out_index += size_t(dim_index) *
(size_t)params.dst_stride[dim].divisor();
}
atomic_add(&dst[out_index], src[in_index]);
}
}

template <typename T>
void padding_forward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
const float_t padding_val, cudaStream_t stream) {
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}

void (*fwd_kern)(const size_t, const size_t, const T* const, T* const,
ShapeParams, const float_t);
switch (mode) {
case param_enumv::Padding::PaddingMode::CONSTANT:
fwd_kern = paddingConst_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REPLICATE:
fwd_kern = paddingReplicate_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REFLECT:
fwd_kern = paddingReflect_kernel<T>;
break;
default:
megdnn_assert(false, "invalid padding mode");
}

size_t total_nr = dst.layout.total_nr_elems();

uint32_t nr_threads = query_blocksize_for_kernel(fwd_kern);
dim3 threads(nr_threads);
dim3 blocks(DIVUP(total_nr, nr_threads));
fwd_kern<<<blocks, threads, 0, stream>>>(src.layout.ndim, total_nr,
src.ptr<T>(), dst.ptr<T>(), params,
padding_val);
after_kernel_launch();
}

template <typename T>
void padding_backward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
cudaStream_t stream) {
ShapeParams params;

for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}

cudaMemset(dst.raw_ptr, 0, dst.layout.access_bytes());

void (*bwd_kern)(const size_t, const size_t, const T* const, T* const,
ShapeParams);

switch (mode) {
case param_enumv::Padding::PaddingMode::CONSTANT:
bwd_kern = paddingConstBackward_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REPLICATE:
bwd_kern = paddingReplicateBackward_kernel<T>;
break;
case param_enumv::Padding::PaddingMode::REFLECT:
bwd_kern = paddingReflectBackward_kernel<T>;
break;
default:
megdnn_assert(false, "invalid padding mode");
}
size_t total_nr = src.layout.total_nr_elems();
uint32_t nr_threads = query_blocksize_for_kernel(bwd_kern);
dim3 threads(nr_threads);
dim3 blocks(DIVUP(total_nr, nr_threads));
bwd_kern<<<blocks, threads, 0, stream>>>(
src.layout.ndim, total_nr, src.ptr<T>(), dst.ptr<T>(), params);
after_kernel_launch();
}

#define INST(T) \
template void padding_forward_proxy<T>( \
const TensorND& src, const TensorND& dst, \
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \
const float_t padding_val, cudaStream_t stream);
#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
#undef INST

#define INST(T) \
template void padding_backward_proxy<T>( \
const TensorND& src, const TensorND& dst, \
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \
cudaStream_t stream);
#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
#undef INST

} // namespace padding
} // namespace cuda
} // namespace megdnn

+ 36
- 0
dnn/src/cuda/padding/padding.cuh View File

@@ -0,0 +1,36 @@
/**
* \file dnn/src/cuda/padding/padding.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "cuda_runtime.h"
#include "megdnn/basic_types.h"
#include "src/common/opr_param_defs_enumv.cuh"
#include "src/cuda/utils.cuh"

namespace megdnn {
namespace cuda {
namespace padding {

template <typename T>
void padding_forward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
const float_t padding_val, cudaStream_t stream);

template <typename T>
void padding_backward_proxy(const TensorND& src, const TensorND& dst,
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode,
cudaStream_t stream);

} // namespace padding
} // namespace cuda
} // namespace megdnn

// vim: syntax=cuda.doxygen

+ 1
- 1
dnn/src/cuda/utils.cuh View File

@@ -78,7 +78,7 @@
#if MEGDNN_THREADS_512
#define NR_THREADS 512
#define NR_THREADS_X 32
#define NR_THREADS_Y 16
#define NR_THREADS_Y 16
#else
#define NR_THREADS 1024
#define NR_THREADS_X 32


+ 1
- 1
dnn/src/naive/argmxx/opr_impl.cpp View File

@@ -16,7 +16,7 @@

#include <numeric>

namespace megdnn {
namespace {

using namespace megdnn;



+ 1
- 0
dnn/src/naive/handle.cpp View File

@@ -14,6 +14,7 @@

#include "src/common/handle_impl.h"

#include "src/naive/padding/opr_impl.h"
#include "src/naive/adaptive_pooling/opr_impl.h"
#include "src/naive/add_update/opr_impl.h"
#include "src/naive/argmxx/opr_impl.h"


+ 281
- 0
dnn/src/naive/padding/opr_impl.cpp View File

@@ -0,0 +1,281 @@
/**
* \file dnn/src/naive/padding/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/naive/padding/opr_impl.h"
#include <math.h>
#include <stdio.h>
#include "src/common/utils.h"
#include "src/naive/handle.h"
namespace megdnn {
namespace naive {

struct ShapeParams {
size_t src_shape[MEGDNN_MAX_NDIM];
size_t dst_shape[MEGDNN_MAX_NDIM];
ptrdiff_t src_stride[MEGDNN_MAX_NDIM];
ptrdiff_t dst_stride[MEGDNN_MAX_NDIM];
size_t offsets[MEGDNN_MAX_NDIM * 2];
};

template <typename T>
void exec_const_internal(const size_t ndim, const size_t total_out_nr,
const T* const src, T* const dst, ShapeParams params,
const T padding_val) MEGDNN_NOEXCEPT {
rep(out_index, total_out_nr) {
bool in_src_valid_area = true;
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim];
in_src_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.src_shape[dim]);
in_index += (dim_index - params.offsets[dim * 2]) *
params.src_stride[dim];
}

if (in_src_valid_area) {
dst[out_index] = src[in_index];
} else {
dst[out_index] = padding_val;
}
}
}

template <typename T>
void exec_replicate_internal(const size_t ndim, const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim];
dim_index = (size_t)std::min(
(long long)params.src_shape[dim] - 1,
std::max((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
in_index += dim_index * params.src_stride[dim];
}
dst[out_index] = src[in_index];
}
}

template <typename T>
void exec_reflect_internal(const size_t ndim, const size_t total_out_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(out_index, total_out_nr) {
size_t in_index = 0;
size_t out_index_tmp = out_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = out_index_tmp / params.dst_stride[dim];
out_index_tmp -= dim_index * params.dst_stride[dim];
dim_index -= (long long)params.offsets[dim * 2];
dim_index = std::max(dim_index, -dim_index);
dim_index =
std::min(dim_index, 2 * (long long)params.src_shape[dim] -
dim_index - 2);
in_index += size_t(dim_index) * (size_t)params.src_stride[dim];
}
dst[out_index] = src[in_index];
}
}

template <typename T>
void backward_exec_const_internal(const size_t ndim, const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(in_index, total_in_nr) {
bool in_dst_valid_area = true;
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim];
in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] &&
dim_index < params.offsets[dim * 2] +
params.dst_shape[dim]);
out_index += (dim_index - params.offsets[dim * 2]) *
params.dst_stride[dim];
}
if (in_dst_valid_area) {
dst[out_index] = src[in_index];
}
}
}

template <typename T>
void backward_exec_replicate_internal(const size_t ndim,
const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
size_t dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim];
dim_index = (size_t)std::min(
(long long)params.dst_shape[dim] - 1,
std::max((long long)dim_index -
(long long)params.offsets[dim * 2],
(long long)0));
out_index += dim_index * params.dst_stride[dim];
}
dst[out_index] += src[in_index];
}
}

template <typename T>
void backward_exec_reflect_internal(const size_t ndim, const size_t total_in_nr,
const T* const src, T* const dst,
ShapeParams params) MEGDNN_NOEXCEPT {
rep(in_index, total_in_nr) {
size_t out_index = 0;
size_t in_index_tmp = in_index;
for (size_t dim = 0; dim <= ndim - 1; ++dim) {
long long dim_index = in_index_tmp / params.src_stride[dim];
in_index_tmp -= dim_index * params.src_stride[dim];
dim_index -= (long long)params.offsets[dim * 2];
dim_index = std::max(dim_index, -dim_index);
dim_index =
std::min(dim_index, 2 * (long long)params.dst_shape[dim] -
dim_index - 2);
out_index += size_t(dim_index) * (size_t)params.dst_stride[dim];
}
dst[out_index] += src[in_index];
}
}

void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
forward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}

size_t n = dst.layout.total_nr_elems();
switch (param().padding_mode) {
case param::Padding::PaddingMode::CONSTANT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_const_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params, \
T(param().padding_val))); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
break;
case param::Padding::PaddingMode::REPLICATE:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_replicate_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
break;
case param::Padding::PaddingMode::REFLECT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_reflect_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
break;
default:
megdnn_assert(false, "unsupported padding mode!");
}
}

void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) {
backward_check_exec(src.layout, dst.layout);
SmallVector<size_t> offsets(get_offsets());
ShapeParams params;
for (size_t i = 0; i < src.layout.ndim; ++i) {
params.src_shape[i] = src.layout.shape[i];
params.dst_shape[i] = dst.layout.shape[i];
params.src_stride[i] = src.layout.stride[i];
params.dst_stride[i] = dst.layout.stride[i];
params.offsets[i * 2] = offsets[i * 2];
params.offsets[i * 2 + 1] = offsets[i * 2 + 1];
}
size_t n = src.layout.total_nr_elems();

memset(dst.raw_ptr, 0, dst.layout.access_bytes());

switch (param().padding_mode) {
case param::Padding::PaddingMode::CONSTANT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_const_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
break;
case param::Padding::PaddingMode::REPLICATE:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_replicate_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
break;
case param::Padding::PaddingMode::REFLECT:
#define cb(DType) \
if (src.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_reflect_internal<T>( \
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
#undef cb
break;
default:
megdnn_assert(false, "unsupported padding mode!");
}
}

size_t PaddingForwardImpl::get_workspace_in_bytes(
const TensorLayout& /* src */, const TensorLayout& /* dst */) {
return 0;
}

size_t PaddingBackwardImpl::get_workspace_in_bytes(
const TensorLayout& /* src */, const TensorLayout& /* dst */) {
return 0;
}
} // namespace naive
} // namespace megdnn

+ 30
- 0
dnn/src/naive/padding/opr_impl.h View File

@@ -0,0 +1,30 @@
/**
* \file dnn/src/naive/padding/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"

namespace megdnn{
namespace naive{
class PaddingForwardImpl: public PaddingForward{
using PaddingForward::PaddingForward;
public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override;
};

class PaddingBackwardImpl: public PaddingBackward{
using PaddingBackward::PaddingBackward;
public:
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override;
size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override;
};
}
}

+ 441
- 0
dnn/test/common/padding.h View File

@@ -0,0 +1,441 @@
/**
* \file dnn/test/common/padding.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include <cstddef>
#include <iostream>
#include "megdnn/basic_types.h"
#include "megdnn/opr_param_defs.h"

namespace megdnn {
namespace test {
namespace padding {

struct TestArg {
param::Padding param;
TensorShape src;
TensorShape dst;
TestArg(param::Padding _param, TensorShape _src, TensorShape _dst)
: param(_param), src(_src), dst(_dst) {}
};

inline std::vector<TestArg> get_args() {
size_t src_shape_dim0 = 5;
size_t src_shape_dim1 = 5;
size_t src_shape_dim2 = 5;
size_t src_shape_dim3 = 5;
size_t src_shape_dim4 = 5;
size_t src_shape_dim5 = 5;
size_t src_shape_dim6 = 5;
size_t dst_shape_dim0 = 8;
size_t dst_shape_dim1 = 8;
size_t dst_shape_dim2 = 8;
size_t dst_shape_dim3 = 8;
size_t dst_shape_dim4 = 8;
size_t dst_shape_dim5 = 8;
size_t dst_shape_dim6 = 8;

std::vector<TestArg> args;

param::Padding cur_param;
cur_param.front_offset_dim0 = 0;
cur_param.front_offset_dim1 = 0;
cur_param.front_offset_dim2 = 0;
cur_param.front_offset_dim3 = 0;
cur_param.front_offset_dim4 = 0;
cur_param.front_offset_dim5 = 0;
cur_param.front_offset_dim6 = 0;
cur_param.back_offset_dim0 = 0;
cur_param.back_offset_dim1 = 0;
cur_param.back_offset_dim2 = 0;
cur_param.back_offset_dim3 = 0;
cur_param.back_offset_dim4 = 0;
cur_param.back_offset_dim5 = 0;
cur_param.back_offset_dim6 = 0;

cur_param.padding_val = 2;

cur_param.front_offset_dim0 = 1;
cur_param.back_offset_dim0 = 2;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});

cur_param.front_offset_dim1 = 2;
cur_param.back_offset_dim1 = 1;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});

cur_param.front_offset_dim2 = 1;
cur_param.back_offset_dim2 = 2;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});

cur_param.front_offset_dim3 = 0;
cur_param.back_offset_dim3 = 3;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});

cur_param.front_offset_dim4 = 3;
cur_param.back_offset_dim4 = 0;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});

cur_param.front_offset_dim5 = 1;
cur_param.back_offset_dim5 = 2;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});

cur_param.front_offset_dim6 = 0;
cur_param.front_offset_dim6 = 3;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});

return args;

}

inline std::vector<TestArg> get_args_backward() {
size_t src_shape_dim0 = 8;
size_t src_shape_dim1 = 8;
size_t src_shape_dim2 = 8;
size_t src_shape_dim3 = 8;
size_t src_shape_dim4 = 8;
size_t src_shape_dim5 = 8;
size_t src_shape_dim6 = 8;
size_t dst_shape_dim0 = 5;
size_t dst_shape_dim1 = 5;
size_t dst_shape_dim2 = 5;
size_t dst_shape_dim3 = 5;
size_t dst_shape_dim4 = 5;
size_t dst_shape_dim5 = 5;
size_t dst_shape_dim6 = 5;

std::vector<TestArg> args;

param::Padding cur_param;
cur_param.front_offset_dim0 = 0;
cur_param.front_offset_dim1 = 0;
cur_param.front_offset_dim2 = 0;
cur_param.front_offset_dim3 = 0;
cur_param.front_offset_dim4 = 0;
cur_param.front_offset_dim5 = 0;
cur_param.front_offset_dim6 = 0;
cur_param.back_offset_dim0 = 0;
cur_param.back_offset_dim1 = 0;
cur_param.back_offset_dim2 = 0;
cur_param.back_offset_dim3 = 0;
cur_param.back_offset_dim4 = 0;
cur_param.back_offset_dim5 = 0;
cur_param.back_offset_dim6 = 0;

cur_param.padding_val = 2;

cur_param.front_offset_dim0 = 1;
cur_param.back_offset_dim0 = 2;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0},
TensorShape{dst_shape_dim0});

cur_param.front_offset_dim1 = 2;
cur_param.back_offset_dim1 = 1;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1},
TensorShape{dst_shape_dim0, dst_shape_dim1});

cur_param.front_offset_dim2 = 1;
cur_param.back_offset_dim2 = 2;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2});

cur_param.front_offset_dim3 = 0;
cur_param.back_offset_dim3 = 3;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(cur_param,
TensorShape{src_shape_dim0, src_shape_dim1,
src_shape_dim2, src_shape_dim3},
TensorShape{dst_shape_dim0, dst_shape_dim1,
dst_shape_dim2, dst_shape_dim3});

cur_param.front_offset_dim4 = 3;
cur_param.back_offset_dim4 =0;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4});

cur_param.front_offset_dim5 = 1;
cur_param.back_offset_dim5 = 2;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5});

cur_param.front_offset_dim6 = 0;
cur_param.back_offset_dim6 = 3;

cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});

cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});

cur_param.padding_mode = param::Padding::PaddingMode::REFLECT;
args.emplace_back(
cur_param,
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2,
src_shape_dim3, src_shape_dim4, src_shape_dim5,
src_shape_dim6},
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2,
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5,
dst_shape_dim6});

return args;

}

} // namespace padding
} // namespace test
} // namespace megdnn

+ 219
- 0
dnn/test/cuda/padding.cpp View File

@@ -0,0 +1,219 @@
/**
* \file dnn/test/cuda/padding.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "test/cuda/benchmark.h"
#include "test/common/padding.h"
#include "megdnn/oprs.h"
#include "test/common/checker.h"
#include "test/common/rng.h"
#include "test/cuda/fixture.h"

using namespace megdnn;
using namespace test;

TEST_F(CUDA, PADDING) {
std::vector<padding::TestArg> args = padding::get_args();
Checker<Padding> checker(handle_cuda());
UniformIntNonZeroRNG rng(1, 9);
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_rng(0, &rng)
.set_dtype(0, dtype::Int8())
.set_dtype(1, dtype::Int8())
.execs({arg.src, arg.dst});
}
}

TEST_F(CUDA, PADDING_BACKWARD) {
std::vector<padding::TestArg> args = padding::get_args_backward();
Checker<PaddingBackward> checker(handle_cuda());
UniformFloatRNG rng(1, 9);
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_rng(0, &rng)
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
}
}

TEST_F(CUDA, PADDING_REFLECT) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 3;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({5}, dtype::Int8(), {1, 2, 3, 4, 5}), {}},
Testcase{{},
TensorValue({10}, dtype::Int8(),
{3, 2, 1, 2, 3, 4, 5, 4, 3, 2})});
}

TEST_F(CUDA, PADDING_REFLECT2) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim0 = 1;
param.front_offset_dim1 = 2;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 1;
param.back_offset_dim1 = 2;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({3, 3}, dtype::Int8(),
{3, 5, 1, 3, 6, 1, 4, 7, 9}),
{}},
Testcase{{},
TensorValue({5, 7}, dtype::Int8(),
{1, 6, 3, 6, 1, 6, 3, 1, 5, 3, 5, 1,
5, 3, 1, 6, 3, 6, 1, 6, 3, 9, 7, 4,
7, 9, 7, 4, 1, 6, 3, 6, 1, 6, 3})});
}

TEST_F(CUDA, PADDING_REPLICATE) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 1;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 2;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({9}, dtype::Int8(),
{1, 2, 3, 4, 5, 6, 7, 8, 9}),
{}},
Testcase{{},
TensorValue({12}, dtype::Int8(),
{1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9})});
}

TEST_F(CUDA, PADDING_REPLICATE2) {
Checker<Padding> checker(handle_cuda(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 1;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 3;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({3, 3}, dtype::Int8(),
{1, 2, 3, 4, 5, 6, 7, 8, 9}),
{}},
Testcase{{},
TensorValue({5, 7}, dtype::Int8(),
{1, 1, 2, 3, 3, 3, 3, 1, 1, 2, 3, 3,
3, 3, 1, 1, 2, 3, 3, 3, 3, 4, 4, 5,
6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9})});
}

// #if MEGDNN_WITH_BENCHMARK

TEST_F(CUDA, BENCHMARK_PADDING_CONSTANT) {
using Param = Padding::Param;

auto run = [&](const TensorShapeArray& shapes, Param param) {
CUBenchmarker<PaddingForward> benchmarker(handle_cuda());
benchmarker.set_param(param);
benchmarker.set_dtype(0, dtype::Int8())
.set_dtype(1, dtype::Int8());

for (auto&& shape : shapes) {
double memaccess =
double(TensorLayout(shape, dtype::Int8())
.span()
.dist_byte()) *
2e-6;
auto time_ms = benchmarker.execs({shape, {}});
printf("execute %s, time %.4f ms, %.4f GB/s\n",
shape.to_string().c_str(), time_ms, memaccess / time_ms);
}
};

printf("mode -> constant; dtype -> int8\n");
{
TensorShapeArray shapes = {
{16, 3, 736, 1280},
};
Param param;
param.padding_mode = param::Padding::PaddingMode::CONSTANT;
param.front_offset_dim1 = 1;
run(shapes, param);
}
printf("mode -> replicate; dtype -> int8\n");
{
TensorShapeArray shapes = {
{16, 3, 736, 1280},
};
Param param;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim1 = 1;
run(shapes, param);
}
printf("mode -> reflect; dtype -> int8\n");
{
TensorShapeArray shapes = {
{16, 3, 736, 1280},
};
Param param;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim1 = 1;
run(shapes, param);
}
}

// #endif

+ 132
- 0
dnn/test/naive/padding.cpp View File

@@ -0,0 +1,132 @@
/**
* \file dnn/test/naive/padding.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "test/common/padding.h"
#include "megdnn/dtype.h"
#include "megdnn/oprs.h"
#include "test/common/checker.h"
#include "test/naive/fixture.h"

namespace megdnn{
namespace test{

TEST_F(NAIVE, PADDING) {
std::vector<padding::TestArg> args = padding::get_args();
Checker<Padding> checker(handle());
for(auto&& arg : args){
checker.set_param(arg.param)
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
}
}

TEST_F(NAIVE, PADDING_CONSTANT) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::CONSTANT;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 1;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 2;
param.back_offset_dim1 = 3;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({1,1}, dtype::Float32(), {1}), {}},
Testcase{{},TensorValue({5,5}, dtype::Float32(), {10,10,10,10,10,10,10,10,10,10,10,1,10,10,10,10,10,10,10,10,10,10,10,10,10})}
);
}

TEST_F(NAIVE, PADDING_REFLECT) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REFLECT;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 3;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({5}, dtype::Float32(), {1,2,3,4,5}), {}},
Testcase{{},TensorValue({10}, dtype::Float32(), {3,2,1,2,3,4,5,4,3,2})}
);
}

TEST_F(NAIVE, PADDING_REPLICATE) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 1;
param.front_offset_dim1 = 0;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 2;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({9}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}},
Testcase{{},TensorValue({12}, dtype::Float32(), {1,1,2,3,4,5,6,7,8,9,9,9})}
);
}

TEST_F(NAIVE, PADDING_REPLICATE2) {
Checker<Padding> checker(handle(), false);
param::Padding param;
param.padding_val = 10;
param.padding_mode = param::Padding::PaddingMode::REPLICATE;
param.front_offset_dim0 = 2;
param.front_offset_dim1 = 1;
param.front_offset_dim2 = 0;
param.front_offset_dim3 = 0;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 3;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
checker.set_param(param).exect(
Testcase{TensorValue({3,3}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}},
Testcase{{},TensorValue({5,7}, dtype::Float32(), {1,1,2,3,3,3,3,1,1,2,3,3,3,3,1,1,2,3,3,3,3,4,4,5,6,6,6,6,7,7,8,9,9,9,9})}
);
}

}
}

+ 5
- 0
src/opr/impl/misc.oprdecl View File

@@ -67,4 +67,9 @@ decl_opr('NvOf',
inputs=['src'], params='NvOf',
desc='opr Implements NVIDIA Optical Flow SDK.')

decl_opr('Padding',
inputs=['src'],
params='Padding',
desc='tensor padding opr.')

# vim: ft=python

+ 1
- 1
src/opr/impl/misc.sereg.h View File

@@ -68,7 +68,7 @@ namespace opr {
//! current cumsum version
using CumsumV1 = opr::Cumsum;
MGB_SEREG_OPR(CumsumV1, 1);
#if MGB_CUDA
MGB_SEREG_OPR(NvOf, 1);
#endif


+ 26
- 0
src/opr/impl/tensor_manip.cpp View File

@@ -1608,4 +1608,30 @@ void RelayoutFormat::init_output_format() {
// f}}}
//


/* f{{{ ======================= PaddingForward ======================= */

MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingForward);
MEGDNN_OPR_INIT1(PaddingForward, "padding")

#if MGB_ENABLE_GRAD
MGB_IMPL_OPR_GRAD(PaddingForward) {
mgb_assert(opr.input().size() == 1);
if (wrt_idx == 0) {
SymbolVar grad = PaddingBackward::make(out_grad[0], opr.input(0), opr.param());
return grad.node();
} else
return InvalidGrad::make(opr, wrt_idx);
}
#endif

// f}}}

/* f{{{ ======================= PaddingBackward ======================= */

MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingBackward);
MEGDNN_OPR_INIT2(PaddingBackward, "padding_backward", 1, false);

// f}}}

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 33
- 0
src/opr/impl/tensor_manip.sereg.h View File

@@ -24,6 +24,35 @@ MGB_SEREG_MODIFY_SUBTENSOR_OPR(IncrSubtensor);
namespace mgb {

namespace serialization {

template<>
struct OprMaker<opr::Padding, 1> {
using Opr = opr::Padding;
using Param = Opr::Param;
static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) {
MGB_MARK_USED_VAR(graph);
if(inputs.size() == 1) {
return Opr::make(inputs[0], param, config).node()->owner_opr();
}else{
return nullptr;
}
}
};

template<>
struct OprMaker<opr::PaddingBackward, 2> {
using Opr = opr::PaddingBackward;
using Param = Opr::Param;
static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) {
MGB_MARK_USED_VAR(graph);
if(inputs.size() == 2) {
return Opr::make(inputs[0], inputs[1], param, config).node()->owner_opr();
}else{
return nullptr;
}
}
};

template<>
struct OprMaker<opr::Concat, 0>: public OprMakerVariadic<opr::Concat>{};

@@ -185,6 +214,10 @@ namespace opr {
using RelayoutFormatV1 = opr::RelayoutFormat;
MGB_SEREG_OPR(RelayoutFormatV1, 1);

MGB_SEREG_OPR(Padding, 1);

MGB_SEREG_OPR(PaddingBackward, 2);
} // namespace opr

} // namespace mgb


+ 20
- 0
src/opr/include/megbrain/opr/tensor_manip.h View File

@@ -635,6 +635,26 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat,
const OperatorNodeConfig &config = {});
void init_output_format() override final;
};

/*!
* \brief padding the src tensor to dst tensor
*/
MGB_DEFINE_OPR_CLASS(PaddingForward, intl::MegDNNOprWrapperFwd<megdnn::PaddingForward>) // {
public:
PaddingForward(VarNode* src, const Param& param, const OperatorNodeConfig& config);
static SymbolVar make(SymbolVar src, const Param& param = {}, const OperatorNodeConfig &config = {});
};
using Padding = PaddingForward;

/*!
* \brief padding backward
*/
MGB_DEFINE_OPR_CLASS(PaddingBackward, intl::MegDNNOprWrapperBwd<megdnn::PaddingBackward>) // {
public:
PaddingBackward(VarNode* src, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config);
static SymbolVar make(SymbolVar src, SymbolVar in_for_shape, const Param &param = {}, const OperatorNodeConfig &config = {});
};

} // opr
} // mgb



+ 68
- 0
src/opr/test/dnn/padding.cpp View File

@@ -0,0 +1,68 @@
#include "./legacy_checker.h"
#include "megbrain/comp_node_env.h"

#include "megbrain/gopt/inference.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/serialization/serializer.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
#include "megdnn/dtype.h"
#include "megdnn/oprs/base.h"

#include <gmock/gmock.h>

#include <cmath>
#include <memory>
#include <random>

using namespace std;
using namespace mgb;

namespace {

TEST(TestOprDNN, PaddingForwardSerialization) {
using namespace serialization;

auto fname = output_file("PaddingForwardTest");
auto dump = [&]() {
opr::Padding::Param param;
param.padding_mode = megdnn::param::Padding::PaddingMode(0);
param.front_offset_dim0 = 3;
param.front_offset_dim1 = 3;
param.front_offset_dim2 = 3;
param.front_offset_dim3 = 3;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
param.padding_val = 0;

auto cn = CompNode::load("xpu");
auto graph = ComputingGraph::make();
HostTensorND inp_host{cn, {32, 4, 24, 24}, dtype::Float32()};
auto inp = opr::ImmutableTensor::make(*graph, inp_host);
auto opr = opr::PaddingForward::make(inp, param, {});
auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
auto rst = dumper->dump({opr});
ASSERT_EQ(rst.outputs.size(), 1u);
};

auto load = [&]() {
auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
auto rst = loader->load();
ASSERT_EQ(rst.output_var_list.size(), 1u);
};

dump();
load();
}
} // namespace

+ 44
- 0
src/opr/test/tensor_manip.cpp View File

@@ -10,6 +10,7 @@
*/

#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/tensor_gen.h"
@@ -2162,4 +2163,47 @@ TEST(TestParamPack, Split) {
test_param_pack_split<3>({{2, 9}, {123}, {5, 3}});
}

TEST(TestTensorManip, Padding_random) {
opr::Padding::Param param;
param.padding_mode = megdnn::param::Padding::PaddingMode(0);
param.front_offset_dim0 = 3;
param.front_offset_dim1 = 3;
param.front_offset_dim2 = 3;
param.front_offset_dim3 = 3;
param.front_offset_dim4 = 0;
param.front_offset_dim5 = 0;
param.front_offset_dim6 = 0;
param.back_offset_dim0 = 0;
param.back_offset_dim1 = 0;
param.back_offset_dim2 = 0;
param.back_offset_dim3 = 0;
param.back_offset_dim4 = 0;
param.back_offset_dim5 = 0;
param.back_offset_dim6 = 0;
param.padding_val = 0;
using Checker = AutoOprChecker<1,1>;
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
return {opr::Padding::make(inputs[0], param)};
};
auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto opr = megdnn_naive_handle()->create_operator<megdnn::Padding>();
TensorShape out_shp{inp[0]->as_megdnn().layout.shape[0]+param.front_offset_dim0+param.back_offset_dim0,
inp[0]->as_megdnn().layout.shape[1]+param.front_offset_dim1+param.back_offset_dim1,
inp[0]->as_megdnn().layout.shape[2]+param.front_offset_dim2+param.back_offset_dim2,
inp[0]->as_megdnn().layout.shape[3]+param.front_offset_dim3+param.back_offset_dim3};
opr->param() = param;
dest[0].resize(out_shp);
opr->exec(inp[0]->as_megdnn(), dest[0].as_megdnn(), {});
};
Checker::RunOptions opt;
opt.numdiff_max_err = 2e-3;
Checker(make_graph, fwd, CompNode::load("xpu0"))
.run({TensorShape{5, 5, 5, 5}}, opt)
.run({TensorShape{4, 3, 4, 5}}, opt)
.run({TensorShape{5, 4, 4, 5}}, opt);
}

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 1
- 0
src/serialization/impl/schema.fbs View File

@@ -113,6 +113,7 @@ union OperatorParam {
param.PermutationRNG = 79,
param.BetaRNG = 80,
param.SlidingWindowTranspose = 81,
param.Padding = 82,
}

table Operator {


Loading…
Cancel
Save