@@ -1353,6 +1353,53 @@ public: | |||
protected: | |||
void check_exec(const TensorLayout& dst, size_t workspace_in_bytes); | |||
}; | |||
/*! | |||
* \brief standard padding operator | |||
* Inputs must have the same dtype, and the output tensor shape must greater or equal than | |||
* input tensor in every dimensions, the extra space will be fulled with m which default to | |||
* be 0. | |||
*/ | |||
class PaddingBase: public OperatorBase { | |||
DEF_OPR_PARAM(Padding); | |||
DEF_OPR_IMPL(PaddingBase, OperatorBase, 1, 1); | |||
public: | |||
using Mode = Param::PaddingMode; | |||
protected: | |||
SmallVector<size_t> get_offsets(); | |||
void check_exec(const TensorLayout& src, const TensorLayout& dst); | |||
}; | |||
class PaddingForward: public PaddingBase { | |||
DEF_OPR_IMPL(PaddingForward, PaddingBase, 1, 1); | |||
public: | |||
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||
_megdnn_workspace) { | |||
return exec(src, dst); | |||
} | |||
virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0; | |||
void deduce_layout(const TensorLayout &src, TensorLayout &dst); | |||
protected: | |||
void forward_check_exec(const TensorLayout& src, const TensorLayout& dst); | |||
}; | |||
using Padding = PaddingForward; | |||
class PaddingBackward: public PaddingBase { | |||
DEF_OPR_IMPL(PaddingBackward, PaddingBase, 1, 1); | |||
public: | |||
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||
_megdnn_workspace) { | |||
return exec(src, dst); | |||
} | |||
virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0; | |||
protected: | |||
void backward_check_exec(const TensorLayout& src, const TensorLayout& dst); | |||
}; | |||
} // namespace megdnn | |||
#include "megdnn/internal/opr_header_epilogue.h" | |||
@@ -1171,3 +1171,29 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o | |||
add_fields('int32', 'qmax', '2147483647') | |||
) | |||
pdef('Fill').add_fields('float32', 'value', '0') | |||
PADDING_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), | |||
Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'), | |||
Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii')] | |||
(pdef('Padding'). | |||
add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0). | |||
add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0). | |||
add_fields('uint32', Doc('front_offset_dim2','offset in dim 2'), 0). | |||
add_fields('uint32', Doc('front_offset_dim3','offset in dim 3'), 0). | |||
add_fields('uint32', Doc('front_offset_dim4','offset in dim 4'), 0). | |||
add_fields('uint32', Doc('front_offset_dim5','offset in dim 5'), 0). | |||
add_fields('uint32', Doc('front_offset_dim6','offset in dim 6'), 0). | |||
add_fields('uint32', Doc('back_offset_dim0', 'back offset in dim0'), 0). | |||
add_fields('uint32', Doc('back_offset_dim1', 'back offset in dim1'), 0). | |||
add_fields('uint32', Doc('back_offset_dim2', 'back offset in dim2'), 0). | |||
add_fields('uint32', Doc('back_offset_dim3', 'back offset in dim3'), 0). | |||
add_fields('uint32', Doc('back_offset_dim4', 'back offset in dim4'), 0). | |||
add_fields('uint32', Doc('back_offset_dim5', 'back offset in dim5'), 0). | |||
add_fields('uint32', Doc('back_offset_dim6', 'back offset in dim6'), 0). | |||
add_fields('float32', Doc('padding_val','param of padding opr'), 0). | |||
add_enum('PaddingMode', *PADDING_MODES, | |||
name_field='padding_mode', default=2, | |||
member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | |||
) | |||
) |
@@ -217,7 +217,9 @@ private: | |||
cb(CheckHasInf) \ | |||
cb(LSQForward) \ | |||
cb(LSQBackward) \ | |||
cb(Fill) | |||
cb(Fill) \ | |||
cb(PaddingForward) \ | |||
cb(PaddingBackward) | |||
/*! | |||
* \brief specialize HandleImpl::create_operator for a single opr type; | |||
@@ -27,6 +27,8 @@ struct OprTrait {}; | |||
static const bool can_deduce_layout = CanDeduceLayout; \ | |||
} | |||
DEF(Padding, 2, false, true); | |||
DEF(PaddingBackward, 2, false, false); | |||
DEF(ConvolutionForward, 3, true, true); | |||
DEF(Convolution3DForward, 3, true, true); | |||
DEF(ConvolutionBackwardData, 3, true, false); | |||
@@ -0,0 +1,144 @@ | |||
/** | |||
* \file dnn/src/common/padding.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "megdnn/oprs.h" | |||
#include "megdnn/oprs/general.h" | |||
#include "megdnn/thin/small_vector.h" | |||
#include "src/common/opr_param_defs_enumv.cuh" | |||
#include "src/common/utils.h" | |||
namespace megdnn { | |||
using padding_param = megdnn::param_enumv::Padding; | |||
void PaddingForward::forward_check_exec(const TensorLayout& src, | |||
const TensorLayout& dst) { | |||
check_exec(src, dst); | |||
megdnn_assert(src.dtype.enumv() != DTypeEnum::Bool && | |||
src.dtype.enumv() != DTypeEnum::IntB1 && | |||
src.dtype.enumv() != DTypeEnum::IntB2 && | |||
src.dtype.enumv() != DTypeEnum::IntB4, | |||
"unsupported %s dtype for forward padding opr", | |||
src.dtype.name()); | |||
} | |||
void PaddingForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) { | |||
SmallVector<size_t> offsets(get_offsets()); | |||
TensorShape dst_shape; | |||
switch (src.ndim) { | |||
case 1: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1]}; | |||
break; | |||
case 2: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||
src.shape[1] + offsets[2] + offsets[3]}; | |||
break; | |||
case 3: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||
src.shape[1] + offsets[2] + offsets[3], | |||
src.shape[2] + offsets[4] + offsets[5]}; | |||
break; | |||
case 4: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||
src.shape[1] + offsets[2] + offsets[3], | |||
src.shape[2] + offsets[4] + offsets[5], | |||
src.shape[3] + offsets[6] + offsets[7]}; | |||
break; | |||
case 5: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||
src.shape[1] + offsets[2] + offsets[3], | |||
src.shape[2] + offsets[4] + offsets[5], | |||
src.shape[3] + offsets[6] + offsets[7], | |||
src.shape[4] + offsets[8] + offsets[9]}; | |||
break; | |||
case 6: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||
src.shape[1] + offsets[2] + offsets[3], | |||
src.shape[2] + offsets[4] + offsets[5], | |||
src.shape[3] + offsets[6] + offsets[7], | |||
src.shape[4] + offsets[8] + offsets[9], | |||
src.shape[5] + offsets[10] + offsets[11]}; | |||
break; | |||
case 7: | |||
dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||
src.shape[1] + offsets[2] + offsets[3], | |||
src.shape[2] + offsets[4] + offsets[5], | |||
src.shape[3] + offsets[6] + offsets[7], | |||
src.shape[4] + offsets[8] + offsets[9], | |||
src.shape[5] + offsets[10] + offsets[11], | |||
src.shape[6] + offsets[12] + offsets[13]}; | |||
break; | |||
default: | |||
megdnn_assert(false, "invalid tensor ndim %zu", src.ndim); | |||
break; | |||
} | |||
dst = TensorLayout(dst_shape, src.dtype); | |||
} | |||
void PaddingBackward::backward_check_exec(const TensorLayout& src, | |||
const TensorLayout& dst) { | |||
check_exec(dst, src); | |||
megdnn_assert(src.dtype.enumv() == | |||
DTypeEnum::Float32 DNN_INC_FLOAT16( | |||
|| src.dtype.enumv() == DTypeEnum::Float16 || | |||
src.dtype.enumv() == DTypeEnum::BFloat16), | |||
"unsupported %s dtype for forward padding opr", | |||
src.dtype.name()); | |||
} | |||
SmallVector<size_t> PaddingBase::get_offsets() { | |||
SmallVector<size_t> offsets = { | |||
param().front_offset_dim0, param().back_offset_dim0, | |||
param().front_offset_dim1, param().back_offset_dim1, | |||
param().front_offset_dim2, param().back_offset_dim2, | |||
param().front_offset_dim3, param().back_offset_dim3, | |||
param().front_offset_dim4, param().back_offset_dim4, | |||
param().front_offset_dim5, param().back_offset_dim5, | |||
param().front_offset_dim6, param().back_offset_dim6}; | |||
return offsets; | |||
} | |||
void PaddingBase::check_exec(const TensorLayout& src, const TensorLayout& dst) { | |||
SmallVector<size_t> offsets(get_offsets()); | |||
// make sure the src and dst tensor not empty | |||
megdnn_assert(src.ndim != 0 && dst.ndim != 0); | |||
// make sure src and dst is same dtype | |||
megdnn_assert_eq_dtype(src, dst); | |||
// make sure src and dst is same ndim | |||
megdnn_assert(src.ndim == dst.ndim, "the src.ndim = %zu the dst.ndim = %zu", | |||
src.ndim, dst.ndim); | |||
// make sure in every dimension dst is equal or greater than src | |||
for (size_t i = 0; i < src.ndim; ++i) { | |||
megdnn_assert(dst.shape[i] == | |||
src.shape[i] + offsets[i * 2] + offsets[i * 2 + 1]); | |||
} | |||
// check the padding mode is valid | |||
megdnn_assert(static_cast<uint32_t>(param().padding_mode) == | |||
padding_param::PaddingMode::REFLECT || | |||
static_cast<uint32_t>(param().padding_mode) == | |||
padding_param::PaddingMode::REPLICATE || | |||
static_cast<uint32_t>(param().padding_mode) == | |||
padding_param::PaddingMode::CONSTANT, | |||
"unsupported padding mode"); | |||
// addition check for reflect padding, make sure the reflected index is | |||
// valid | |||
if (static_cast<uint32_t>(param().padding_mode) == | |||
padding_param::PaddingMode::REFLECT) { | |||
for (size_t i = 0; i < src.ndim; ++i) { | |||
megdnn_assert(offsets[i * 2] < src.shape[i] && | |||
dst.shape[i] - offsets[i * 2] - src.shape[i] < | |||
src.shape[i]); | |||
} | |||
} | |||
} | |||
} // namespace megdnn |
@@ -12,6 +12,7 @@ | |||
#include "src/common/handle_impl.h" | |||
#include "src/cuda/padding/opr_impl.h" | |||
#include "src/cuda/adaptive_pooling/opr_impl.h" | |||
#include "src/cuda/add_update/opr_impl.h" | |||
#include "src/cuda/argmxx/opr_impl.h" | |||
@@ -0,0 +1,71 @@ | |||
/** | |||
* \file dnn/src/cuda/padding/opr_impl.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/cuda/padding/opr_impl.h" | |||
#include "src/common/utils.h" | |||
#include "src/cuda/handle.h" | |||
#include "src/cuda/padding/padding.cuh" | |||
#include "src/cuda/utils.h" | |||
namespace megdnn { | |||
namespace cuda { | |||
void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||
forward_check_exec(src.layout, dst.layout); | |||
SmallVector<size_t> offsets(get_offsets()); | |||
// SamllVector can not be used as argument in cu file | |||
size_t param_offsets[MEGDNN_MAX_NDIM * 2] = { | |||
offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], | |||
offsets[5], offsets[6], offsets[7], offsets[8], offsets[9], | |||
offsets[10], offsets[11], offsets[12], offsets[13]}; | |||
auto stream = cuda_stream(this->handle()); | |||
#define cb(DType) \ | |||
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \ | |||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||
padding::padding_forward_proxy<ctype>(src, dst, param_offsets, \ | |||
uint32_t(param().padding_mode), \ | |||
param().padding_val, stream); \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||
#undef cb | |||
} | |||
void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||
backward_check_exec(src.layout, dst.layout); | |||
SmallVector<size_t> offsets(get_offsets()); | |||
// SamllVector can not be used as argument in cu file | |||
size_t param_offsets[MEGDNN_MAX_NDIM * 2] = { | |||
offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], | |||
offsets[5], offsets[6], offsets[7], offsets[8], offsets[9], | |||
offsets[10], offsets[11], offsets[12], offsets[13]}; | |||
auto stream = cuda_stream(this->handle()); | |||
#define cb(DType) \ | |||
if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \ | |||
using ctype = typename DTypeTrait<DType>::ctype; \ | |||
padding::padding_backward_proxy<ctype>(src, dst, param_offsets, \ | |||
uint32_t(param().padding_mode), \ | |||
stream); \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||
#undef cb | |||
} | |||
size_t PaddingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& dst) { | |||
return 0; | |||
} | |||
size_t PaddingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& dst) { | |||
return 0; | |||
} | |||
} // namespace cuda | |||
} // namespace megdnn |
@@ -0,0 +1,35 @@ | |||
/** | |||
* \file dnn/src/cuda/padding/opr_impl.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/oprs.h" | |||
namespace megdnn { | |||
namespace cuda { | |||
class PaddingForwardImpl : public PaddingForward { | |||
using PaddingForward::PaddingForward; | |||
public: | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||
size_t get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& dst) override; | |||
}; | |||
class PaddingBackwardImpl : public PaddingBackward { | |||
using PaddingBackward::PaddingBackward; | |||
public: | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||
size_t get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& dst) override; | |||
}; | |||
} // namespace cuda | |||
} // namespace megdnn |
@@ -0,0 +1,284 @@ | |||
/** | |||
* \file dnn/src/cuda/padding/padding.cu | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include <algorithm> | |||
#include <cstring> | |||
#include <iostream> | |||
#include "megdnn/basic_types.h" | |||
#include "padding.cuh" | |||
#include "src/cuda/int_fastdiv.cuh" | |||
#include "src/cuda/query_blocksize.cuh" | |||
namespace megdnn { | |||
namespace cuda { | |||
namespace padding { | |||
struct ShapeParams { | |||
size_t src_shape[MEGDNN_MAX_NDIM]; | |||
size_t dst_shape[MEGDNN_MAX_NDIM]; | |||
Uint32Fastdiv src_stride[MEGDNN_MAX_NDIM]; | |||
Uint32Fastdiv dst_stride[MEGDNN_MAX_NDIM]; | |||
size_t offsets[MEGDNN_MAX_NDIM * 2]; | |||
}; | |||
template <typename T> | |||
__global__ void paddingConst_kernel(const size_t ndim, | |||
const size_t total_out_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params, | |||
const float_t padding_val) { | |||
KERN_FOR(out_index, total_out_nr) { | |||
bool in_src_valid_area = true; | |||
size_t in_index = 0; | |||
size_t out_index_tmp = out_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
Uint32Fastdiv dst_stride = params.dst_stride[dim], src_stride = params.src_stride[dim]; | |||
size_t src_shape = params.src_shape[dim]; | |||
size_t offset = params.offsets[dim*2]; | |||
size_t dim_index = out_index_tmp / dst_stride; | |||
in_src_valid_area &= (dim_index >= offset && dim_index < offset+src_shape); | |||
if(!in_src_valid_area) break; | |||
out_index_tmp -= dim_index * dst_stride.divisor(); | |||
in_index += (dim_index - offset)*src_stride.divisor(); | |||
/* | |||
size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||
out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); | |||
in_src_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||
dim_index < params.offsets[dim * 2] + | |||
params.src_shape[dim]); | |||
in_index += (dim_index - params.offsets[dim * 2]) * | |||
params.src_stride[dim].divisor(); | |||
*/ | |||
} | |||
dst[out_index] = in_src_valid_area ? src[in_index] : padding_val; | |||
} | |||
} | |||
template <typename T> | |||
__global__ void paddingReplicate_kernel(const size_t ndim, | |||
const size_t total_out_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params, const float_t) { | |||
KERN_FOR(out_index, total_out_nr) { | |||
size_t in_index = 0; | |||
size_t out_index_tmp = out_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||
out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); | |||
dim_index = (size_t)llmin( | |||
(long long)params.src_shape[dim] - 1, | |||
llmax((long long)dim_index - | |||
(long long)params.offsets[dim * 2], | |||
(long long)0)); | |||
in_index += dim_index * params.src_stride[dim].divisor(); | |||
} | |||
dst[out_index] = src[in_index]; | |||
} | |||
} | |||
template <typename T> | |||
__global__ void paddingReflect_kernel(const size_t ndim, | |||
const size_t total_out_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params, const float_t) { | |||
KERN_FOR(out_index, total_out_nr) { | |||
size_t in_index = 0; | |||
size_t out_index_tmp = out_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
long long dim_index = out_index_tmp / params.dst_stride[dim]; | |||
out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); | |||
dim_index -= (long long)params.offsets[dim * 2]; | |||
dim_index = llmax(dim_index, -dim_index); | |||
dim_index = llmin(dim_index, 2 * (long long)params.src_shape[dim] - | |||
dim_index - 2); | |||
in_index += size_t(dim_index) * | |||
(size_t)params.src_stride[dim].divisor(); | |||
} | |||
dst[out_index] = src[in_index]; | |||
} | |||
} | |||
template <typename T> | |||
__global__ void paddingConstBackward_kernel(const size_t ndim, | |||
const size_t total_in_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) { | |||
KERN_FOR(in_index, total_in_nr) { | |||
bool in_dst_valid_area = true; | |||
size_t out_index = 0; | |||
size_t in_index_tmp = in_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||
in_index_tmp -= dim_index * params.src_stride[dim].divisor(); | |||
in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||
dim_index < params.offsets[dim * 2] + | |||
params.dst_shape[dim]); | |||
out_index += (dim_index - params.offsets[dim * 2]) * | |||
params.dst_stride[dim].divisor(); | |||
} | |||
if (in_dst_valid_area) { | |||
dst[out_index] = src[in_index]; | |||
} | |||
} | |||
} | |||
template <typename T> | |||
__global__ void paddingReplicateBackward_kernel(const size_t ndim, | |||
const size_t total_in_nr, | |||
const T* const src, | |||
T* const dst, | |||
ShapeParams params) { | |||
KERN_FOR(in_index, total_in_nr) { | |||
size_t out_index = 0; | |||
size_t in_index_tmp = in_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||
in_index_tmp -= dim_index * params.src_stride[dim].divisor(); | |||
dim_index = (size_t)llmin( | |||
(long long)params.dst_shape[dim] - 1, | |||
llmax((long long)dim_index - | |||
(long long)params.offsets[dim * 2], | |||
(long long)0)); | |||
out_index += dim_index * params.dst_stride[dim].divisor(); | |||
} | |||
atomic_add(&dst[out_index], src[in_index]); | |||
} | |||
} | |||
template <typename T> | |||
__global__ void paddingReflectBackward_kernel(const size_t ndim, | |||
const size_t total_in_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) { | |||
KERN_FOR(in_index, total_in_nr) { | |||
size_t out_index = 0; | |||
size_t in_index_tmp = in_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
long long dim_index = in_index_tmp / params.src_stride[dim]; | |||
in_index_tmp -= dim_index * params.src_stride[dim].divisor(); | |||
dim_index -= (long long)params.offsets[dim * 2]; | |||
dim_index = llmax(dim_index, -dim_index); | |||
dim_index = llmin(dim_index, 2 * (long long)params.dst_shape[dim] - | |||
dim_index - 2); | |||
out_index += size_t(dim_index) * | |||
(size_t)params.dst_stride[dim].divisor(); | |||
} | |||
atomic_add(&dst[out_index], src[in_index]); | |||
} | |||
} | |||
template <typename T> | |||
void padding_forward_proxy(const TensorND& src, const TensorND& dst, | |||
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||
const float_t padding_val, cudaStream_t stream) { | |||
ShapeParams params; | |||
for (size_t i = 0; i < src.layout.ndim; ++i) { | |||
params.src_shape[i] = src.layout.shape[i]; | |||
params.dst_shape[i] = dst.layout.shape[i]; | |||
params.src_stride[i] = src.layout.stride[i]; | |||
params.dst_stride[i] = dst.layout.stride[i]; | |||
params.offsets[i * 2] = offsets[i * 2]; | |||
params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||
} | |||
void (*fwd_kern)(const size_t, const size_t, const T* const, T* const, | |||
ShapeParams, const float_t); | |||
switch (mode) { | |||
case param_enumv::Padding::PaddingMode::CONSTANT: | |||
fwd_kern = paddingConst_kernel<T>; | |||
break; | |||
case param_enumv::Padding::PaddingMode::REPLICATE: | |||
fwd_kern = paddingReplicate_kernel<T>; | |||
break; | |||
case param_enumv::Padding::PaddingMode::REFLECT: | |||
fwd_kern = paddingReflect_kernel<T>; | |||
break; | |||
default: | |||
megdnn_assert(false, "invalid padding mode"); | |||
} | |||
size_t total_nr = dst.layout.total_nr_elems(); | |||
uint32_t nr_threads = query_blocksize_for_kernel(fwd_kern); | |||
dim3 threads(nr_threads); | |||
dim3 blocks(DIVUP(total_nr, nr_threads)); | |||
fwd_kern<<<blocks, threads, 0, stream>>>(src.layout.ndim, total_nr, | |||
src.ptr<T>(), dst.ptr<T>(), params, | |||
padding_val); | |||
after_kernel_launch(); | |||
} | |||
template <typename T> | |||
void padding_backward_proxy(const TensorND& src, const TensorND& dst, | |||
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||
cudaStream_t stream) { | |||
ShapeParams params; | |||
for (size_t i = 0; i < src.layout.ndim; ++i) { | |||
params.src_shape[i] = src.layout.shape[i]; | |||
params.dst_shape[i] = dst.layout.shape[i]; | |||
params.src_stride[i] = src.layout.stride[i]; | |||
params.dst_stride[i] = dst.layout.stride[i]; | |||
params.offsets[i * 2] = offsets[i * 2]; | |||
params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||
} | |||
cudaMemset(dst.raw_ptr, 0, dst.layout.access_bytes()); | |||
void (*bwd_kern)(const size_t, const size_t, const T* const, T* const, | |||
ShapeParams); | |||
switch (mode) { | |||
case param_enumv::Padding::PaddingMode::CONSTANT: | |||
bwd_kern = paddingConstBackward_kernel<T>; | |||
break; | |||
case param_enumv::Padding::PaddingMode::REPLICATE: | |||
bwd_kern = paddingReplicateBackward_kernel<T>; | |||
break; | |||
case param_enumv::Padding::PaddingMode::REFLECT: | |||
bwd_kern = paddingReflectBackward_kernel<T>; | |||
break; | |||
default: | |||
megdnn_assert(false, "invalid padding mode"); | |||
} | |||
size_t total_nr = src.layout.total_nr_elems(); | |||
uint32_t nr_threads = query_blocksize_for_kernel(bwd_kern); | |||
dim3 threads(nr_threads); | |||
dim3 blocks(DIVUP(total_nr, nr_threads)); | |||
bwd_kern<<<blocks, threads, 0, stream>>>( | |||
src.layout.ndim, total_nr, src.ptr<T>(), dst.ptr<T>(), params); | |||
after_kernel_launch(); | |||
} | |||
#define INST(T) \ | |||
template void padding_forward_proxy<T>( \ | |||
const TensorND& src, const TensorND& dst, \ | |||
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \ | |||
const float_t padding_val, cudaStream_t stream); | |||
#define cb(DType) INST(typename DTypeTrait<DType>::ctype) | |||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||
#undef cb | |||
#undef INST | |||
#define INST(T) \ | |||
template void padding_backward_proxy<T>( \ | |||
const TensorND& src, const TensorND& dst, \ | |||
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \ | |||
cudaStream_t stream); | |||
#define cb(DType) INST(typename DTypeTrait<DType>::ctype) | |||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||
#undef cb | |||
#undef INST | |||
} // namespace padding | |||
} // namespace cuda | |||
} // namespace megdnn |
@@ -0,0 +1,36 @@ | |||
/** | |||
* \file dnn/src/cuda/padding/padding.cuh | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include "cuda_runtime.h" | |||
#include "megdnn/basic_types.h" | |||
#include "src/common/opr_param_defs_enumv.cuh" | |||
#include "src/cuda/utils.cuh" | |||
namespace megdnn { | |||
namespace cuda { | |||
namespace padding { | |||
template <typename T> | |||
void padding_forward_proxy(const TensorND& src, const TensorND& dst, | |||
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||
const float_t padding_val, cudaStream_t stream); | |||
template <typename T> | |||
void padding_backward_proxy(const TensorND& src, const TensorND& dst, | |||
size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||
cudaStream_t stream); | |||
} // namespace padding | |||
} // namespace cuda | |||
} // namespace megdnn | |||
// vim: syntax=cuda.doxygen |
@@ -78,7 +78,7 @@ | |||
#if MEGDNN_THREADS_512 | |||
#define NR_THREADS 512 | |||
#define NR_THREADS_X 32 | |||
#define NR_THREADS_Y 16 | |||
#define NR_THREADS_Y 16 | |||
#else | |||
#define NR_THREADS 1024 | |||
#define NR_THREADS_X 32 | |||
@@ -16,7 +16,7 @@ | |||
#include <numeric> | |||
namespace megdnn { | |||
namespace { | |||
using namespace megdnn; | |||
@@ -14,6 +14,7 @@ | |||
#include "src/common/handle_impl.h" | |||
#include "src/naive/padding/opr_impl.h" | |||
#include "src/naive/adaptive_pooling/opr_impl.h" | |||
#include "src/naive/add_update/opr_impl.h" | |||
#include "src/naive/argmxx/opr_impl.h" | |||
@@ -0,0 +1,281 @@ | |||
/** | |||
* \file dnn/src/naive/padding/opr_impl.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/naive/padding/opr_impl.h" | |||
#include <math.h> | |||
#include <stdio.h> | |||
#include "src/common/utils.h" | |||
#include "src/naive/handle.h" | |||
namespace megdnn { | |||
namespace naive { | |||
struct ShapeParams { | |||
size_t src_shape[MEGDNN_MAX_NDIM]; | |||
size_t dst_shape[MEGDNN_MAX_NDIM]; | |||
ptrdiff_t src_stride[MEGDNN_MAX_NDIM]; | |||
ptrdiff_t dst_stride[MEGDNN_MAX_NDIM]; | |||
size_t offsets[MEGDNN_MAX_NDIM * 2]; | |||
}; | |||
template <typename T> | |||
void exec_const_internal(const size_t ndim, const size_t total_out_nr, | |||
const T* const src, T* const dst, ShapeParams params, | |||
const T padding_val) MEGDNN_NOEXCEPT { | |||
rep(out_index, total_out_nr) { | |||
bool in_src_valid_area = true; | |||
size_t in_index = 0; | |||
size_t out_index_tmp = out_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||
out_index_tmp -= dim_index * params.dst_stride[dim]; | |||
in_src_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||
dim_index < params.offsets[dim * 2] + | |||
params.src_shape[dim]); | |||
in_index += (dim_index - params.offsets[dim * 2]) * | |||
params.src_stride[dim]; | |||
} | |||
if (in_src_valid_area) { | |||
dst[out_index] = src[in_index]; | |||
} else { | |||
dst[out_index] = padding_val; | |||
} | |||
} | |||
} | |||
template <typename T> | |||
void exec_replicate_internal(const size_t ndim, const size_t total_out_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) MEGDNN_NOEXCEPT { | |||
rep(out_index, total_out_nr) { | |||
size_t in_index = 0; | |||
size_t out_index_tmp = out_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||
out_index_tmp -= dim_index * params.dst_stride[dim]; | |||
dim_index = (size_t)std::min( | |||
(long long)params.src_shape[dim] - 1, | |||
std::max((long long)dim_index - | |||
(long long)params.offsets[dim * 2], | |||
(long long)0)); | |||
in_index += dim_index * params.src_stride[dim]; | |||
} | |||
dst[out_index] = src[in_index]; | |||
} | |||
} | |||
template <typename T> | |||
void exec_reflect_internal(const size_t ndim, const size_t total_out_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) MEGDNN_NOEXCEPT { | |||
rep(out_index, total_out_nr) { | |||
size_t in_index = 0; | |||
size_t out_index_tmp = out_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
long long dim_index = out_index_tmp / params.dst_stride[dim]; | |||
out_index_tmp -= dim_index * params.dst_stride[dim]; | |||
dim_index -= (long long)params.offsets[dim * 2]; | |||
dim_index = std::max(dim_index, -dim_index); | |||
dim_index = | |||
std::min(dim_index, 2 * (long long)params.src_shape[dim] - | |||
dim_index - 2); | |||
in_index += size_t(dim_index) * (size_t)params.src_stride[dim]; | |||
} | |||
dst[out_index] = src[in_index]; | |||
} | |||
} | |||
template <typename T> | |||
void backward_exec_const_internal(const size_t ndim, const size_t total_in_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) MEGDNN_NOEXCEPT { | |||
rep(in_index, total_in_nr) { | |||
bool in_dst_valid_area = true; | |||
size_t out_index = 0; | |||
size_t in_index_tmp = in_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||
in_index_tmp -= dim_index * params.src_stride[dim]; | |||
in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||
dim_index < params.offsets[dim * 2] + | |||
params.dst_shape[dim]); | |||
out_index += (dim_index - params.offsets[dim * 2]) * | |||
params.dst_stride[dim]; | |||
} | |||
if (in_dst_valid_area) { | |||
dst[out_index] = src[in_index]; | |||
} | |||
} | |||
} | |||
template <typename T> | |||
void backward_exec_replicate_internal(const size_t ndim, | |||
const size_t total_in_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) MEGDNN_NOEXCEPT { | |||
rep(in_index, total_in_nr) { | |||
size_t out_index = 0; | |||
size_t in_index_tmp = in_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||
in_index_tmp -= dim_index * params.src_stride[dim]; | |||
dim_index = (size_t)std::min( | |||
(long long)params.dst_shape[dim] - 1, | |||
std::max((long long)dim_index - | |||
(long long)params.offsets[dim * 2], | |||
(long long)0)); | |||
out_index += dim_index * params.dst_stride[dim]; | |||
} | |||
dst[out_index] += src[in_index]; | |||
} | |||
} | |||
template <typename T> | |||
void backward_exec_reflect_internal(const size_t ndim, const size_t total_in_nr, | |||
const T* const src, T* const dst, | |||
ShapeParams params) MEGDNN_NOEXCEPT { | |||
rep(in_index, total_in_nr) { | |||
size_t out_index = 0; | |||
size_t in_index_tmp = in_index; | |||
for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||
long long dim_index = in_index_tmp / params.src_stride[dim]; | |||
in_index_tmp -= dim_index * params.src_stride[dim]; | |||
dim_index -= (long long)params.offsets[dim * 2]; | |||
dim_index = std::max(dim_index, -dim_index); | |||
dim_index = | |||
std::min(dim_index, 2 * (long long)params.dst_shape[dim] - | |||
dim_index - 2); | |||
out_index += size_t(dim_index) * (size_t)params.dst_stride[dim]; | |||
} | |||
dst[out_index] += src[in_index]; | |||
} | |||
} | |||
void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||
forward_check_exec(src.layout, dst.layout); | |||
SmallVector<size_t> offsets(get_offsets()); | |||
ShapeParams params; | |||
for (size_t i = 0; i < src.layout.ndim; ++i) { | |||
params.src_shape[i] = src.layout.shape[i]; | |||
params.dst_shape[i] = dst.layout.shape[i]; | |||
params.src_stride[i] = src.layout.stride[i]; | |||
params.dst_stride[i] = dst.layout.stride[i]; | |||
params.offsets[i * 2] = offsets[i * 2]; | |||
params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||
} | |||
size_t n = dst.layout.total_nr_elems(); | |||
switch (param().padding_mode) { | |||
case param::Padding::PaddingMode::CONSTANT: | |||
#define cb(DType) \ | |||
if (src.layout.dtype == DType()) { \ | |||
using T = typename DTypeTrait<DType>::ctype; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_const_internal<T>( \ | |||
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params, \ | |||
T(param().padding_val))); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||
#undef cb | |||
break; | |||
case param::Padding::PaddingMode::REPLICATE: | |||
#define cb(DType) \ | |||
if (src.layout.dtype == DType()) { \ | |||
using T = typename DTypeTrait<DType>::ctype; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_replicate_internal<T>( \ | |||
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||
#undef cb | |||
break; | |||
case param::Padding::PaddingMode::REFLECT: | |||
#define cb(DType) \ | |||
if (src.layout.dtype == DType()) { \ | |||
using T = typename DTypeTrait<DType>::ctype; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(exec_reflect_internal<T>( \ | |||
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||
#undef cb | |||
break; | |||
default: | |||
megdnn_assert(false, "unsupported padding mode!"); | |||
} | |||
} | |||
void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||
backward_check_exec(src.layout, dst.layout); | |||
SmallVector<size_t> offsets(get_offsets()); | |||
ShapeParams params; | |||
for (size_t i = 0; i < src.layout.ndim; ++i) { | |||
params.src_shape[i] = src.layout.shape[i]; | |||
params.dst_shape[i] = dst.layout.shape[i]; | |||
params.src_stride[i] = src.layout.stride[i]; | |||
params.dst_stride[i] = dst.layout.stride[i]; | |||
params.offsets[i * 2] = offsets[i * 2]; | |||
params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||
} | |||
size_t n = src.layout.total_nr_elems(); | |||
memset(dst.raw_ptr, 0, dst.layout.access_bytes()); | |||
switch (param().padding_mode) { | |||
case param::Padding::PaddingMode::CONSTANT: | |||
#define cb(DType) \ | |||
if (src.layout.dtype == DType()) { \ | |||
using T = typename DTypeTrait<DType>::ctype; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_const_internal<T>( \ | |||
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||
#undef cb | |||
break; | |||
case param::Padding::PaddingMode::REPLICATE: | |||
#define cb(DType) \ | |||
if (src.layout.dtype == DType()) { \ | |||
using T = typename DTypeTrait<DType>::ctype; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_replicate_internal<T>( \ | |||
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||
#undef cb | |||
break; | |||
case param::Padding::PaddingMode::REFLECT: | |||
#define cb(DType) \ | |||
if (src.layout.dtype == DType()) { \ | |||
using T = typename DTypeTrait<DType>::ctype; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_reflect_internal<T>( \ | |||
src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||
#undef cb | |||
break; | |||
default: | |||
megdnn_assert(false, "unsupported padding mode!"); | |||
} | |||
} | |||
size_t PaddingForwardImpl::get_workspace_in_bytes( | |||
const TensorLayout& /* src */, const TensorLayout& /* dst */) { | |||
return 0; | |||
} | |||
size_t PaddingBackwardImpl::get_workspace_in_bytes( | |||
const TensorLayout& /* src */, const TensorLayout& /* dst */) { | |||
return 0; | |||
} | |||
} // namespace naive | |||
} // namespace megdnn |
@@ -0,0 +1,30 @@ | |||
/** | |||
* \file dnn/src/naive/padding/opr_impl.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/oprs.h" | |||
namespace megdnn{ | |||
namespace naive{ | |||
class PaddingForwardImpl: public PaddingForward{ | |||
using PaddingForward::PaddingForward; | |||
public: | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||
size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override; | |||
}; | |||
class PaddingBackwardImpl: public PaddingBackward{ | |||
using PaddingBackward::PaddingBackward; | |||
public: | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||
size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override; | |||
}; | |||
} | |||
} |
@@ -0,0 +1,441 @@ | |||
/** | |||
* \file dnn/test/common/padding.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include <cstddef> | |||
#include <iostream> | |||
#include "megdnn/basic_types.h" | |||
#include "megdnn/opr_param_defs.h" | |||
namespace megdnn { | |||
namespace test { | |||
namespace padding { | |||
struct TestArg { | |||
param::Padding param; | |||
TensorShape src; | |||
TensorShape dst; | |||
TestArg(param::Padding _param, TensorShape _src, TensorShape _dst) | |||
: param(_param), src(_src), dst(_dst) {} | |||
}; | |||
inline std::vector<TestArg> get_args() { | |||
size_t src_shape_dim0 = 5; | |||
size_t src_shape_dim1 = 5; | |||
size_t src_shape_dim2 = 5; | |||
size_t src_shape_dim3 = 5; | |||
size_t src_shape_dim4 = 5; | |||
size_t src_shape_dim5 = 5; | |||
size_t src_shape_dim6 = 5; | |||
size_t dst_shape_dim0 = 8; | |||
size_t dst_shape_dim1 = 8; | |||
size_t dst_shape_dim2 = 8; | |||
size_t dst_shape_dim3 = 8; | |||
size_t dst_shape_dim4 = 8; | |||
size_t dst_shape_dim5 = 8; | |||
size_t dst_shape_dim6 = 8; | |||
std::vector<TestArg> args; | |||
param::Padding cur_param; | |||
cur_param.front_offset_dim0 = 0; | |||
cur_param.front_offset_dim1 = 0; | |||
cur_param.front_offset_dim2 = 0; | |||
cur_param.front_offset_dim3 = 0; | |||
cur_param.front_offset_dim4 = 0; | |||
cur_param.front_offset_dim5 = 0; | |||
cur_param.front_offset_dim6 = 0; | |||
cur_param.back_offset_dim0 = 0; | |||
cur_param.back_offset_dim1 = 0; | |||
cur_param.back_offset_dim2 = 0; | |||
cur_param.back_offset_dim3 = 0; | |||
cur_param.back_offset_dim4 = 0; | |||
cur_param.back_offset_dim5 = 0; | |||
cur_param.back_offset_dim6 = 0; | |||
cur_param.padding_val = 2; | |||
cur_param.front_offset_dim0 = 1; | |||
cur_param.back_offset_dim0 = 2; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||
TensorShape{dst_shape_dim0}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||
TensorShape{dst_shape_dim0}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||
TensorShape{dst_shape_dim0}); | |||
cur_param.front_offset_dim1 = 2; | |||
cur_param.back_offset_dim1 = 1; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||
cur_param.front_offset_dim2 = 1; | |||
cur_param.back_offset_dim2 = 2; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||
cur_param.front_offset_dim3 = 0; | |||
cur_param.back_offset_dim3 = 3; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back(cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, | |||
src_shape_dim2, src_shape_dim3}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, | |||
dst_shape_dim2, dst_shape_dim3}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back(cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, | |||
src_shape_dim2, src_shape_dim3}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, | |||
dst_shape_dim2, dst_shape_dim3}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back(cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, | |||
src_shape_dim2, src_shape_dim3}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, | |||
dst_shape_dim2, dst_shape_dim3}); | |||
cur_param.front_offset_dim4 = 3; | |||
cur_param.back_offset_dim4 = 0; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4}); | |||
cur_param.front_offset_dim5 = 1; | |||
cur_param.back_offset_dim5 = 2; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||
cur_param.front_offset_dim6 = 0; | |||
cur_param.front_offset_dim6 = 3; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||
src_shape_dim6}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||
dst_shape_dim6}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||
src_shape_dim6}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||
dst_shape_dim6}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||
src_shape_dim6}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||
dst_shape_dim6}); | |||
return args; | |||
} | |||
inline std::vector<TestArg> get_args_backward() { | |||
size_t src_shape_dim0 = 8; | |||
size_t src_shape_dim1 = 8; | |||
size_t src_shape_dim2 = 8; | |||
size_t src_shape_dim3 = 8; | |||
size_t src_shape_dim4 = 8; | |||
size_t src_shape_dim5 = 8; | |||
size_t src_shape_dim6 = 8; | |||
size_t dst_shape_dim0 = 5; | |||
size_t dst_shape_dim1 = 5; | |||
size_t dst_shape_dim2 = 5; | |||
size_t dst_shape_dim3 = 5; | |||
size_t dst_shape_dim4 = 5; | |||
size_t dst_shape_dim5 = 5; | |||
size_t dst_shape_dim6 = 5; | |||
std::vector<TestArg> args; | |||
param::Padding cur_param; | |||
cur_param.front_offset_dim0 = 0; | |||
cur_param.front_offset_dim1 = 0; | |||
cur_param.front_offset_dim2 = 0; | |||
cur_param.front_offset_dim3 = 0; | |||
cur_param.front_offset_dim4 = 0; | |||
cur_param.front_offset_dim5 = 0; | |||
cur_param.front_offset_dim6 = 0; | |||
cur_param.back_offset_dim0 = 0; | |||
cur_param.back_offset_dim1 = 0; | |||
cur_param.back_offset_dim2 = 0; | |||
cur_param.back_offset_dim3 = 0; | |||
cur_param.back_offset_dim4 = 0; | |||
cur_param.back_offset_dim5 = 0; | |||
cur_param.back_offset_dim6 = 0; | |||
cur_param.padding_val = 2; | |||
cur_param.front_offset_dim0 = 1; | |||
cur_param.back_offset_dim0 = 2; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||
TensorShape{dst_shape_dim0}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||
TensorShape{dst_shape_dim0}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||
TensorShape{dst_shape_dim0}); | |||
cur_param.front_offset_dim1 = 2; | |||
cur_param.back_offset_dim1 = 1; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||
cur_param.front_offset_dim2 = 1; | |||
cur_param.back_offset_dim2 = 2; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||
cur_param.front_offset_dim3 = 0; | |||
cur_param.back_offset_dim3 = 3; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back(cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, | |||
src_shape_dim2, src_shape_dim3}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, | |||
dst_shape_dim2, dst_shape_dim3}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back(cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, | |||
src_shape_dim2, src_shape_dim3}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, | |||
dst_shape_dim2, dst_shape_dim3}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back(cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, | |||
src_shape_dim2, src_shape_dim3}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, | |||
dst_shape_dim2, dst_shape_dim3}); | |||
cur_param.front_offset_dim4 = 3; | |||
cur_param.back_offset_dim4 =0; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4}); | |||
cur_param.front_offset_dim5 = 1; | |||
cur_param.back_offset_dim5 = 2; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||
cur_param.front_offset_dim6 = 0; | |||
cur_param.back_offset_dim6 = 3; | |||
cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||
src_shape_dim6}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||
dst_shape_dim6}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||
src_shape_dim6}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||
dst_shape_dim6}); | |||
cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
args.emplace_back( | |||
cur_param, | |||
TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||
src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||
src_shape_dim6}, | |||
TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||
dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||
dst_shape_dim6}); | |||
return args; | |||
} | |||
} // namespace padding | |||
} // namespace test | |||
} // namespace megdnn |
@@ -0,0 +1,219 @@ | |||
/** | |||
* \file dnn/test/cuda/padding.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "test/cuda/benchmark.h" | |||
#include "test/common/padding.h" | |||
#include "megdnn/oprs.h" | |||
#include "test/common/checker.h" | |||
#include "test/common/rng.h" | |||
#include "test/cuda/fixture.h" | |||
using namespace megdnn; | |||
using namespace test; | |||
TEST_F(CUDA, PADDING) { | |||
std::vector<padding::TestArg> args = padding::get_args(); | |||
Checker<Padding> checker(handle_cuda()); | |||
UniformIntNonZeroRNG rng(1, 9); | |||
for (auto&& arg : args) { | |||
checker.set_param(arg.param) | |||
.set_rng(0, &rng) | |||
.set_dtype(0, dtype::Int8()) | |||
.set_dtype(1, dtype::Int8()) | |||
.execs({arg.src, arg.dst}); | |||
} | |||
} | |||
TEST_F(CUDA, PADDING_BACKWARD) { | |||
std::vector<padding::TestArg> args = padding::get_args_backward(); | |||
Checker<PaddingBackward> checker(handle_cuda()); | |||
UniformFloatRNG rng(1, 9); | |||
for (auto&& arg : args) { | |||
checker.set_param(arg.param) | |||
.set_rng(0, &rng) | |||
.set_dtype(0, dtype::Float32()) | |||
.set_dtype(1, dtype::Float32()) | |||
.execs({arg.src, arg.dst}); | |||
} | |||
} | |||
TEST_F(CUDA, PADDING_REFLECT) { | |||
Checker<Padding> checker(handle_cuda(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
param.front_offset_dim0 = 2; | |||
param.front_offset_dim1 = 0; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 3; | |||
param.back_offset_dim1 = 0; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({5}, dtype::Int8(), {1, 2, 3, 4, 5}), {}}, | |||
Testcase{{}, | |||
TensorValue({10}, dtype::Int8(), | |||
{3, 2, 1, 2, 3, 4, 5, 4, 3, 2})}); | |||
} | |||
TEST_F(CUDA, PADDING_REFLECT2) { | |||
Checker<Padding> checker(handle_cuda(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
param.front_offset_dim0 = 1; | |||
param.front_offset_dim1 = 2; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 1; | |||
param.back_offset_dim1 = 2; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({3, 3}, dtype::Int8(), | |||
{3, 5, 1, 3, 6, 1, 4, 7, 9}), | |||
{}}, | |||
Testcase{{}, | |||
TensorValue({5, 7}, dtype::Int8(), | |||
{1, 6, 3, 6, 1, 6, 3, 1, 5, 3, 5, 1, | |||
5, 3, 1, 6, 3, 6, 1, 6, 3, 9, 7, 4, | |||
7, 9, 7, 4, 1, 6, 3, 6, 1, 6, 3})}); | |||
} | |||
TEST_F(CUDA, PADDING_REPLICATE) { | |||
Checker<Padding> checker(handle_cuda(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
param.front_offset_dim0 = 1; | |||
param.front_offset_dim1 = 0; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 2; | |||
param.back_offset_dim1 = 0; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({9}, dtype::Int8(), | |||
{1, 2, 3, 4, 5, 6, 7, 8, 9}), | |||
{}}, | |||
Testcase{{}, | |||
TensorValue({12}, dtype::Int8(), | |||
{1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9})}); | |||
} | |||
TEST_F(CUDA, PADDING_REPLICATE2) { | |||
Checker<Padding> checker(handle_cuda(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
param.front_offset_dim0 = 2; | |||
param.front_offset_dim1 = 1; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 0; | |||
param.back_offset_dim1 = 3; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({3, 3}, dtype::Int8(), | |||
{1, 2, 3, 4, 5, 6, 7, 8, 9}), | |||
{}}, | |||
Testcase{{}, | |||
TensorValue({5, 7}, dtype::Int8(), | |||
{1, 1, 2, 3, 3, 3, 3, 1, 1, 2, 3, 3, | |||
3, 3, 1, 1, 2, 3, 3, 3, 3, 4, 4, 5, | |||
6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9})}); | |||
} | |||
// #if MEGDNN_WITH_BENCHMARK | |||
TEST_F(CUDA, BENCHMARK_PADDING_CONSTANT) { | |||
using Param = Padding::Param; | |||
auto run = [&](const TensorShapeArray& shapes, Param param) { | |||
CUBenchmarker<PaddingForward> benchmarker(handle_cuda()); | |||
benchmarker.set_param(param); | |||
benchmarker.set_dtype(0, dtype::Int8()) | |||
.set_dtype(1, dtype::Int8()); | |||
for (auto&& shape : shapes) { | |||
double memaccess = | |||
double(TensorLayout(shape, dtype::Int8()) | |||
.span() | |||
.dist_byte()) * | |||
2e-6; | |||
auto time_ms = benchmarker.execs({shape, {}}); | |||
printf("execute %s, time %.4f ms, %.4f GB/s\n", | |||
shape.to_string().c_str(), time_ms, memaccess / time_ms); | |||
} | |||
}; | |||
printf("mode -> constant; dtype -> int8\n"); | |||
{ | |||
TensorShapeArray shapes = { | |||
{16, 3, 736, 1280}, | |||
}; | |||
Param param; | |||
param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
param.front_offset_dim1 = 1; | |||
run(shapes, param); | |||
} | |||
printf("mode -> replicate; dtype -> int8\n"); | |||
{ | |||
TensorShapeArray shapes = { | |||
{16, 3, 736, 1280}, | |||
}; | |||
Param param; | |||
param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
param.front_offset_dim1 = 1; | |||
run(shapes, param); | |||
} | |||
printf("mode -> reflect; dtype -> int8\n"); | |||
{ | |||
TensorShapeArray shapes = { | |||
{16, 3, 736, 1280}, | |||
}; | |||
Param param; | |||
param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
param.front_offset_dim1 = 1; | |||
run(shapes, param); | |||
} | |||
} | |||
// #endif |
@@ -0,0 +1,132 @@ | |||
/** | |||
* \file dnn/test/naive/padding.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "test/common/padding.h" | |||
#include "megdnn/dtype.h" | |||
#include "megdnn/oprs.h" | |||
#include "test/common/checker.h" | |||
#include "test/naive/fixture.h" | |||
namespace megdnn{ | |||
namespace test{ | |||
TEST_F(NAIVE, PADDING) { | |||
std::vector<padding::TestArg> args = padding::get_args(); | |||
Checker<Padding> checker(handle()); | |||
for(auto&& arg : args){ | |||
checker.set_param(arg.param) | |||
.set_dtype(0, dtype::Float32()) | |||
.set_dtype(1, dtype::Float32()) | |||
.execs({arg.src, arg.dst}); | |||
} | |||
} | |||
TEST_F(NAIVE, PADDING_CONSTANT) { | |||
Checker<Padding> checker(handle(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||
param.front_offset_dim0 = 2; | |||
param.front_offset_dim1 = 1; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 2; | |||
param.back_offset_dim1 = 3; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({1,1}, dtype::Float32(), {1}), {}}, | |||
Testcase{{},TensorValue({5,5}, dtype::Float32(), {10,10,10,10,10,10,10,10,10,10,10,1,10,10,10,10,10,10,10,10,10,10,10,10,10})} | |||
); | |||
} | |||
TEST_F(NAIVE, PADDING_REFLECT) { | |||
Checker<Padding> checker(handle(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||
param.front_offset_dim0 = 2; | |||
param.front_offset_dim1 = 0; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 3; | |||
param.back_offset_dim1 = 0; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({5}, dtype::Float32(), {1,2,3,4,5}), {}}, | |||
Testcase{{},TensorValue({10}, dtype::Float32(), {3,2,1,2,3,4,5,4,3,2})} | |||
); | |||
} | |||
TEST_F(NAIVE, PADDING_REPLICATE) { | |||
Checker<Padding> checker(handle(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
param.front_offset_dim0 = 1; | |||
param.front_offset_dim1 = 0; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 2; | |||
param.back_offset_dim1 = 0; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({9}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}}, | |||
Testcase{{},TensorValue({12}, dtype::Float32(), {1,1,2,3,4,5,6,7,8,9,9,9})} | |||
); | |||
} | |||
TEST_F(NAIVE, PADDING_REPLICATE2) { | |||
Checker<Padding> checker(handle(), false); | |||
param::Padding param; | |||
param.padding_val = 10; | |||
param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||
param.front_offset_dim0 = 2; | |||
param.front_offset_dim1 = 1; | |||
param.front_offset_dim2 = 0; | |||
param.front_offset_dim3 = 0; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 0; | |||
param.back_offset_dim1 = 3; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
checker.set_param(param).exect( | |||
Testcase{TensorValue({3,3}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}}, | |||
Testcase{{},TensorValue({5,7}, dtype::Float32(), {1,1,2,3,3,3,3,1,1,2,3,3,3,3,1,1,2,3,3,3,3,4,4,5,6,6,6,6,7,7,8,9,9,9,9})} | |||
); | |||
} | |||
} | |||
} |
@@ -67,4 +67,9 @@ decl_opr('NvOf', | |||
inputs=['src'], params='NvOf', | |||
desc='opr Implements NVIDIA Optical Flow SDK.') | |||
decl_opr('Padding', | |||
inputs=['src'], | |||
params='Padding', | |||
desc='tensor padding opr.') | |||
# vim: ft=python |
@@ -68,7 +68,7 @@ namespace opr { | |||
//! current cumsum version | |||
using CumsumV1 = opr::Cumsum; | |||
MGB_SEREG_OPR(CumsumV1, 1); | |||
#if MGB_CUDA | |||
MGB_SEREG_OPR(NvOf, 1); | |||
#endif | |||
@@ -1608,4 +1608,30 @@ void RelayoutFormat::init_output_format() { | |||
// f}}} | |||
// | |||
/* f{{{ ======================= PaddingForward ======================= */ | |||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingForward); | |||
MEGDNN_OPR_INIT1(PaddingForward, "padding") | |||
#if MGB_ENABLE_GRAD | |||
MGB_IMPL_OPR_GRAD(PaddingForward) { | |||
mgb_assert(opr.input().size() == 1); | |||
if (wrt_idx == 0) { | |||
SymbolVar grad = PaddingBackward::make(out_grad[0], opr.input(0), opr.param()); | |||
return grad.node(); | |||
} else | |||
return InvalidGrad::make(opr, wrt_idx); | |||
} | |||
#endif | |||
// f}}} | |||
/* f{{{ ======================= PaddingBackward ======================= */ | |||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingBackward); | |||
MEGDNN_OPR_INIT2(PaddingBackward, "padding_backward", 1, false); | |||
// f}}} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -24,6 +24,35 @@ MGB_SEREG_MODIFY_SUBTENSOR_OPR(IncrSubtensor); | |||
namespace mgb { | |||
namespace serialization { | |||
template<> | |||
struct OprMaker<opr::Padding, 1> { | |||
using Opr = opr::Padding; | |||
using Param = Opr::Param; | |||
static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) { | |||
MGB_MARK_USED_VAR(graph); | |||
if(inputs.size() == 1) { | |||
return Opr::make(inputs[0], param, config).node()->owner_opr(); | |||
}else{ | |||
return nullptr; | |||
} | |||
} | |||
}; | |||
template<> | |||
struct OprMaker<opr::PaddingBackward, 2> { | |||
using Opr = opr::PaddingBackward; | |||
using Param = Opr::Param; | |||
static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) { | |||
MGB_MARK_USED_VAR(graph); | |||
if(inputs.size() == 2) { | |||
return Opr::make(inputs[0], inputs[1], param, config).node()->owner_opr(); | |||
}else{ | |||
return nullptr; | |||
} | |||
} | |||
}; | |||
template<> | |||
struct OprMaker<opr::Concat, 0>: public OprMakerVariadic<opr::Concat>{}; | |||
@@ -185,6 +214,10 @@ namespace opr { | |||
using RelayoutFormatV1 = opr::RelayoutFormat; | |||
MGB_SEREG_OPR(RelayoutFormatV1, 1); | |||
MGB_SEREG_OPR(Padding, 1); | |||
MGB_SEREG_OPR(PaddingBackward, 2); | |||
} // namespace opr | |||
} // namespace mgb | |||
@@ -635,6 +635,26 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat, | |||
const OperatorNodeConfig &config = {}); | |||
void init_output_format() override final; | |||
}; | |||
/*! | |||
* \brief padding the src tensor to dst tensor | |||
*/ | |||
MGB_DEFINE_OPR_CLASS(PaddingForward, intl::MegDNNOprWrapperFwd<megdnn::PaddingForward>) // { | |||
public: | |||
PaddingForward(VarNode* src, const Param& param, const OperatorNodeConfig& config); | |||
static SymbolVar make(SymbolVar src, const Param& param = {}, const OperatorNodeConfig &config = {}); | |||
}; | |||
using Padding = PaddingForward; | |||
/*! | |||
* \brief padding backward | |||
*/ | |||
MGB_DEFINE_OPR_CLASS(PaddingBackward, intl::MegDNNOprWrapperBwd<megdnn::PaddingBackward>) // { | |||
public: | |||
PaddingBackward(VarNode* src, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); | |||
static SymbolVar make(SymbolVar src, SymbolVar in_for_shape, const Param ¶m = {}, const OperatorNodeConfig &config = {}); | |||
}; | |||
} // opr | |||
} // mgb | |||
@@ -0,0 +1,68 @@ | |||
#include "./legacy_checker.h" | |||
#include "megbrain/comp_node_env.h" | |||
#include "megbrain/gopt/inference.h" | |||
#include "megbrain/opr/basic_arith.h" | |||
#include "megbrain/opr/dnn/convolution.h" | |||
#include "megbrain/opr/tensor_manip.h" | |||
#include "megbrain/serialization/serializer.h" | |||
#include "megbrain/test/autocheck.h" | |||
#include "megbrain/test/helper.h" | |||
#include "megbrain/test/megdnn_helper.h" | |||
#include "megdnn/dtype.h" | |||
#include "megdnn/oprs/base.h" | |||
#include <gmock/gmock.h> | |||
#include <cmath> | |||
#include <memory> | |||
#include <random> | |||
using namespace std; | |||
using namespace mgb; | |||
namespace { | |||
TEST(TestOprDNN, PaddingForwardSerialization) { | |||
using namespace serialization; | |||
auto fname = output_file("PaddingForwardTest"); | |||
auto dump = [&]() { | |||
opr::Padding::Param param; | |||
param.padding_mode = megdnn::param::Padding::PaddingMode(0); | |||
param.front_offset_dim0 = 3; | |||
param.front_offset_dim1 = 3; | |||
param.front_offset_dim2 = 3; | |||
param.front_offset_dim3 = 3; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 0; | |||
param.back_offset_dim1 = 0; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
param.padding_val = 0; | |||
auto cn = CompNode::load("xpu"); | |||
auto graph = ComputingGraph::make(); | |||
HostTensorND inp_host{cn, {32, 4, 24, 24}, dtype::Float32()}; | |||
auto inp = opr::ImmutableTensor::make(*graph, inp_host); | |||
auto opr = opr::PaddingForward::make(inp, param, {}); | |||
auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str())); | |||
auto rst = dumper->dump({opr}); | |||
ASSERT_EQ(rst.outputs.size(), 1u); | |||
}; | |||
auto load = [&]() { | |||
auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str())); | |||
auto rst = loader->load(); | |||
ASSERT_EQ(rst.output_var_list.size(), 1u); | |||
}; | |||
dump(); | |||
load(); | |||
} | |||
} // namespace |
@@ -10,6 +10,7 @@ | |||
*/ | |||
#include "megbrain/test/helper.h" | |||
#include "megbrain/test/megdnn_helper.h" | |||
#include "megbrain/test/autocheck.h" | |||
#include "megbrain/opr/tensor_manip.h" | |||
#include "megbrain/opr/tensor_gen.h" | |||
@@ -2162,4 +2163,47 @@ TEST(TestParamPack, Split) { | |||
test_param_pack_split<3>({{2, 9}, {123}, {5, 3}}); | |||
} | |||
TEST(TestTensorManip, Padding_random) { | |||
opr::Padding::Param param; | |||
param.padding_mode = megdnn::param::Padding::PaddingMode(0); | |||
param.front_offset_dim0 = 3; | |||
param.front_offset_dim1 = 3; | |||
param.front_offset_dim2 = 3; | |||
param.front_offset_dim3 = 3; | |||
param.front_offset_dim4 = 0; | |||
param.front_offset_dim5 = 0; | |||
param.front_offset_dim6 = 0; | |||
param.back_offset_dim0 = 0; | |||
param.back_offset_dim1 = 0; | |||
param.back_offset_dim2 = 0; | |||
param.back_offset_dim3 = 0; | |||
param.back_offset_dim4 = 0; | |||
param.back_offset_dim5 = 0; | |||
param.back_offset_dim6 = 0; | |||
param.padding_val = 0; | |||
using Checker = AutoOprChecker<1,1>; | |||
auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { | |||
return {opr::Padding::make(inputs[0], param)}; | |||
}; | |||
auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) { | |||
auto opr = megdnn_naive_handle()->create_operator<megdnn::Padding>(); | |||
TensorShape out_shp{inp[0]->as_megdnn().layout.shape[0]+param.front_offset_dim0+param.back_offset_dim0, | |||
inp[0]->as_megdnn().layout.shape[1]+param.front_offset_dim1+param.back_offset_dim1, | |||
inp[0]->as_megdnn().layout.shape[2]+param.front_offset_dim2+param.back_offset_dim2, | |||
inp[0]->as_megdnn().layout.shape[3]+param.front_offset_dim3+param.back_offset_dim3}; | |||
opr->param() = param; | |||
dest[0].resize(out_shp); | |||
opr->exec(inp[0]->as_megdnn(), dest[0].as_megdnn(), {}); | |||
}; | |||
Checker::RunOptions opt; | |||
opt.numdiff_max_err = 2e-3; | |||
Checker(make_graph, fwd, CompNode::load("xpu0")) | |||
.run({TensorShape{5, 5, 5, 5}}, opt) | |||
.run({TensorShape{4, 3, 4, 5}}, opt) | |||
.run({TensorShape{5, 4, 4, 5}}, opt); | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -113,6 +113,7 @@ union OperatorParam { | |||
param.PermutationRNG = 79, | |||
param.BetaRNG = 80, | |||
param.SlidingWindowTranspose = 81, | |||
param.Padding = 82, | |||
} | |||
table Operator { | |||