Browse Source

feat(mgb): support fp16 nhwc backward

GitOrigin-RevId: 954ac6405a
release-1.6
Megvii Engine Team 3 years ago
parent
commit
722aecd437
6 changed files with 112 additions and 21 deletions
  1. +9
    -19
      dnn/src/cuda/convolution/backward_data/cudnn.cpp
  2. +9
    -1
      dnn/src/cuda/convolution/backward_filter/cudnn.cpp
  3. +2
    -1
      dnn/src/cuda/convolution/helper.cpp
  4. +10
    -0
      dnn/test/common/convolution.cpp
  5. +1
    -0
      dnn/test/common/convolution.h
  6. +81
    -0
      dnn/test/cuda/convolution.cpp

+ 9
- 19
dnn/src/cuda/convolution/backward_data/cudnn.cpp View File

@@ -14,6 +14,7 @@
#include "src/cuda/utils.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/convolution/helper.h"
#include "src/cuda/conv_bias/helper.h"

using namespace megdnn;
using namespace cuda;
@@ -31,27 +32,16 @@ bool ConvolutionBackwardDataImpl::AlgoCUDNN::is_available(

CUDNNBwdDataDescs D;

if (!is_cudnn_supported(args.as_fwd_args()))
TensorLayout bias_layout, z_layout;
conv_bias::CanonizedFilterMeta meta;
meta.copy_from(args.filter_meta);
conv_bias::BiasForwardSizeArgs bias_args{args.handle,
args.grad_layout, args.filter_layout, &bias_layout,
&z_layout, meta, args.diff_layout, param::ConvBias::NonlineMode::IDENTITY,
};
if (!conv_bias::is_cudnn_supported(bias_args))
return false;

#if CUDNN_VERSION >= 7500
// As in cuda10.0 and cudnn7.5, algo CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 with
// TensorCore operations produces incorrect result. So we disable
// this algo. Please remove the following code, when
// nvidia has fixed this issue.
// incorrect case:
// inp={2x8x18x18}, kern={8x8x2x2}, pad_h=pad_w=2, stride_h=stride_w=2,
// dtype=float16
if (args.filter_meta.dtype == dtype::Float16()) {
const char* algo_1 = "CUDNN_CONVOLUTION_BWD_DATA_ALGO_1";
auto cmp_len = strlen(algo_1);
if (is_compute_capability_required(7, 0) &&
strncmp(name(), algo_1, cmp_len) == 0) {
return false;
}
}
#endif

auto& cudnn = args.handle->cudnn();
args.init_desc(D);
size_t workspace_size;


+ 9
- 1
dnn/src/cuda/convolution/backward_filter/cudnn.cpp View File

@@ -14,6 +14,7 @@
#include "src/cuda/utils.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/convolution/helper.h"
#include "src/cuda/conv_bias/helper.h"

using namespace megdnn;
using namespace cuda;
@@ -31,7 +32,14 @@ bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available(
auto& cudnn = args.handle->cudnn();
CUDNNBwdFilterDescs D;

if (!is_cudnn_supported(args.as_fwd_args()))
TensorLayout bias_layout, z_layout;
conv_bias::CanonizedFilterMeta meta;
meta.copy_from(args.grad_filter_meta);
conv_bias::BiasForwardSizeArgs bias_args{args.handle,
args.src_layout, args.grad_layout, &bias_layout,
&z_layout, meta, args.diff_layout, param::ConvBias::NonlineMode::IDENTITY,
};
if (!conv_bias::is_cudnn_supported(bias_args))
return false;

args.init_desc(D);


+ 2
- 1
dnn/src/cuda/convolution/helper.cpp View File

@@ -33,7 +33,8 @@ bool convolution::is_cudnn_supported(const ForwardSizeArgs &args) {
args.dst_layout->dtype.enumv() != DTypeEnum::QuantizedS8) {
return false;
}
} else if (args.filter_meta.format != param::Convolution::Format::NCHW) {
} else if (args.filter_meta.format != param::Convolution::Format::NCHW &&
args.filter_meta.format != param::Convolution::Format::NHWC) {
return false;
}
auto& fm = args.filter_meta;


+ 10
- 0
dnn/test/common/convolution.cpp View File

@@ -284,6 +284,16 @@ std::vector<TestArg> convolution::get_args_cudnn_5_1_failures() {
return args;
}

std::vector<TestArg> convolution::get_args_cudnn_5_1_backward() {
std::vector<TestArg> args;
args.emplace_back(
param::Convolution{param::Convolution::Mode::CROSS_CORRELATION, 2,
2, 2, 2},
TensorShape{2, 8, 18, 18}, TensorShape{8, 8, 2, 2});

return args;
}

std::vector<TestArg> convolution::get_args_x86_winograd_algorithm() {
std::vector<TestArg> args;
for (size_t ic_size : {8, 16}) {


+ 1
- 0
dnn/test/common/convolution.h View File

@@ -40,6 +40,7 @@ std::vector<TestArg> get_args_x86_direct_case_2();
std::vector<TestArg> get_args_fallback_templated_impl();
std::vector<TestArg> get_args_fallback_non_templated_impl();
std::vector<TestArg> get_args_cudnn_5_1_failures();
std::vector<TestArg> get_args_cudnn_5_1_backward();
std::vector<TestArg> get_args_x86_winograd_algorithm();
std::vector<TestArg> get_args_BRAIN_481();
std::vector<TestArg> get_args();


+ 81
- 0
dnn/test/cuda/convolution.cpp View File

@@ -238,6 +238,87 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA) {
}
}

TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_FP16_CUDNN7_5) {
// algo CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 with
// TensorCore operations produces incorrect result.
// Maybe nvidia has fixed this issue
// There is a test using incorrect case:
// inp={2x8x18x18}, kern={8x8x2x2}, pad_h=pad_w=2, stride_h=stride_w=2,
// dtype=float16
using namespace convolution;
std::vector<TestArg> args = get_args_cudnn_5_1_backward();
Checker<ConvolutionBackwardData> checker(handle_cuda());
NormalRNG default_rng;
for (auto&& arg : args) {
float scale =
128.f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
scale = std::max(scale, 1.f);
UniformFloatRNG rng(scale, 2 * scale);
arg.param.format = param::Convolution::Format::NHWC;
arg.src = cvt_src_or_dst_nchw2nhwc(arg.src);
arg.filter = cvt_filter_nchw2nhwc(arg.filter);
auto src = TensorLayout(arg.src, dtype::Float32());
auto filter = TensorLayout(arg.filter, dtype::Float32());
TensorLayout dst;
{
auto opr = handle_cuda()->create_operator<Convolution>();
opr->param() = arg.param;
opr->deduce_layout(src, filter, dst);
}
src.dtype = dst.dtype = filter.dtype = dtype::Float16();
arg.param.compute_mode = param::Convolution::ComputeMode::FLOAT32;
checker.set_rng(0, &rng)
.set_rng(1, &rng)
.set_epsilon(1e-2)
.set_param(arg.param)
.exec(TensorLayoutArray{filter, dst, src});
src.dtype = dst.dtype = filter.dtype = dtype::Float32();
arg.param.compute_mode = param::Convolution::ComputeMode::DEFAULT;
checker.set_rng(0, &rng)
.set_rng(1, &rng)
.set_epsilon(1e-2)
.set_param(arg.param)
.exec(TensorLayoutArray{filter, dst, src});
}
}

TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_NHWC) {
using namespace convolution;
std::vector<TestArg> args = get_args_cuda_conv_bwd_data();
Checker<ConvolutionBackwardData> checker(handle_cuda());
NormalRNG default_rng;
for (auto&& arg : args) {
float scale =
64.f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
UniformFloatRNG rng(scale, 2 * scale);
arg.param.format = param::Convolution::Format::NHWC;
arg.src = cvt_src_or_dst_nchw2nhwc(arg.src);
arg.filter = cvt_filter_nchw2nhwc(arg.filter);
auto src = TensorLayout(arg.src, dtype::Float32());
auto filter = TensorLayout(arg.filter, dtype::Float32());
TensorLayout dst;
{
auto opr = handle_cuda()->create_operator<Convolution>();
opr->param() = arg.param;
opr->deduce_layout(src, filter, dst);
}
src.dtype = dst.dtype = filter.dtype = dtype::Float16();
arg.param.compute_mode = param::Convolution::ComputeMode::FLOAT32;
checker.set_rng(0, &rng)
.set_rng(1, &rng)
.set_epsilon(1e-2)
.set_param(arg.param)
.exec(TensorLayoutArray{filter, dst, src});
src.dtype = dst.dtype = filter.dtype = dtype::Float32();
arg.param.compute_mode = param::Convolution::ComputeMode::DEFAULT;
checker.set_rng(0, &rng)
.set_rng(1, &rng)
.set_epsilon(1e-2)
.set_param(arg.param)
.exec(TensorLayoutArray{filter, dst, src});
}
}

TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_CUDNN) {
if (cuda::is_compute_capability_required(7, 0))
return;


Loading…
Cancel
Save