Browse Source

fix(dnn/cuda): fix cudnn conv impl for nchw4_nchw hybrid layout

the conv_bias algo *_IMPLICIT_GEMM in cudnn less than 8.0.0 is disabled due to the incorrect result for int8x4->f32 configs

GitOrigin-RevId: 7cc52d0a85
release-1.7
Megvii Engine Team 3 years ago
parent
commit
10af44abba
2 changed files with 9 additions and 11 deletions
  1. +6
    -4
      dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
  2. +3
    -7
      dnn/test/cuda/conv_bias.cpp

+ 6
- 4
dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp View File

@@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
return false; return false;
} }


//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
//! memory access errors, so we have to disable this kernel here.
if (param.format == param::ConvBias::Format::NCHW4_NCHW ||
param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
#if CUDNN_MAJOR < 8
if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM &&
param.format == param::ConvBias::Format::NCHW4_NCHW)
return false;
#endif
if (param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
param.format == param::ConvBias::Format::NCHW32_NCHW4) param.format == param::ConvBias::Format::NCHW32_NCHW4)
return false; return false;
if (param.format == param::ConvBias::Format::NCHW && if (param.format == param::ConvBias::Format::NCHW &&


+ 3
- 7
dnn/test/cuda/conv_bias.cpp View File

@@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) {
checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}}); checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}});
} }


//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
//! memory access errors, so we have to disable this test here.
#if 0
TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
require_compute_capability(6, 1); require_compute_capability(6, 1);
using namespace conv_bias; using namespace conv_bias;
@@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
auto run = [&](const TensorShapeArray& shapes) { auto run = [&](const TensorShapeArray& shapes) {
opr->param() = param; opr->param() = param;
TensorLayout dst_layout; TensorLayout dst_layout;
opr->deduce_layout({shapes[0], dtype::Float32()},
{shapes[1], dtype::Float32()}, {}, {}, dst_layout);
opr->deduce_layout(
{shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, {}, {},
dst_layout);
checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}}); checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}});
}; };


@@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
} }
#endif #endif


#endif

TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) { TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) {
Checker<ConvBiasForward> checker(handle_cuda()); Checker<ConvBiasForward> checker(handle_cuda());
std::vector<TestArg> args = get_chanwise_args(); std::vector<TestArg> args = get_chanwise_args();


Loading…
Cancel
Save