fix(dnn/cuda): fix cudnn conv impl for nchw4_nchw hybrid layout

the conv_bias algo *_IMPLICIT_GEMM in cudnn less than 8.0.0 is disabled due to the incorrect result for int8x4->f32 configs GitOrigin-RevId: 7cc52d0a85
3 years ago · 10af44abba
--- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
+++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
@@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
        return false;
    }
    //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
    //! memory access errors, so we have to disable this kernel here.
    if (param.format == param::ConvBias::Format::NCHW4_NCHW ||
        param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
 #if CUDNN_MAJOR < 8
    if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM &&
        param.format == param::ConvBias::Format::NCHW4_NCHW)
        return false;
 #endif
    if (param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
        param.format == param::ConvBias::Format::NCHW32_NCHW4)
        return false;
    if (param.format == param::ConvBias::Format::NCHW &&
--- a/dnn/test/cuda/conv_bias.cpp
+++ b/dnn/test/cuda/conv_bias.cpp
@@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) {
    checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}});
 }
 //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
 //! memory access errors, so we have to disable this test here.
 #if 0
 TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
    require_compute_capability(6, 1);
    using namespace conv_bias;
@@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
    auto run = [&](const TensorShapeArray& shapes) {
        opr->param() = param;
        TensorLayout dst_layout;
        opr->deduce_layout({shapes[0], dtype::Float32()},
                           {shapes[1], dtype::Float32()}, {}, {}, dst_layout);
        opr->deduce_layout(
                {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, {}, {},
                dst_layout);
        checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}});
    };
@@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
 }
 #endif
 #endif
 TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) {
    Checker<ConvBiasForward> checker(handle_cuda());
    std::vector<TestArg> args = get_chanwise_args();