diff --git a/dnn/src/cuda/conv_bias/algo.cpp b/dnn/src/cuda/conv_bias/algo.cpp index 8f251344..961e2eac 100644 --- a/dnn/src/cuda/conv_bias/algo.cpp +++ b/dnn/src/cuda/conv_bias/algo.cpp @@ -164,15 +164,14 @@ std::string ConvBiasForwardImpl::AlgoBase::SizeArgs::to_string() const { megdnn_throw("invalid conv bias nonlinear mode"); } return ssprintf( - "src=%s, filter=%u{%u,%u,%u,%u}, bias=%s, z=%s, dst=%s, " + "src=%s, filter=%s, bias=%s, z=%s, dst=%s, " "pad=%ux%u, stride=%ux%u, dilate=%ux%u, xcorr=%d, dtype=%s,%s, " "nonlinear_mode=%s", - src_layout->to_string().c_str(), fm.group, fm.ocpg, fm.icpg, - fm.spatial[0], fm.spatial[1], bias_layout->to_string().c_str(), - z_layout->to_string().c_str(), dst_layout->to_string().c_str(), - fm.padding[0], fm.padding[1], fm.stride[0], fm.stride[1], - fm.dilation[0], fm.dilation[1], !fm.should_flip, - src_layout->dtype.name(), dst_layout->dtype.name(), + src_layout->to_string().c_str(), filter_layout->to_string().c_str(), + bias_layout->to_string().c_str(), z_layout->to_string().c_str(), + dst_layout->to_string().c_str(), fm.padding[0], fm.padding[1], + fm.stride[0], fm.stride[1], fm.dilation[0], fm.dilation[1], + !fm.should_flip, src_layout->dtype.name(), dst_layout->dtype.name(), nonlinear_mode_str.c_str()); } diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp index b3f3df78..2ae27776 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp @@ -35,6 +35,17 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( return false; } auto&& param = args.opr->param(); + +#if (CUDNN_MAJOR == 8 && CUDNN_MINOR < 2) + if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM && + param.format == param::ConvBias::Format::NCHW4 && + args.filter_meta.group * args.filter_meta.ocpg > 256 && + args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS8 && + args.filter_layout->dtype.enumv() == DTypeEnum::QuantizedS8) { + return false; + } +#endif + //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal //! memory access errors, so we have to disable this kernel here. if (param.format == param::ConvBias::Format::NCHW4_NCHW || diff --git a/dnn/src/cuda/conv_bias/opr_impl.cpp b/dnn/src/cuda/conv_bias/opr_impl.cpp index 075cb7c7..b6ed2586 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.cpp +++ b/dnn/src/cuda/conv_bias/opr_impl.cpp @@ -97,8 +97,9 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( auto conv_bias_algo = cb(algo_perf[i].algo); if (conv_bias_algo->is_available_attribute( args, positive_attr, negative_attr, - workspace_limit_in_bytes)) + workspace_limit_in_bytes)) { return conv_bias_algo; + } } #else cudnnConvolutionFwdAlgo_t algo; diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index 95be9b71..30fe47b2 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -523,6 +523,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) { }; run({{1, 4, 4, 4, 4}, {4, 4, 3, 3, 4}, {1, 1, 1, 1, 4}}); + run({{1, 4, 4, 4, 4}, {260, 4, 3, 3, 4}, {1, 65, 1, 1, 4}}); run({{20, 1, 24, 24, 4}, {24, 1, 2, 2, 4}, {1, 6, 1, 1, 4}}); run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}});