@@ -164,15 +164,14 @@ std::string ConvBiasForwardImpl::AlgoBase::SizeArgs::to_string() const { | |||||
megdnn_throw("invalid conv bias nonlinear mode"); | megdnn_throw("invalid conv bias nonlinear mode"); | ||||
} | } | ||||
return ssprintf( | return ssprintf( | ||||
"src=%s, filter=%u{%u,%u,%u,%u}, bias=%s, z=%s, dst=%s, " | |||||
"src=%s, filter=%s, bias=%s, z=%s, dst=%s, " | |||||
"pad=%ux%u, stride=%ux%u, dilate=%ux%u, xcorr=%d, dtype=%s,%s, " | "pad=%ux%u, stride=%ux%u, dilate=%ux%u, xcorr=%d, dtype=%s,%s, " | ||||
"nonlinear_mode=%s", | "nonlinear_mode=%s", | ||||
src_layout->to_string().c_str(), fm.group, fm.ocpg, fm.icpg, | |||||
fm.spatial[0], fm.spatial[1], bias_layout->to_string().c_str(), | |||||
z_layout->to_string().c_str(), dst_layout->to_string().c_str(), | |||||
fm.padding[0], fm.padding[1], fm.stride[0], fm.stride[1], | |||||
fm.dilation[0], fm.dilation[1], !fm.should_flip, | |||||
src_layout->dtype.name(), dst_layout->dtype.name(), | |||||
src_layout->to_string().c_str(), filter_layout->to_string().c_str(), | |||||
bias_layout->to_string().c_str(), z_layout->to_string().c_str(), | |||||
dst_layout->to_string().c_str(), fm.padding[0], fm.padding[1], | |||||
fm.stride[0], fm.stride[1], fm.dilation[0], fm.dilation[1], | |||||
!fm.should_flip, src_layout->dtype.name(), dst_layout->dtype.name(), | |||||
nonlinear_mode_str.c_str()); | nonlinear_mode_str.c_str()); | ||||
} | } | ||||
@@ -35,6 +35,17 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( | |||||
return false; | return false; | ||||
} | } | ||||
auto&& param = args.opr->param(); | auto&& param = args.opr->param(); | ||||
#if (CUDNN_MAJOR == 8 && CUDNN_MINOR < 2) | |||||
if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM && | |||||
param.format == param::ConvBias::Format::NCHW4 && | |||||
args.filter_meta.group * args.filter_meta.ocpg > 256 && | |||||
args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS8 && | |||||
args.filter_layout->dtype.enumv() == DTypeEnum::QuantizedS8) { | |||||
return false; | |||||
} | |||||
#endif | |||||
//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal | //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal | ||||
//! memory access errors, so we have to disable this kernel here. | //! memory access errors, so we have to disable this kernel here. | ||||
if (param.format == param::ConvBias::Format::NCHW4_NCHW || | if (param.format == param::ConvBias::Format::NCHW4_NCHW || | ||||
@@ -97,8 +97,9 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( | |||||
auto conv_bias_algo = cb(algo_perf[i].algo); | auto conv_bias_algo = cb(algo_perf[i].algo); | ||||
if (conv_bias_algo->is_available_attribute( | if (conv_bias_algo->is_available_attribute( | ||||
args, positive_attr, negative_attr, | args, positive_attr, negative_attr, | ||||
workspace_limit_in_bytes)) | |||||
workspace_limit_in_bytes)) { | |||||
return conv_bias_algo; | return conv_bias_algo; | ||||
} | |||||
} | } | ||||
#else | #else | ||||
cudnnConvolutionFwdAlgo_t algo; | cudnnConvolutionFwdAlgo_t algo; | ||||
@@ -523,6 +523,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) { | |||||
}; | }; | ||||
run({{1, 4, 4, 4, 4}, {4, 4, 3, 3, 4}, {1, 1, 1, 1, 4}}); | run({{1, 4, 4, 4, 4}, {4, 4, 3, 3, 4}, {1, 1, 1, 1, 4}}); | ||||
run({{1, 4, 4, 4, 4}, {260, 4, 3, 3, 4}, {1, 65, 1, 1, 4}}); | |||||
run({{20, 1, 24, 24, 4}, {24, 1, 2, 2, 4}, {1, 6, 1, 1, 4}}); | run({{20, 1, 24, 24, 4}, {24, 1, 2, 2, 4}, {1, 6, 1, 1, 4}}); | ||||
run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}); | run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}); | ||||