GitOrigin-RevId: 44c586f912
release-1.5
@@ -1189,7 +1189,12 @@ ConvolutionBackwardFilter::check_exec(const TensorLayout& src, | |||||
diff.dtype.category() == DTypeCategory::FLOAT && | diff.dtype.category() == DTypeCategory::FLOAT && | ||||
grad.dtype.category() == DTypeCategory::FLOAT, | grad.dtype.category() == DTypeCategory::FLOAT, | ||||
"only float type is supported for conv backward filter"); | "only float type is supported for conv backward filter"); | ||||
auto ret = check_layout_fwd(src, grad, diff); | |||||
auto src_fwd = src; | |||||
auto diff_fwd = diff; | |||||
src_fwd.init_contiguous_stride(); | |||||
diff_fwd.init_contiguous_stride(); | |||||
auto ret = check_layout_fwd(src_fwd, grad, diff_fwd); | |||||
auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); | auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); | ||||
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
return ret; | return ret; | ||||
@@ -20,6 +20,10 @@ using namespace convolution; | |||||
bool ConvolutionBackwardDataImpl::AlgoChanwise::is_available( | bool ConvolutionBackwardDataImpl::AlgoChanwise::is_available( | ||||
const SizeArgs& args) const { | const SizeArgs& args) const { | ||||
if (!args.grad_layout->is_contiguous() || | |||||
!args.diff_layout->is_contiguous()) { | |||||
return false; | |||||
} | |||||
if ((args.diff_layout->dtype == args.filter_layout->dtype && | if ((args.diff_layout->dtype == args.filter_layout->dtype && | ||||
args.diff_layout->dtype == dtype::BFloat16()) || | args.diff_layout->dtype == dtype::BFloat16()) || | ||||
(args.diff_layout->dtype == args.filter_layout->dtype && | (args.diff_layout->dtype == args.filter_layout->dtype && | ||||
@@ -30,6 +30,10 @@ inline bool is_available_small(const chanwise::Param& param) { | |||||
bool ConvolutionBackwardDataImpl::AlgoChanwiseSmall::is_available( | bool ConvolutionBackwardDataImpl::AlgoChanwiseSmall::is_available( | ||||
const SizeArgs& args) const { | const SizeArgs& args) const { | ||||
if (!args.grad_layout->is_contiguous() || | |||||
!args.diff_layout->is_contiguous()) { | |||||
return false; | |||||
} | |||||
if ((args.diff_layout->dtype == args.filter_layout->dtype && | if ((args.diff_layout->dtype == args.filter_layout->dtype && | ||||
args.diff_layout->dtype == dtype::BFloat16()) || | args.diff_layout->dtype == dtype::BFloat16()) || | ||||
(args.diff_layout->dtype == args.filter_layout->dtype && | (args.diff_layout->dtype == args.filter_layout->dtype && | ||||
@@ -71,7 +71,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||||
ConvolutionBackwardFilterImpl *o, | ConvolutionBackwardFilterImpl *o, | ||||
const TensorLayout &src, const TensorLayout &diff, | const TensorLayout &src, const TensorLayout &diff, | ||||
const TensorLayout &grad): | const TensorLayout &grad): | ||||
SizeArgs(o, src, diff, grad, o->check_layout_fwd(src, grad, diff)) | |||||
SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) | |||||
{ | { | ||||
} | } | ||||
@@ -19,6 +19,10 @@ using namespace convolution; | |||||
bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( | bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( | ||||
const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
if (!args.grad_layout->is_contiguous() || | |||||
!args.diff_layout->is_contiguous()) { | |||||
return false; | |||||
} | |||||
if (args.src_layout->dtype == args.src_layout->dtype && | if (args.src_layout->dtype == args.src_layout->dtype && | ||||
args.diff_layout->dtype == dtype::BFloat16()) { | args.diff_layout->dtype == dtype::BFloat16()) { | ||||
return false; | return false; | ||||
@@ -21,6 +21,13 @@ using namespace convolution; | |||||
bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available( | bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available( | ||||
const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
if (args.grad_filter_meta.format != Param::Format::NCHW && | |||||
args.grad_filter_meta.format != Param::Format::NHWC) { | |||||
if (!args.grad_layout->is_contiguous() || | |||||
!args.diff_layout->is_contiguous()) { | |||||
return false; | |||||
} | |||||
} | |||||
auto& cudnn = args.handle->cudnn(); | auto& cudnn = args.handle->cudnn(); | ||||
CUDNNBwdFilterDescs D; | CUDNNBwdFilterDescs D; | ||||
@@ -64,8 +64,9 @@ ConvolutionBackwardFilterImpl::AlgoMatmul::get_subopr_list( | |||||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | const TensorLayoutArray& layouts, const OperatorBase* opr) const { | ||||
const ConvolutionBackwardFilterImpl* conv_backward_filter_opr = | const ConvolutionBackwardFilterImpl* conv_backward_filter_opr = | ||||
static_cast<const ConvolutionBackwardFilterImpl*>(opr); | static_cast<const ConvolutionBackwardFilterImpl*>(opr); | ||||
CanonizedFilterMeta fm = conv_backward_filter_opr->check_layout_fwd( | |||||
layouts[0], layouts[2], layouts[1]); | |||||
CanonizedFilterMeta fm = | |||||
conv_backward_filter_opr->make_canonized_filter_meta( | |||||
layouts[0].ndim, layouts[2]); | |||||
auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2], | auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2], | ||||
conv_backward_filter_opr); | conv_backward_filter_opr); | ||||
@@ -519,6 +519,38 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) { | |||||
.set_param(arg.param) | .set_param(arg.param) | ||||
.exec(TensorLayoutArray{src, dst, filter}); | .exec(TensorLayoutArray{src, dst, filter}); | ||||
} | } | ||||
//! noncontiguous case | |||||
{ | |||||
NormalRNG default_rng; | |||||
param::Convolution param; | |||||
param.pad_h = param.pad_w = 1; | |||||
checker.set_rng(0, &default_rng) | |||||
.set_rng(1, &default_rng) | |||||
.set_param(param) | |||||
.execl(TensorLayoutArray{ | |||||
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
{{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}}); | |||||
} | |||||
} | |||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) { | |||||
if (cuda::is_compute_capability_required(7, 0)) | |||||
return; | |||||
using namespace convolution; | |||||
Checker<ConvolutionBackwardFilter> checker(handle_cuda()); | |||||
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>( | |||||
"CUDNN_CONVOLUTION")); | |||||
//! noncontiguous case | |||||
{ | |||||
param::Convolution param; | |||||
param.pad_h = param.pad_w = 1; | |||||
checker.set_param(param).execl(TensorLayoutArray{ | |||||
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
{{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()} | |||||
}); | |||||
} | |||||
} | } | ||||
TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | ||||