/** * \file dnn/src/cuda/convolution/backward_data/algo.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. */ #include "./algo.h" #include "src/cuda/utils.h" using namespace megdnn; using namespace cuda; ConvolutionBackwardDataImpl::AlgoPack::AlgoPack() { non_cudnn_algos.push_back(&chanwise); non_cudnn_algos.push_back(&chanwise_small); non_cudnn_algos.push_back(&matmul); all_algos.push_back(&chanwise); // prefer chanwise all_algos.push_back(&chanwise_small); // prefer small chanwise fill_cudnn_algos(); for (auto&& i : cudnn) { all_algos.push_back(&i); } all_algos.push_back(&matmul); fill_int8_dp4a_algos(); for (auto&& algo : int8_nchw4_dotprod) { all_algos.push_back(&algo); int8_algos.push_back(&algo); } fill_int8_imma_algos(); for (auto&& algo : int8_nhwc_imma) { all_algos.push_back(&algo); int8_algos.push_back(&algo); } fill_dwconv_algos(); int8_algos.push_back(&int8_nchw_dotprod); all_algos.push_back(&int8_nchw_dotprod); all_algos.push_back(&bfloat16); bfloat16_algos.push_back(&bfloat16); all_algos.push_back(&group); for (auto&& algo : all_algos) { m_all_algos_map.emplace(algo->info().desc, algo); } } void ConvolutionBackwardDataImpl::AlgoPack::fill_dwconv_algos() { { using AlgoParam = AlgoFloat32NCHWFMAImplicitBatchedGemm::AlgoParam; /// preferred algo implbmm_nchw_fma.emplace_back(AlgoParam{64, 128, 8, 32, 64, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{128, 128, 8, 32, 64, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{128, 64, 8, 64, 32, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{128, 32, 8, 64, 32, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{32, 128, 8, 32, 64, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{64, 64, 8, 32, 64, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{32, 64, 8, 32, 64, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{32, 32, 8, 32, 32, 8, 2}); implbmm_nchw_fma.emplace_back(AlgoParam{64, 32, 8, 64, 32, 8, 2}); for (auto&& algo : implbmm_nchw_fma) { all_algos.push_back(&algo); } } #if CUDA_VERSION >= 10010 { using AlgoParam = AlgoFloat16NCHWHMMAImplicitBatchedGemm::AlgoParam; /// preferred algo implbmm_nchw_hmma.emplace_back(AlgoParam{64, 128, 32, 32, 32, 32, 8, 8, 4, 2}); implbmm_nchw_hmma.emplace_back(AlgoParam{128, 128, 32, 32, 32, 32, 8, 8, 4, 2}); implbmm_nchw_hmma.emplace_back(AlgoParam{128, 256, 32, 64, 64, 32, 8, 8, 4, 2}); implbmm_nchw_hmma.emplace_back(AlgoParam{128, 64, 32, 32, 32, 32, 8, 8, 4, 2}); implbmm_nchw_hmma.emplace_back(AlgoParam{64, 64, 32, 32, 32, 32, 8, 8, 4, 2}); for (auto&& algo : implbmm_nchw_hmma) { all_algos.push_back(&algo); } } #endif } MEGDNN_DEF_GET_ALGO_FROM_DESC(ConvolutionBackwardDataImpl) ConvolutionBackwardDataImpl::AlgoCUDNN* ConvolutionBackwardDataImpl::AlgoPack:: cudnn_from_enum(cudnnConvolutionBwdDataAlgo_t algo) { for (auto&& i : cudnn) { if (i.cudnn_enum() == algo) return &i; } megdnn_throw(ssprintf( "can not find cudnn bwd_data algorithm %d", static_cast(algo))); } ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack; ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( const ConvolutionBackwardDataImpl* o, const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad) : SizeArgs( o, filter, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) {} ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( const ConvolutionBackwardDataImpl* o, const TensorLayout& filter, const CanonizedFilterMeta& filter_meta, const TensorLayout& diff, const TensorLayout& grad) : handle{concrete_handle(o->handle())}, filter_meta{filter_meta}, diff_layout{&diff}, grad_layout{&grad}, filter_layout{&filter}, opr{o} {} ConvolutionBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( const ConvolutionBackwardDataImpl* opr, _megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) : SizeArgs(opr, filter.layout, diff.layout, grad.layout), filter_tensor{&filter}, diff_tensor{&diff}, grad_tensor{&grad}, workspace{workspace} {} std::string ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::to_string() const { auto&& fm = filter_meta; MEGDNN_MARK_USED_VAR(fm); return ssprintf( "filter=%u{%u,%u,%u,%u}, diff=%s, grad=%s, " "pad=%ux%u, stride=%ux%u, dilate=%ux%u, xcorr=%d, dtype=%s,%s", fm.group, fm.ocpg, fm.icpg, fm.spatial[0], fm.spatial[1], diff_layout->to_string().c_str(), grad_layout->to_string().c_str(), fm.padding[0], fm.padding[1], fm.stride[0], fm.stride[1], fm.dilation[0], fm.dilation[1], !fm.should_flip, diff_layout->dtype.name(), grad_layout->dtype.name()); } // vim: syntax=cpp.doxygen