/** * \file dnn/src/cuda/convolution3d/opr_impl.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "./opr_impl.h" #include "./backward_data/algo.h" #include "./backward_filter/algo.h" #include "./forward/algo.h" #include "./helper.h" #include "src/common/algo_chooser.h" #include "src/cuda/utils.h" using namespace megdnn; using namespace cuda; using namespace convolution3d; #define TO_STRING2(v) #v #define TO_STRING(v) TO_STRING2(v) #define CUDNN_VERSION_STR \ TO_STRING(CUDNN_MAJOR) \ "." TO_STRING(CUDNN_MINOR) "." TO_STRING(CUDNN_PATCHLEVEL) /* ============== Convolution3DForwardImpl ============== */ Convolution3DForwardImpl::Algorithm* Convolution3DForwardImpl::get_algorithm_heuristic( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { auto fm = check_layout_fwd(src, filter, dst); return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes, positive_attr, negative_attr); } Convolution3DForwardImpl::Algorithm* Convolution3DForwardImpl::get_algorithm_heuristic( const TensorLayout& src, const CanonizedFilterMeta& filter, const TensorLayout& dst, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { AlgoBase::SizeArgs args(this, src, filter, dst); #if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5) if (args.filter_meta.group > 1) { // prefer special chanwise impl since as the group conv of cudnn whose // version is lower than v7.5.0 is still slower than our implementation // in many channel-wise cases if (sm_algo_pack.chanwise.is_available_attribute( args, positive_attr, negative_attr, workspace_limit_in_bytes)) { return &sm_algo_pack.chanwise; } } #endif auto prefer_1x1x1 = [&args, positive_attr, negative_attr, workspace_limit_in_bytes]() { const size_t MAX_BATCH_SIZE_FOR_1x1x1_MAT_ALGO = 4; size_t batch_size = args.src_layout->shape[0]; if (batch_size > MAX_BATCH_SIZE_FOR_1x1x1_MAT_ALGO) { return false; } return sm_algo_pack.a1x1x1.is_available_attribute( args, positive_attr, negative_attr, workspace_limit_in_bytes); }; auto get_cudnn_algo = [this, &args, workspace_limit_in_bytes, positive_attr, negative_attr]() -> Convolution3DForwardImpl::AlgoBase* { auto cudnn_handle = cuda::cudnn_handle(this->handle()); cudnnConvolutionFwdAlgo_t algo; CUDNNForwardDescs desc; args.init_desc(desc); bool got = cudnn_get_convolution_fwd_algo_helper( cudnn_handle, desc.src_desc.desc, desc.filter_desc.desc, desc.conv_desc.desc, desc.dst_desc.desc, workspace_limit_in_bytes, &algo, positive_attr, negative_attr); if (got) { return static_cast( megdnn::get_algo_match_attribute( sm_algo_pack.cudnn_from_enum(algo), positive_attr, negative_attr)); } else { return nullptr; } }; if (prefer_1x1x1()) { return &sm_algo_pack.a1x1x1; } if (is_cudnn_supported(args)) { if (auto algo = get_cudnn_algo()) return algo; } if (args.filter_meta.group > 1) { auto orig_args = args; TensorLayout a, b; AlgoGroupConvGeneral::modify_size_args(args, a, b); if (prefer_1x1x1()) { return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1); } if (is_cudnn_supported(args)) { if (auto algo = get_cudnn_algo()) return sm_algo_pack.algo2gconv.at(algo); } args = orig_args; } return megdnn::get_algo_match_attribute( sm_algo_pack.non_cudnn_algos, args, workspace_limit_in_bytes, "cuda conv3d fwd", positive_attr, negative_attr); } std::vector Convolution3DForwardImpl::get_all_algorithms(const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) { return megdnn::get_all_algorithms( {this, src, filter, dst}); } size_t Convolution3DForwardImpl::get_workspace_in_bytes( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) { AlgoBase::SizeArgs args(this, src, filter, dst); return get_algorithm(this, src, args.filter_meta, dst) ->get_workspace_in_bytes(args); } void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, _megdnn_workspace workspace) { AlgoBase::ExecArgs args(this, src, filter, dst, workspace); auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout); algo->check_workspace(args, workspace).exec(args); } const char* Convolution3DForwardImpl::get_algorithm_set_name() const { return "CUDACONV0+CUDNN" CUDNN_VERSION_STR; } void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) { AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout); algo->check_workspace(args, workspace).exec(args); } std::vector Convolution3DBackwardDataImpl::get_all_algorithms(const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad) { return megdnn::get_all_algorithms( {this, filter, diff, grad}); } Convolution3DBackwardDataImpl::Algorithm* Convolution3DBackwardDataImpl::get_algorithm_heuristic( const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { auto fm = check_layout_fwd(grad, filter, diff); return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes, positive_attr, negative_attr); } Convolution3DBackwardDataImpl::Algorithm* Convolution3DBackwardDataImpl::get_algorithm_heuristic( const CanonizedFilterMeta& filter, const TensorLayout& diff, const TensorLayout& grad, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { AlgoBase::SizeArgs args(this, filter, diff, grad); if (args.filter_meta.group > 1 && sm_algo_pack.chanwise.is_available_attribute( args, positive_attr, negative_attr, workspace_limit_in_bytes)) { return &sm_algo_pack.chanwise; } auto get_cudnn_algo = [this, &args, workspace_limit_in_bytes, positive_attr, negative_attr]() -> Convolution3DBackwardDataImpl::AlgoBase* { auto cudnn_handle = cuda::cudnn_handle(this->handle()); cudnnConvolutionBwdDataAlgo_t algo; CUDNNBwdDataDescs desc; args.init_desc(desc); bool got = cudnn_get_convolution_bwd_data_algo_helper( cudnn_handle, desc.filter_desc.desc, desc.diff_desc.desc, desc.conv_desc.desc, desc.grad_desc.desc, workspace_limit_in_bytes, &algo, positive_attr, negative_attr); if (got) { return static_cast(megdnn::get_algo_match_attribute< Convolution3DBackwardDataImpl>( sm_algo_pack.cudnn_from_enum(algo), positive_attr, negative_attr)); } else { return nullptr; } }; if (is_cudnn_supported(args.as_fwd_args())) { if (auto algo = get_cudnn_algo()) return algo; } if (args.filter_meta.group > 1) { auto orig_args = args; TensorLayout a, b; AlgoGroupConvGeneral::modify_size_args(args, a, b); if (is_cudnn_supported(args.as_fwd_args())) { if (auto algo = get_cudnn_algo()) return sm_algo_pack.algo2gconv.at(algo); } args = orig_args; } return megdnn::get_algo_match_attribute( sm_algo_pack.non_cudnn_algos, args, workspace_limit_in_bytes, "cuda conv3d bwd data", positive_attr, negative_attr); } size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad) { AlgoBase::SizeArgs args(this, filter, diff, grad); return get_algorithm(this, args.filter_meta, diff, grad) ->get_workspace_in_bytes(args); } const char* Convolution3DBackwardDataImpl::get_algorithm_set_name() const { return "CUDACONV0+CUDNN" CUDNN_VERSION_STR; } void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) { AlgoBase::ExecArgs args(this, src, diff, grad, workspace); auto algo = get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta); algo->check_workspace(args, workspace).exec(args); } std::vector Convolution3DBackwardFilterImpl::get_all_algorithms(const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad) { return megdnn::get_all_algorithms( {this, src, diff, grad}); } Convolution3DBackwardFilterImpl::Algorithm* Convolution3DBackwardFilterImpl::get_algorithm_heuristic( const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { auto fm = check_layout_fwd(src, grad, diff); return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes, positive_attr, negative_attr); } Convolution3DBackwardFilterImpl::Algorithm* Convolution3DBackwardFilterImpl::get_algorithm_heuristic( const TensorLayout& src, const TensorLayout& diff, const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { AlgoBase::SizeArgs args(this, src, diff, grad); if (args.grad_filter_meta.group > 1 && sm_algo_pack.chanwise.is_available_attribute( args, positive_attr, negative_attr, workspace_limit_in_bytes)) { return &sm_algo_pack.chanwise; } auto get_cudnn_algo = [this, &args, workspace_limit_in_bytes, positive_attr, negative_attr]() -> Convolution3DBackwardFilterImpl::AlgoBase* { auto cudnn_handle = cuda::cudnn_handle(this->handle()); cudnnConvolutionBwdFilterAlgo_t algo; CUDNNBwdFilterDescs desc; args.init_desc(desc); bool got = cudnn_get_convolution_bwd_filter_algo_helper( cudnn_handle, desc.src_desc.desc, desc.diff_desc.desc, desc.conv_desc.desc, desc.grad_desc.desc, workspace_limit_in_bytes, &algo, positive_attr, negative_attr); if (got) { return static_cast(megdnn::get_algo_match_attribute< Convolution3DBackwardFilterImpl>( sm_algo_pack.cudnn_from_enum(algo), positive_attr, negative_attr)); } else { return nullptr; } }; if (is_cudnn_supported(args.as_fwd_args())) { if (auto algo = get_cudnn_algo()) return algo; } if (args.grad_filter_meta.group > 1) { auto orig_args = args; TensorLayout a, b; AlgoGroupConvGeneral::modify_size_args(args, a, b); if (is_cudnn_supported(args.as_fwd_args())) { if (auto algo = get_cudnn_algo()) return sm_algo_pack.algo2gconv.at(algo); } args = orig_args; } return megdnn::get_algo_match_attribute( sm_algo_pack.non_cudnn_algos, args, workspace_limit_in_bytes, "cuda conv3d bwd filter", positive_attr, negative_attr); } size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad) { AlgoBase::SizeArgs args(this, src, diff, grad); return get_algorithm(this, src, diff, args.grad_filter_meta) ->get_workspace_in_bytes(args); } const char* Convolution3DBackwardFilterImpl::get_algorithm_set_name() const { return "CUDACONV0+CUDNN" CUDNN_VERSION_STR; } // vim: syntax=cpp.doxygen