From b8febaf91fe968763c74822a9f67410be917d406 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 21 Jan 2021 22:45:20 +0800 Subject: [PATCH] refactor(megdnn): refactor bfloat16 convolutionbackwardfilter to recursive inteface GitOrigin-RevId: 37c08a5b8b2484df300acf71c651640eca041144 --- .../cuda/convolution/backward_data/bfloat16.cpp | 4 +- dnn/src/cuda/convolution/backward_filter/algo.cpp | 8 +- dnn/src/cuda/convolution/backward_filter/algo.h | 22 ++--- .../cuda/convolution/backward_filter/bfloat16.cpp | 101 +++++++++++++-------- dnn/src/cuda/convolution/opr_impl.h | 11 +++ dnn/test/cuda/convolution.cpp | 6 ++ 6 files changed, 91 insertions(+), 61 deletions(-) diff --git a/dnn/src/cuda/convolution/backward_data/bfloat16.cpp b/dnn/src/cuda/convolution/backward_data/bfloat16.cpp index 322a8e8b..6e2e85b9 100644 --- a/dnn/src/cuda/convolution/backward_data/bfloat16.cpp +++ b/dnn/src/cuda/convolution/backward_data/bfloat16.cpp @@ -60,7 +60,7 @@ bool ConvolutionBackwardDataImpl::AlgoBFloat16::is_available( auto&& config = sub_opr_config( {*args.filter_layout, *args.diff_layout, *args.grad_layout}, args.opr); - conv_back_data_opr->param() = config.second; + conv_back_data_opr->param() = config.second; return args.diff_layout->dtype == args.filter_layout->dtype && args.diff_layout->dtype == dtype::BFloat16() && get_algorithm(static_cast( @@ -80,7 +80,7 @@ WorkspaceBundle ConvolutionBackwardDataImpl::AlgoBFloat16::get_workspace_bundle( auto&& config = sub_opr_config( {*args.filter_layout, *args.diff_layout, *args.grad_layout}, args.opr); - conv_back_data_opr->param() = config.second; + conv_back_data_opr->param() = config.second; SmallVector sizes; auto get_workspace = [&sizes](const TensorLayout& src, const TensorLayout& dst) { diff --git a/dnn/src/cuda/convolution/backward_filter/algo.cpp b/dnn/src/cuda/convolution/backward_filter/algo.cpp index fc89b30b..f7ea856a 100644 --- a/dnn/src/cuda/convolution/backward_filter/algo.cpp +++ b/dnn/src/cuda/convolution/backward_filter/algo.cpp @@ -43,12 +43,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() { megdnn_assert(all_algos_data == all_algos.data()); non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group matmul - size_t algo_size = all_algos.size(); - for (size_t i=0; iinfo().desc, algo); diff --git a/dnn/src/cuda/convolution/backward_filter/algo.h b/dnn/src/cuda/convolution/backward_filter/algo.h index d224ee00..9da63ffe 100644 --- a/dnn/src/cuda/convolution/backward_filter/algo.h +++ b/dnn/src/cuda/convolution/backward_filter/algo.h @@ -158,27 +158,21 @@ public: class ConvolutionBackwardFilterImpl::AlgoBFloat16 final : public AlgoBase { public: - AlgoBFloat16(ConvolutionBackwardFilterImpl::AlgoBase*); bool is_available(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override; void exec(const ExecArgs& args) const override; - const char* name() const override { return m_name.c_str(); } - bool is_reproducible() const override { return true; } - MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16) + std::vector get_subopr_list( + const TensorLayoutArray& layouts, + const OperatorBase* opr) const override; - std::string param() const override { - std::string ret; - serialize_write_pod(m_algorithm, ret); - return ret; + const char* name() const override { + return "CONVOLUTION_BACKWARD_FILTER_BFLOAT16"; } + bool is_reproducible() const override { return true; } + MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16) private: - std::string m_name; - ConvolutionBackwardFilterImpl::AlgoBase* m_algorithm = nullptr; - SizeArgs float_args(const SizeArgs& args, - ConvolutionBackwardFilterImpl* opr, TensorLayout& fsrc, - TensorLayout& ffilter, TensorLayout& fdst) const; WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; }; @@ -225,7 +219,7 @@ public: AlgoChanwise chanwise; std::vector gconv; std::unordered_map algo2gconv; - std::vector> bfloat16_refhold; + AlgoBFloat16 bfloat16; std::vector //! all algorithms diff --git a/dnn/src/cuda/convolution/backward_filter/bfloat16.cpp b/dnn/src/cuda/convolution/backward_filter/bfloat16.cpp index 6e8dd709..81898795 100644 --- a/dnn/src/cuda/convolution/backward_filter/bfloat16.cpp +++ b/dnn/src/cuda/convolution/backward_filter/bfloat16.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "./algo.h" @@ -17,33 +18,39 @@ using namespace megdnn; using namespace cuda; using namespace convolution; -ConvolutionBackwardFilterImpl::AlgoBFloat16::AlgoBFloat16( - ConvolutionBackwardFilterImpl::AlgoBase* algorithm) - : m_algorithm(algorithm) { - megdnn_assert_internal(algorithm); - m_name = ssprintf("CONVOLUTION_BACKWARD_Filter_BFLOAT16:%s", - m_algorithm->name()); -} - -ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs -ConvolutionBackwardFilterImpl::AlgoBFloat16::float_args( - const SizeArgs& args, ConvolutionBackwardFilterImpl* opr, - TensorLayout& fsrc, TensorLayout& fdiff, TensorLayout& fgrad) const { - fsrc = *args.src_layout; - fdiff = *args.diff_layout; - fgrad = *args.grad_layout; +namespace { +std::pair +sub_opr_config(const TensorLayoutArray& layouts, + const ConvolutionBackwardFilterImpl* opr) { + megdnn_assert(layouts.size() >= 3); + std::pair ret; + ret.first = layouts; auto change_dtype = [](TensorLayout& layout) { if (layout.dtype == dtype::BFloat16()) { layout.dtype = dtype::Float32(); } }; - change_dtype(fsrc); - change_dtype(fdiff); - change_dtype(fgrad); - opr->param() = args.opr->param(); - opr->param().compute_mode = Param::ComputeMode::DEFAULT; - opr->execution_policy() = {m_algorithm->desc(), {}}; - return SizeArgs(opr, fsrc, fdiff, fgrad); + change_dtype(ret.first[0]); + change_dtype(ret.first[1]); + change_dtype(ret.first[2]); + + ret.second = opr->param(); + ret.second.compute_mode = + ConvolutionBackwardFilter::Param::ComputeMode::DEFAULT; + return ret; +} +} // namespace + +std::vector +ConvolutionBackwardFilterImpl::AlgoBFloat16::get_subopr_list( + const TensorLayoutArray& layouts, const OperatorBase* opr) const { + auto&& config = sub_opr_config( + layouts, static_cast(opr)); + + std::string param_str; + Algorithm::serialize_write_pod(config.second, param_str); + return {{Algorithm::OprType::CONVOLUTION_BACKWARD_FILTER, param_str, + config.first}}; } bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available( @@ -51,25 +58,33 @@ bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available( TensorLayout fsrc, fdiff, fgrad; auto conv_back_filter_opr = args.handle->create_operator(); - SizeArgs fargs = float_args(args, - static_cast( - conv_back_filter_opr.get()), - fsrc, fdiff, fgrad); + + auto&& config = sub_opr_config( + {*args.src_layout, *args.diff_layout, *args.grad_layout}, + args.opr); + conv_back_filter_opr->param() = config.second; return args.src_layout->dtype == args.diff_layout->dtype && args.src_layout->dtype == dtype::BFloat16() && - m_algorithm->is_available(fargs); + get_algorithm(static_cast( + conv_back_filter_opr.get()), + config.first[0], config.first[1], config.first[2]); } WorkspaceBundle ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle( void* ptr, const SizeArgs& args) const { - TensorLayout fsrc, fdiff, fgrad; auto conv_back_filter_opr = args.handle->create_operator(); - SizeArgs fargs = float_args(args, - static_cast( - conv_back_filter_opr.get()), - fsrc, fdiff, fgrad); + if (args.opr->execution_policy().algo.valid()) { + megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1); + conv_back_filter_opr->execution_policy() = + args.opr->execution_policy().sub_policy[0]; + } + auto&& config = sub_opr_config( + {*args.src_layout, *args.diff_layout, *args.grad_layout}, + args.opr); + + conv_back_filter_opr->param() = config.second; SmallVector sizes; auto get_workspace = [&sizes](const TensorLayout& src, const TensorLayout& dst) { @@ -77,11 +92,14 @@ ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle( sizes.push_back(dst.span().dist_byte()); } }; - get_workspace(*args.src_layout, fsrc); - get_workspace(*args.diff_layout, fdiff); - get_workspace(*args.grad_layout, fgrad); - sizes.push_back(m_algorithm->get_workspace_in_bytes(fargs)); - return {ptr, std::move(sizes)}; + + get_workspace(*args.src_layout, config.first[0]); + get_workspace(*args.diff_layout, config.first[1]); + get_workspace(*args.grad_layout, config.first[2]); + sizes.push_back(conv_back_filter_opr->get_workspace_in_bytes( + config.first[0], config.first[1], config.first[2])); + auto ret = WorkspaceBundle{ptr, std::move(sizes)}; + return ret; } size_t ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_in_bytes( @@ -107,7 +125,12 @@ void ConvolutionBackwardFilterImpl::AlgoBFloat16::exec( conv_back_filter_opr->param() = args.opr->param(); conv_back_filter_opr->param().compute_mode = Param::ComputeMode::DEFAULT; - conv_back_filter_opr->execution_policy() = {m_algorithm->desc(), {}}; + + if (args.opr->execution_policy().algo.valid()) { + megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1); + conv_back_filter_opr->execution_policy() = + args.opr->execution_policy().sub_policy[0]; + } conv_back_filter_opr->exec(fsrc_tensor, fdiff_tensor, fgrad_tensor, cvter.workspace()); } diff --git a/dnn/src/cuda/convolution/opr_impl.h b/dnn/src/cuda/convolution/opr_impl.h index ec2d2671..8610325c 100644 --- a/dnn/src/cuda/convolution/opr_impl.h +++ b/dnn/src/cuda/convolution/opr_impl.h @@ -152,6 +152,17 @@ public: ->info(); } + AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& filter, + const TensorLayout& diff, + const TensorLayout& grad, + size_t workspace_limit_in_bytes, + bool reproducible) { + return get_algorithm_heuristic(filter, diff, grad, + workspace_limit_in_bytes, reproducible) + ->info(); + } + + const char* get_algorithm_set_name() const override; class AlgoBase; diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 18ade4c1..93f083c9 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -328,12 +328,18 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); + + checker.set_before_exec_callback(AlgoChecker( + ExecutionPolicyAlgoName{"CONVOLUTION_BACKWARD_FILTER_BFLOAT16", + {{"MATMUL", {}}}})); src.dtype = dst.dtype = filter.dtype = dtype::BFloat16(); checker.set_rng(0, &rng) .set_rng(1, &rng) .set_epsilon(1e-1) .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); + checker.reset_before_exec_callback(); + checker.opr()->execution_policy() = {}; } }