From 8773926ef8cc0fdde164ed8d9984f9f19c484651 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 26 Jan 2021 21:22:02 +0800 Subject: [PATCH] refactor(megdnn): refactor matmul algo in conv bias GitOrigin-RevId: 932f7d6f811f7f587fe1f9afd2a571fd7d51c2f5 --- dnn/src/cuda/conv_bias/algo.h | 9 ++-- dnn/src/cuda/conv_bias/helper.cpp | 5 +- dnn/src/cuda/conv_bias/helper.h | 2 +- dnn/src/cuda/conv_bias/matmul.cpp | 95 +++++++++++++++++++++++++++-------- dnn/test/common/checker.h | 7 +++ dnn/test/cuda/conv_bias.cpp | 10 ++-- dnn/test/cuda/dilated_convolution.cpp | 2 +- 7 files changed, 99 insertions(+), 31 deletions(-) diff --git a/dnn/src/cuda/conv_bias/algo.h b/dnn/src/cuda/conv_bias/algo.h index 8c26c602..8368aafa 100644 --- a/dnn/src/cuda/conv_bias/algo.h +++ b/dnn/src/cuda/conv_bias/algo.h @@ -318,17 +318,20 @@ public: const char* name() const override { if (m_name.empty()) { - m_name = ConvBiasForward::algo_name( - "MATMUL", {}); + m_name = ConvBiasForward::algo_name("MATMUL", + {}); } return m_name.c_str(); } + + std::vector get_subopr_list( + const TensorLayoutArray& layouts, + const OperatorBase* opr) const override; bool is_reproducible() const override { return true; } MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL) private: WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; - mutable std::string m_name; }; diff --git a/dnn/src/cuda/conv_bias/helper.cpp b/dnn/src/cuda/conv_bias/helper.cpp index 44cf4e36..1f45f170 100644 --- a/dnn/src/cuda/conv_bias/helper.cpp +++ b/dnn/src/cuda/conv_bias/helper.cpp @@ -196,7 +196,8 @@ bool check_bias_share_in_channel(const TensorLayout& bias, return share_in_channel; } -WorkspaceBundle matmul_get_workspace_bundle(const BiasForwardSizeArgs& args) { +SmallVector matmul_get_workspace_bundle( + const BiasForwardSizeArgs& args) { auto dtype = args.src_layout->dtype; auto&& fm = args.filter_meta; megdnn_assert(fm.group == 1); @@ -208,7 +209,7 @@ WorkspaceBundle matmul_get_workspace_bundle(const BiasForwardSizeArgs& args) { if (args.filter_meta.should_flip) { sizes.push_back(dtype.size() * OC * IC * FH * FW); } - return {nullptr, std::move(sizes)}; + return sizes; } void flip_filter(const BiasForwardSizeArgs& args, const Workspace& workspace, diff --git a/dnn/src/cuda/conv_bias/helper.h b/dnn/src/cuda/conv_bias/helper.h index dae22853..ceade354 100644 --- a/dnn/src/cuda/conv_bias/helper.h +++ b/dnn/src/cuda/conv_bias/helper.h @@ -50,7 +50,7 @@ namespace conv_bias { bool is_cudnn_supported(const BiasForwardSizeArgs& args); //! get workspace bundle for matmul algo - WorkspaceBundle matmul_get_workspace_bundle( + SmallVector matmul_get_workspace_bundle( const BiasForwardSizeArgs& args); /*! diff --git a/dnn/src/cuda/conv_bias/matmul.cpp b/dnn/src/cuda/conv_bias/matmul.cpp index 78038009..79f2e2fb 100644 --- a/dnn/src/cuda/conv_bias/matmul.cpp +++ b/dnn/src/cuda/conv_bias/matmul.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/common/conv_bias.h" @@ -19,6 +20,43 @@ using namespace megdnn; using namespace cuda; using namespace conv_bias; +namespace { +std::pair sub_opr_config( + const ConvBiasForwardImpl::CanonizedFilterMeta& fm, + const TensorLayout& src_layout, const TensorLayout& filter_layout, + const TensorLayout& dst_layout, const ConvBiasForwardImpl* opr) { + size_t N = src_layout.shape[0], IC = fm.icpg, OC = fm.ocpg, + OH = dst_layout.shape[2], OW = dst_layout.shape[3], + FH = fm.spatial[0], FW = fm.spatial[1]; + + megdnn_assert(src_layout.dtype.category() == DTypeCategory::FLOAT); + TensorLayout Al({OC, IC * FH * FW}, filter_layout.dtype), + Bl({IC * FH * FW, OH * OW * N}, filter_layout.dtype), + Cl({OC, OH * OW * N}, filter_layout.dtype); + MatrixMulForward::Param param; + if (opr->param().compute_mode == param::Convolution::ComputeMode::FLOAT32) { + param.compute_mode = param::MatrixMul::ComputeMode::FLOAT32; + } + + return {{Al, Bl, Cl}, param}; +} +} // namespace + +std::vector +ConvBiasForwardImpl::AlgoMatmul::get_subopr_list( + const TensorLayoutArray& layouts, const OperatorBase* opr) const { + const ConvBiasForwardImpl* conv_bias_opr = + static_cast(opr); + CanonizedFilterMeta fm = + conv_bias_opr->check_layout_fwd(layouts[0], layouts[1], layouts[4]); + auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[4], + conv_bias_opr); + + std::string param_str; + Algorithm::serialize_write_pod(config.second, param_str); + return {{Algorithm::OprType::MATRIX_MUL_FORWARD, param_str, config.first}}; +} + bool ConvBiasForwardImpl::AlgoMatmul::is_available(const SizeArgs& args) const { if (args.src_layout->dtype == args.filter_layout->dtype && args.src_layout->dtype == dtype::BFloat16()) { @@ -47,11 +85,24 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoMatmul::get_workspace_bundle( SizeArgs conv_args = args; conv_args.dst_layout = &dst_layout; - SmallVector matmul_sizes; - WorkspaceBundle matmul_bundle = matmul_get_workspace_bundle(conv_args); - for (size_t i = 0; i < matmul_bundle.nr_workspace(); ++i) { - matmul_sizes.push_back(matmul_bundle.get_size(i)); + SmallVector matmul_sizes = matmul_get_workspace_bundle(conv_args); + + auto matmul_opr = args.handle->create_operator(); + if (args.opr->execution_policy().algo.valid() && + !args.opr->execution_policy().sub_policy.empty()) { + megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1); + matmul_opr->execution_policy() = + args.opr->execution_policy().sub_policy[0]; } + + auto&& config = + sub_opr_config(args.filter_meta, *args.src_layout, + *args.filter_layout, *args.dst_layout, args.opr); + matmul_opr->param() = config.second; + size_t mm_ws = matmul_opr->get_workspace_in_bytes( + config.first[0], config.first[1], config.first[2]); + matmul_sizes.push_back(mm_ws); + sizes.insert(sizes.begin(), matmul_sizes.begin(), matmul_sizes.end()); return {ptr, std::move(sizes)}; } @@ -110,24 +161,28 @@ void ConvBiasForwardImpl::AlgoMatmul::exec_internal( conv_bias::im2col(args.src_tensor->ptr(), col, N, args.src_layout->stride[0], IC, IH, IW, FH, FW, OH, OW, PH, PW, SH, SW, DH, DW, stream); - TensorLayout Al({OC, IC * FH * FW}, typename DTypeTrait::dtype()), - Bl({IC * FH * FW, OH * OW * N}, typename DTypeTrait::dtype()), - Cl({OC, OH * OW * N}, typename DTypeTrait::dtype()); - TensorND A(args.filter_tensor->ptr(), Al), B(col, Bl), C(dst_t, Cl); + + auto matmul_opr = args.handle->create_operator(); + if (args.opr->execution_policy().algo.valid()) { + megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1); + matmul_opr->execution_policy() = + args.opr->execution_policy().sub_policy[0]; + } + + auto&& config = + sub_opr_config(args.filter_meta, *args.src_layout, + *args.filter_layout, *args.dst_layout, args.opr); + matmul_opr->param() = config.second; + + TensorND A(args.filter_tensor->ptr(), config.first[0]), + B(col, config.first[1]), C(dst_t, config.first[2]); + size_t matmul_ws_idx = 2; if (fm.should_flip) { conv_bias::flip_filter(args, bundle.get_workspace(2), A.raw_ptr); + matmul_ws_idx = 3; } - auto&& matmul_opr = args.handle->create_operator(); - if (args.opr->param().compute_mode == - param::Convolution::ComputeMode::FLOAT32) { - matmul_opr->param().compute_mode = - param::MatrixMul::ComputeMode::FLOAT32; - } - megdnn_assert(matmul_opr->get_workspace_in_bytes(A.layout, B.layout, - C.layout) == 0_z, - "Assume matmul opr in algo MATMUL doesn't need extra " - "workspace"); - matmul_opr->exec(A, B, C, Workspace()); + + matmul_opr->exec(A, B, C, bundle.get_workspace(matmul_ws_idx)); TensorLayout C2l({OC * OH * OW, N}, typename DTypeTrait::dtype()), C3l = C2l; diff --git a/dnn/test/common/checker.h b/dnn/test/common/checker.h index 640b4ad6..ba9d1681 100644 --- a/dnn/test/common/checker.h +++ b/dnn/test/common/checker.h @@ -491,6 +491,13 @@ public: Algorithm* algo = opr->get_algorithm_from_desc(algo_info.desc); std::vector&& sub_items = algo->get_subopr_list(layouts, opr.get()); + if (sub_items.size() != policy_name.sub_policy_names.size()) { + printf("Invalid sub_policy_names in %s, expected %zu but got " + "%zu\n", + algo_info.name.c_str(), sub_items.size(), + policy_name.sub_policy_names.size()); + return {}; + } FOREACH_OPR_TYPE_DISPATCH(sub_items, { ExecutionPolicy policy = AlgoChecker<_Opr>::construct_execution_policy_from_name( diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index 9b362c76..5c382d4d 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -704,10 +704,12 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL) { std::vector args = get_args(); Checker checker(handle_cuda()); - checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( - ConvBiasForward::algo_name("MATMUL", - {}) - .c_str())); + checker.set_before_exec_callback( + AlgoChecker(ExecutionPolicyAlgoName{ + ConvBiasForward::algo_name( + "MATMUL", {}) + .c_str(), + {{"CUBLAS", {}}}})); param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; diff --git a/dnn/test/cuda/dilated_convolution.cpp b/dnn/test/cuda/dilated_convolution.cpp index 14979bf6..97c13a30 100644 --- a/dnn/test/cuda/dilated_convolution.cpp +++ b/dnn/test/cuda/dilated_convolution.cpp @@ -49,7 +49,7 @@ TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD) {{ConvBiasForward::algo_name( "MATMUL", {}) .c_str(), - {}}}})); + {{"CUBLAS", {}}}}}})); #endif NormalRNG default_rng; for (auto &&arg: args) {