From 60c6d59fc92c2211d911fa46edae1b797a25ae39 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 13 Nov 2020 15:12:38 +0800 Subject: [PATCH] feat(mbg/core): support bias preprocess in conv_bias GitOrigin-RevId: d2e1e14d414a9dcb9a6816515100b854eedb8a0e --- dnn/include/megdnn/oprs/nn.h | 6 +- dnn/src/cuda/conv_bias/opr_impl.cpp | 4 +- dnn/src/cuda/conv_bias/opr_impl.h | 2 +- dnn/src/fallback/conv_bias/opr_impl.cpp | 11 ++- dnn/src/fallback/conv_bias/opr_impl.h | 2 +- dnn/src/naive/conv_bias/opr_impl.h | 2 +- dnn/test/common/opr_proxy.h | 2 +- src/core/test/graph/misc.cpp | 83 ++++++++++++++++++++++ src/opr/impl/dnn/convolution.cpp | 32 +++++++-- src/opr/impl/search_policy/profiler.cpp | 23 +++++- .../include/megbrain/opr/search_policy/profiler.h | 5 ++ 11 files changed, 148 insertions(+), 24 deletions(-) diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index 62fcf3c8..c05d73a4 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -378,15 +378,15 @@ public: _megdnn_workspace workspace) = 0; /** - * \brief execute weight preprocessing, read weights form filter and write - * to preprocessed_filter after preprocessed. + * \brief execute weight preprocessing, read weights form filter and bias, + * write to preprocessed_filter after preprocessed. * * \praram[in] workspace the needed tmp workspace when exec_preprocess * running, the size is got by get_preprocess_workspace_in_bytes */ virtual void exec_preprocess(const TensorLayout& src_layout, _megdnn_tensor_in filter, - const TensorLayout& bias_layout, + _megdnn_tensor_in bias, const TensorLayout& z_layout, const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, diff --git a/dnn/src/cuda/conv_bias/opr_impl.cpp b/dnn/src/cuda/conv_bias/opr_impl.cpp index 5205e40a..6bccb117 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.cpp +++ b/dnn/src/cuda/conv_bias/opr_impl.cpp @@ -238,11 +238,11 @@ ConvBiasForwardImpl::deduce_preprocessed_filter_layout( void ConvBiasForwardImpl::exec_preprocess( const TensorLayout& src_layout, _megdnn_tensor_in filter, - const TensorLayout& bias_layout, const TensorLayout& z_layout, + _megdnn_tensor_in bias, const TensorLayout& z_layout, const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) { TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, - z{nullptr, z_layout}, bias{nullptr, bias_layout}; + z{nullptr, z_layout}; AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace, preprocessed_filter); auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, diff --git a/dnn/src/cuda/conv_bias/opr_impl.h b/dnn/src/cuda/conv_bias/opr_impl.h index 81f3f771..09c87973 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.h +++ b/dnn/src/cuda/conv_bias/opr_impl.h @@ -49,7 +49,7 @@ public: const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&) override; void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, - const TensorLayout&, const TensorLayout&, + _megdnn_tensor_in, const TensorLayout&, const TensorLayout&, PreprocessedFilter*, _megdnn_workspace) override; const char* get_algorithm_set_name() const override; diff --git a/dnn/src/fallback/conv_bias/opr_impl.cpp b/dnn/src/fallback/conv_bias/opr_impl.cpp index e8be6c08..efefbfea 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.cpp +++ b/dnn/src/fallback/conv_bias/opr_impl.cpp @@ -178,15 +178,14 @@ void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, _megdnn_tensor_in filter, - const TensorLayout& bias_layout, + _megdnn_tensor_in bias, const TensorLayout& z_layout, const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) { - //! exec_preprocess currently only support preprocess weights before exec, - //! src/dst/bias/z will be ignored, just set to nullptr - TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, - bias{nullptr, bias_layout}; + //! exec_preprocess currently only support preprocess weights and bias + //! before exec, src/dst/z will be ignored, just set to nullptr + TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}; auto fparam = make_ncb_kern_param(src, filter, bias, dst, workspace, preprocessed_filter); //! should not pass workspace_size limit otherwise can not find match algo @@ -196,7 +195,7 @@ void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, exec_preprocess_with_ncb_kern(fparam, algo); } else { naive::ConvBiasForwardImpl::exec_preprocess( - src_layout, filter, bias_layout, z_layout, dst_layout, + src_layout, filter, bias, z_layout, dst_layout, preprocessed_filter, workspace); } } diff --git a/dnn/src/fallback/conv_bias/opr_impl.h b/dnn/src/fallback/conv_bias/opr_impl.h index 8a44770b..34318cb9 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.h +++ b/dnn/src/fallback/conv_bias/opr_impl.h @@ -57,7 +57,7 @@ public: void exec_preprocess(const TensorLayout& src_layout, _megdnn_tensor_in filter, - const TensorLayout& bias_layout, + _megdnn_tensor_in bias, const TensorLayout& z_layout, const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, diff --git a/dnn/src/naive/conv_bias/opr_impl.h b/dnn/src/naive/conv_bias/opr_impl.h index bbe35155..47b9d4c6 100644 --- a/dnn/src/naive/conv_bias/opr_impl.h +++ b/dnn/src/naive/conv_bias/opr_impl.h @@ -59,7 +59,7 @@ public: } void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, - const TensorLayout&, const TensorLayout&, + _megdnn_tensor_in, const TensorLayout&, const TensorLayout&, PreprocessedFilter*, _megdnn_workspace) override {} diff --git a/dnn/test/common/opr_proxy.h b/dnn/test/common/opr_proxy.h index c34ff06a..2a9e21f1 100644 --- a/dnn/test/common/opr_proxy.h +++ b/dnn/test/common/opr_proxy.h @@ -601,7 +601,7 @@ struct OprWeightPreprocessProxy tensors[3].layout, tensors[4].layout); WorkspaceWrapper preprocess_workspace(opr->handle(), preprocess_workspace_size); - opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2].layout, + opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2], tensors[3].layout, tensors[4].layout, &preprocessed_filter, preprocess_workspace.workspace()); diff --git a/src/core/test/graph/misc.cpp b/src/core/test/graph/misc.cpp index 3655587f..9676f185 100644 --- a/src/core/test/graph/misc.cpp +++ b/src/core/test/graph/misc.cpp @@ -1955,6 +1955,39 @@ typename DnnOp::Algorithm* try_find_any_weight_preprocess_algo( return nullptr; } +template +typename DnnOp::Algorithm* try_find_any_bias_preprocess_algo( + DnnOp* dnn_op, const char* mgb_info, Maybe& found, + Args&& ...args) { + if (found.valid()) { + if (found.val()) { + return dnn_op->execution_policy().algorithm; + } else { + return nullptr; + } + } + for (auto&& algo : dnn_op->get_all_algorithms( + std::forward(args)...)) { + dnn_op->execution_policy().algorithm = algo; + auto layouts = dnn_op->deduce_preprocessed_filter_layout( + std::forward(args)...); + if (layouts.size() <= 1) + continue; + bool valid = false; + if (!layouts[1].is_empty()) { + valid = true; + break; + } + if (valid) { + found.emplace(true); + return algo; + } + } + found.emplace(false); + mgb_log_warn("Can't find bias preprocess algo for op %s", mgb_info); + return nullptr; +} + void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { HostTensorGenerator<> gen; auto graph = ComputingGraph::make(); @@ -2152,4 +2185,54 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { .empty()); } +TEST(TestGraph, FreeBias) { + HostTensorGenerator<> gen; + auto graph = ComputingGraph::make(); + auto cn = CompNode::load("xpu0"); + graph->options().graph_opt.weight_preprocess = true; + auto mkvar = [&](const char* name, const TensorShape& shp) { + return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name); + }; + auto mkcvar = [&](const char* name, const TensorShape& shp) { + return opr::SharedDeviceTensor::make_const(*graph, *gen(shp, cn)) + .rename(name); + }; + auto x = mkvar("x", {1, 32, 16, 16}); + // ConvBias test dense + opr::ConvBias::Param param_conv_bias; + param_conv_bias.pad_h = param_conv_bias.pad_w = 0; + param_conv_bias.sparse = opr::ConvBias::Param::Sparse::DENSE; + auto w1 = mkcvar("w1", {32, 32, 1, 1}), b1 = mkcvar("b1", {1, 32, 1, 1}); + auto conv1 = opr::ConvBias::make(x, w1, b1, param_conv_bias); + Maybe wp1; + conv1.node()->owner_opr()->cast_final_safe() + .setup_algo_chooser([&](const cg::OperatorNodeBase* opr) { + return try_find_any_bias_preprocess_algo( + opr->cast_final_safe().megdnn_opr(), + opr->cname(), wp1, + opr->input(0)->layout(), opr->input(1)->layout(), + opr->input(2)->layout(), TensorLayout{}, + opr->output(0)->layout()); + }); + + HostTensorND host_y; + auto func =graph->compile({make_callback_copy(conv1, host_y)}); + //!flag the no need memory of var + func->execute(); + //!free the no need memory of var + func->execute(); + auto check = [&](SymbolVar v) { + ASSERT_TRUE(v.node()->contain_flag(VarNode::Flag::MEMORY_NO_NEED)); + ASSERT_TRUE(v.node()->dev_tensor().empty()); + ASSERT_TRUE(v.node()->owner_opr() + ->cast_final_safe() + .get_dev_tensor() + .empty()); + }; + ASSERT_TRUE(wp1.valid()); + if (wp1.val()) { + check(b1); + } +} + // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/dnn/convolution.cpp b/src/opr/impl/dnn/convolution.cpp index 2188e5ee..91e5ee57 100644 --- a/src/opr/impl/dnn/convolution.cpp +++ b/src/opr/impl/dnn/convolution.cpp @@ -961,17 +961,37 @@ void ConvBiasForward::scn_do_execute_preprocess() { z_layout = input(3)->layout(); } megdnn_opr()->exec_preprocess( - input(0)->layout(), input(1)->dev_tensor().as_megdnn(), bias_layout, - z_layout, output(0)->layout(), preprocessed_filter(), + input(0)->layout(), input(1)->dev_tensor().as_megdnn(), + input(2)->dev_tensor().as_megdnn(), z_layout, output(0)->layout(), + preprocessed_filter(), intl::get_megdnn_workspace_from_var(output().back())); - //! Flag the input(1) no use later, which can be freed when no other + //! Flag the weight and bias no use later, which can be freed when no other //! var depend on its dev_value, host_value and shape. - auto receiver_info = + auto receiver_info_weight = input(1)->owner_graph()->var_receiver_in_current_comp_seq(input(1)); - if (receiver_info.dev_value == 1 && receiver_info.host_value == 0 && - receiver_info.shape == 0) { + if (receiver_info_weight.dev_value == 1 && + receiver_info_weight.host_value == 0 && + receiver_info_weight.shape == 0) { input(1)->add_flag(VarNode::Flag::MEMORY_NO_NEED); } + //! if bias is preprocessd + if (input().size() > 3) { + auto preprocessed_layouts = + megdnn_opr()->deduce_preprocessed_filter_layout( + input(0)->layout(), input(1)->layout(), bias_layout, + z_layout, output(0)->layout()); + if (preprocessed_layouts.size() > 1 && + !preprocessed_layouts[1].is_empty()) { + auto receiver_info_bias = + input(2)->owner_graph()->var_receiver_in_current_comp_seq( + input(2)); + if (receiver_info_bias.dev_value == 1 && + receiver_info_bias.host_value == 0 && + receiver_info_bias.shape == 0) { + input(2)->add_flag(VarNode::Flag::MEMORY_NO_NEED); + } + } + } } /* ===================== LocalShareForward ==================== */ diff --git a/src/opr/impl/search_policy/profiler.cpp b/src/opr/impl/search_policy/profiler.cpp index d6048851..506a8330 100644 --- a/src/opr/impl/search_policy/profiler.cpp +++ b/src/opr/impl/search_policy/profiler.cpp @@ -178,9 +178,26 @@ typename TimedProfiler::TResult TimedProfiler::prof_impl( for (size_t i = 0; i < flt_val.size(); i++) { pf.tensors[i] = flt_val[i].as_megdnn(); } - APPLY(_(megdnn_opr)->exec_preprocess(args..., &pf, mdn_workspace), - std::forward_as_tuple(layouts[0], inp_val[1].as_megdnn()), - array_skip<2>(layouts)); + if_constexpr()>( + //! convbias + [&](auto __) { + APPLY(__(megdnn_opr) + ->exec_preprocess(args..., &pf, + mdn_workspace), + std::forward_as_tuple(layouts[0], + inp_val[1].as_megdnn(), + inp_val[2].as_megdnn()), + array_skip(layouts)); + }, + //! Convolution + [&](auto __) { + APPLY(__(megdnn_opr) + ->exec_preprocess(args..., &pf, + mdn_workspace), + std::forward_as_tuple(layouts[0], + inp_val[1].as_megdnn()), + array_skip<2>(layouts)); + }); } }); diff --git a/src/opr/include/megbrain/opr/search_policy/profiler.h b/src/opr/include/megbrain/opr/search_policy/profiler.h index 03708715..a7d1aaac 100644 --- a/src/opr/include/megbrain/opr/search_policy/profiler.h +++ b/src/opr/include/megbrain/opr/search_policy/profiler.h @@ -75,6 +75,11 @@ constexpr bool opr_supports_preprocess() { std::is_same::value; } +template +constexpr bool opr_contain_bias() { + return std::is_same::value; +} + template struct PreprocessFilterImpl { using T = union {};