From 777f3ea9709c2fee69a640e2c74066a0aea842d0 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 20 Aug 2020 10:59:12 +0800 Subject: [PATCH] refactor(gopt): format code GitOrigin-RevId: 9d5c87000fdfa291d91306365f7401c8af443dc1 --- src/gopt/impl/tensor_reformat.cpp | 295 ++++++++++++++-------------- src/gopt/test/inference.cpp | 404 +++++++++++++++++++------------------- 2 files changed, 353 insertions(+), 346 deletions(-) diff --git a/src/gopt/impl/tensor_reformat.cpp b/src/gopt/impl/tensor_reformat.cpp index a5f1f79a..c9cfbcdd 100644 --- a/src/gopt/impl/tensor_reformat.cpp +++ b/src/gopt/impl/tensor_reformat.cpp @@ -10,23 +10,23 @@ * implied. */ -#include "megbrain/gopt/inference.h" -#include "megbrain/gopt/gtrans.h" #include "megbrain/gopt/basic_arith.h" +#include "megbrain/gopt/gtrans.h" +#include "megbrain/gopt/inference.h" #include "megbrain/graph/event.h" -#include "megbrain/opr/dnn/batch_norm.h" -#include "megbrain/opr/dnn/local.h" -#include "megbrain/utils/shared_set.h" -#include "megbrain/serialization/opr_shallow_copy.h" #include "megbrain/opr/basic_arith.h" -#include "megbrain/opr/dnn/convolution.h" #include "megbrain/opr/blas.h" -#include "megbrain/opr/misc.h" -#include "megbrain/opr/utility.h" +#include "megbrain/opr/dnn/batch_norm.h" +#include "megbrain/opr/dnn/convolution.h" +#include "megbrain/opr/dnn/local.h" #include "megbrain/opr/dnn/pooling.h" -#include "megbrain/opr/tensor_manip.h" #include "megbrain/opr/imgproc.h" +#include "megbrain/opr/misc.h" #include "megbrain/opr/nn_int.h" +#include "megbrain/opr/tensor_manip.h" +#include "megbrain/opr/utility.h" +#include "megbrain/serialization/opr_shallow_copy.h" +#include "megbrain/utils/shared_set.h" #include "megdnn/opr_param_defs.h" #include "megdnn/tensor_format.h" @@ -66,71 +66,74 @@ using namespace gopt; * oprs should not get involved in any actual computing. */ MGB_DEFINE_OPR_CLASS(TensorReformatPass::RelayoutPlaceholder, - cg::SingleCNOperatorNodeBase) // { + cg::SingleCNOperatorNodeBase) // { public: - //! relayout type of this opr - enum class LayoutType { - NCHW4_TO_NCHW32, //!< from nchw4 layout to nchw32 layout - NCHW32_TO_NCHW4, //!< from nchw32 layout to nchw4 layout - NCHW4_TO_CHWN4, //!< from nchw4 layout to chwn4 layout - CHWN4_TO_NCHW4, //!< from chwn4 layout to nchw4 layout - NCHW_TO_NCHW4, //!< from nchw layout to nchw4 layout - NCHW_TO_NCHW4_IC_SMALL_CONV, ///< from nchw layout to nchw4 whose - ///< channel size less than 4 - NCHW4_TO_NCHW, //!< from nchw4 layout to nchw layout - NCHW_TO_NCHW88, //!< from nchw layout to nchw88 layout - NCHW88_TO_NCHW, //!< from nchw88 layout to nchw layout - - WEIGHT_NCHW_TO_NCHW4_DENSE, //!< weight from nchw layout to nchw4 - //!< layout - WEIGHT_NCHW_TO_NCHW4_GROUP, //!< group weight from nchw layout to - //!< nchw4 layout - WEIGHT_NCHW_TO_NCHW4_DENSE_IC_SMALL_CONV, //!< weight from nchw layout - //!< to nchw4 layout whose - //! channel size less than 4 - - WEIGHT_NCHW_TO_NCHW88_DENSE, //!< weight from nchw layout to nchw88 - //!< layout - WEIGHT_NCHW_TO_NCHW88_GROUP, //!< group weight from nchw layout to - //!< nchw88 layout - WEIGHT_NCHW_TO_NCHW88_CHAN, //!< channel wise weight from nchw layout - //!< to nchw88 layout - //!< the weight layout of input is nchw output is nchw88, special for - //!< shape weight in nchw like {64, 2, 3, 3} to {8, 3, 3, 2, 8} - WEIGHT_HYBIRD_NCHW_NCHW88, - - WEIGHT_NCHW_TO_NCHW44_DENSE, //!< weight from nchw layout to nchw44 - //!< layout - WEIGHT_NCHW_TO_NCHW44_GROUP, //!< group weight from nchw layout to - //!< nchw44 layout - WEIGHT_NCHW_TO_NCHW44_CHAN, //!< channel wise weight from nchw layout - //!< to nchw44 layout - //!< the weight layout of input is nchw output is nchw44, special for - //!< shape weight in nchw like {64, 2, 3, 3} to {16, 3, 3, 2, 4} - WEIGHT_HYBIRD_NCHW_NCHW44, - WEIGHT_NCHW_TO_NCHW44_DOT_DENSE, //!< weight from NCHW44 layout to - //!< NCHW44_DOT layout dense - WEIGHT_NCHW_TO_NCHW44_DOT_GROUP, //!< weight from NCHW44 layout to - //!< NCHW44_DOT layout group - }; - - RelayoutPlaceholder(VarNode* src_var, LayoutType layout_type); - - /*! - * \param src_var the input var - * \param layout_type tensor layout transform type of this relayout - * placeholder as described in LayoutType - */ - static SymbolVar make(VarNode* src_var, LayoutType layout_type); - - LayoutType layout_type() const { return m_layout_type; } +//! relayout type of this opr +enum class LayoutType { + NCHW4_TO_NCHW32, //!< from nchw4 layout to nchw32 layout + NCHW32_TO_NCHW4, //!< from nchw32 layout to nchw4 layout + NCHW4_TO_CHWN4, //!< from nchw4 layout to chwn4 layout + CHWN4_TO_NCHW4, //!< from chwn4 layout to nchw4 layout + NCHW_TO_NCHW4, //!< from nchw layout to nchw4 layout + NCHW_TO_NCHW4_IC_SMALL_CONV, ///< from nchw layout to nchw4 whose + ///< channel size less than 4 + NCHW4_TO_NCHW, //!< from nchw4 layout to nchw layout + NCHW_TO_NCHW88, //!< from nchw layout to nchw88 layout + NCHW88_TO_NCHW, //!< from nchw88 layout to nchw layout + + WEIGHT_NCHW_TO_NCHW4_DENSE, //!< weight from nchw layout to nchw4 + //!< layout + WEIGHT_NCHW_TO_NCHW4_GROUP, //!< group weight from nchw layout to + //!< nchw4 layout + WEIGHT_NCHW_TO_NCHW4_DENSE_IC_SMALL_CONV, //!< weight from nchw layout + //!< to nchw4 layout whose + //! channel size less than 4 + + WEIGHT_NCHW_TO_NCHW88_DENSE, //!< weight from nchw layout to nchw88 + //!< layout + WEIGHT_NCHW_TO_NCHW88_GROUP, //!< group weight from nchw layout to + //!< nchw88 layout + WEIGHT_NCHW_TO_NCHW88_CHAN, //!< channel wise weight from nchw layout + //!< to nchw88 layout + //!< the weight layout of input is nchw output is nchw88, special for + //!< shape weight in nchw like {64, 2, 3, 3} to {8, 3, 3, 2, 8} + WEIGHT_HYBIRD_NCHW_NCHW88, + + WEIGHT_NCHW_TO_NCHW44_DENSE, //!< weight from nchw layout to nchw44 + //!< layout + WEIGHT_NCHW_TO_NCHW44_GROUP, //!< group weight from nchw layout to + //!< nchw44 layout + WEIGHT_NCHW_TO_NCHW44_CHAN, //!< channel wise weight from nchw layout + //!< to nchw44 layout + //!< the weight layout of input is nchw output is nchw44, special for + //!< shape weight in nchw like {64, 2, 3, 3} to {16, 3, 3, 2, 4} + WEIGHT_HYBIRD_NCHW_NCHW44, + WEIGHT_NCHW_TO_NCHW44_DOT_DENSE, //!< weight from NCHW44 layout to + //!< NCHW44_DOT layout dense + WEIGHT_NCHW_TO_NCHW44_DOT_GROUP, //!< weight from NCHW44 layout to + //!< NCHW44_DOT layout group +}; + +RelayoutPlaceholder(VarNode* src_var, LayoutType layout_type); + +/*! + * \param src_var the input var + * \param layout_type tensor layout transform type of this relayout + * placeholder as described in LayoutType + */ +static SymbolVar make(VarNode* src_var, LayoutType layout_type); + +LayoutType layout_type() const { + return m_layout_type; +} private: - void init_output_static_infer_desc() override; - void scn_do_execute() override; - void init_output_comp_node() override; - const LayoutType m_layout_type; -}; +void init_output_static_infer_desc() override; +void scn_do_execute() override; +void init_output_comp_node() override; +const LayoutType m_layout_type; +} +; MGB_DYN_TYPE_OBJ_FINAL_IMPL(TensorReformatPass::RelayoutPlaceholder); @@ -211,7 +214,7 @@ void TensorReformatPass::RelayoutPlaceholder::init_output_static_infer_desc() { dst[3] = inp_shape[3]; dst[4] = 4; } else if (layout_type() == - RelayoutPlaceholder::LayoutType::NCHW4_TO_NCHW){ + RelayoutPlaceholder::LayoutType::NCHW4_TO_NCHW) { mgb_assert(inp_shape.ndim == 5 && inp_shape[4] == 4); dst.ndim = 4; dst[0] = inp_shape[0]; @@ -249,7 +252,7 @@ void TensorReformatPass::RelayoutPlaceholder::init_output_static_infer_desc() { dst[3] = inp_shape[3]; dst[4] = inp_shape[4]; dst[5] = 4; - }else if (layout_type() == + } else if (layout_type() == RelayoutPlaceholder::LayoutType::NCHW_TO_NCHW88) { mgb_assert(inp_shape.ndim == 4 && inp_shape[1] % 8 == 0); dst.ndim = 5; @@ -489,7 +492,7 @@ void TensorReformatPass::translate_pass(OptState& opt) const { return opr::IndexAt::make(xshp, {{0, cv(idx)}}); }; auto tshp0 = opr::Concat::make( - {sub(0), sub(1) / 4, cv(4), sub(2), sub(3)}, 0); + {sub(0), sub(1) / 4, cv(4), sub(2), sub(3)}, 0); auto y0 = opr::Reshape::make(x, tshp0); auto y1 = opr::Dimshuffle::make(y0, {0, 1, 3, 4, 2}); return y1.node(); @@ -1033,7 +1036,6 @@ EnableTensorCorePass::make_tensorcore_converter() { "can not be changed in this opt " "pass"); return serialization::copy_opr_shallow(*opr, new_inp, opr->config()); - }; auto replace_warp_affine_opr = [replace_inps_to_nchw4, replace_non_nchw4_opr]( @@ -1247,7 +1249,8 @@ std::unique_ptr EnableCHWN4Pass::make_chwn4_converter() { } if (nr_shape_changed) { auto inps = new_inp; - if (nr_shape_changed >= nr_inps / 2) { // CHWN4 > NCHW4 -> use CHWN4 + if (nr_shape_changed >= + nr_inps / 2) { // CHWN4 > NCHW4 -> use CHWN4 for (size_t i = 0; i < nr_inps; ++i) { if (varshape_changed.count(new_inp[i]) == 0) { auto symvar = RelayoutPlaceholder::make( @@ -1309,7 +1312,6 @@ std::unique_ptr EnableCHWN4Pass::make_chwn4_converter() { "can not be changed in this opt " "pass"); return serialization::copy_opr_shallow(*opr, new_inp, opr->config()); - }; // capture by copy to avoid use after return auto replace_warp_affine_opr = @@ -1410,7 +1412,7 @@ VarNode* EnableNCHW4Pass::on_graph_endpoint_var(VarNode* new_var, return new_var; } -std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ +std::unique_ptr EnableNCHW4Pass::make_nchw4_converter() { MIDOUT_B("EnableNCHW4Pass::make") auto ret = std::make_unique(); ret->set_var_replace_check_flag(VarReplaceCheckFlag::NOCHECK); @@ -1469,14 +1471,12 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ return serialization::copy_opr_shallow(*opr, new_inp, opr->config()); } - auto conv_mode = - trans_nchw4(conv_opr.param().sparse, new_inp[1]); + auto conv_mode = trans_nchw4(conv_opr.param().sparse, new_inp[1]); VarNode *conv_src = new_inp[0], *conv_filter = new_inp[1]; // src: NCHW --> NCWH4 if (new_inp[0]->shape().ndim != 5) { mgb_assert(new_inp[0]->shape().ndim == 4); - auto new_src = - RelayoutPlaceholder::make(new_inp[0], conv_mode.src); + auto new_src = RelayoutPlaceholder::make(new_inp[0], conv_mode.src); conv_src = new_src.node(); } // weight: NCHW --> NCHW4 @@ -1488,21 +1488,21 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ new_param.format = conv_format; // dst auto new_conv_opr = opr::Convolution::make( - conv_src, conv_filter, new_param, - conv_opr.execution_policy(), conv_opr.config()); + conv_src, conv_filter, new_param, conv_opr.execution_policy(), + conv_opr.config()); OperatorNodeBase* new_opr = new_conv_opr.node()->owner_opr(); mgb_assert(new_conv_opr.shape().ndim == 5, - "The conv dst dim is not trans to nchw4"); + "The conv dst dim is not trans to nchw4"); return new_opr; }; auto replace_batch_conv_bias_opr = [batch_conv_bias_format, - src_to_nchw4_mode]( - OperatorNodeBase* opr, - const VarNodeArray& new_inp) { + src_to_nchw4_mode]( + OperatorNodeBase* opr, + const VarNodeArray& new_inp) { mgb_assert(opr->input().size() == new_inp.size()); auto& batch_conv_bias_opr = - opr->cast_final_safe(); + opr->cast_final_safe(); if (batch_conv_bias_opr.param().format != megdnn::param::BatchConvBias::Format::NCHW) { return serialization::copy_opr_shallow(*opr, new_inp, @@ -1515,10 +1515,10 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ // what should be converted: src, weight VarNode *src = new_inp[0], *filter = new_inp[1]; // src: NCHW --> NCHW4 - if (new_inp[0]->shape().ndim !=5) { + if (new_inp[0]->shape().ndim != 5) { mgb_assert(new_inp[0]->shape().ndim == 4); - auto new_src = RelayoutPlaceholder::make(new_inp[0], - src_to_nchw4_mode); + auto new_src = + RelayoutPlaceholder::make(new_inp[0], src_to_nchw4_mode); src = new_src.node(); } // weight: BNCHW --> BNCHW4 @@ -1531,9 +1531,10 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ auto new_param = batch_conv_bias_opr.param(); new_param.format = batch_conv_bias_format; if (new_inp.size() == 2) { - auto dst = opr::BatchConvBias::make(src, filter, new_param, - batch_conv_bias_opr.execution_policy(), - batch_conv_bias_opr.config()); + auto dst = opr::BatchConvBias::make( + src, filter, new_param, + batch_conv_bias_opr.execution_policy(), + batch_conv_bias_opr.config()); OperatorNodeBase* new_opr = dst.node()->owner_opr(); mgb_assert(dst.shape().ndim == 5, "The conv_bias dst dim is not trans to nchw4"); @@ -1542,14 +1543,15 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ // bias: NCHW --> NCHW4 VarNode* bias = new_inp[2]; if (new_inp[2]->shape().ndim == 4) { - auto new_bias = RelayoutPlaceholder::make(new_inp[2], - src_to_nchw4_mode); + auto new_bias = + RelayoutPlaceholder::make(new_inp[2], src_to_nchw4_mode); bias = new_bias.node(); } if (new_inp.size() == 3) { - auto dst = opr::BatchConvBias::make(src, filter, bias, new_param, - batch_conv_bias_opr.execution_policy(), - batch_conv_bias_opr.config()); + auto dst = opr::BatchConvBias::make( + src, filter, bias, new_param, + batch_conv_bias_opr.execution_policy(), + batch_conv_bias_opr.config()); OperatorNodeBase* new_opr = dst.node()->owner_opr(); mgb_assert(dst.shape().ndim == 5, "The conv_bias dst dim is not trans to nchw4"); @@ -1558,13 +1560,14 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ // z_inp: NCHW --> NCHW4 VarNode* z_inp = new_inp[3]; if (new_inp[3]->shape().ndim == 4) { - auto new_z = RelayoutPlaceholder::make(new_inp[3], - src_to_nchw4_mode); + auto new_z = + RelayoutPlaceholder::make(new_inp[3], src_to_nchw4_mode); z_inp = new_z.node(); } - auto dst = opr::BatchConvBias::make(src, filter, bias, z_inp, - new_param,batch_conv_bias_opr.execution_policy(), - batch_conv_bias_opr.config()); + auto dst = + opr::BatchConvBias::make(src, filter, bias, z_inp, new_param, + batch_conv_bias_opr.execution_policy(), + batch_conv_bias_opr.config()); OperatorNodeBase* new_opr = dst.node()->owner_opr(); mgb_assert(dst.shape().ndim == 5, "The conv_bias dst dim is not trans to nchw4"); @@ -1584,13 +1587,11 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ // what should be converted: src, weight VarNode *conv_bias_src = new_inp[0], *conv_bias_filter = new_inp[1]; - auto conv_mode = - trans_nchw4(conv_bias_opr.param().sparse, new_inp[1]); + auto conv_mode = trans_nchw4(conv_bias_opr.param().sparse, new_inp[1]); // src: NCHW --> NCHW4 if (new_inp[0]->shape().ndim != 5) { mgb_assert(new_inp[0]->shape().ndim == 4); - auto new_src = - RelayoutPlaceholder::make(new_inp[0], conv_mode.src); + auto new_src = RelayoutPlaceholder::make(new_inp[0], conv_mode.src); conv_bias_src = new_src.node(); } // weight: NCHW --> NCHW4 or GNCHW --> GNCHW4 @@ -1602,8 +1603,8 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ new_param.format = conv_bias_format; if (new_inp.size() == 2) { auto new_conv_bias_opr = opr::ConvBias::make( - conv_bias_src, conv_bias_filter, new_param, - conv_bias_opr.execution_policy(), conv_bias_opr.config()); + conv_bias_src, conv_bias_filter, new_param, + conv_bias_opr.execution_policy(), conv_bias_opr.config()); OperatorNodeBase* new_opr = new_conv_bias_opr.node()->owner_opr(); mgb_assert(new_conv_bias_opr.shape().ndim == 5, "The conv_bias dst dim is not trans to nchw4"); @@ -1618,8 +1619,8 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ } if (new_inp.size() == 3) { auto new_conv_bias_opr = opr::ConvBias::make( - conv_bias_src, conv_bias_filter, conv_bias_bias, new_param, - conv_bias_opr.execution_policy(), conv_bias_opr.config()); + conv_bias_src, conv_bias_filter, conv_bias_bias, new_param, + conv_bias_opr.execution_policy(), conv_bias_opr.config()); OperatorNodeBase* new_opr = new_conv_bias_opr.node()->owner_opr(); mgb_assert(new_conv_bias_opr.shape().ndim == 5, "The conv_bias dst dim is not trans to nchw4"); @@ -1632,9 +1633,10 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ RelayoutPlaceholder::make(new_inp[3], src_to_nchw4_mode); z_inp = new_z.node(); } - auto new_conv_bias_opr = opr::ConvBias::make(conv_bias_src, - conv_bias_filter, conv_bias_bias, z_inp, new_param, - conv_bias_opr.execution_policy(), conv_bias_opr.config()); + auto new_conv_bias_opr = opr::ConvBias::make( + conv_bias_src, conv_bias_filter, conv_bias_bias, z_inp, + new_param, conv_bias_opr.execution_policy(), + conv_bias_opr.config()); OperatorNodeBase* new_opr = new_conv_bias_opr.node()->owner_opr(); mgb_assert(new_conv_bias_opr.shape().ndim == 5, "The conv_bias dst dim is not trans to nchw4"); @@ -1654,8 +1656,8 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ auto temp_inp = new_inp; for (size_t i = 0; i < opr->input().size(); i++) { if (new_inp[i]->shape().ndim == 4) { - auto new_var = RelayoutPlaceholder::make( - new_inp[i], src_to_nchw4_mode); + auto new_var = RelayoutPlaceholder::make(new_inp[i], + src_to_nchw4_mode); temp_inp[i] = new_var.node(); } else { mgb_assert((new_inp[i]->shape().ndim == 5) || @@ -1670,7 +1672,7 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ } }; auto relayout_inp_to_nchw = [=](OperatorNodeBase* opr, - const VarNodeArray& new_inp) { + const VarNodeArray& new_inp) { mgb_assert(opr->input().size() == new_inp.size()); VarNodeArray temp_inp = new_inp; for (size_t i = 0; i < opr->input().size(); i++) { @@ -1697,8 +1699,8 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ mgb_assert(new_inp[0]->dtype().enumv() == DTypeEnum::QuantizedS8); auto new_param = pooling.param(); new_param.format = Format::NCHW4; - auto new_pooling = - opr::PoolingForward::make(new_inp[0], new_param, opr->config()); + auto new_pooling = opr::PoolingForward::make(new_inp[0], new_param, + opr->config()); mgb_assert(new_pooling.shape().ndim == 5, "out var of Pooling opr after transform must be 5 (got: " "%zu).", @@ -1767,8 +1769,7 @@ std::unique_ptr EnableNCHW4Pass::make_nchw4_converter(){ //! supportted nchw4 replace_func[opr::Convolution::typeinfo()] = replace_conv_opr; replace_func[opr::ConvBias::typeinfo()] = replace_conv_bias_opr; - replace_func[opr::BatchConvBias::typeinfo()] = - replace_batch_conv_bias_opr; + replace_func[opr::BatchConvBias::typeinfo()] = replace_batch_conv_bias_opr; replace_func[opr::PoolingForward::typeinfo()] = replace_pooling_opr; replace_func[opr::ResizeForward::typeinfo()] = replace_resize_opr; replace_func[opr::WarpPerspectiveForward::typeinfo()] = @@ -1811,7 +1812,7 @@ VarNode* EnableNchwxxPass::on_graph_endpoint_var(VarNode* new_var, return new_var; } -void EnableNchwxxPass::fill_opr_convert_fun(size_t pack_c_size){ +void EnableNchwxxPass::fill_opr_convert_fun(size_t pack_c_size) { using RelayoutMode = RelayoutPlaceholder::LayoutType; using TestFilterResult = std::pair; RelayoutMode weight_to_nchwxx_mode_dense = @@ -2045,7 +2046,7 @@ void EnableNchwxxPass::fill_opr_convert_fun(size_t pack_c_size){ }; auto replace_pooling_opr = [=](OperatorNodeBase* opr, - const VarNodeArray& new_inp) { + const VarNodeArray& new_inp) { mgb_assert(opr->input().size() == new_inp.size()); auto& pooling_opr = opr->cast_final_safe(); mgb_assert(pooling_opr.param().format == @@ -2115,7 +2116,7 @@ void EnableNchwxxPass::fill_opr_convert_fun(size_t pack_c_size){ }; auto relayout_inp_to_nchw = [=](OperatorNodeBase* opr, - const VarNodeArray& new_inp) { + const VarNodeArray& new_inp) { mgb_assert(opr->input().size() == new_inp.size()); VarNodeArray temp_inp = new_inp; for (size_t i = 0; i < opr->input().size(); i++) { @@ -2205,7 +2206,8 @@ EnableNchw44DotPass::make_nchw44_dot_converter() { size_t OC = filter->shape()[0]; if ((IC % pack_c_size == 0) && (OC % pack_c_size == 0)) { ret.trans_type = TransType::TRANS_PURE_NCHWXX; - ret.relayout_mod = RelayoutMode::WEIGHT_NCHW_TO_NCHW44_DOT_DENSE; + ret.relayout_mod = + RelayoutMode::WEIGHT_NCHW_TO_NCHW44_DOT_DENSE; ret.conv_format = megdnn::param::ConvBias::Format::NCHW44_DOT; } else if (IC < pack_c_size && OC % pack_c_size == 0) { ret.trans_type = TransType::TRANS_HYBIRD_NCHWXX; @@ -2223,7 +2225,8 @@ EnableNchw44DotPass::make_nchw44_dot_converter() { ret.conv_format = megdnn::param::ConvBias::Format::NCHW44; } else if ((icpg % pack_c_size == 0) && (ocpg % pack_c_size == 0)) { ret.trans_type = TransType::TRANS_PURE_NCHWXX; - ret.relayout_mod = RelayoutMode::WEIGHT_NCHW_TO_NCHW44_DOT_GROUP; + ret.relayout_mod = + RelayoutMode::WEIGHT_NCHW_TO_NCHW44_DOT_GROUP; ret.conv_format = megdnn::param::ConvBias::Format::NCHW44_DOT; } } @@ -2538,7 +2541,6 @@ public: param.mode = megdnn::param::RelayoutFormat::Mode::NCHW4_CHWN4; auto reformat = opr::RelayoutFormat::make(inp, param); return reformat.node(); - }; m_reformat[std::make_pair(TensorFormat::CHWN4, TensorFormat::NCHW4)] = @@ -2563,7 +2565,6 @@ public: auto y0 = opr::Reshape::make(x, tshp); auto y1 = opr::Dimshuffle::make(y0, {1, 3, 4, 0, 2}); return y1.node(); - }; m_reformat[std::make_pair(TensorFormat::CHWN4, TensorFormat::NCHW)] = @@ -2591,24 +2592,29 @@ public: * \brief abstract operator representation of shuffle operation */ MGB_DEFINE_OPR_CLASS(ShuffleShuffleRemovePass::Impl::AbstractShuffleOpr, - cg::SingleCNOperatorNodeBase) // { + cg::SingleCNOperatorNodeBase) // { public: - AbstractShuffleOpr(VarNode* inpvar, TensorFormat inp_format, - TensorFormat out_format); +AbstractShuffleOpr(VarNode* inpvar, TensorFormat inp_format, + TensorFormat out_format); - static SymbolVar make(VarNode* inpvar, TensorFormat inp_format, - TensorFormat out_format); +static SymbolVar make(VarNode* inpvar, TensorFormat inp_format, + TensorFormat out_format); - TensorFormat inp_format() const { return m_inp_format; } +TensorFormat inp_format() const { + return m_inp_format; +} - TensorFormat out_format() const { return m_out_format; } +TensorFormat out_format() const { + return m_out_format; +} private: - void init_output_static_infer_desc() override; - void scn_do_execute() override; - const TensorFormat m_inp_format; - const TensorFormat m_out_format; -}; +void init_output_static_infer_desc() override; +void scn_do_execute() override; +const TensorFormat m_inp_format; +const TensorFormat m_out_format; +} +; MGB_DYN_TYPE_OBJ_FINAL_IMPL(ShuffleShuffleRemovePass::Impl::AbstractShuffleOpr); @@ -2914,7 +2920,8 @@ void ShuffleShuffleRemovePass::Impl::do_replace() { bool force_folding_typecvt = false; bool first_shuffle = false; // initialize inp_format and out_format - TensorFormat out_format = TensorFormat::NCHW, inp_format = out_format; + TensorFormat out_format = TensorFormat::NCHW, + inp_format = out_format; megdnn::DType inp_dtype = cur->input(0)->dtype(), out_dtype = cur->output(0)->dtype(); SmallVector out_dtype_vec; diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp index 54ffd19e..6a8c42a4 100644 --- a/src/gopt/test/inference.cpp +++ b/src/gopt/test/inference.cpp @@ -6,32 +6,31 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "megbrain/opr/dnn/local.h" #include "megbrain/test/helper.h" -#include "megbrain/gopt/inference.h" #include "megbrain/gopt/basic_arith.h" #include "megbrain/gopt/gtrans.h" +#include "megbrain/gopt/inference.h" -#include "megbrain/opr/io.h" #include "megbrain/opr/basic_arith_wrapper.h" -#include "megbrain/opr/tensor_manip.h" +#include "megbrain/opr/blas.h" #include "megbrain/opr/dnn/batch_norm.h" #include "megbrain/opr/dnn/convolution.h" -#include "megbrain/opr/utility.h" +#include "megbrain/opr/dnn/pooling.h" #include "megbrain/opr/imgproc.h" -#include "megbrain/opr/tensor_manip.h" +#include "megbrain/opr/io.h" #include "megbrain/opr/nn_int.h" -#include "megbrain/opr/imgproc.h" -#include "megbrain/opr/dnn/pooling.h" #include "megbrain/opr/tensor_gen.h" -#include "megbrain/opr/blas.h" +#include "megbrain/opr/tensor_manip.h" +#include "megbrain/opr/utility.h" -#include "megbrain/comp_node_env.h" #include "./helper.h" +#include "megbrain/comp_node_env.h" #include "megdnn/tensor_format.h" @@ -121,20 +120,17 @@ TEST(TestGoptInference, ParamFuseConstEndPoint) { graph->options().graph_opt_level = 0; auto x = opr::SharedDeviceTensor::make(*graph, *host_x), y = opr::SharedDeviceTensor::make(*graph, *host_y), - p = opr::Host2DeviceCopy::make(*graph, host_p), - q = p + x, - a = y + 3, - z0 = a + q, - z1 = a + 4; + p = opr::Host2DeviceCopy::make(*graph, host_p), q = p + x, a = y + 3, + z0 = a + q, z1 = a + 4; HostTensorND host_z0, host_z1; SymbolVar z0_1, z1_1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - apply({{z1, z0}}).endpoint_vars(), - z1_1, z0_1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .apply({{z1, z0}}) + .endpoint_vars(), + z1_1, z0_1); auto func = graph->compile({make_callback_copy(z0_1, host_z0), make_callback_copy(z1_1, host_z1)}); @@ -143,7 +139,10 @@ TEST(TestGoptInference, ParamFuseConstEndPoint) { func->execute(); int nr_opr = 0; - func->iter_opr_seq([&](cg::OperatorNodeBase*) {++ nr_opr; return true; }); + func->iter_opr_seq([&](cg::OperatorNodeBase*) { + ++nr_opr; + return true; + }); ASSERT_EQ(8, nr_opr); auto px = host_x->ptr(), pz0 = host_z0.ptr(); @@ -151,13 +150,12 @@ TEST(TestGoptInference, ParamFuseConstEndPoint) { auto yv = host_y->ptr()[0], pv = host_p->ptr()[0], pz1 = host_z1.ptr()[0]; - for (size_t i = 0; i < SIZE; ++ i) { + for (size_t i = 0; i < SIZE; ++i) { MGB_ASSERT_FLOAT_EQ(px[i] + yv + 3 + pv, pz0[i]); } MGB_ASSERT_FLOAT_EQ(yv + 7, pz1); } - TEST(TestGoptInference, ParamFuse) { constexpr size_t SIZE = 23; HostTensorGenerator<> gen; @@ -168,35 +166,37 @@ TEST(TestGoptInference, ParamFuse) { auto x = opr::SharedDeviceTensor::make(*graph, *host_x), y = opr::SharedDeviceTensor::make(*graph, *host_y), p = opr::Host2DeviceCopy::make(*graph, host_p), - z = x + y, // endpoint - q = x * y + p; // middle point + z = x + y, // endpoint + q = x * y + p; // middle point SymbolVar z1, q1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - apply({{z, q}}).endpoint_vars(), - z1, q1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .apply({{z, q}}) + .endpoint_vars(), + z1, q1); ASSERT_TRUE(z1.node()->owner_opr()->same_type()); ASSERT_NE(q1.node()->owner_opr(), q.node()->owner_opr()); ASSERT_EQ(q1.node()->owner_opr()->dyn_typeinfo(), - q.node()->owner_opr()->dyn_typeinfo()); + q.node()->owner_opr()->dyn_typeinfo()); HostTensorND host_z, host_q; auto func = graph->compile( - {make_callback_copy(z1, host_z), - make_callback_copy(q1, host_q)}); + {make_callback_copy(z1, host_z), make_callback_copy(q1, host_q)}); func->execute(); int nr_opr = 0; - func->iter_opr_seq([&](cg::OperatorNodeBase*) {++ nr_opr; return true; }); + func->iter_opr_seq([&](cg::OperatorNodeBase*) { + ++nr_opr; + return true; + }); ASSERT_EQ(6, nr_opr); auto px = host_x->ptr(), pz = host_z.ptr(), pq = host_q.ptr(); auto yv = host_y->ptr()[0], pv = host_p->ptr()[0]; - for (size_t i = 0; i < SIZE; ++ i) { + for (size_t i = 0; i < SIZE; ++i) { MGB_ASSERT_FLOAT_EQ(px[i] + yv, pz[i]); MGB_ASSERT_FLOAT_EQ(px[i] * yv + pv, pq[i]); } @@ -212,8 +212,8 @@ TEST(TestGoptInference, ParamFuseMultiDeviceTensorHolder) { auto x = opr::SharedDeviceTensor::make(*graph, *host_x), y = opr::SharedDeviceTensor::make(*graph, *host_y), p = opr::Host2DeviceCopy::make(*graph, host_p), - z = x + y, // endpoint - q = x * y + p; // middle point + z = x + y, //! endpoint + q = x * y + p; //! middle point SymbolVar z1, q1; unpack_vector(gopt::GraphOptimizer{} @@ -223,34 +223,38 @@ TEST(TestGoptInference, ParamFuseMultiDeviceTensorHolder) { z1); ASSERT_TRUE(z1.node() - ->owner_opr()->input(0)->owner_opr() + ->owner_opr() + ->input(0) + ->owner_opr() ->same_type()); - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - add_pass(). - apply({{z, q}}).endpoint_vars(), - z1, q1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .add_pass() + .apply({{z, q}}) + .endpoint_vars(), + z1, q1); ASSERT_TRUE(z1.node()->owner_opr()->same_type()); ASSERT_NE(q1.node()->owner_opr(), q.node()->owner_opr()); ASSERT_EQ(q1.node()->owner_opr()->dyn_typeinfo(), - q.node()->owner_opr()->dyn_typeinfo()); + q.node()->owner_opr()->dyn_typeinfo()); HostTensorND host_z, host_q; auto func = graph->compile( - {make_callback_copy(z1, host_z), - make_callback_copy(q1, host_q)}); + {make_callback_copy(z1, host_z), make_callback_copy(q1, host_q)}); func->execute(); int nr_opr = 0; - func->iter_opr_seq([&](cg::OperatorNodeBase*op) {++ nr_opr; return true; }); + func->iter_opr_seq([&](cg::OperatorNodeBase* op) { + ++nr_opr; + return true; + }); ASSERT_EQ(6, nr_opr); auto px = host_x->ptr(), pz = host_z.ptr(), pq = host_q.ptr(); auto yv = host_y->ptr()[0], pv = host_p->ptr()[0]; - for (size_t i = 0; i < SIZE; ++ i) { + for (size_t i = 0; i < SIZE; ++i) { MGB_ASSERT_FLOAT_EQ(px[i] + yv, pz[i]); MGB_ASSERT_FLOAT_EQ(px[i] * yv + pv, pq[i]); } @@ -262,33 +266,42 @@ TEST(TestGoptInference, ParamFuseMultiRead) { auto graph = ComputingGraph::make(); graph->options().graph_opt_level = 0; - auto mkvar = [&](const char *name, const TensorShape &shp) { + auto mkvar = [&](const char* name, const TensorShape& shp) { return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name); }; - auto mkcvar = [&](const char *name, const TensorShape &shp) { + auto mkcvar = [&](const char* name, const TensorShape& shp) { return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name); }; - auto x = mkvar("x", {23}), - p0 = mkcvar("p0", {1}), - p1 = mkcvar("p1", {1}), + auto x = mkvar("x", {23}), p0 = mkcvar("p0", {1}), p1 = mkcvar("p1", {1}), z0 = x * (p0 + p1) + x / (p0 + p1); SymbolVar z1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - apply({{z0}}).endpoint_vars(), - z1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .apply({{z0}}) + .endpoint_vars(), + z1); ASSERT_NE(z0.node(), z1.node()); - ASSERT_TRUE(z1.node()->owner_opr()->input(0)->owner_opr() - ->input(1)->owner_opr()->same_type()); - ASSERT_TRUE(z1.node()->owner_opr()->input(1)->owner_opr() - ->input(1)->owner_opr()->same_type()); + ASSERT_TRUE(z1.node() + ->owner_opr() + ->input(0) + ->owner_opr() + ->input(1) + ->owner_opr() + ->same_type()); + ASSERT_TRUE(z1.node() + ->owner_opr() + ->input(1) + ->owner_opr() + ->input(1) + ->owner_opr() + ->same_type()); HostTensorND host_z0, host_z1; graph->compile({make_callback_copy(z0, host_z0), - make_callback_copy(z1, host_z1)})->execute(); + make_callback_copy(z1, host_z1)}) + ->execute(); MGB_ASSERT_TENSOR_EQ(host_z0, host_z1); } @@ -297,10 +310,10 @@ TEST(TestGoptInference, ParamFuseStaticInfer) { auto graph = ComputingGraph::make(); - auto mkvar = [&](const char *name, const TensorShape &shp) { + auto mkvar = [&](const char* name, const TensorShape& shp) { return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name); }; - auto mkcvar = [&](const char *name, const TensorShape &shp) { + auto mkcvar = [&](const char* name, const TensorShape& shp) { return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name); }; @@ -308,11 +321,11 @@ TEST(TestGoptInference, ParamFuseStaticInfer) { b = a.reshape(opr::GetVarShape::make(mkcvar("tshp", {2, 2}))); SymbolVar b1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - apply({{b}}).endpoint_vars(), - b1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .apply({{b}}) + .endpoint_vars(), + b1); ASSERT_EQ(b1, a.reshape({2, 2})); } @@ -333,11 +346,11 @@ TEST(TestGoptInference, ParamRedistributeConvMul) { y0 = opr::Convolution::make(x * k, w); SymbolVar y1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - apply({{y0}}).endpoint_vars(), - y1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .apply({{y0}}) + .endpoint_vars(), + y1); ASSERT_NE(y0.node(), y1.node()); @@ -364,18 +377,18 @@ TEST(TestGoptInference, ParamRedistributeConvMulUniqReader) { {-1, 0, -1, -1}), w = opr::SharedDeviceTensor::make(*graph, *host_w), // y0 should be replaced - y0 = opr::powf(opr::Convolution::make(x * k, w).rename("y0") + 2, 2), + y0 = opr::powf(opr::Convolution::make(x * k, w).rename("y0") + 2, + 2), y0k = (y0 * k).rename("y0k"), // y0k is accessed twice, so it should not be replaced - y1 = opr::Convolution::make(y0k, w).rename("y1"), - z0 = y1 / y0k; + y1 = opr::Convolution::make(y0k, w).rename("y1"), z0 = y1 / y0k; SymbolVar z1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - apply({{z0}}).endpoint_vars(), - z1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .apply({{z0}}) + .endpoint_vars(), + z1); ASSERT_NE(z0.node(), z1.node()); auto y1_repl = z1.node()->owner_opr()->input(0)->owner_opr(); @@ -394,10 +407,8 @@ TEST(TestGoptInference, ParamRedistributeMulConvMul) { constexpr size_t N = 4, IC = 3, IH = 5, IW = 4, OC = 4, KH = 3, KW = 2; HostTensorGenerator<> gen; - auto host_x = gen({N, IC, IH, IW}), - host_k1 = gen({IC}), - host_k2 = gen({1, OC, 1, 1}), - host_w = gen({OC, IC, KH, KW}); + auto host_x = gen({N, IC, IH, IW}), host_k1 = gen({IC}), + host_k2 = gen({1, OC, 1, 1}), host_w = gen({OC, IC, KH, KW}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), @@ -409,12 +420,12 @@ TEST(TestGoptInference, ParamRedistributeMulConvMul) { y0 = opr::Convolution::make(x * k1, w) * k2; SymbolVar y1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - add_pass(). - apply({{y0}}).endpoint_vars(), - y1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .add_pass() + .apply({{y0}}) + .endpoint_vars(), + y1); auto y1opr = y1.node()->owner_opr(); ASSERT_TRUE(y1opr->same_type()); @@ -444,12 +455,12 @@ TEST(TestGoptInference, ParamRedistributeConvAdd) { y0 = opr::Convolution::make(x + b, w); SymbolVar y1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - add_pass(). - apply({{y0}}).endpoint_vars(), - y1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .add_pass() + .apply({{y0}}) + .endpoint_vars(), + y1); ASSERT_NE(y0.node(), y1.node()); @@ -462,41 +473,37 @@ TEST(TestGoptInference, ParamRedistributeConvAdd) { } TEST(TestGoptInference, ParamRedistributeDistThenReasso) { - constexpr size_t N = 4, IC0 = 3, IC1 = 6, IH = 5, - IW = 4, OC = 4, KH = 3, KW = 2; + constexpr size_t N = 4, IC0 = 3, IC1 = 6, IH = 5, IW = 4, OC = 4, KH = 3, + KW = 2; HostTensorGenerator<> gen; auto graph = ComputingGraph::make(); - auto mkvar = [&](const char *name, const TensorShape &shp) { + auto mkvar = [&](const char* name, const TensorShape& shp) { return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name); }; - auto mkcvar = [&](const char *name, const TensorShape &shp) { + auto mkcvar = [&](const char* name, const TensorShape& shp) { return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name); }; - auto x0 = mkvar("x0", {N, IC0, IH, IW}), - x1 = mkvar("x1", {N, IC1, IH, IW}), - k0 = opr::Dimshuffle::make( - mkcvar("x1_", {IC0}), {-1, 0, -1, -1}).rename("x1"), + auto x0 = mkvar("x0", {N, IC0, IH, IW}), x1 = mkvar("x1", {N, IC1, IH, IW}), + k0 = opr::Dimshuffle::make(mkcvar("x1_", {IC0}), {-1, 0, -1, -1}) + .rename("x1"), w0 = mkcvar("w0", {OC, IC0, KH, KW}), k1 = mkcvar("k1", {1, IC1, 1, 1}), - w1 = mkcvar("w1", {OC, IC1, KH, KW}), - b0 = mkvar("b0", {1, OC, 1, 1}), - b1 = mkcvar("b1", {1}), - k2 = mkcvar("k2", {1}), - y0 = ( - opr::Convolution::make(x0 * k0, w0) + - opr::Convolution::make(x1 + k1, w1) + - b0 + b1) * k2; + w1 = mkcvar("w1", {OC, IC1, KH, KW}), b0 = mkvar("b0", {1, OC, 1, 1}), + b1 = mkcvar("b1", {1}), k2 = mkcvar("k2", {1}), + y0 = (opr::Convolution::make(x0 * k0, w0) + + opr::Convolution::make(x1 + k1, w1) + b0 + b1) * + k2; SymbolVar y1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - add_pass( - gopt::ConstVarType::IMMUTABLE_AND_PARAM). - add_pass(). - apply({{y0}}).endpoint_vars(), - y1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .add_pass( + gopt::ConstVarType::IMMUTABLE_AND_PARAM) + .add_pass() + .apply({{y0}}) + .endpoint_vars(), + y1); ASSERT_NE(y0.node(), y1.node()); HostTensorND host_y0, host_y1; @@ -506,19 +513,21 @@ TEST(TestGoptInference, ParamRedistributeDistThenReasso) { MGB_ASSERT_TENSOR_NEAR(host_y0, host_y1, 1e-5); - auto chain = gopt::extract_opr_leaves(y1.node(), - [](cg::OperatorNodeBase*opr){ + auto chain = + gopt::extract_opr_leaves(y1.node(), [](cg::OperatorNodeBase* opr) { return gopt::as_elem_opr(opr, opr::Elemwise::Mode::ADD); }); size_t nr_conv = 0; - for (auto i: chain) { + for (auto i : chain) { auto opr = i->owner_opr(); if (opr->same_type()) { - ++ nr_conv; - ASSERT_TRUE(opr->input(0)->owner_opr() - ->same_type()); - ASSERT_TRUE(opr->input(1)->owner_opr() - ->same_type()); + ++nr_conv; + ASSERT_TRUE(opr->input(0) + ->owner_opr() + ->same_type()); + ASSERT_TRUE(opr->input(1) + ->owner_opr() + ->same_type()); } } ASSERT_EQ(2u, nr_conv); @@ -531,27 +540,24 @@ TEST(TestGoptInference, ParamRedistributeMultiChange) { HostTensorGenerator<> gen; auto graph = ComputingGraph::make(); graph->options().graph_opt_level = 0; - auto mkvar = [&](const char *name, const TensorShape &shp) { + auto mkvar = [&](const char* name, const TensorShape& shp) { return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name); }; - auto mkcvar = [&](const char *name, const TensorShape &shp) { + auto mkcvar = [&](const char* name, const TensorShape& shp) { return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name); }; - auto x = mkvar("x", {N, IC, IH, IW}), - k0 = mkcvar("k0", {1, IC, 1, 1}), - b0 = mkcvar("b0", {1, IC, 1, 1}), - k1 = mkcvar("k0", {1}), - b1 = mkcvar("b0", {1}), - w = mkcvar("w", {OC, IC, KH, KW}), + auto x = mkvar("x", {N, IC, IH, IW}), k0 = mkcvar("k0", {1, IC, 1, 1}), + b0 = mkcvar("b0", {1, IC, 1, 1}), k1 = mkcvar("k0", {1}), + b1 = mkcvar("b0", {1}), w = mkcvar("w", {OC, IC, KH, KW}), y0 = (opr::Convolution::make(x * k0 + b0, w) + b1) * k1; SymbolVar y1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - add_pass(). - apply({{y0}}).endpoint_vars(), - y1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .add_pass() + .apply({{y0}}) + .endpoint_vars(), + y1); ASSERT_NE(y0.node(), y1.node()); HostTensorND host_y0, host_y1; @@ -577,16 +583,15 @@ TEST(TestGoptInference, ParamRedistributeMultiReader) { auto graph = ComputingGraph::make(); graph->options().graph_opt_level = 0; - auto mkvar = [&](const char *name, const TensorShape &shp) { + auto mkvar = [&](const char* name, const TensorShape& shp) { return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name); }; - auto mkcvar = [&](const char *name, const TensorShape &shp) { + auto mkcvar = [&](const char* name, const TensorShape& shp) { return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name); }; - auto x = mkvar("x", {N, IC, IH, IW}), - k = mkcvar("k", {1, OC, 1, 1}), + auto x = mkvar("x", {N, IC, IH, IW}), k = mkcvar("k", {1, OC, 1, 1}), w = mkcvar("w", {OC, IC, KH, KW}); auto conv = opr::Convolution::make(x, w); @@ -594,12 +599,12 @@ TEST(TestGoptInference, ParamRedistributeMultiReader) { auto y0 = t * 4.2f + t * 2.4f; SymbolVar y1; - unpack_vector( - gopt::GraphOptimizer{}. - add_pass(). - add_pass(). - apply({{y0}}).endpoint_vars(), - y1); + unpack_vector(gopt::GraphOptimizer{} + .add_pass() + .add_pass() + .apply({{y0}}) + .endpoint_vars(), + y1); ASSERT_NE(y0.node(), y1.node()); HostTensorND host_y0, host_y1; @@ -616,13 +621,11 @@ TEST(TestGoptInference, ParamRedistributeMultiReader) { ASSERT_TRUE(ymul0); ASSERT_TRUE(ymul1); auto yconv = ymul0->input(0)->owner_opr(); - if (!yconv->same_type()) - { + if (!yconv->same_type()) { yconv = ymul0->input(1)->owner_opr(); } ASSERT_TRUE(yconv->same_type()); - if (ymul1->input(0) != yconv->output(0)) - { + if (ymul1->input(0) != yconv->output(0)) { ASSERT_EQ(yconv->output(0), ymul1->input(1)); } ASSERT_EQ(x.node(), yconv->input(0)); @@ -751,9 +754,9 @@ TEST(TestGoptInference, Float16IOFloat32ComputeRemap) { auto a = mkvar("a", {N, 4, INP_H, INP_W}); auto gen_map = [&](HostTensorND& mat) { auto ptr = mat.ptr(); - for(size_t n = 0; n < N; ++n){ - for(int h = 0; h < 5; ++h){ - for(int w = 0; w < 5; ++w){ + for (size_t n = 0; n < N; ++n) { + for (int h = 0; h < 5; ++h) { + for (int w = 0; w < 5; ++w) { *ptr++ = (h * 5 * 2) + 5 * 2 + 0; *ptr++ = (h * 5 * 2) + 5 * 2 + 1; } @@ -905,21 +908,26 @@ TEST(TestGoptInference, Float32TOFloat16C32) { }; auto make_f16_graph = [&]() { - auto d0 = opr::TypeCvt::make(opr::TypeCvt::make( - opr::Host2DeviceCopy::make(*graph, host_x0), - dtype::Float16{}), dtype::Float32{}), - d1 = opr::TypeCvt::make(opr::TypeCvt::make( - opr::Host2DeviceCopy::make(*graph, host_x1), - dtype::Float16{}), dtype::Float32{}), - d2 = opr::TypeCvt::make(opr::TypeCvt::make( - opr::SharedDeviceTensor::make(*graph, *host_x2), - dtype::Float16{}), dtype::Float32{}); + auto d0 = opr::TypeCvt::make( + opr::TypeCvt::make( + opr::Host2DeviceCopy::make(*graph, host_x0), + dtype::Float16{}), + dtype::Float32{}), + d1 = opr::TypeCvt::make( + opr::TypeCvt::make( + opr::Host2DeviceCopy::make(*graph, host_x1), + dtype::Float16{}), + dtype::Float32{}), + d2 = opr::TypeCvt::make( + opr::TypeCvt::make( + opr::SharedDeviceTensor::make(*graph, *host_x2), + dtype::Float16{}), + dtype::Float32{}); auto y = opr::ConvBias::make(d1, d2, d0); y = opr::Reduce::make(y, {}, y.make_scalar(1)); - y = opr::TypeCvt::make( - opr::TypeCvt::make(y, dtype::Float16{}), - dtype::Float32{}); + y = opr::TypeCvt::make(opr::TypeCvt::make(y, dtype::Float16{}), + dtype::Float32{}); return y; }; @@ -927,7 +935,7 @@ TEST(TestGoptInference, Float32TOFloat16C32) { auto y_opt = make_f32_to_f16_graph(); auto y = make_f16_graph(); ASSERT_EQ(find_opr(y_opt).param().compute_mode, - opr::ConvBias::Param::ConvBias::ComputeMode::FLOAT32); + opr::ConvBias::Param::ConvBias::ComputeMode::FLOAT32); ASSERT_EQ(y_opt.dtype(), dtype::Float32{}); ASSERT_EQ(y.dtype(), dtype::Float32{}); @@ -1061,16 +1069,14 @@ TEST(TestGoptInference, Float32TOFloat16Endpoints) { }; auto mkcvar = [&](const char* name, const TensorShape& shp) { - return opr::SharedDeviceTensor::make(*graph, *gen(shp)) - .rename(name); + return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name); }; graph->options().graph_opt_level = 0; opr::Convolution::Param param; param.pad_h = param.pad_w = 0; - auto x = mkvar("x", {8, 8, 8, 8}), - y = mkvar("y", {8, 8, 8, 8}), + auto x = mkvar("x", {8, 8, 8, 8}), y = mkvar("y", {8, 8, 8, 8}), w = mkcvar("w", {4, 8, 3, 3}), z = opr::Convolution::make(x + y, w, param); @@ -1277,9 +1283,8 @@ TEST(TestGoptInference, ConvertFormatNHWCD4Qint8) { param.pad_h = param.pad_w = 0; auto w = mkcvar("w", {4, 8, 3, 3}, dtype::QuantizedS8(0.1f)), b = mkcvar("b", {1, 4, 1, 1}, dtype::QuantizedS32(0.02f)), - y = opr::ConvBias::make( - x, w, b, param, {}, - OperatorNodeConfig{dtype::QuantizedS8(0.2f)}); + y = opr::ConvBias::make(x, w, b, param, {}, + OperatorNodeConfig{dtype::QuantizedS8(0.2f)}); SymbolVar y_opt; auto options = gopt::OptimizeForInferenceOptions{}; @@ -1542,7 +1547,6 @@ TEST(TestGoptInference, AlgoWorkspaceLimit) { ASSERT_EQ(10000u, conv.execution_policy().workspace_limit); } - TEST_PASS(FuseConvBiasNonlinPass, Basic) { auto cn = CompNode::load("xpux"); @@ -1563,11 +1567,9 @@ TEST_PASS(FuseConvBiasNonlinPass, Basic) { dtype); }; - for (auto format : { - opr::Convolution::Param::Format::NCHW, + for (auto format : {opr::Convolution::Param::Format::NCHW, opr::Convolution::Param::Format::NHWC, - opr::Convolution::Param::Format::NCHW4 - }) { + opr::Convolution::Param::Format::NCHW4}) { opr::Convolution::Param param; param.format = format; SymbolVar x, w, b; @@ -1670,7 +1672,6 @@ TEST(TestEnableTensorCore, SmallInputShape) { MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt); } - TEST(TestEnableTensorCore, Nchw4Nchw) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2085,12 +2086,11 @@ TEST(TestEnableTensorCore, ShuffleMerge) { return y1; }; - auto x = mkvar("x", {32, 64, 16, 16}, dtype::QuantizedS8(2.5f)), w = mkcvar("w1", {64, 64, 3, 3}, dtype::QuantizedS8(2.5f)), b = mkcvar("b", {1, 64, 1, 1}, dtype::QuantizedS32(6.25f)), z = mkvar("b1", {32, 64, 16, 16}, dtype::QuantizedS8(2.5f)); - x = nchw2nchw4(x), w = nchw2nchw4(w), b = nchw2nchw4(b), z= nchw2nchw4(z); + x = nchw2nchw4(x), w = nchw2nchw4(w), b = nchw2nchw4(b), z = nchw2nchw4(z); opr::ConvBias::Param param; param.format = opr::ConvBias::Param::Format::NCHW4; param.nonlineMode = opr::ConvBias::Param::NonlineMode::RELU; @@ -2350,7 +2350,8 @@ TEST(TestGoptInference, EnableCHWN4WarpPespective) { opr::WarpPerspective::Param warp_param; warp_param.format = opr::WarpPerspective::Param::Format::NCHW4; - auto y1 = opr::WarpPerspective::make(y, mat_var, TensorShape{16, 16}, warp_param); + auto y1 = opr::WarpPerspective::make(y, mat_var, TensorShape{16, 16}, + warp_param); y1 = opr::TypeCvt::make(y1, dtype::Float32()); auto nchw42nchw = [](SymbolVar x) { auto xshp = opr::GetVarShape::make(x); @@ -2366,7 +2367,8 @@ TEST(TestGoptInference, EnableCHWN4WarpPespective) { }; y1 = nchw42nchw(y1); warp_param.format = opr::WarpPerspective::Param::Format::NCHW; - auto y2 = opr::WarpPerspective::make(y1, mat_var, TensorShape{16, 16}, warp_param); + auto y2 = opr::WarpPerspective::make(y1, mat_var, TensorShape{16, 16}, + warp_param); SymbolVar y_opt; SymbolVar y_cudnn; { @@ -2833,8 +2835,7 @@ TEST(TestGoptInference, ConvertFormatNCHW4Ic3) { auto mkcvar = [&](const char* name, const TensorShape& shp, const DType& dtype) { return opr::TypeCvt::make( - opr::SharedDeviceTensor::make(*graph, *gen(shp)) - .rename(name), + opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name), dtype); }; @@ -2878,7 +2879,6 @@ TEST(TestGoptInference, ConvertFormatNCHW4Ic3) { MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3); } - TEST(TestGoptInference, ConvertFormatNCHW88) { HostTensorGenerator<> gen; auto cn = CompNode::load("cpu0"); @@ -2894,12 +2894,12 @@ TEST(TestGoptInference, ConvertFormatNCHW88) { auto host_x = gen({2, 3, 16, 16}, cn); auto x = opr::Host2DeviceCopy::make(*graph, host_x); - //!Hybrid nchw88 mode + //! Hybrid nchw88 mode opr::Convolution::Param param_conv; param_conv.pad_h = param_conv.pad_w = 1; auto w1 = mkcvar("w1", {8, 3, 3, 3}), conv1 = opr::Convolution::make(x, w1, param_conv); - //!channel wise + //! channel wise opr::ConvBias::Param param_conv_bias; param_conv_bias.pad_h = param_conv_bias.pad_w = 1; param_conv_bias.sparse = opr::ConvBias::Param::Sparse::GROUP; @@ -2976,12 +2976,12 @@ TEST(TestGoptInference, ConvertFormatNCHW44) { auto host_x = gen({2, 3, 16, 16}, cn); auto x = opr::Host2DeviceCopy::make(*graph, host_x); - //!Hybrid nchw88 mode + //! Hybrid nchw88 mode opr::Convolution::Param param_conv; param_conv.pad_h = param_conv.pad_w = 1; auto w1 = mkcvar("w1", {8, 3, 3, 3}), conv1 = opr::Convolution::make(x, w1, param_conv); - //!channel wise + //! channel wise opr::ConvBias::Param param_conv_bias; param_conv_bias.pad_h = param_conv_bias.pad_w = 1; param_conv_bias.sparse = opr::ConvBias::Param::Sparse::GROUP; @@ -3140,12 +3140,12 @@ TEST(TestGoptInference, ConvertFormatNCHW44_DOT) { auto host_x = gen({2, 3, 16, 16}, cn); auto x = opr::Host2DeviceCopy::make(*graph, host_x); - //!Hybrid nchw88 mode + //! Hybrid nchw88 mode opr::Convolution::Param param_conv; param_conv.pad_h = param_conv.pad_w = 1; auto w1 = mkcvar("w1", {8, 3, 3, 3}), conv1 = opr::Convolution::make(x, w1, param_conv); - //!channel wise + //! channel wise opr::ConvBias::Param param_conv_bias; param_conv_bias.pad_h = param_conv_bias.pad_w = 1; param_conv_bias.sparse = opr::ConvBias::Param::Sparse::GROUP;