diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index c05d73a4..7a475c5a 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -435,16 +435,6 @@ public: const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst) = 0; - /** - * \brief deduce the origin filter layout and conv_bias param after winograd - * transform, this used in fast-run to construct the origin cache-key - */ - static void deduce_winograd_origin_layout_and_param( - const Param::Format format, const size_t output_block_size, - const TensorLayout& src_layout, - const TensorLayout& winograd_filter_layout, - TensorLayout& origin_layout, Param& origin_param); - enum class BiasMode : uint32_t { NO_BIAS = 0, //!< no bias BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] diff --git a/dnn/include/megdnn/oprs/utils.h b/dnn/include/megdnn/oprs/utils.h index 03957fd2..de7e72e1 100644 --- a/dnn/include/megdnn/oprs/utils.h +++ b/dnn/include/megdnn/oprs/utils.h @@ -91,29 +91,6 @@ class MaxTensorDiff : public OperatorBase { void check_exec(const TensorLayout& layout1, const TensorLayout& layout2, size_t workspace_in_bytes); }; - -/*! - * \brief winograd preprocess opr. - * - * for the detail \see src/fallback/conv_bias/winograd/winograd.h - * - */ -class WinogradFilterPreprocess : public OperatorBase { - DEF_OPR_PARAM(Winograd); - DEF_OPR_IMPL(WinogradFilterPreprocess, OperatorBase, 1, 1); - -public: - virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, - _megdnn_workspace) = 0; - - size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&); - - void deduce_layout(const TensorLayout& src, TensorLayout& dst); - -protected: - void check_exec(const TensorLayout& src, const TensorLayout& dst, - size_t workspace_in_bytes); -}; } // namespace megdnn #include "megdnn/internal/opr_header_epilogue.h" diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index c8fbeb3a..85eb3ca6 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -39,7 +39,7 @@ pdef('Axis').add_fields('int32', 'axis', 0) 'NCHW44','NCHW44_DOT', Doc('NCHW_WINOGRAD', 'NCHW layout with weights tranformed by winograd'), Doc('NCHW88_WINOGRAD', 'NCHW88 layout with weights tranformed by winograd'), - Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'), + Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'), Doc('NCHW4_NCHW32', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), Doc('NCHW32_NCHW4', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), Doc('NCHW4_NCHW', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), @@ -456,15 +456,6 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) 'layout is (K/4, M/4, 4(m), 4(k)) x (K/4, N, 4(k))')) ) -(pdef('Winograd', 'winograd param used in convbias'). - add_fields( - 'uint32', - Doc('output_block_size', 'output block size, detail meaning see winograd ' - 'in convbias, equals to the meaning of m in F(m, r)'), 0). - add_enum_alias('Format', 'MatrixMul'). - add_enum_alias('ComputeMode', 'Convolution', name_field='compute_mode') - ) - (pdef('SVD'). add_fields('bool', Doc('full_matrices', diff --git a/dnn/src/arm_common/conv_bias/f16/algos.cpp b/dnn/src/arm_common/conv_bias/f16/algos.cpp index 9ab11020..fd256fd9 100644 --- a/dnn/src/arm_common/conv_bias/f16/algos.cpp +++ b/dnn/src/arm_common/conv_bias/f16/algos.cpp @@ -27,7 +27,7 @@ using namespace arm_common; /* ======================= AlgoFP16WinogradF23 ======================== */ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( - const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { @@ -37,12 +37,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -78,12 +73,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 4 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 5) && @@ -117,12 +107,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 6 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -162,12 +147,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK8)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && diff --git a/dnn/src/arm_common/conv_bias/fp32/algos.cpp b/dnn/src/arm_common/conv_bias/fp32/algos.cpp index fef97afe..bdca4781 100644 --- a/dnn/src/arm_common/conv_bias/fp32/algos.cpp +++ b/dnn/src/arm_common/conv_bias/fp32/algos.cpp @@ -47,12 +47,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -86,12 +81,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 6 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -125,12 +115,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 5 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 4) && @@ -164,12 +149,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 4 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 5) && @@ -209,12 +189,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 6 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -257,12 +232,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && - (param.filter_meta.format == param::ConvBias::Format::NCHW44 || - (param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + param.filter_meta.format == param::ConvBias::Format::NCHW44 && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -303,12 +273,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && - (param.filter_meta.format == param::ConvBias::Format::NCHW44 || - (param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD && - param.output_block_size == 6 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + param.filter_meta.format == param::ConvBias::Format::NCHW44 && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -350,12 +315,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF73_4x4_NCHW44::usable( return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && - (param.filter_meta.format == param::ConvBias::Format::NCHW44 || - (param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD && - param.output_block_size == 7 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + param.filter_meta.format == param::ConvBias::Format::NCHW44 && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && diff --git a/dnn/src/arm_common/conv_bias/int8/algos.cpp b/dnn/src/arm_common/conv_bias/int8/algos.cpp index 4df46930..e8654f52 100644 --- a/dnn/src/arm_common/conv_bias/int8/algos.cpp +++ b/dnn/src/arm_common/conv_bias/int8/algos.cpp @@ -242,14 +242,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - ((param.filter_meta.format == param::ConvBias::Format::NCHW && - param.filter_type.enumv() == DTypeEnum::QuantizedS8) || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && + (param.filter_meta.format == param::ConvBias::Format::NCHW && + param.filter_type.enumv() == DTypeEnum::QuantizedS8) && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -293,13 +287,8 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( .get_matmul_kern_param(param)); return is_matmul_usable && m_matmul_algo->packmode() == PackMode::NO_PACK && - ((param.filter_meta.format == param::ConvBias::Format::NCHW44 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8) || - ((param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD) && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + (param.filter_meta.format == param::ConvBias::Format::NCHW44 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8) && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -341,14 +330,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( .get_matmul_kern_param(param); bool is_matmul_usable = m_matmul_algo->usable(matmul_param); return is_matmul_usable && - ((param.filter_meta.format == param::ConvBias::Format::NCHW44 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8) || - (param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && + (param.filter_meta.format == param::ConvBias::Format::NCHW44 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8) && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && diff --git a/dnn/src/arm_common/conv_bias/int8/strategy_nchw44_2x3_4x4.cpp b/dnn/src/arm_common/conv_bias/int8/strategy_nchw44_2x3_4x4.cpp index 3177fd1d..7f080f39 100644 --- a/dnn/src/arm_common/conv_bias/int8/strategy_nchw44_2x3_4x4.cpp +++ b/dnn/src/arm_common/conv_bias/int8/strategy_nchw44_2x3_4x4.cpp @@ -240,7 +240,6 @@ void winograd_2x3_4x4_s8_f32_nchw44::filter(const int8_t* filter, float* transform_mid_buf, size_t OC, size_t IC, size_t oc_start, size_t oc_end) { constexpr int alpha = 2 + 3 - 1; - /** * origin: (4x3) * (3 x 3) * (3 x 4) */ diff --git a/dnn/src/arm_common/conv_bias/opr_impl.cpp b/dnn/src/arm_common/conv_bias/opr_impl.cpp index 99a13d0a..4da12b88 100644 --- a/dnn/src/arm_common/conv_bias/opr_impl.cpp +++ b/dnn/src/arm_common/conv_bias/opr_impl.cpp @@ -290,8 +290,8 @@ ConvBiasImpl::get_all_packed_algo() { bool ConvBiasImpl::is_matmul_quantized_prefer( const ConvBiasImpl::NCBKernSizeParam& param) const { fallback::ConvBiasImpl::NCBKernSizeParam conv_ncb_param( - param, 0, param::MatrixMul::Format::DEFAULT, {}, 0, - BiasMode::NO_BIAS, param::ConvBias::NonlineMode::IDENTITY); + param, {}, 0, BiasMode::NO_BIAS, + param::ConvBias::NonlineMode::IDENTITY); conv_ncb_param.dst_type = param.bias_type; conv_ncb_param.filter_meta.group = 1; @@ -320,11 +320,6 @@ SmallVector ConvBiasImpl::suggest_algo_category_order( auto FH = param.filter_meta.spatial[0]; auto FW = param.filter_meta.spatial[1]; //! TODO: now winograd only support fast-run - if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD || - param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD || - param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) { - return {AlgoCategory::WINOGRAD}; - } //! im2col bool im2col_prefer = (IC >= 32 || OC >= 32); //! quantized algo use matmul when direct algo is unusable diff --git a/dnn/src/arm_common/handle.cpp b/dnn/src/arm_common/handle.cpp index b26c33cb..8fca423f 100644 --- a/dnn/src/arm_common/handle.cpp +++ b/dnn/src/arm_common/handle.cpp @@ -27,7 +27,7 @@ #include "src/arm_common/type_cvt/opr_impl.h" #include "src/arm_common/reduce/opr_impl.h" #include "src/arm_common/conv_bias/opr_impl.h" -#include "src/arm_common/winograd_filter_preprocess/opr_impl.h" + namespace megdnn { namespace arm_common { @@ -50,7 +50,6 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(WarpPerspective) MEGDNN_SPECIALIZE_CREATE_OPERATOR(TypeCvt) MEGDNN_SPECIALIZE_CREATE_OPERATOR(Reduce) MEGDNN_SPECIALIZE_CREATE_OPERATOR(ConvBias) -MEGDNN_SPECIALIZE_CREATE_OPERATOR(WinogradFilterPreprocess) MEGDNN_SPECIALIZE_CREATE_OPERATOR(ConvolutionBackwardData) #pragma GCC diagnostic push diff --git a/dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp b/dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp deleted file mode 100644 index 82e7af0e..00000000 --- a/dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/** - * \file dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - -#include "src/arm_common/winograd_filter_preprocess/opr_impl.h" -#include "src/arm_common/handle.h" -#include "src/common/utils.h" -#include "src/arm_common/conv_bias/fp32/strategy.h" -#include "src/arm_common/conv_bias/int8/strategy.h" -#include "src/arm_common/conv_bias/f16/strategy.h" - -#include "midout.h" -MIDOUT_DECL(megdnn_arm_common_winograd_filter_preprocess) - -using namespace megdnn; -using namespace arm_common; - -void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, - _megdnn_tensor_out dst, - _megdnn_workspace workspace) { - using namespace winograd; - check_exec(src.layout, dst.layout, workspace.size); - - //! NCHW44 group conv or NCHW group conv or both dense conv - size_t flt_start = 0; - size_t pack_c_size = 1; - size_t group = 1; - if (src.layout.ndim == 5) { //! {g, OC, IC, FH, FW} - flt_start = 1; - group = src.layout[0]; - } else if (src.layout.ndim == 6) { //! {OC/4, IC/4, FH, FW, 4, 4} - pack_c_size = src.layout[5]; - } else if (src.layout.ndim == 7) { //! {g, OC/4, IC/4, FH, FW, 4, 4} - flt_start = 1; - group = src.layout[0]; - pack_c_size = src.layout[6]; - } - size_t OC = src.layout[flt_start] * pack_c_size, - IC = src.layout[flt_start + 1] * pack_c_size, - FW = src.layout[flt_start + 3]; - size_t m = param().output_block_size; - - bool execed = false; - -#define DISPATCH(_strategy, _format, ...) \ - MIDOUT_BEGIN(megdnn_arm_common_winograd_filter_preprocess, \ - ##__VA_ARGS__) { \ - if (param().format == _format) { \ - for (size_t g = 0; g < group; g++) { \ - auto run = [=]() { \ - _strategy strategy(src.layout.dtype, src.layout.dtype, \ - src.layout.dtype); \ - megdnn::winograd::ConvBias<_strategy, _format>(strategy, \ - 1_z) \ - .filter_process(src_ptr, dst_ptr, workspace_ptr, \ - OC, IC); \ - }; \ - MEGDNN_DISPATCH_CPU_KERN_OPR(run()); \ - src_ptr += src.layout.stride[0]; \ - dst_ptr += dst.layout.stride[0]; \ - } \ - execed = true; \ - } \ - } \ - MIDOUT_END(); - - if (src.layout.dtype.enumv() == DTypeEnum::Float32) { - const float* src_ptr = src.ptr(); - float* dst_ptr = dst.ptr(); - float* workspace_ptr = workspace.ptr(); - if (FW == 3) { - if (m == 2) { - if (pack_c_size == 1) { - DISPATCH(winograd_2x3_4x4_f, param::Winograd::Format::MK4, - 0, 0); - } else if (pack_c_size == 4) { - DISPATCH(winograd_F23_mk4_f_nchw44, - param::Winograd::Format::MK4, 0, 5); - } - } else if (m == 6) { - DISPATCH(winograd_6x3_1x1_f, param::Winograd::Format::DEFAULT, - 0, 1); - if (pack_c_size == 1) { - DISPATCH(winograd_6x3_4x4_f, param::Winograd::Format::MK4, - 0, 2); - } else if (pack_c_size == 4) { - DISPATCH(winograd_F63_mk4_f_nchw44, - param::Winograd::Format::MK4, 0, 6); - } - } else if (m == 7) { - megdnn_assert(pack_c_size == 4, "WINOGRAD F(7,3) Only Supports NCHW44"); - DISPATCH(winograd_F73_mk4_f_nchw44, - param::Winograd::Format::MK4, 0, 7); - } - } else if (FW == 4) { - if (m == 5) { - DISPATCH(winograd_5x4_1x1_f, param::Winograd::Format::DEFAULT, - 0, 3); - } - } else if (FW == 5) { - if (m == 4) { - DISPATCH(winograd_4x5_1x1_f, param::Winograd::Format::DEFAULT, - 0, 4); - } - } - } - if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { - const dt_int8* src_ptr = src.compatible_ptr(); - if (param().compute_mode == param::ConvBias::ComputeMode::DEFAULT) { - dt_int16* dst_ptr = dst.compatible_ptr(); - dt_int16* workspace_ptr = workspace.ptr(); - if (FW == 3) { - if (m == 2) { - if (pack_c_size == 1) { - DISPATCH(winograd_2x3_8x8_s8, - param::Winograd::Format::MK8, 1, 0); - } else if (pack_c_size == 4) { - DISPATCH(winograd_2x3_8x8_s8_nchw44, - param::Winograd::Format::MK8, 1, 0); - }else{ - megdnn_throw("only support pack_c_size = 1 or 4"); - } - } - } - } else { - dt_int32* dst_ptr_tmp = dst.compatible_ptr(); - dt_int32* workspace_ptr_tmp = workspace.ptr(); - float* dst_ptr = reinterpret_cast(dst_ptr_tmp); - float* workspace_ptr = reinterpret_cast(workspace_ptr_tmp); - if (pack_c_size == 4) { - if (FW == 3) { - if (m == 2) { - DISPATCH(winograd_2x3_4x4_s8_f32_nchw44, - param::Winograd::Format::MK4, 1, 1); - } - } - } else { - megdnn_throw("only support pack_c_size == 4"); - } - } - } -#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - if (src.layout.dtype.enumv() == DTypeEnum::Float16) { - const dt_float16* src_ptr = src.ptr(); - dt_float16* dst_ptr = dst.ptr(); - dt_float16* workspace_ptr = workspace.ptr(); - if (FW == 3) { - if (m == 2) { - DISPATCH(winograd_2x3_4x4_f16, param::Winograd::Format::DEFAULT, - 2, 0); - DISPATCH(winograd_2x3_8x8_f16, param::Winograd::Format::MK8, 2, - 1); - } else if (m == 6) { - DISPATCH(winograd_6x3_1x1_f16, param::Winograd::Format::DEFAULT, - 2, 2); - } - } else if (FW == 5) { - if (m == 4) { - DISPATCH(winograd_4x5_1x1_f16, param::Winograd::Format::DEFAULT, - 2, 3); - } - } - } -#endif -#undef DISPATCH - - megdnn_assert(execed, - "Unsupport winograd filter preprocess. m: %zu src: %s", m, - src.layout.to_string().c_str()); -} - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h b/dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h deleted file mode 100644 index e2e5bb65..00000000 --- a/dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h +++ /dev/null @@ -1,28 +0,0 @@ -/** - * \file dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ -#pragma once -#include "megdnn/oprs.h" -#include "src/common/utils.h" - -namespace megdnn { -namespace arm_common { - -class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess { -public: - using WinogradFilterPreprocess::WinogradFilterPreprocess; - void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, - _megdnn_workspace workspace) override; -}; - -} // namespace arm_common -} // namespace megdnn - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/common/conv_bias.cpp b/dnn/src/common/conv_bias.cpp index dc3541ce..7b66b502 100644 --- a/dnn/src/common/conv_bias.cpp +++ b/dnn/src/common/conv_bias.cpp @@ -35,37 +35,11 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst, size_t workspace_in_bytes, const PreprocessedFilter* preprocessed_filter) { - if ((param().format == param::ConvBias::Format::NCHW_WINOGRAD || - param().format == param::ConvBias::Format::NCHW88_WINOGRAD || - param().format == param::ConvBias::Format::NCHW44_WINOGRAD) && - src.dtype.category() == DTypeCategory::QUANTIZED) { - megdnn_assert(filter.dtype.enumv() == DTypeEnum::QuantizedS16 || - //!int8 winogradf23_44 using float,QuantizedS32 take the scale - filter.dtype.enumv() == DTypeEnum::QuantizedS32); - megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 || - src.dtype.enumv() == DTypeEnum::Quantized8Asymm); - } else { - megdnn_assert(src.dtype.enumv() == filter.dtype.enumv()); - } + megdnn_assert(src.dtype.enumv() == filter.dtype.enumv()); if (src.dtype.enumv() == DTypeEnum::QuantizedS8) { if (bias.dtype.enumv() == DTypeEnum::QuantizedS32) { float scale_src = src.dtype.param().scale; - float scale_filter = 0.f; - if (param().format == param::ConvBias::Format::NCHW_WINOGRAD || - param().format == param::ConvBias::Format::NCHW88_WINOGRAD || - param().format == param::ConvBias::Format::NCHW44_WINOGRAD) { - if (filter.dtype.enumv() == DTypeEnum::QuantizedS32) { - //! int8 winogradf23_44 using float,QuantizedS32 take the - //! scale - scale_filter = - filter.dtype.param().scale; - } else { - scale_filter = - filter.dtype.param().scale; - } - } else { - scale_filter = filter.dtype.param().scale; - } + float scale_filter = filter.dtype.param().scale; float scale_bias = bias.dtype.param().scale; megdnn_assert( std::abs(scale_src * scale_filter - scale_bias) < 1e-6, @@ -77,15 +51,8 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( } else if (src.dtype.enumv() == DTypeEnum::Quantized8Asymm) { if (bias.dtype.enumv() == DTypeEnum::QuantizedS32) { float scale_src = src.dtype.param().scale; - float scale_filter = 0.f; - if (param().format == param::ConvBias::Format::NCHW_WINOGRAD || - param().format == param::ConvBias::Format::NCHW88_WINOGRAD || - param().format == param::ConvBias::Format::NCHW44_WINOGRAD) { - scale_filter = filter.dtype.param().scale; - } else { - scale_filter = - filter.dtype.param().scale; - } + float scale_filter = + filter.dtype.param().scale; float scale_bias = bias.dtype.param().scale; megdnn_assert( std::abs(scale_src * scale_filter - scale_bias) < 1e-6, @@ -115,7 +82,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( if (check_eq(bias, dst)) return ret; if (param().format == param::ConvBias::Format::NCHW || - param().format == param::ConvBias::Format::NCHW_WINOGRAD || param().format == param::ConvBias::Format::NCHW4_NCHW) { megdnn_assert(bias.shape[0] == 1); megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s", @@ -131,7 +97,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( } else if (param().format == param::ConvBias::Format::NCHW4 || param().format == param::ConvBias::Format::NCHW44 || param().format == param::ConvBias::Format::NCHW44_DOT || - param().format == param::ConvBias::Format::NCHW44_WINOGRAD || param().format == param::ConvBias::Format::NCHW32_NCHW4) { megdnn_assert(bias.shape[0] == 1); megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s", @@ -140,8 +105,7 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( megdnn_assert(bias.shape[3] == 1); megdnn_assert(bias.shape[4] == 4); } else if (param().format == param::ConvBias::Format::NCHW8 || - param().format == param::ConvBias::Format::NCHW88 || - param().format == param::ConvBias::Format::NCHW88_WINOGRAD) { + param().format == param::ConvBias::Format::NCHW88 ) { megdnn_assert(bias.shape[0] == 1); megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s", bias.to_string().c_str(), dst.to_string().c_str()); @@ -175,11 +139,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( } if (z.ndim != 0) { - megdnn_assert(param().format != param::ConvBias::Format::NCHW_WINOGRAD); - megdnn_assert(param().format != - param::ConvBias::Format::NCHW88_WINOGRAD); - megdnn_assert(param().format != - param::ConvBias::Format::NCHW44_WINOGRAD); megdnn_assert(param().format != param::ConvBias::Format::NCHW4_NCHW32); megdnn_assert(param().format != param::ConvBias::Format::NCHW32_NCHW4); megdnn_assert(z.dtype.enumv() == dst.dtype.enumv()); @@ -187,105 +146,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( } return ret; } -/*! - * \brief deduce the origin filter layout and param after winograd transformed - */ -void ConvBiasForward::deduce_winograd_origin_layout_and_param( - const Param::Format format, const size_t output_block_size, - const TensorLayout& src_layout, - const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout, - Param& origin_param) { - if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD || - format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD || - format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { - //! change NCHWxx_WINOGRAD to NCHWxx - size_t OC = 0; - size_t IC = 0; - size_t GROUP = 1; - size_t FH = winograd_filter_layout[1] - output_block_size + 1; - - //! {alpha, alpha, IC, OC} - if (winograd_filter_layout.ndim == 4) { - OC = winograd_filter_layout[3]; - IC = winograd_filter_layout[2]; - } - //! {group, alpha, alpha, IC, OC} - else if (winograd_filter_layout.ndim == 5) { - OC = winograd_filter_layout[4]; - IC = winograd_filter_layout[3]; - GROUP = winograd_filter_layout[0]; - } - //! {alpha, alpha, OC/f, IC/f, f, f} - else if (winograd_filter_layout.ndim == 6) { - OC = winograd_filter_layout[2] * winograd_filter_layout[5]; - IC = winograd_filter_layout[3] * winograd_filter_layout[4]; - } - //! {group, alpha, alpha, OC/f, IC/f, f, f} - else if (winograd_filter_layout.ndim == 7) { - OC = winograd_filter_layout[3] * winograd_filter_layout[6]; - IC = winograd_filter_layout[4] * winograd_filter_layout[5]; - GROUP = winograd_filter_layout[0]; - } - auto origin_data_type = winograd_filter_layout.dtype; - if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) { - if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) { - float scale = - origin_data_type.param().scale; - origin_data_type = megdnn::dtype::QuantizedS8(scale); - } else { - //! In order to braing the sacle of filter, the transformed - //! qint8 winograd filter computing with float dtype is Qint32 - megdnn_assert(origin_data_type.enumv() == - DTypeEnum::QuantizedS32); - float scale = - origin_data_type.param().scale; - origin_data_type = megdnn::dtype::QuantizedS8(scale); - } - } - - if (GROUP == 1) { - if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { - origin_layout = - TensorLayout({OC, IC, FH, FH}, origin_data_type); - } else if (format == - megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { - origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4}, - origin_data_type); - } else { - megdnn_assert(format == - megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); - origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8}, - origin_data_type); - } - } else { - if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { - origin_layout = - TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type); - } else if (format == - megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { - origin_layout = - TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4}, - origin_data_type); - } else { - megdnn_assert(format == - megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); - origin_layout = - TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8}, - origin_data_type); - } - } - origin_param.output_block_size = 0; - if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { - origin_param.format = megdnn::param::ConvBias::Format::NCHW; - } else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { - origin_param.format = megdnn::param::ConvBias::Format::NCHW44; - } else { - megdnn_assert(format == - megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); - origin_param.format = megdnn::param::ConvBias::Format::NCHW88; - } - } -} template struct NCHWParamTrait; diff --git a/dnn/src/common/convolution.cpp b/dnn/src/common/convolution.cpp index 19e4b1a9..a30867ff 100644 --- a/dnn/src/common/convolution.cpp +++ b/dnn/src/common/convolution.cpp @@ -41,36 +41,12 @@ uint32_t spatial_getter(uint32_t filter, const Param&) { return filter; } -template <> -uint32_t -spatial_getter( - uint32_t filter, const param::ConvBias& param) { - //! f = m + r - 1 -> r = f + 1 - m - return filter - param.output_block_size + 1; -} - -template <> -uint32_t -spatial_getter( - uint32_t filter, const param::ConvBias& param) { - //! f = m + r - 1 -> r = f + 1 - m - return filter - param.output_block_size + 1; -} -template <> -uint32_t -spatial_getter( - uint32_t filter, const param::ConvBias& param) { - //! f = m + r - 1 -> r = f + 1 - m - return filter - param.output_block_size + 1; -} - template void make_canonized_filter_meta_nchw_nhwc( size_t src_ndim, const TensorLayout& filter, const Param& param, typename ConvolutionBase::CanonizedFilterMeta& ret) { megdnn_assert(param.format == Param::Format::NCHW || - param.format == Param::Format::NHWC || - param.format == Param::Format::NCHW_WINOGRAD); + param.format == Param::Format::NHWC ); auto img_ndim = src_ndim - 2; size_t flt_start, flt_spatial_start, ocpg_pos, icpg_pos; if (param.sparse == Param::Sparse::DENSE) { @@ -101,20 +77,6 @@ void make_canonized_filter_meta_nchw_nhwc( flt_spatial_start = 2; ocpg_pos = 0; icpg_pos = 1; - } else if (param.format == Param::Format::NCHW_WINOGRAD) { - // filter should be (alphah, alphaw, ic, oc) or (alphah, alphaw, ocb, - // icb, ic_block_size, oc_block_size) - flt_spatial_start = 0; - if (filter.ndim == flt_start + 4) { - ocpg_pos = 3; - icpg_pos = 2; - } else { - megdnn_assert(filter.ndim == flt_start + 6); - ic_block_size = filter[flt_start + 4]; - oc_block_size = filter[flt_start + 5]; - ocpg_pos = 2; - icpg_pos = 3; - } } else { megdnn_assert(param.format == Param::Format::NHWC, "invalid conv tensor format"); @@ -136,14 +98,8 @@ void make_canonized_filter_meta_nchw_nhwc( megdnn_assert(dilation[i] > 0, "invalid dilation on spatial dim %zu: %u", i, dilation[i]); - if (param.format == Param::Format::NCHW_WINOGRAD) { - ret.spatial[i] = - spatial_getter( - filter[i + flt_start + flt_spatial_start], param); - } else { - ret.spatial[i] = spatial_getter( - filter[i + flt_start + flt_spatial_start], param); - } + ret.spatial[i] = spatial_getter( + filter[i + flt_start + flt_spatial_start], param); ret.dilated_spatial[i] = (ret.spatial[i] - 1) * dilation[i] + 1; } } @@ -295,20 +251,12 @@ void make_canonized_filter_meta_nchwxx( * FH, FW, pack_size(IC), pack_size(OC)} [group] * {GROUP/pack_size, 1, 1, FH, FW, pack_size} [chan] * - ** NCHW88_WINOGRAD and NCHW44_WINOGRAD mode - * filter: - * {alpha, alpha, OC/pack_size, IC/pack_size, pack_size(IC), - *pack_size(OC)} [dense] - * {GROUP, alpha, alpha, OC_PER_GROUP/pack_size, - * IC_PER_GROUP/pack_size, pack_size(IC), pack_size(OC)} [group] * */ megdnn_assert(param.format == Param::Format::NCHW88 || param.format == Param::Format::NCHW44 || - param.format == Param::Format::NCHW44_WINOGRAD || - param.format == Param::Format::NCHW44_DOT || - param.format == Param::Format::NCHW88_WINOGRAD); + param.format == Param::Format::NCHW44_DOT); size_t img_ndim = 2; size_t flt_start = 0; size_t flt_spatial_start = 2; @@ -325,10 +273,6 @@ void make_canonized_filter_meta_nchwxx( filter[filter.ndim - 1]); ret.group = 1; flt_start = 0; - if (param.format == Param::Format::NCHW88_WINOGRAD || - param.format == Param::Format::NCHW44_WINOGRAD) { - flt_start = 2; - } if (filter[filter.ndim - 2] == 2 * pack_size && filter[filter.ndim - 1] == 2 * pack_size) { pack_c_size = 2 * pack_size; @@ -339,10 +283,6 @@ void make_canonized_filter_meta_nchwxx( ret.icpg = filter[flt_start + 1] * pack_c_size; } else if (filter.ndim == img_ndim + 3) { // ohwi8o - megdnn_assert(param.format != Param::Format::NCHW88_WINOGRAD, - "Hybrid nchw88 mode in not support winograd"); - megdnn_assert(param.format != Param::Format::NCHW44_WINOGRAD, - "Hybrid nchw44 mode in not support winograd"); flt_start = 0; flt_spatial_start = 1; ret.group = 1; @@ -357,15 +297,9 @@ void make_canonized_filter_meta_nchwxx( megdnn_assert(param.sparse == Param::Sparse::GROUP, "invalid convolution sparse type"); flt_start = 1; - if (param.format == Param::Format::NCHW88_WINOGRAD || - param.format == Param::Format::NCHW44_WINOGRAD) { - flt_start = 3; - } auto filter_oc = filter[flt_start]; auto filter_ic = filter[flt_start + 1]; - if (filter_oc == 1 && filter_ic == 1 && filter.ndim == (img_ndim + 4) && - param.format != Param::Format::NCHW88_WINOGRAD && - param.format != Param::Format::NCHW44_WINOGRAD) { + if (filter_oc == 1 && filter_ic == 1 && filter.ndim == (img_ndim + 4)) { // Depthwise case goihw8g megdnn_assert(filter.ndim == img_ndim + 4, "bad filter ndim for group convolution: " @@ -416,17 +350,7 @@ void make_canonized_filter_meta_nchwxx( "NCHWXX has invalid dilation on spatial dim %zu: %u, " "require to be 1", i, dilation[i]); - if (param.format == Param::Format::NCHW88_WINOGRAD) { - ret.spatial[i] = - spatial_getter( - filter[i + flt_start - 2], param); - } else if (param.format == Param::Format::NCHW44_WINOGRAD) { - ret.spatial[i] = - spatial_getter( - filter[i + flt_start - 2], param); - } else { - ret.spatial[i] = filter[i + flt_start + flt_spatial_start]; - } + ret.spatial[i] = filter[i + flt_start + flt_spatial_start]; ret.dilated_spatial[i] = (ret.spatial[i] - 1) * dilation[i] + 1; } } @@ -579,13 +503,11 @@ ConvolutionBase::make_canonized_filter_meta( } else if (param().format == Param::Format::NCHW8) { make_canonized_filter_meta_nchwx<8, Parameter>(src_ndim, filter, param(), ret); - } else if (param().format == Param::Format::NCHW88 || - param().format == Param::Format::NCHW88_WINOGRAD) { + } else if (param().format == Param::Format::NCHW88) { make_canonized_filter_meta_nchwxx<8, Parameter>(src_ndim, filter, param(), ret); } else if (param().format == Param::Format::NCHW44 || - param().format == Param::Format::NCHW44_DOT || - param().format == Param::Format::NCHW44_WINOGRAD) { + param().format == Param::Format::NCHW44_DOT) { make_canonized_filter_meta_nchwxx<4, Parameter>(src_ndim, filter, param(), ret); } else if (param().format == Param::Format::NCHW32 || @@ -597,8 +519,7 @@ ConvolutionBase::make_canonized_filter_meta( param(), ret); } else { megdnn_assert(param().format == Param::Format::NHWC || - param().format == Param::Format::NCHW || - param().format == Param::Format::NCHW_WINOGRAD); + param().format == Param::Format::NCHW); make_canonized_filter_meta_nchw_nhwc(src_ndim, filter, param(), ret); } @@ -619,17 +540,8 @@ void ConvolutionBase::check_or_deduce_dtype_fwd(DType src, } else if (src.enumv() == DTypeEnum::QuantizedS8 || src.enumv() == DTypeEnum::Quantized8Asymm || src.enumv() == DTypeEnum::Quantized4Asymm) { - //! Qint8 winograd compute with float, in order to bringing the filter - //! scale, here just use QuantizedS32 as filter type. - if (src.enumv() == DTypeEnum::QuantizedS8 && - filter.enumv() == DTypeEnum::QuantizedS32) { - supported_dst_dtype.push_back(dtype::QuantizedS32( - src.param().scale * - filter.param().scale)); - } else { - supported_dst_dtype.push_back( - dtype::QuantizedS32(mul_scale(src, filter))); - } + supported_dst_dtype.push_back( + dtype::QuantizedS32(mul_scale(src, filter))); if (dst.valid() && dst.enumv() == src.enumv()) { supported_dst_dtype.push_back(dst); } @@ -681,24 +593,12 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, megdnn_assert_contiguous(src); megdnn_assert_contiguous(filter); megdnn_assert(src.ndim >= 3_z, "%s", errmsg().c_str()); - if ((param().format == Param::Format::NCHW_WINOGRAD || - param().format == Param::Format::NCHW44_WINOGRAD) && - src.dtype.category() == DTypeCategory::QUANTIZED) { - megdnn_assert((filter.dtype.enumv() == DTypeEnum::QuantizedS16 || - filter.dtype.enumv() == DTypeEnum::QuantizedS32), - "%s", errmsg().c_str()); - megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 || - src.dtype.enumv() == DTypeEnum::Quantized8Asymm, - "%s", errmsg().c_str()); - } else { - megdnn_assert(src.dtype.enumv() == filter.dtype.enumv(), "%s", - errmsg().c_str()); - } + megdnn_assert(src.dtype.enumv() == filter.dtype.enumv(), "%s", + errmsg().c_str()); check_or_deduce_dtype_fwd(src.dtype, filter.dtype, dst.dtype); size_t img_dim; if (param().format == Param::Format::NCHW || - param().format == Param::Format::NHWC || - param().format == Param::Format::NCHW_WINOGRAD) { + param().format == Param::Format::NHWC) { img_dim = src.ndim - 2; megdnn_assert(filter.ndim >= img_dim + 2 && filter.ndim <= img_dim + 6, "%s", errmsg().c_str()); @@ -714,8 +614,6 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, param().format == Param::Format::NCHW32 || param().format == Param::Format::NCHW32_NCHW4 || param().format == Param::Format::NCHW88 || - param().format == Param::Format::NCHW88_WINOGRAD || - param().format == Param::Format::NCHW44_WINOGRAD || param().format == Param::Format::CHWN4); img_dim = src.ndim - 3; if ((param().format == Param::Format::NCHW88 || @@ -770,8 +668,7 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, "but got src %s, filter %s", src.to_string().c_str(), filter.to_string().c_str()); } - if (param().format == Param::Format::NCHW88 || - param().format == Param::Format::NCHW88_WINOGRAD) { + if (param().format == Param::Format::NCHW88) { megdnn_assert((src.ndim == 4 && filter.ndim == 5 && filter[filter.ndim - 1] == 8) || (src.ndim == 5 && @@ -786,8 +683,7 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, src.to_string().c_str(), filter.to_string().c_str()); } if (param().format == Param::Format::NCHW44 || - param().format == Param::Format::NCHW44_DOT || - param().format == Param::Format::NCHW44_WINOGRAD) { + param().format == Param::Format::NCHW44_DOT) { //!support nchw44 filter change to 88 for int8 winogradf23_88 using MK8 mamtul megdnn_assert((src.ndim == 4 && filter.ndim == 5 && filter[filter.ndim - 1] == 4) || @@ -820,12 +716,10 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, "currently only convolution on 2D image is supported"); auto cflt = make_canonized_filter_meta(src.ndim, filter); if (param().format == Param::Format::NCHW || - param().format == Param::Format::NHWC || - param().format == Param::Format::NCHW_WINOGRAD) { + param().format == Param::Format::NHWC ) { size_t src_or_dst_c_pos = 0; size_t src_or_dst_spatial_start = 0; - if (param().format == Param::Format::NCHW || - param().format == Param::Format::NCHW_WINOGRAD) { + if (param().format == Param::Format::NCHW) { src_or_dst_c_pos = 1; src_or_dst_spatial_start = 2; } else { @@ -836,10 +730,6 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, } megdnn_assert(cflt.icpg * cflt.group == src[src_or_dst_c_pos], "%s", errmsg().c_str()); - if (param().format == Param::Format::NCHW_WINOGRAD) { - megdnn_assert(cflt.spatial[0] == cflt.spatial[1], - "NCHW_WINOGRAD only support conv with fh == fw"); - } dst.ndim = src.ndim; dst[0] = src[0]; dst[src_or_dst_c_pos] = cflt.ocpg * cflt.group; @@ -900,8 +790,7 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, dst[3] = infer_conv_shape(src[3], cflt.dilated_spatial[1], cflt.stride[1], cflt.padding[1]); dst[4] = 32; - } else if (param().format == Param::Format::NCHW88 || - param().format == Param::Format::NCHW88_WINOGRAD) { + } else if (param().format == Param::Format::NCHW88 ) { megdnn_assert(src.ndim == 5 || (src.ndim == 4 && src[1] <= 8), "invalid src ndim for NCHW88, expected=5 or 4, got=%zu", src.ndim); @@ -923,8 +812,7 @@ ConvolutionBase::deduce_layout_fwd(const TensorLayout& src, } } else if (param().format == Param::Format::NCHW44 || - param().format == Param::Format::NCHW44_DOT || - param().format == Param::Format::NCHW44_WINOGRAD) { + param().format == Param::Format::NCHW44_DOT) { megdnn_assert(src.ndim == 5 || (src.ndim == 4 && src[1] <= 4), "invalid src ndim for NCHW44, expected=5 or 4, got=%zu", src.ndim); diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 17d217c8..5adb79de 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -189,7 +189,6 @@ private: cb(RelayoutFormat) \ cb(TopK) \ cb(PowC) \ - cb(WinogradFilterPreprocess) \ cb(LocalShareForward) \ cb(LocalShareBackwardData) \ cb(LocalShareBackwardFilter) \ diff --git a/dnn/src/common/winograd_filter_preprocess.cpp b/dnn/src/common/winograd_filter_preprocess.cpp deleted file mode 100644 index 486aadb6..00000000 --- a/dnn/src/common/winograd_filter_preprocess.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/** - * \file dnn/src/common/winograd_filter_preprocess.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ -#include "megdnn/oprs.h" - -#include -#include "src/common/utils.h" - -using namespace megdnn; -void WinogradFilterPreprocess::deduce_layout(const TensorLayout& src, - TensorLayout& dst) { - auto errmsg = [&]() { - return "invalid filter layout:" + megdnn_layout_msg(src); - }; - MEGDNN_MARK_USED_VAR(errmsg); - //! NCHW88 weight layout include - //! dense{oc/8, ic/8, fh, fw, 8, 8}; group {g, oc/8, ic/8, fh, fw, 8, 8}; - //! channel wise{g/8, 1, 1, fh, fw, 8} - megdnn_assert( - src.ndim == 4 || src.ndim == 5 || src.ndim == 6 || src.ndim == 7, - "%s", errmsg().c_str()); - //! nchw88 channel wise conv - megdnn_assert(!(src.ndim == 6 && src[1] == 1 && src[2] == 1), - "chennel wise nchw88 can not use winograd "); - //! nchw88 group conv - size_t flt_start = 0; - size_t pack_c_size = 1; - size_t group = 1; - //! group conv - if (src.ndim == 5) { - flt_start = 1; - group = src[0]; - //! nchw88 dense conv - } else if (src.ndim == 6) { - pack_c_size = src[5]; - //! nchw88 group conv - } else if (src.ndim == 7) { - flt_start = 1; - group = src[0]; - pack_c_size = src[6]; - } - size_t OC = src[flt_start] * pack_c_size, - IC = src[flt_start + 1] * pack_c_size, FH = src[flt_start + 2], - FW = src[flt_start + 3]; - size_t m = param().output_block_size; - megdnn_assert(FH == FW, "%s", errmsg().c_str()); - - size_t alpha = FH + m - 1; - DType dst_type = src.dtype; - if (src.dtype.category() == DTypeCategory::QUANTIZED) { - megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8); - if (param().compute_mode == - param::ConvBias::ComputeMode::DEFAULT) { - //! input int8 compute short - dst_type = dtype::QuantizedS16( - src.dtype.param().scale); - } else { - //! input int8 compute float32 - dst_type = dtype::QuantizedS32( - src.dtype.param().scale); - } - } - - if (src.ndim == 4 || src.ndim == 6) { - if (param().format == param::Winograd::Format::DEFAULT) { - dst = TensorLayout({alpha, alpha, IC, OC}, dst_type); - } else { - megdnn_assert(param().format == param::Winograd::Format::MK4 || - param().format == param::Winograd::Format::MK8); - size_t pack_size = MatrixMulForward::pack_size(param().format); - dst = TensorLayout({alpha, alpha, OC / pack_size, IC / pack_size, - pack_size, pack_size}, - dst_type); - } - } else { - megdnn_assert(src.ndim == 5 || src.ndim == 7); - if (param().format == param::Winograd::Format::DEFAULT) { - dst = TensorLayout({group, alpha, alpha, IC, OC}, dst_type); - } else { - megdnn_assert(param().format == param::Winograd::Format::MK4 || - param().format == param::Winograd::Format::MK8); - size_t pack_size = MatrixMulForward::pack_size(param().format); - dst = TensorLayout({group, alpha, alpha, OC / pack_size, - IC / pack_size, pack_size, pack_size}, - dst_type); - } - } -} - -void WinogradFilterPreprocess::check_exec(const TensorLayout& src, - const TensorLayout& dst, - size_t workspace_in_bytes) { - auto errmsg = [&]() { - return megdnn_layout_msg(src) + ", " + megdnn_layout_msg(dst); - }; - MEGDNN_MARK_USED_VAR(errmsg); - megdnn_assert_contiguous(src); - megdnn_assert_contiguous(dst); - //! nchwxx now only support Format MKx - if (param().format == param::Winograd::Format::DEFAULT) { - megdnn_assert(src.ndim == dst.ndim && (src.ndim == 4 || src.ndim == 5), - "%s", errmsg().c_str()); - } else { - megdnn_assert( - (param().format == param::Winograd::Format::MK4 || - param().format == param::Winograd::Format::MK8) && - (src.ndim == dst.ndim - 2 || src.ndim == dst.ndim) && - (src.ndim == 4 || src.ndim == 5 || src.ndim == 6 || - src.ndim == 7), - "%s", errmsg().c_str()); - } - - TensorLayout dst_expected; - deduce_layout(src, dst_expected); - megdnn_assert_eq_layout(dst_expected, dst); - auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst); - megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); -} - -size_t WinogradFilterPreprocess::get_workspace_in_bytes( - const TensorLayout& src, const TensorLayout& dst) { - MEGDNN_MARK_USED_VAR(dst); - DType output_compute_dtype = src.dtype; - if (src.dtype.category() == DTypeCategory::QUANTIZED) { - megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 || - src.dtype.enumv() == DTypeEnum::Quantized8Asymm); - if (param().compute_mode == - param::ConvBias::ComputeMode::DEFAULT) { - //! input int8 compute short - output_compute_dtype = dtype::QuantizedS16( - src.dtype.param().scale); - } else { - //! input int8 compute float32 - output_compute_dtype = dtype::QuantizedS32( - src.dtype.param().scale); - } - } - - size_t FW = src[3]; - if (src.ndim == 5 || src.ndim == 7) { - FW = src[4]; - } - - size_t pack_size = MatrixMulForward::pack_size(param().format); - size_t alpha = param().output_block_size + FW - 1; - return 2 * alpha * alpha * output_compute_dtype.size() * pack_size * - pack_size; -} - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/handle_create.cpp b/dnn/src/cuda/handle_create.cpp index 1dc4015d..af4949fb 100644 --- a/dnn/src/cuda/handle_create.cpp +++ b/dnn/src/cuda/handle_create.cpp @@ -72,7 +72,6 @@ #include "src/cuda/type_cvt/opr_impl.h" #include "src/cuda/warp_affine/opr_impl.h" #include "src/cuda/warp_perspective/opr_impl.h" -#include "src/cuda/winograd_filter_preprocess/opr_impl.h" #include "src/cuda/local_share/opr_impl.h" #include "src/cuda/roi_align/opr_impl.h" #include "src/cuda/batch_conv_bias/opr_impl.h" diff --git a/dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp b/dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp deleted file mode 100644 index 9bad1877..00000000 --- a/dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp +++ /dev/null @@ -1,22 +0,0 @@ -/** - * \file dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ -#include "src/cuda/winograd_filter_preprocess/opr_impl.h" -#include "src/common/utils.h" - -using namespace megdnn; -using namespace cuda; - -void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in, _megdnn_tensor_in, - _megdnn_workspace) { - megdnn_throw("WinogradFilterPreprocess is not supported in CUDA"); -} - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/winograd_filter_preprocess/opr_impl.h b/dnn/src/cuda/winograd_filter_preprocess/opr_impl.h deleted file mode 100644 index 19e60490..00000000 --- a/dnn/src/cuda/winograd_filter_preprocess/opr_impl.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - * \file dnn/src/cuda/winograd_filter_preprocess/opr_impl.h - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ -#pragma once -#include "megdnn/oprs.h" - -namespace megdnn { -namespace cuda { - -class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess { -public: - using WinogradFilterPreprocess::WinogradFilterPreprocess; - void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, - _megdnn_workspace workspace) override; -}; - -} // namespace cuda -} // namespace megdnn - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/conv_bias/algos.cpp b/dnn/src/fallback/conv_bias/algos.cpp index af26286f..dd5d5534 100644 --- a/dnn/src/fallback/conv_bias/algos.cpp +++ b/dnn/src/fallback/conv_bias/algos.cpp @@ -259,12 +259,7 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( strategy, UNIT_TILE_SIZE, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -329,12 +324,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( strategy, UNIT_TILE_SIZE, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK4)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -397,12 +387,7 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::DEFAULT)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -467,12 +452,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( strategy, UNIT_TILE_SIZE, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW || - (param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK8)) && + param.filter_meta.format == param::ConvBias::Format::NCHW && param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && diff --git a/dnn/src/fallback/conv_bias/opr_impl.cpp b/dnn/src/fallback/conv_bias/opr_impl.cpp index b0b5e2d9..0c3f6f88 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.cpp +++ b/dnn/src/fallback/conv_bias/opr_impl.cpp @@ -342,10 +342,7 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( param().format == Param::Format::NCHW4 || param().format == Param::Format::NCHW44 || param().format == Param::Format::NCHW44_DOT || - param().format == Param::Format::NCHW || - param().format == Param::Format::NCHW_WINOGRAD || - param().format == Param::Format::NCHW88_WINOGRAD || - param().format == Param::Format::NCHW44_WINOGRAD) { + param().format == Param::Format::NCHW) { spatial_pos = 2; } else if (param().format == Param::Format::NHWC) { spatial_pos = 1; @@ -370,25 +367,7 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( "should be equal"); auto&& fm = check_layout_fwd(src, filter, dst); auto& conv_fm = reinterpret_cast(fm); - - param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT; - if (param().format == Param::Format::NCHW_WINOGRAD || - param().format == Param::Format::NCHW88_WINOGRAD || - param().format == Param::Format::NCHW44_WINOGRAD) { - size_t flt_start = 0; - if (param().sparse == Param::Sparse::GROUP) { - flt_start = 1; - } - - if (filter.ndim == 6 + flt_start) { - if (filter[5] == 4) { - format = param::MatrixMul::Format::MK4; - } else { - megdnn_assert(filter[5] == 8); - format = param::MatrixMul::Format::MK8; - } - } - } + size_t nr_threads = static_cast(handle()) ->megcore_dispatcher() ->nr_threads(); @@ -407,8 +386,6 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( nr_threads, reinterpret_cast( preprocessed_filter)}, - param().output_block_size, - format, bias.dtype, bias.stride[0], bias_mode, @@ -537,11 +514,7 @@ SmallVector ConvBiasImpl::suggest_algo_category_order( auto FH = param.filter_meta.spatial[0]; auto FW = param.filter_meta.spatial[1]; //! TODO: now winograd only support in fast-run - if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD || - param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD || - param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) { - return {AlgoCategory::WINOGRAD}; - } + //! im2col + matmul bool im2col_prefer = (IC >= 32 || OC >= 32); //! quantized algo use matmul when direct algo is unusable @@ -632,21 +605,6 @@ const T* ConvBiasImpl::NCBKernParam::filter(size_t group_pack_id, break; } - case ConvBiasImpl::Param::Format::NCHW_WINOGRAD: - case ConvBiasImpl::Param::Format::NCHW44_WINOGRAD: - case ConvBiasImpl::Param::Format::NCHW88_WINOGRAD: { - //! four format of weight layout - //! 1. {g, alpha, alpha, ocpg/8, icpg/8, 8, 8} - //! 2. {alpha, alpha, ocpg/8, icpg/8, 8, 8} - //! 3. {g, alpha, alpha, oc, ic, 8, 8} - //! 4. {alpha, alpha, oc, ic} - group_offset = pack_group_size * group_pack_id * filter_meta.icpg * - filter_meta.ocpg * - (filter_meta.spatial[0] + output_block_size - 1) * - (filter_meta.spatial[1] + output_block_size - 1) * - filter_type.size(); - break; - } default: megdnn_assert(0, "other filter format is not support yet"); } diff --git a/dnn/src/fallback/conv_bias/opr_impl.h b/dnn/src/fallback/conv_bias/opr_impl.h index 7ac49b0b..40d26d40 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.h +++ b/dnn/src/fallback/conv_bias/opr_impl.h @@ -103,19 +103,13 @@ public: struct NCBKernSizeParam : ConvolutionImpl::NCBKernSizeParam { NCBKernSizeParam() = default; NCBKernSizeParam(const ConvolutionImpl::NCBKernSizeParam& param, - size_t output_block_size, - param::MatrixMul::Format winograd_matmul_format, DType bias_type, ptrdiff_t bias_bs, BiasMode bias_mode, Param::NonlineMode nonlineMode) : ConvolutionImpl::NCBKernSizeParam(param), - output_block_size{output_block_size}, - winograd_matmul_format{winograd_matmul_format}, bias_type{bias_type}, bias_bs{bias_bs}, bias_mode{bias_mode}, nonlineMode{nonlineMode} {} - size_t output_block_size; //!< used in winograd algo - param::MatrixMul::Format winograd_matmul_format; DType bias_type; //! stride for batch of bias ptrdiff_t bias_bs; diff --git a/dnn/src/fallback/conv_bias/winograd/winograd.h b/dnn/src/fallback/conv_bias/winograd/winograd.h index fda01133..2484d3dd 100644 --- a/dnn/src/fallback/conv_bias/winograd/winograd.h +++ b/dnn/src/fallback/conv_bias/winograd/winograd.h @@ -88,13 +88,7 @@ class ConvBias { size_t filter_transform_buf_size = 0; //! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE, //! OC_BLOCK_SIZE) - if (param.preprocessed_filter == nullptr && - param.filter_meta.format != - param::ConvBias::Format::NCHW_WINOGRAD && - param.filter_meta.format != - param::ConvBias::Format::NCHW88_WINOGRAD && - param.filter_meta.format != - param::ConvBias::Format::NCHW44_WINOGRAD) { + if (param.preprocessed_filter == nullptr) { filter_transform_buf_size = Strategy::ALPHA * Strategy::ALPHA * OC * IC * sizeof(input_filter_compute_type); } @@ -108,12 +102,7 @@ class ConvBias { nullptr, {winograd_comput_size, filter_transform_buf_size * GROUP}); } else { - megdnn_assert(param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD || - param.filter_meta.format == - param::ConvBias::Format::NCHW88_WINOGRAD || - param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD); + megdnn_assert(param.preprocessed_filter != nullptr); return WorkspaceBundle(nullptr, {winograd_comput_size}); } } @@ -499,7 +488,6 @@ public: const TensorND& preprocessed_dst = param.preprocessed_filter->tensors[0]; WorkspaceBundle bundle = get_preprocess_wbundle(param); - Strategy strategy = m_strategy; SmallVector kerns; auto filter_process_kern = @@ -558,13 +546,7 @@ public: param.filter_meta.stride[1] == 1 && (param.filter_meta.format == param::ConvBias::Format::NCHW || param.filter_meta.format == param::ConvBias::Format::NCHW88 || - param.filter_meta.format == param::ConvBias::Format::NCHW44 || - param.filter_meta.format == - param::ConvBias::Format::NCHW_WINOGRAD || - param.filter_meta.format == - param::ConvBias::Format::NCHW88_WINOGRAD || - param.filter_meta.format == - param::ConvBias::Format::NCHW44_WINOGRAD)); + param.filter_meta.format == param::ConvBias::Format::NCHW44)); SmallVector kerns; if (param.preprocessed_filter == nullptr && diff --git a/dnn/src/fallback/convolution/algos.cpp b/dnn/src/fallback/convolution/algos.cpp index 36cb709b..f8dca2c5 100644 --- a/dnn/src/fallback/convolution/algos.cpp +++ b/dnn/src/fallback/convolution/algos.cpp @@ -316,8 +316,6 @@ ConvolutionImpl::AlgoDefault::init_conv_bias_param( mul_scale(param.src_type, param.filter_type)); } return {param, - 0, - param::MatrixMul::Format::DEFAULT, bias_type, 0, BiasMode::NO_BIAS, diff --git a/dnn/src/fallback/convolution/opr_impl.cpp b/dnn/src/fallback/convolution/opr_impl.cpp index ec7352ef..4a2ed249 100644 --- a/dnn/src/fallback/convolution/opr_impl.cpp +++ b/dnn/src/fallback/convolution/opr_impl.cpp @@ -225,8 +225,7 @@ ConvolutionImpl::NCBKernSizeParam ConvolutionImpl::make_ncb_kern_size_param( param().format == Param::Format::NCHW44_DOT || param().format == Param::Format::NCHW44) { spatial_pos = 2; - } else if (param().format == Param::Format::NCHW || - param().format == Param::Format::NCHW_WINOGRAD) { + } else if (param().format == Param::Format::NCHW) { spatial_pos = 2; } else if (param().format == Param::Format::NHWC) { spatial_pos = 1; diff --git a/dnn/src/naive/handle.cpp b/dnn/src/naive/handle.cpp index 2f235ef8..4a91dbf9 100644 --- a/dnn/src/naive/handle.cpp +++ b/dnn/src/naive/handle.cpp @@ -78,7 +78,6 @@ #include "src/naive/type_cvt/opr_impl.h" #include "src/naive/warp_affine/opr_impl.h" #include "src/naive/warp_perspective/opr_impl.h" -#include "src/naive/winograd_filter_preprocess/opr_impl.h" #include "src/naive/remap/opr_impl.h" #include "src/naive/fake_quant/opr_impl.h" diff --git a/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp b/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp deleted file mode 100644 index 148440e4..00000000 --- a/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp +++ /dev/null @@ -1,234 +0,0 @@ -/** - * \file dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. - */ - -#include "src/naive/winograd_filter_preprocess/opr_impl.h" -#include "src/common/utils.h" -#include "src/common/winograd/winograd_helper.h" -#include "src/naive/handle.h" - -#include "midout.h" -MIDOUT_DECL(megdnn_naive_winograd_filter_preprocess) - -using namespace megdnn; -using namespace naive; - -void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, - _megdnn_tensor_out dst, - _megdnn_workspace workspace) { - check_exec(src.layout, dst.layout, workspace.size); - - //! nchw88 group conv - size_t flt_start = 0; - size_t pack_c_size = 1; - size_t group = 1; - //! group conv - if (src.layout.ndim == 5) { - flt_start = 1; - group = src.layout[0]; - //! nchw88 dense conv - } else if (src.layout.ndim == 6) { - pack_c_size = src.layout[5]; - //! nchw88 group conv - } else if (src.layout.ndim == 7) { - flt_start = 1; - group = src.layout[0]; - pack_c_size = src.layout[6]; - } - size_t OC = src.layout[flt_start] * pack_c_size, - IC = src.layout[flt_start + 1] * pack_c_size, - FW = src.layout[flt_start + 3]; - - size_t m = param().output_block_size; - - bool execed = false; - -#define cb(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format, rescale) \ - if (param().format == _format) { \ - return winograd::StrategyHelper< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::ConvBias::Format::NCHW, \ - _format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ - OC, m, FW, interp_points, src.layout.dtype, \ - rescale); \ - } - -#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _rescale) \ - cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ - param::Winograd::Format::DEFAULT, _rescale); \ - cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ - param::Winograd::Format::MK4, _rescale); - -#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _rescale) \ - cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ - param::Winograd::Format::DEFAULT, _rescale); \ - cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ - param::Winograd::Format::MK8, _rescale); - -#define DISPATCH_KERNEL(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _kern, _rescale, ...) \ - const _ctype* src_ptr = src.compatible_ptr<_ctype>(); \ - _input_filter_compute_type* dst_ptr = \ - dst.compatible_ptr<_input_filter_compute_type>(); \ - _input_filter_compute_type* workspace_ptr = \ - workspace.ptr<_input_filter_compute_type>(); \ - MIDOUT_BEGIN(megdnn_naive_winograd_filter_preprocess, ##__VA_ARGS__) { \ - for (size_t g = 0; g < group; g++) { \ - auto run = [=]() { \ - _kern(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _rescale); \ - }; \ - MEGDNN_DISPATCH_CPU_KERN_OPR(run()); \ - src_ptr += src.layout.stride[0]; \ - dst_ptr += dst.layout.stride[0]; \ - } \ - execed = true; \ - } \ - MIDOUT_END(); - -#define DISPATCH_DTYPE(_midout_tag) \ - if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \ - DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ - DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \ - } \ - if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { \ - DISPATCH_KERNEL(dt_int8, dt_int8, dt_int16, dt_int32, \ - DISPATCH_FORMAT_MK8, 2.0f, _midout_tag, 1); \ - } \ - MEGDNN_INC_FLOAT16(if (src.layout.dtype.enumv() == DTypeEnum::Float16) { \ - DISPATCH_KERNEL(dt_float16, dt_float16, dt_float16, dt_float16, \ - DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 2); \ - }) - - if (src.layout.ndim <= 5) { - //! dispatch_dtype with consider layout and format. - if (FW == 3) { - if (m == 2) { - std::vector interp_points = {0, 1, -1}; - DISPATCH_DTYPE(0); - } else if (m == 6) { - std::vector interp_points = {0, 1, -1, 2, -2, 0.5, -0.5}; - DISPATCH_DTYPE(1); - } - } else if (FW == 4) { - if (m == 5) { - std::vector interp_points = {0, 0.5, -0.5, 1, -1, 2, -2}; - DISPATCH_DTYPE(2); - } - } else if (FW == 5) { - if (m == 4) { - std::vector interp_points = {0, 1, -1, 0.5, -0.5, 2, -2}; - DISPATCH_DTYPE(3); - } - } -#undef cb -#undef DISPATCH_FORMAT_MK4 -#undef DISPATCH_FORMAT_MK8 -#undef DISPATCH_DTYPE - } else { - megdnn_assert(src.layout.ndim == 6 || src.layout.ndim == 7); -#define cb(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format, rescale) \ - if (param().format == _format) { \ - return winograd::StrategyHelper< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::ConvBias::Format::NCHW88, \ - _format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ - OC, m, FW, interp_points, src.layout.dtype, \ - rescale); \ - } - -#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _rescale) \ - cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ - param::Winograd::Format::MK8, _rescale); - -#define DISPATCH_DTYPE(_midout_tag) \ - if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \ - DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ - DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 0); \ - } - if (pack_c_size == 8) { //! NCHW88 - if (FW == 3) { - if (m == 2) { - std::vector interp_points = {0, 1, -1}; - DISPATCH_DTYPE(4); - } else if (m == 6) { - std::vector interp_points = {0, 1, -1, 2, - -2, 0.5, -0.5}; - DISPATCH_DTYPE(5); - } - } -#undef cb -#undef DISPATCH_DTYPE - } - else if (pack_c_size == 4) { //! NCHW44 -#define cb(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format, rescale) \ - if (param().format == _format) { \ - return winograd::StrategyHelper< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::ConvBias::Format::NCHW44, \ - _format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ - OC, m, FW, interp_points, src.layout.dtype, \ - rescale); \ - } - -#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _rescale) \ - cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ - param::Winograd::Format::MK4, _rescale); - -#define DISPATCH_DTYPE(_midout_tag) \ - if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \ - DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ - DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \ - } \ - if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { \ - if (param().format == param::Winograd::Format::MK4) { \ - DISPATCH_KERNEL(dt_int8, dt_int8, dt_float32, dt_float32, \ - DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \ - } else if (param().format == param::Winograd::Format::MK8) { \ - DISPATCH_KERNEL(dt_int8, dt_int8, dt_int16, dt_int32, \ - DISPATCH_FORMAT_MK8, 2.0f, _midout_tag, 0); \ - } \ - } - if (FW == 3) { - if (m == 2) { - std::vector interp_points = {0, 1, -1}; - DISPATCH_DTYPE(6); - } else if (m == 6) { - std::vector interp_points = {0, 1, -1, 2, - -2, 0.5, -0.5}; - DISPATCH_DTYPE(7); - } else if (m == 7) { - std::vector interp_points = {0, 1, -1, 2, - -2, 0.5, -0.5, 1.5}; - DISPATCH_DTYPE(8); - } - } -#undef cb -#undef DISPATCH_FORMAT_MK8 -#undef DISPATCH_FORMAT_MK4 -#undef DISPATCH_KERNEL -#undef DISPATCH_DTYPE - } - } - - megdnn_assert(execed, - "Unsupport winograd filter preprocess. m: %zu src: %s", m, - src.layout.to_string().c_str()); -} - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/naive/winograd_filter_preprocess/opr_impl.h b/dnn/src/naive/winograd_filter_preprocess/opr_impl.h deleted file mode 100644 index 3ec9652c..00000000 --- a/dnn/src/naive/winograd_filter_preprocess/opr_impl.h +++ /dev/null @@ -1,28 +0,0 @@ -/** - * \file dnn/src/naive/winograd_filter_preprocess/opr_impl.h - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ -#pragma once -#include "megdnn/oprs.h" -#include "src/common/utils.h" - -namespace megdnn { -namespace naive { - -class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess { -public: - using WinogradFilterPreprocess::WinogradFilterPreprocess; - void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, - _megdnn_workspace workspace) override; -}; - -} // namespace naive -} // namespace megdnn - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/x86/conv_bias/f32/winograd_algo.cpp b/dnn/src/x86/conv_bias/f32/winograd_algo.cpp index 95dd34a5..39468312 100644 --- a/dnn/src/x86/conv_bias/f32/winograd_algo.cpp +++ b/dnn/src/x86/conv_bias/f32/winograd_algo.cpp @@ -43,12 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW88 || - (param.filter_meta.format == - param::ConvBias::Format::NCHW88_WINOGRAD && - param.output_block_size == 6 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK8)) && + param.filter_meta.format == param::ConvBias::Format::NCHW88 && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && @@ -89,12 +84,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (param.filter_meta.format == param::ConvBias::Format::NCHW88 || - (param.filter_meta.format == - param::ConvBias::Format::NCHW88_WINOGRAD && - param.output_block_size == 2 && - param.winograd_matmul_format == - param::MatrixMul::Format::MK8)) && + param.filter_meta.format == param::ConvBias::Format::NCHW88 && !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && diff --git a/dnn/src/x86/conv_bias/opr_impl.cpp b/dnn/src/x86/conv_bias/opr_impl.cpp index 36fa51f1..aa0180c5 100644 --- a/dnn/src/x86/conv_bias/opr_impl.cpp +++ b/dnn/src/x86/conv_bias/opr_impl.cpp @@ -173,11 +173,7 @@ SmallVector ConvBiasImpl::suggest_algo_category_order( auto FH = param.filter_meta.spatial[0]; auto FW = param.filter_meta.spatial[1]; //! TODO: now winograd only support fast-run - if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD || - param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD || - param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) { - return {AlgoCategory::WINOGRAD}; - } + //! nchw88 use mkl-dnn which algo is direct if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { return {AlgoCategory::DIRECT, AlgoCategory::IM2COL}; diff --git a/dnn/test/arm_common/conv_bias_multi_thread.cpp b/dnn/test/arm_common/conv_bias_multi_thread.cpp index 08b25268..3fe6e395 100644 --- a/dnn/test/arm_common/conv_bias_multi_thread.cpp +++ b/dnn/test/arm_common/conv_bias_multi_thread.cpp @@ -629,6 +629,35 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_8x8x32) { #endif +TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { + using namespace conv_bias; + std::vector args = get_winograd_args(3); + + Checker checker(handle()); + + auto run = [&checker](const std::vector& args, DType A_dtype, + DType B_dtype, DType C_dtype, DType D_dtype, + const float eps) { + for (auto&& arg : args) { + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); + } + }; + run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(), + dtype::Float32(), 1e-3f); +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); + checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); + run(args, dtype::Float16(), dtype::Float16(), dtype::Float16(), + dtype::Float16(), 0.35f); +#endif +} + TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) { using namespace conv_bias; std::vector args = get_winograd_mk_packed_args(); @@ -717,207 +746,97 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { check_winograd("1:4:32", checker, args); } - - -TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { - using namespace conv_bias; - std::vector args = get_winograd_args(3); - - Checker checker(handle()); - - auto extra_impl = [](const TensorNDArray& tensors, uint32_t m, - param::ConvBias param, Handle* handle) { - megdnn_assert(param.format == param::ConvBias::Format::NCHW); - auto winograd_preprocess_opr = - handle->create_operator(); - winograd_preprocess_opr->param().output_block_size = m; - TensorLayout filter_transform_layout; - winograd_preprocess_opr->deduce_layout(tensors[1].layout, - filter_transform_layout); - size_t winograd_preprocess_workspace_in_bytes = - winograd_preprocess_opr->get_workspace_in_bytes( - tensors[1].layout, filter_transform_layout); - - auto conv_bias_opr = handle->create_operator(); - conv_bias_opr->param() = param; - conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD; - conv_bias_opr->param().output_block_size = m; - size_t conv_bias_workspace_in_bytes = - conv_bias_opr->get_workspace_in_bytes( - tensors[0].layout, filter_transform_layout, - tensors[2].layout, tensors[3].layout, tensors[4].layout, - nullptr); - - WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), - conv_bias_workspace_in_bytes, - winograd_preprocess_workspace_in_bytes}); - wb.set(malloc(wb.total_size_in_bytes())); - - TensorND filter_transform_tensor(wb.get(0), - std::move(filter_transform_layout)); - winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, - wb.get_workspace(2)); - conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], nullptr, - wb.get_workspace(1)); - - free(wb.ptr()); - }; - - auto run = [&checker, &extra_impl]( - Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, - DType B_dtype, DType C_dtype, DType D_dtype, - const float eps) { - for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind(extra_impl, - std::placeholders::_1, m, - arg.param, handle)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } - } - }; - run(handle(), args, {6}, dtype::Float32(), dtype::Float32(), - dtype::Float32(), dtype::Float32(), 1e-3f); -#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); - checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); - run(handle(), args, {6}, dtype::Float16(), dtype::Float16(), - dtype::Float16(), dtype::Float16(), 0.35f); -#endif -} - - - TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; std::vector args = get_winograd_mk_packed_args(8); std::vector args_first_half(args.begin(), args.begin() + args.size() / 2); - run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, - dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, - 1e-3f); + run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, + dtype::Float32{}, 1e-3f); } - - TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; std::vector args = get_winograd_mk_packed_args(8); std::vector args_second_half(args.begin() + args.size() / 2, args.end()); - run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, - dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, - 1e-3f); + run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, + dtype::Float32{}, 1e-3f); } - - #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; std::vector args = get_winograd_mk_packed_args(8); Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); - run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{}, - dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, - 0.25); + run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{}, + dtype::Float16{}, 0.25); } - #endif TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; @@ -933,24 +852,19 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { get_quantized_winograd_mk_packed_args(8); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), - dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), - dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); + run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), + dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); } TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, + DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); checker.set_dtype(0, A_dtype) .set_dtype(1, B_dtype) .set_dtype(2, C_dtype) @@ -958,7 +872,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) { .set_epsilon(eps) .set_param(arg.param) .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } } }; @@ -973,118 +886,99 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) { std::vector quantized_args = get_int8_nchw44_args(3, 4); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), + run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), - dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); + dtype::QuantizedS8(60.25f),1e-3); } TEST_F(ARM_COMMON_MULTI_THREADS, - CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) { + CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; + float epsilon = 0.001; #if MEGDNN_AARCH64 - const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; + const char* matmul_name = "AARCH64_F32_MK4_4x16"; #else - const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; + const char* matmul_name = "ARMV7_F32_MK4_4x8"; #endif checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( - ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str())); - + ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); std::vector quantized_args = - get_int8_nchw44_args(3, 4, false, true); + get_int8_nchw44_args(3, 4, true, true); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), - dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), - dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); + run(quantized_args, dtype::QuantizedS8(0.41113496f), + dtype::QuantizedS8(0.01887994f), + dtype::QuantizedS32(0.41113496f * 0.01887994f), + dtype::QuantizedS8(0.49550694f), epsilon); } TEST_F(ARM_COMMON_MULTI_THREADS, - CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) { + CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; - float epsilon = 0.001; #if MEGDNN_AARCH64 - const char* matmul_name = "AARCH64_F32_MK4_4x16"; + const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; #else - const char* matmul_name = "ARMV7_F32_MK4_4x8"; + const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; #endif checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( - ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); - std::vector quantized_args = get_int8_nchw44_args(3, 4, true); + ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str())); + + std::vector quantized_args = + get_int8_nchw44_args(3, 4, false, true); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), - dtype::QuantizedS8(0.01887994f), - dtype::QuantizedS32(0.41113496f * 0.01887994f), - dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, - epsilon); + run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), + dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); } TEST_F(ARM_COMMON_MULTI_THREADS, - CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) { + CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) { using namespace conv_bias; Checker checker(handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; @@ -1096,23 +990,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, #endif checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); - std::vector quantized_args = - get_int8_nchw44_args(3, 4, true, true); + std::vector quantized_args = get_int8_nchw44_args(3, 4, true); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), + run(quantized_args, dtype::QuantizedS8(0.41113496f), dtype::QuantizedS8(0.01887994f), dtype::QuantizedS32(0.41113496f * 0.01887994f), - dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, - epsilon); + dtype::QuantizedS8(0.49550694f), epsilon); } - - - - - - #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) { using namespace conv_bias; @@ -1170,7 +1056,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) { check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, param::MatrixMul::Format::MK8); } - #endif TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { using namespace conv_bias; @@ -1187,6 +1072,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); } + TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) { using namespace conv_bias; diff --git a/dnn/test/arm_common/conv_bias_multi_thread_weight_preprocess.cpp b/dnn/test/arm_common/conv_bias_multi_thread_weight_preprocess.cpp index 8f0e52be..03225c0c 100644 --- a/dnn/test/arm_common/conv_bias_multi_thread_weight_preprocess.cpp +++ b/dnn/test/arm_common/conv_bias_multi_thread_weight_preprocess.cpp @@ -83,56 +83,12 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) { Checker checker(handle()); - auto extra_impl = [](const TensorNDArray& tensors, uint32_t m, - param::ConvBias param, Handle* handle) { - megdnn_assert(param.format == param::ConvBias::Format::NCHW44); - auto winograd_preprocess_opr = - handle->create_operator(); - winograd_preprocess_opr->param().output_block_size = m; - winograd_preprocess_opr->param().format = param::MatrixMul::Format::MK4; - TensorLayout filter_transform_layout; - winograd_preprocess_opr->deduce_layout(tensors[1].layout, - filter_transform_layout); - size_t winograd_preprocess_workspace_in_bytes = - winograd_preprocess_opr->get_workspace_in_bytes( - tensors[1].layout, filter_transform_layout); - - auto conv_bias_opr = handle->create_operator(); - conv_bias_opr->param() = param; - conv_bias_opr->param().format = - param::ConvBias::Format::NCHW44_WINOGRAD; - conv_bias_opr->param().output_block_size = m; - size_t conv_bias_workspace_in_bytes = - conv_bias_opr->get_workspace_in_bytes( - tensors[0].layout, filter_transform_layout, - tensors[2].layout, tensors[3].layout, tensors[4].layout, - nullptr); - - WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), - conv_bias_workspace_in_bytes, - winograd_preprocess_workspace_in_bytes}); - wb.set(malloc(wb.total_size_in_bytes())); - - TensorND filter_transform_tensor(wb.get(0), - std::move(filter_transform_layout)); - winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, - wb.get_workspace(2)); - conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], nullptr, - wb.get_workspace(1)); - free(wb.ptr()); - }; - - auto run = [&checker, &extra_impl]( - Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker]( + const std::vector& args, + DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, const float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind(extra_impl, - std::placeholders::_1, m, - arg.param, handle)); checker.set_dtype(0, A_dtype) .set_dtype(1, B_dtype) .set_dtype(2, C_dtype) @@ -140,7 +96,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) { .set_epsilon(eps) .set_param(arg.param) .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } } }; @@ -149,7 +104,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) { // dtype::Float32(), dtype::Float32(), 1e-2f); //! remove this when low precision mode is ok - run(handle(), nchw44_args, {2, 6}, dtype::Float32(), dtype::Float32(), + run(nchw44_args, dtype::Float32(), dtype::Float32(), dtype::Float32(), dtype::Float32(), 1e-3f); } TEST_F(ARM_COMMON_MULTI_THREADS, @@ -158,31 +113,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; std::vector args = get_winograd_mk_packed_args(8); std::vector args_first_half(args.begin(), args.begin() + args.size() / 2); - run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, - dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, - 1e-3f); + run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, + dtype::Float32{}, 1e-3f); } TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) { @@ -190,31 +138,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; std::vector args = get_winograd_mk_packed_args(8); std::vector args_second_half(args.begin() + args.size() / 2, args.end()); - run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, - dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, - 1e-3f); + run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, + dtype::Float32{}, 1e-3f); } #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_F(ARM_COMMON_MULTI_THREADS, @@ -223,32 +164,25 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; std::vector args = get_winograd_mk_packed_args(8); Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); - run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{}, - dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, - 0.25); + run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{}, + dtype::Float16{}, 0.25); } #endif TEST_F(ARM_COMMON_MULTI_THREADS, @@ -257,23 +191,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; @@ -289,9 +217,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, get_quantized_winograd_mk_packed_args(8); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), - dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), - dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); + run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), + dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); } TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) { @@ -299,15 +226,11 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, + DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); checker.set_dtype(0, A_dtype) .set_dtype(1, B_dtype) .set_dtype(2, C_dtype) @@ -315,7 +238,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, .set_epsilon(eps) .set_param(arg.param) .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } } }; @@ -330,9 +252,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, std::vector quantized_args = get_int8_nchw44_args(3, 4); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), - dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), - dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); + run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), + dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); } TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) { @@ -340,23 +261,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; @@ -372,9 +287,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, get_int8_nchw44_args(3, 4, false, true); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), - dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), - dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); + run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), + dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); } TEST_F(ARM_COMMON_MULTI_THREADS, @@ -383,23 +297,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; @@ -414,11 +322,10 @@ TEST_F(ARM_COMMON_MULTI_THREADS, std::vector quantized_args = get_int8_nchw44_args(3, 4, true); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), + run(quantized_args, dtype::QuantizedS8(0.41113496f), dtype::QuantizedS8(0.01887994f), dtype::QuantizedS32(0.41113496f * 0.01887994f), - dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, - epsilon); + dtype::QuantizedS8(0.49550694f), epsilon); } TEST_F(ARM_COMMON_MULTI_THREADS, @@ -427,23 +334,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, Checker> checker( handle()); - auto run = [&checker](Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, + auto run = [&checker](const std::vector& args, DType A_dtype, DType B_dtype, DType C_dtype, DType D_dtype, - param::MatrixMul::Format format, float eps) { + float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind( - winograd_algo_extra_impl, std::placeholders::_1, m, - arg.param, handle, format)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; @@ -459,11 +360,10 @@ TEST_F(ARM_COMMON_MULTI_THREADS, get_int8_nchw44_args(3, 4, true, true); UniformIntRNG int_rng{-50, 50}; checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); - run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), + run(quantized_args, dtype::QuantizedS8(0.41113496f), dtype::QuantizedS8(0.01887994f), dtype::QuantizedS32(0.41113496f * 0.01887994f), - dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, - epsilon); + dtype::QuantizedS8(0.49550694f), epsilon); } #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) { diff --git a/dnn/test/arm_common/winograd_filter_preprocess.cpp b/dnn/test/arm_common/winograd_filter_preprocess.cpp deleted file mode 100644 index 0126b1f6..00000000 --- a/dnn/test/arm_common/winograd_filter_preprocess.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/** - * \file dnn/test/arm_common/winograd_filter_preprocess.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ -#include "test/common/checker.h" -#include "test/common/benchmarker.h" -#include "test/common/winograd_filter_preprocess.h" - -#include "test/arm_common/fixture.h" - -using namespace megdnn; -using namespace test; - -TEST_F(ARM_COMMON, WinogradFilterPreprocessF32) { - using namespace winograd_filter_preprocess; - Checker checker(handle()); - // default - std::vector args = get_args(6, 3); - std::vector args54 = get_args(5, 4); - std::vector args45 = get_args(4, 5); - - // mk4 - std::vector args_mk4_out2 = - get_mk_packed_args(2, param::Winograd::Format::MK4, 4); - std::vector args_mk4_out6 = - get_mk_packed_args(6, param::Winograd::Format::MK4, 4); - - args.insert(args.end(), args54.begin(), args54.end()); - args.insert(args.end(), args45.begin(), args45.end()); - args.insert(args.end(), args_mk4_out2.begin(), args_mk4_out2.end()); - args.insert(args.end(), args_mk4_out6.begin(), args_mk4_out6.end()); - for (auto&& arg : args) { - checker.set_param(arg.param) - .set_dtype(0, dtype::Float32{}) - .set_dtype(1, dtype::Float32{}) - .execs({arg.src, {}}); - } -} - -TEST_F(ARM_COMMON, WinogradFilterPreprocessQs8) { - using namespace winograd_filter_preprocess; - std::vector args = - get_mk_packed_args(2, param::Winograd::Format::MK8, 8); - Checker checker(handle()); - UniformIntRNG rng{-50, 50}; - checker.set_rng(0, &rng).set_rng(1, &rng).set_rng(2, &rng); - for (auto&& arg : args) { - checker.set_param(arg.param) - .set_dtype(0, dtype::QuantizedS8(2.5f)) - .set_dtype(1, dtype::QuantizedS16(2.5f)) - .execs({arg.src, {}}); - } -} - -#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -TEST_F(ARM_COMMON, WinogradFilterPreprocessF16) { - using namespace winograd_filter_preprocess; - Checker checker(handle()); - // default - std::vector args = get_args(6, 3); - std::vector args_23 = - get_mk_packed_args(2, param::Winograd::Format::DEFAULT, 4); - std::vector args45 = get_args(4, 5); - - // mk8 - std::vector args_mk8_out2 = - get_mk_packed_args(2, param::Winograd::Format::MK8, 8); - - args.insert(args.end(), args_23.begin(), args_23.end()); - args.insert(args.end(), args45.begin(), args45.end()); - args.insert(args.end(), args_mk8_out2.begin(), args_mk8_out2.end()); - - Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); - for (auto&& arg : args) { - checker.set_param(arg.param) - .set_rng(0, rng) - .set_dtype(0, dtype::Float16{}) - .set_dtype(1, dtype::Float16{}) - .execs({arg.src, {}}); - } -} - -#endif - -// vim: syntax=cpp.doxygen diff --git a/dnn/test/common/conv_bias.cpp b/dnn/test/common/conv_bias.cpp index 14a19ec4..3d00d40e 100644 --- a/dnn/test/common/conv_bias.cpp +++ b/dnn/test/common/conv_bias.cpp @@ -1152,50 +1152,6 @@ void check_conv_bias_preprocess(std::vector args, } -void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, - param::ConvBias param, Handle* handle, - param::MatrixMul::Format format) { - megdnn_assert(param.format == param::ConvBias::Format::NCHW || - param.format == param::ConvBias::Format::NCHW44); - auto winograd_preprocess_opr = - handle->create_operator(); - winograd_preprocess_opr->param().output_block_size = m; - winograd_preprocess_opr->param().format = format; - winograd_preprocess_opr->param().compute_mode = param.compute_mode; - TensorLayout filter_transform_layout; - winograd_preprocess_opr->deduce_layout(tensors[1].layout, - filter_transform_layout); - size_t winograd_preprocess_workspace_in_bytes = - winograd_preprocess_opr->get_workspace_in_bytes( - tensors[1].layout, filter_transform_layout); - - auto conv_bias_opr = handle->create_operator(); - conv_bias_opr->param() = param; - if (param.format == param::ConvBias::Format::NCHW) { - conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD; - } else { - conv_bias_opr->param().format = - param::ConvBias::Format::NCHW44_WINOGRAD; - } - conv_bias_opr->param().output_block_size = m; - size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes( - tensors[0].layout, filter_transform_layout, tensors[2].layout, - tensors[3].layout, tensors[4].layout, nullptr); - - WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), - conv_bias_workspace_in_bytes, - winograd_preprocess_workspace_in_bytes}); - wb.set(malloc(wb.total_size_in_bytes())); - - TensorND filter_transform_tensor(wb.get(0), - std::move(filter_transform_layout)); - winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, - wb.get_workspace(2)); - conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], nullptr, wb.get_workspace(1)); - free(wb.ptr()); -}; - void checker_conv_bias_common(std::vector args, Handle* handle, RNG* rng, float epsilon, DType type0, DType type1, DType type2, DType type3, const char* algo_name) { @@ -1388,7 +1344,6 @@ std::vector get_nchw44_conv_bias_args( } return args; } - } // namespace conv_bias } // namespace test } // namespace megdnn diff --git a/dnn/test/common/conv_bias.h b/dnn/test/common/conv_bias.h index 365d9156..4d5a1cc3 100644 --- a/dnn/test/common/conv_bias.h +++ b/dnn/test/common/conv_bias.h @@ -94,9 +94,6 @@ void checker_conv_bias_int8x8x16( std::vector args, megdnn::Handle* handle, const char* algo_name); -void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, - param::ConvBias param, Handle* handle, - param::MatrixMul::Format format); void checker_conv_bias_common(std::vector args, Handle* handle, RNG* rng, float epsilon, DType type0, DType type1, DType type2, diff --git a/dnn/test/common/opr_trait.h b/dnn/test/common/opr_trait.h index cad36421..bf0af976 100644 --- a/dnn/test/common/opr_trait.h +++ b/dnn/test/common/opr_trait.h @@ -95,7 +95,6 @@ DEF(MaskConvolution, 4, true, true); DEF(MaskPropagate, 2, true, true); DEF(RelayoutFormat, 2, true, true); DEF(MaxTensorDiff, 2, true, false); -DEF(WinogradFilterPreprocess, 2, true, true); DEF(LocalShareForward, 3, true, true); DEF(LocalShareBackwardData, 3, true, false); DEF(LocalShareBackwardFilter, 3, true, false); diff --git a/dnn/test/x86/conv_bias.cpp b/dnn/test/x86/conv_bias.cpp index e17582b5..d7669c4b 100644 --- a/dnn/test/x86/conv_bias.cpp +++ b/dnn/test/x86/conv_bias.cpp @@ -1814,69 +1814,22 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { using namespace conv_bias; std::vector args = get_winograd_mk_nchw88_args(); Checker checker(handle()); - auto extra_impl = [](const TensorNDArray& tensors, uint32_t m, - param::ConvBias param, Handle* handle) { - megdnn_assert(param.format == param::ConvBias::Format::NCHW88); - auto winograd_preprocess_opr = - handle->create_operator(); - winograd_preprocess_opr->param().output_block_size = m; - winograd_preprocess_opr->param().format = param::MatrixMul::Format::MK8; - TensorLayout filter_transform_layout; - winograd_preprocess_opr->deduce_layout(tensors[1].layout, - filter_transform_layout); - size_t winograd_preprocess_workspace_in_bytes = - winograd_preprocess_opr->get_workspace_in_bytes( - tensors[1].layout, filter_transform_layout); - - auto conv_bias_opr = handle->create_operator(); - conv_bias_opr->param() = param; - conv_bias_opr->param().format = - param::ConvBias::Format::NCHW88_WINOGRAD; - conv_bias_opr->param().output_block_size = m; - size_t conv_bias_workspace_in_bytes = - conv_bias_opr->get_workspace_in_bytes( - tensors[0].layout, filter_transform_layout, - tensors[2].layout, tensors[3].layout, tensors[4].layout, - nullptr); - - WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), - conv_bias_workspace_in_bytes, - winograd_preprocess_workspace_in_bytes}); - wb.set(malloc(wb.total_size_in_bytes())); - - TensorND filter_transform_tensor(wb.get(0), - std::move(filter_transform_layout)); - winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, - wb.get_workspace(2)); - conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], - tensors[3], tensors[4], nullptr, - wb.get_workspace(1)); - - free(wb.ptr()); - }; - auto run = [&checker, &extra_impl]( - Handle* handle, const std::vector& args, - const std::vector& out_size, DType A_dtype, - DType B_dtype, DType C_dtype, DType D_dtype, - const float eps) { + auto run = [&checker](const std::vector& args, DType A_dtype, + DType B_dtype, DType C_dtype, DType D_dtype, + const float eps) { for (auto&& arg : args) { - for (uint32_t m : out_size) { - checker.set_extra_opr_impl(std::bind(extra_impl, - std::placeholders::_1, m, - arg.param, handle)); - checker.set_dtype(0, A_dtype) - .set_dtype(1, B_dtype) - .set_dtype(2, C_dtype) - .set_dtype(4, D_dtype) - .set_epsilon(eps) - .set_param(arg.param) - .execs({arg.src, arg.filter, arg.bias, {}, {}}); - } + checker.set_dtype(0, A_dtype) + .set_dtype(1, B_dtype) + .set_dtype(2, C_dtype) + .set_dtype(4, D_dtype) + .set_epsilon(eps) + .set_param(arg.param) + .execs({arg.src, arg.filter, arg.bias, {}, {}}); } }; - run(handle(), args, {2, 6}, dtype::Float32(), dtype::Float32(), - dtype::Float32(), dtype::Float32(), 1e-3f); + run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(), + dtype::Float32(), 1e-3f); } /*********************************** End winograd ************************/ diff --git a/src/core/impl/graph/cg_impl.cpp b/src/core/impl/graph/cg_impl.cpp index 3341c3a4..46e22e9d 100644 --- a/src/core/impl/graph/cg_impl.cpp +++ b/src/core/impl/graph/cg_impl.cpp @@ -32,7 +32,6 @@ #include "megbrain/jit/fusion_pass.h" #endif -#include "megbrain/gopt/weights_preprocess.h" using namespace mgb; using namespace cg; diff --git a/src/gopt/impl/framework.cpp b/src/gopt/impl/framework.cpp index ad57e59f..e0b80eaa 100644 --- a/src/gopt/impl/framework.cpp +++ b/src/gopt/impl/framework.cpp @@ -14,7 +14,6 @@ #include "megbrain/gopt/gtrans.h" #include "megbrain/gopt/inference.h" #include "megbrain/gopt/misc.h" -#include "megbrain/gopt/weights_preprocess.h" #include "megbrain/graph/cg.h" #include "megbrain/graph/event.h" #include "megbrain/graph/exc_extra_info.h" @@ -780,8 +779,6 @@ const GraphOptimizer& GraphOptimizer::add_passes_for_optimize_options( add_pass(); }); - cb(weight_winograd_transform, - { add_pass(); }); #undef cb if (need_param_fuse) { diff --git a/src/gopt/impl/weights_preprocess.cpp b/src/gopt/impl/weights_preprocess.cpp deleted file mode 100644 index 25e6d535..00000000 --- a/src/gopt/impl/weights_preprocess.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/** - * \file src/gopt/impl/weights_preprocess.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - -#include "megbrain/gopt/weights_preprocess.h" -#include "megbrain/gopt/inference.h" -#include "megbrain/opr/dnn/convolution.h" -#include "megbrain/opr/tensor_manip.h" - -#include "megbrain/utils/hash_ct.h" -#include "midout.h" - -MIDOUT_DECL(megbrain_weight_preprocess) -#define MIDOUT_B(tag) \ - MIDOUT_BEGIN(megbrain_weight_preprocess, midout_iv(MGB_HASH_STR(tag))) { -#define MIDOUT_E \ - } \ - MIDOUT_END(); - -using namespace mgb; -using namespace gopt; -using namespace cg; - -const char* WinogradTransformReplacePass::name() const { - return "winograd_transform"; -} - -void WinogradTransformReplacePass::apply(OptState& opt) const { - MIDOUT_B("WinogradTransformReplacePass::apply") - auto rewriter = opt.graph().make_rewriter(); - ConstVarPropogate cvprop{ConstVarType::IMMUTABLE_AND_PARAM}; - opt.graph().iter([&cvprop](OperatorNodeBase *opr) { - cvprop.add_opr(opr); - }); - - auto get_algo = [](const opr::ConvBias& opr) -> std::string { - auto&& inputs = opr.input(); - SmallVector layouts; - mgb_assert(inputs.size() >= 2 && inputs.size() <= 4); - auto&& mo = opr.megdnn_opr(); - for (size_t i = 0; i < 4; i++) { - if (inputs.size() <= i) { - if (i == 2) { - //! bias - DType dtype; - mo->deduce_dtype(inputs[0]->dtype(), inputs[1]->dtype(), - DType{}, DType{}, dtype); - layouts.emplace_back(TensorShape{}, dtype); - } else { - layouts.emplace_back(TensorShape{}, opr.output(0)->dtype(), - opr.output(0)->format()); - } - } else { - layouts.emplace_back(inputs[i]->shape(), inputs[i]->dtype(), - inputs[i]->format()); - } - } - layouts.emplace_back(opr.output(0)->shape(), opr.output(0)->dtype(), - opr.output(0)->format()); - - AlgoChooserProfileCache& cache = opr.profile_cache(); - auto param_blob = opr.param_blob(); - AlgoChooserProfileCache::Key cache_key{layouts.data(), layouts.size(), - param_blob.first, - param_blob.second}; - auto&& rst = cache.get(cache_key); - if (!rst.valid()) - return ""; - auto prof = rst.val(); - if (prof.empty()) - return ""; - return prof[0].algo; - }; - auto on_opr = [&](OperatorNodeBase* opr) { - auto type = opr->dyn_typeinfo(); - do { - if (type != opr::ConvBias::typeinfo()) - break; - auto&& conv_bias_opr = opr->cast_final_safe(); - auto&& inputs = conv_bias_opr.input(); - VarNodeArray new_inp; - new_inp.reserve(inputs.size()); - for (auto i : inputs) { - new_inp.push_back(rewriter.get_var(i)); - } - if (!(cvprop.is_midconst(inputs[1]) || - cvprop.is_const(inputs[1]))) { - break; - } - auto algo_name = get_algo(conv_bias_opr); - auto winograd_param = - megdnn::ConvBias::parse_winograd_name(algo_name); - if (winograd_param == megdnn::ConvBias::INVALID_WINOGRAD_PARAM) - break; - mgb_assert( - conv_bias_opr.param().format == - megdnn::ConvBias::Param::Format::NCHW || - conv_bias_opr.param().format == - megdnn::ConvBias::Param::Format::NCHW88 || - conv_bias_opr.param().format == - megdnn::ConvBias::Param::Format::NCHW44, - "currently winograd only suppport NCHW and NCHW44 and " - "NCHW88"); - opr::ConvBiasForward::check_winograd_param_valid( - winograd_param, conv_bias_opr.input(0)->dtype()); - megdnn::param::Winograd winograd_preprocess_param; - winograd_preprocess_param.format = - opr::ConvBiasForward::get_matmul_format(winograd_param); - winograd_preprocess_param.output_block_size = - winograd_param.output_block_size; - - auto conv_bias_param = conv_bias_opr.param(); - //! If input dtype is Qint8 and matmul format is MK4, The winograd - //! compute type is float. - if (conv_bias_opr.input(0)->dtype().enumv() == - DTypeEnum::QuantizedS8 && - winograd_preprocess_param.format == - megdnn::param::MatrixMul::Format::MK4) { - winograd_preprocess_param.compute_mode = - megdnn::param::ConvBias::ComputeMode::FLOAT32; - conv_bias_param.compute_mode = - megdnn::param::ConvBias::ComputeMode::FLOAT32; - } - - auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( - new_inp[1], winograd_preprocess_param); - mgb_assert(inputs.size() == 2 || inputs.size() == 3, - "input size need to be 2/3, but got: %zu", - inputs.size()); - SymbolVar new_conv_bias_opr; - - if (new_inp[0]->shape().ndim == 4) { - conv_bias_param.format = - megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; - } else { - mgb_assert(new_inp[0]->shape().ndim == 5); - size_t pack_size = new_inp[0]->shape()[4]; - if (pack_size == 8) { - conv_bias_param.format = - megdnn::ConvBias::Param::Format::NCHW88_WINOGRAD; - } else if (pack_size == 4) { - conv_bias_param.format = - megdnn::ConvBias::Param::Format::NCHW44_WINOGRAD; - } else { - mgb_assert(0, "Invalid pack size %zu in algo %s", pack_size, - algo_name.c_str()); - } - } - - conv_bias_param.output_block_size = - winograd_param.output_block_size; - if (inputs.size() == 2) { - new_conv_bias_opr = opr::ConvBias::make( - new_inp[0], winograd_preprocess_opr.node(), - conv_bias_param, conv_bias_opr.execution_policy(), - conv_bias_opr.config()); - } else { - new_conv_bias_opr = opr::ConvBias::make( - new_inp[0], winograd_preprocess_opr.node(), new_inp[2], - conv_bias_param, conv_bias_opr.execution_policy(), - conv_bias_opr.config()); - } - - auto&& origin_out = conv_bias_opr.output(); - auto&& cur_out = new_conv_bias_opr.node()->owner_opr()->output(); - mgb_assert(origin_out.size() == cur_out.size()); - for (size_t i = 0; i < origin_out.size(); i++) { - if (!origin_out[i]->contain_flag( - VarNode::Flag::VOLATILE_CONTENT)) { - rewriter.replace_var(origin_out[i], cur_out[i], nullptr); - } - } - return; - } while (0); - - rewriter.auto_replace_outputs(opr); - }; - - opt.graph().iter(on_opr); - rewriter.apply_inplace(); - MIDOUT_E -} - -/** - * \warning WinogradTransformReplacePass implies that we run ParamFuse pass - * before(currently run ParamFuse in optimize_for_inference when dump model), - * othwise it can not deal with \c ConvBias(x, W+1), as the node of W+1 has no - * flag PERSISTENT_DEVICE_VALUE, it's a mid-const node, we should use - * ConstVarPropogate strictly speaking. - */ -void gopt::transform_vars_inplace_with_winograd( - mgb::cg::VarNodeArray& dest_vars) { - gopt::GraphOptimizer optimizer; - optimizer.add_pass(); - optimizer.add_pass(); - optimizer.apply_inplace(dest_vars); -} - -// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/gopt/include/megbrain/gopt/weights_preprocess.h b/src/gopt/include/megbrain/gopt/weights_preprocess.h deleted file mode 100644 index 50205fa2..00000000 --- a/src/gopt/include/megbrain/gopt/weights_preprocess.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * \file src/gopt/include/megbrain/gopt/weights_preprocess.h - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - -#pragma once - -#include "megbrain/gopt/framework.h" - -namespace mgb { -namespace gopt { - -class WinogradTransformReplacePass final : public Pass { - class Impl; - -public: - const char* name() const override; - void apply(OptState& opt) const override; -}; - -void transform_vars_inplace_with_winograd(mgb::cg::VarNodeArray& dest_vars); - -} // namespace gopt -} // namespace mgb - -// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/search_policy/algo_chooser.cpp b/src/opr/impl/search_policy/algo_chooser.cpp index 4816d159..c9d11350 100644 --- a/src/opr/impl/search_policy/algo_chooser.cpp +++ b/src/opr/impl/search_policy/algo_chooser.cpp @@ -46,7 +46,6 @@ AlgoChooserProfileCache::Result AlgoChooser::get_profile_result( ConvTensorLayouts origin_layouts = ctx.layouts(); typename Opr::Param origin_param = ctx.mgb_opr()->param(); - get_origin_param_and_layouts(ctx, origin_layouts, origin_param); AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), origin_layouts.size(), &origin_param, sizeof(origin_param)}; @@ -104,18 +103,6 @@ AlgoChooserProfileCache::Result AlgoChooser::get_profile_result( return prof_rst; } -template <> -void AlgoChooser::get_origin_param_and_layouts( - const ExeContext& ctx, ConvTensorLayouts& layouts, - megdnn::ConvBias::Param& param) { - auto format = static_cast( - ctx.megdnn_opr()->param().format); - size_t output_block_size = ctx.megdnn_opr()->param().output_block_size; - megdnn::ConvBias::deduce_winograd_origin_layout_and_param( - format, output_block_size, ctx.layouts()[0], ctx.layouts()[1], - layouts[1], param); -} - template typename AlgoChooser::ImplAlgo AlgoChooser::choose_by_profile( ExeContext& ctx, bool require_reproducible, bool enable_update) { diff --git a/src/opr/impl/tensor_manip.cpp b/src/opr/impl/tensor_manip.cpp index af3f5e65..1a71a5af 100644 --- a/src/opr/impl/tensor_manip.cpp +++ b/src/opr/impl/tensor_manip.cpp @@ -1607,15 +1607,5 @@ void RelayoutFormat::init_output_format() { } // f}}} // -/* f{{{ ===================== WinogradFilterPreprocess ===================== */ -MGB_DYN_TYPE_OBJ_FINAL_IMPL(WinogradFilterPreprocess); -MEGDNN_OPR_INIT1(WinogradFilterPreprocess, "winograd_filter_preprocess") -void WinogradFilterPreprocess::init_output_dtype() { - TensorLayout dst; - TensorLayout src{input(0)->shape(), input(0)->dtype(), input(0)->format()}; - megdnn_opr()->deduce_layout(src, dst); - output(0)->dtype(dst.dtype); -} -// f}}} // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/tensor_manip.sereg.h b/src/opr/impl/tensor_manip.sereg.h index b6a57460..adb5db42 100644 --- a/src/opr/impl/tensor_manip.sereg.h +++ b/src/opr/impl/tensor_manip.sereg.h @@ -184,7 +184,6 @@ namespace opr { MGB_REG_OPR_SHALLOW_COPY(ParamPackConcat, opr_shallow_copy_param_pack_concat); MGB_SEREG_OPR(RelayoutFormat, 1); - MGB_SEREG_OPR(WinogradFilterPreprocess, 1); } // namespace opr } // namespace mgb diff --git a/src/opr/include/megbrain/opr/search_policy/algo_chooser.h b/src/opr/include/megbrain/opr/search_policy/algo_chooser.h index 7219fe5c..cb0e5861 100644 --- a/src/opr/include/megbrain/opr/search_policy/algo_chooser.h +++ b/src/opr/include/megbrain/opr/search_policy/algo_chooser.h @@ -113,10 +113,6 @@ class AlgoChooser { //! entrance for getting algorithm according to execution strategy static ImplAlgo get_algo(ExeContext& ctx); - static void get_origin_param_and_layouts(const ExeContext&, - ConvTensorLayouts&, - typename Opr::Param&) {} - //! get all profile result, either by retrieving cache or profiling static AlgoChooserProfileCache::Result get_profile_result( ExeContext& ctx, bool enable_update); diff --git a/src/opr/include/megbrain/opr/tensor_manip.h b/src/opr/include/megbrain/opr/tensor_manip.h index d030e239..1c7b15aa 100644 --- a/src/opr/include/megbrain/opr/tensor_manip.h +++ b/src/opr/include/megbrain/opr/tensor_manip.h @@ -635,22 +635,6 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat, const OperatorNodeConfig &config = {}); void init_output_format() override final; }; - -/*! - * \brief change conv weights layout base on winograd transform. - * - * See docs of megdnn params for more details - */ -MGB_DEFINE_OPR_CLASS(WinogradFilterPreprocess, - intl::MegDNNOprWrapperFwd) - public: - WinogradFilterPreprocess(VarNode* p0, const Param& param, - const OperatorNodeConfig& config); - static SymbolVar make(SymbolVar p0, const Param& param = {}, - const OperatorNodeConfig& config = {}); - void init_output_dtype() override final; -}; - } // opr } // mgb diff --git a/src/plugin/impl/opr_footprint.cpp b/src/plugin/impl/opr_footprint.cpp index 9a70b767..557ff467 100644 --- a/src/plugin/impl/opr_footprint.cpp +++ b/src/plugin/impl/opr_footprint.cpp @@ -171,12 +171,6 @@ uint64_t eval_conv_computation(const TensorShape& src_shape, cpos = 1; spatial_start = 2; break; - case Param::Format::NCHW_WINOGRAD: - case Param::Format::NCHW44_WINOGRAD: - case Param::Format::NCHW88_WINOGRAD: - cpos = 1; - spatial_start = 0; - break; case Param::Format::NHWC: cpos = 3; spatial_start = 1; @@ -203,29 +197,9 @@ uint64_t eval_conv_computation(const TensorShape& src_shape, uint64_t fh = static_cast(filter_shape[spatial_start]); uint64_t fw = static_cast(filter_shape[spatial_start + 1]); - if (param.format == Param::Format::NCHW_WINOGRAD || - param.format == Param::Format::NCHW44_WINOGRAD || - param.format == Param::Format::NCHW88_WINOGRAD) { - mgb_assert(opr->same_type(), - "Only conv bias support WINOGRAD"); - auto&& conv_bias_opr = opr->cast_final_safe(); - uint32_t output_block_size = conv_bias_opr.param().output_block_size; - mgb_assert(fh == fw, - "NCHW_WINOGRAD, NCHW88_WINOGRAD need fw==fh, got fw: %u fh " - "%u\n", - static_cast(fh), static_cast(fw)); - fh = fh + 1 - output_block_size; - fw = fw + 1 - output_block_size; - } + // mul and add are counted as 2 operations - if(param.format == Param::Format::NCHW88_WINOGRAD){ - return dst_shape.total_nr_elems() * fh * fw * - static_cast(src_shape[cpos] * 8) / group * 2; - } - if (param.format == Param::Format::NCHW44_WINOGRAD) { - return dst_shape.total_nr_elems() * fh * fw * - static_cast(src_shape[cpos] * 4) / group * 2; - } + return dst_shape.total_nr_elems() * fh * fw * static_cast(src_shape[cpos]) / group * 2; } diff --git a/src/serialization/impl/schema.fbs b/src/serialization/impl/schema.fbs index be47f360..67a9b90b 100644 --- a/src/serialization/impl/schema.fbs +++ b/src/serialization/impl/schema.fbs @@ -28,6 +28,7 @@ table Blob { } table Reserved0 {} +table DeprecatedParam {} union OperatorParam { param.Empty = 1, @@ -50,7 +51,8 @@ union OperatorParam { param.ElemwiseMultiType = 18, param.PowC = 19, param.MatrixMul = 20, - param.Winograd = 21, + //Reserved for param.Winograd = 21, + DeprecatedParam = 21, param.SVD = 22, param.Reduce = 23, param.Cumsum = 24,