Browse Source

chore(winograd): remove winograd transform code

GitOrigin-RevId: 78c3cfceae
release-1.2
Megvii Engine Team 4 years ago
parent
commit
fc0fcd2f7f
47 changed files with 295 additions and 2165 deletions
  1. +0
    -10
      dnn/include/megdnn/oprs/nn.h
  2. +0
    -23
      dnn/include/megdnn/oprs/utils.h
  3. +1
    -10
      dnn/scripts/opr_param_defs.py
  4. +5
    -25
      dnn/src/arm_common/conv_bias/f16/algos.cpp
  5. +8
    -48
      dnn/src/arm_common/conv_bias/fp32/algos.cpp
  6. +6
    -23
      dnn/src/arm_common/conv_bias/int8/algos.cpp
  7. +0
    -1
      dnn/src/arm_common/conv_bias/int8/strategy_nchw44_2x3_4x4.cpp
  8. +2
    -7
      dnn/src/arm_common/conv_bias/opr_impl.cpp
  9. +1
    -2
      dnn/src/arm_common/handle.cpp
  10. +0
    -179
      dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp
  11. +0
    -28
      dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h
  12. +5
    -145
      dnn/src/common/conv_bias.cpp
  13. +20
    -132
      dnn/src/common/convolution.cpp
  14. +0
    -1
      dnn/src/common/handle_impl.h
  15. +0
    -157
      dnn/src/common/winograd_filter_preprocess.cpp
  16. +0
    -1
      dnn/src/cuda/handle_create.cpp
  17. +0
    -22
      dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp
  18. +0
    -27
      dnn/src/cuda/winograd_filter_preprocess/opr_impl.h
  19. +4
    -24
      dnn/src/fallback/conv_bias/algos.cpp
  20. +3
    -45
      dnn/src/fallback/conv_bias/opr_impl.cpp
  21. +0
    -6
      dnn/src/fallback/conv_bias/opr_impl.h
  22. +3
    -21
      dnn/src/fallback/conv_bias/winograd/winograd.h
  23. +0
    -2
      dnn/src/fallback/convolution/algos.cpp
  24. +1
    -2
      dnn/src/fallback/convolution/opr_impl.cpp
  25. +0
    -1
      dnn/src/naive/handle.cpp
  26. +0
    -234
      dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp
  27. +0
    -28
      dnn/src/naive/winograd_filter_preprocess/opr_impl.h
  28. +2
    -12
      dnn/src/x86/conv_bias/f32/winograd_algo.cpp
  29. +1
    -5
      dnn/src/x86/conv_bias/opr_impl.cpp
  30. +130
    -244
      dnn/test/arm_common/conv_bias_multi_thread.cpp
  31. +86
    -186
      dnn/test/arm_common/conv_bias_multi_thread_weight_preprocess.cpp
  32. +0
    -91
      dnn/test/arm_common/winograd_filter_preprocess.cpp
  33. +0
    -45
      dnn/test/common/conv_bias.cpp
  34. +0
    -3
      dnn/test/common/conv_bias.h
  35. +0
    -1
      dnn/test/common/opr_trait.h
  36. +12
    -59
      dnn/test/x86/conv_bias.cpp
  37. +0
    -1
      src/core/impl/graph/cg_impl.cpp
  38. +0
    -3
      src/gopt/impl/framework.cpp
  39. +0
    -206
      src/gopt/impl/weights_preprocess.cpp
  40. +0
    -32
      src/gopt/include/megbrain/gopt/weights_preprocess.h
  41. +0
    -13
      src/opr/impl/search_policy/algo_chooser.cpp
  42. +0
    -10
      src/opr/impl/tensor_manip.cpp
  43. +0
    -1
      src/opr/impl/tensor_manip.sereg.h
  44. +0
    -4
      src/opr/include/megbrain/opr/search_policy/algo_chooser.h
  45. +0
    -16
      src/opr/include/megbrain/opr/tensor_manip.h
  46. +2
    -28
      src/plugin/impl/opr_footprint.cpp
  47. +3
    -1
      src/serialization/impl/schema.fbs

+ 0
- 10
dnn/include/megdnn/oprs/nn.h View File

@@ -435,16 +435,6 @@ public:
const TensorLayout& bias, const TensorLayout& z,
const TensorLayout& dst) = 0;

/**
* \brief deduce the origin filter layout and conv_bias param after winograd
* transform, this used in fast-run to construct the origin cache-key
*/
static void deduce_winograd_origin_layout_and_param(
const Param::Format format, const size_t output_block_size,
const TensorLayout& src_layout,
const TensorLayout& winograd_filter_layout,
TensorLayout& origin_layout, Param& origin_param);

enum class BiasMode : uint32_t {
NO_BIAS = 0, //!< no bias
BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1]


+ 0
- 23
dnn/include/megdnn/oprs/utils.h View File

@@ -91,29 +91,6 @@ class MaxTensorDiff : public OperatorBase {
void check_exec(const TensorLayout& layout1,
const TensorLayout& layout2, size_t workspace_in_bytes);
};

/*!
* \brief winograd preprocess opr.
*
* for the detail \see src/fallback/conv_bias/winograd/winograd.h
*
*/
class WinogradFilterPreprocess : public OperatorBase {
DEF_OPR_PARAM(Winograd);
DEF_OPR_IMPL(WinogradFilterPreprocess, OperatorBase, 1, 1);

public:
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace) = 0;

size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&);

void deduce_layout(const TensorLayout& src, TensorLayout& dst);

protected:
void check_exec(const TensorLayout& src, const TensorLayout& dst,
size_t workspace_in_bytes);
};
} // namespace megdnn

#include "megdnn/internal/opr_header_epilogue.h"


+ 1
- 10
dnn/scripts/opr_param_defs.py View File

@@ -39,7 +39,7 @@ pdef('Axis').add_fields('int32', 'axis', 0)
'NCHW44','NCHW44_DOT',
Doc('NCHW_WINOGRAD', 'NCHW layout with weights tranformed by winograd'),
Doc('NCHW88_WINOGRAD', 'NCHW88 layout with weights tranformed by winograd'),
Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'),
Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'),
Doc('NCHW4_NCHW32', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'),
Doc('NCHW32_NCHW4', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'),
Doc('NCHW4_NCHW', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'),
@@ -456,15 +456,6 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0)
'layout is (K/4, M/4, 4(m), 4(k)) x (K/4, N, 4(k))'))
)

(pdef('Winograd', 'winograd param used in convbias').
add_fields(
'uint32',
Doc('output_block_size', 'output block size, detail meaning see winograd '
'in convbias, equals to the meaning of m in F(m, r)'), 0).
add_enum_alias('Format', 'MatrixMul').
add_enum_alias('ComputeMode', 'Convolution', name_field='compute_mode')
)

(pdef('SVD').
add_fields('bool',
Doc('full_matrices',


+ 5
- 25
dnn/src/arm_common/conv_bias/f16/algos.cpp View File

@@ -27,7 +27,7 @@ using namespace arm_common;
/* ======================= AlgoFP16WinogradF23 ======================== */

bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
const NCBKernSizeParam& param,
const NCBKernSizeParam& param,
AlgoSelectionStrategy /*algo_selection_strategy*/) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) {
@@ -37,12 +37,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -78,12 +73,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 4 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 5) &&
@@ -117,12 +107,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 6 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -162,12 +147,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK8)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&


+ 8
- 48
dnn/src/arm_common/conv_bias/fp32/algos.cpp View File

@@ -47,12 +47,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -86,12 +81,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 6 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -125,12 +115,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 5 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 4) &&
@@ -164,12 +149,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 4 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 5) &&
@@ -209,12 +189,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 6 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -257,12 +232,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() ==
fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK &&
(param.filter_meta.format == param::ConvBias::Format::NCHW44 ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -303,12 +273,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() ==
fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK &&
(param.filter_meta.format == param::ConvBias::Format::NCHW44 ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD &&
param.output_block_size == 6 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -350,12 +315,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF73_4x4_NCHW44::usable(
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() ==
fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK &&
(param.filter_meta.format == param::ConvBias::Format::NCHW44 ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD &&
param.output_block_size == 7 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&


+ 6
- 23
dnn/src/arm_common/conv_bias/int8/algos.cpp View File

@@ -242,14 +242,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
((param.filter_meta.format == param::ConvBias::Format::NCHW &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK8 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS16)) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -293,13 +287,8 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
.get_matmul_kern_param(param));
return is_matmul_usable &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
((param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) ||
((param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD) &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -341,14 +330,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
.get_matmul_kern_param(param);
bool is_matmul_usable = m_matmul_algo->usable(matmul_param);
return is_matmul_usable &&
((param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK8 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS16)) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW44 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&


+ 0
- 1
dnn/src/arm_common/conv_bias/int8/strategy_nchw44_2x3_4x4.cpp View File

@@ -240,7 +240,6 @@ void winograd_2x3_4x4_s8_f32_nchw44::filter(const int8_t* filter,
float* transform_mid_buf, size_t OC, size_t IC,
size_t oc_start, size_t oc_end) {
constexpr int alpha = 2 + 3 - 1;

/**
* origin: (4x3) * (3 x 3) * (3 x 4)
*/


+ 2
- 7
dnn/src/arm_common/conv_bias/opr_impl.cpp View File

@@ -290,8 +290,8 @@ ConvBiasImpl::get_all_packed_algo() {
bool ConvBiasImpl::is_matmul_quantized_prefer(
const ConvBiasImpl::NCBKernSizeParam& param) const {
fallback::ConvBiasImpl::NCBKernSizeParam conv_ncb_param(
param, 0, param::MatrixMul::Format::DEFAULT, {}, 0,
BiasMode::NO_BIAS, param::ConvBias::NonlineMode::IDENTITY);
param, {}, 0, BiasMode::NO_BIAS,
param::ConvBias::NonlineMode::IDENTITY);
conv_ncb_param.dst_type = param.bias_type;
conv_ncb_param.filter_meta.group = 1;

@@ -320,11 +320,6 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order(
auto FH = param.filter_meta.spatial[0];
auto FW = param.filter_meta.spatial[1];
//! TODO: now winograd only support fast-run
if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD ||
param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD ||
param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) {
return {AlgoCategory::WINOGRAD};
}
//! im2col
bool im2col_prefer = (IC >= 32 || OC >= 32);
//! quantized algo use matmul when direct algo is unusable


+ 1
- 2
dnn/src/arm_common/handle.cpp View File

@@ -27,7 +27,7 @@
#include "src/arm_common/type_cvt/opr_impl.h"
#include "src/arm_common/reduce/opr_impl.h"
#include "src/arm_common/conv_bias/opr_impl.h"
#include "src/arm_common/winograd_filter_preprocess/opr_impl.h"

namespace megdnn {
namespace arm_common {
@@ -50,7 +50,6 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(WarpPerspective)
MEGDNN_SPECIALIZE_CREATE_OPERATOR(TypeCvt)
MEGDNN_SPECIALIZE_CREATE_OPERATOR(Reduce)
MEGDNN_SPECIALIZE_CREATE_OPERATOR(ConvBias)
MEGDNN_SPECIALIZE_CREATE_OPERATOR(WinogradFilterPreprocess)
MEGDNN_SPECIALIZE_CREATE_OPERATOR(ConvolutionBackwardData)

#pragma GCC diagnostic push


+ 0
- 179
dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp View File

@@ -1,179 +0,0 @@
/**
* \file dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#include "src/arm_common/winograd_filter_preprocess/opr_impl.h"
#include "src/arm_common/handle.h"
#include "src/common/utils.h"
#include "src/arm_common/conv_bias/fp32/strategy.h"
#include "src/arm_common/conv_bias/int8/strategy.h"
#include "src/arm_common/conv_bias/f16/strategy.h"

#include "midout.h"
MIDOUT_DECL(megdnn_arm_common_winograd_filter_preprocess)

using namespace megdnn;
using namespace arm_common;

void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
using namespace winograd;
check_exec(src.layout, dst.layout, workspace.size);

//! NCHW44 group conv or NCHW group conv or both dense conv
size_t flt_start = 0;
size_t pack_c_size = 1;
size_t group = 1;
if (src.layout.ndim == 5) { //! {g, OC, IC, FH, FW}
flt_start = 1;
group = src.layout[0];
} else if (src.layout.ndim == 6) { //! {OC/4, IC/4, FH, FW, 4, 4}
pack_c_size = src.layout[5];
} else if (src.layout.ndim == 7) { //! {g, OC/4, IC/4, FH, FW, 4, 4}
flt_start = 1;
group = src.layout[0];
pack_c_size = src.layout[6];
}
size_t OC = src.layout[flt_start] * pack_c_size,
IC = src.layout[flt_start + 1] * pack_c_size,
FW = src.layout[flt_start + 3];
size_t m = param().output_block_size;

bool execed = false;

#define DISPATCH(_strategy, _format, ...) \
MIDOUT_BEGIN(megdnn_arm_common_winograd_filter_preprocess, \
##__VA_ARGS__) { \
if (param().format == _format) { \
for (size_t g = 0; g < group; g++) { \
auto run = [=]() { \
_strategy strategy(src.layout.dtype, src.layout.dtype, \
src.layout.dtype); \
megdnn::winograd::ConvBias<_strategy, _format>(strategy, \
1_z) \
.filter_process(src_ptr, dst_ptr, workspace_ptr, \
OC, IC); \
}; \
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); \
src_ptr += src.layout.stride[0]; \
dst_ptr += dst.layout.stride[0]; \
} \
execed = true; \
} \
} \
MIDOUT_END();

if (src.layout.dtype.enumv() == DTypeEnum::Float32) {
const float* src_ptr = src.ptr<float>();
float* dst_ptr = dst.ptr<float>();
float* workspace_ptr = workspace.ptr<float>();
if (FW == 3) {
if (m == 2) {
if (pack_c_size == 1) {
DISPATCH(winograd_2x3_4x4_f, param::Winograd::Format::MK4,
0, 0);
} else if (pack_c_size == 4) {
DISPATCH(winograd_F23_mk4_f_nchw44,
param::Winograd::Format::MK4, 0, 5);
}
} else if (m == 6) {
DISPATCH(winograd_6x3_1x1_f, param::Winograd::Format::DEFAULT,
0, 1);
if (pack_c_size == 1) {
DISPATCH(winograd_6x3_4x4_f, param::Winograd::Format::MK4,
0, 2);
} else if (pack_c_size == 4) {
DISPATCH(winograd_F63_mk4_f_nchw44,
param::Winograd::Format::MK4, 0, 6);
}
} else if (m == 7) {
megdnn_assert(pack_c_size == 4, "WINOGRAD F(7,3) Only Supports NCHW44");
DISPATCH(winograd_F73_mk4_f_nchw44,
param::Winograd::Format::MK4, 0, 7);
}
} else if (FW == 4) {
if (m == 5) {
DISPATCH(winograd_5x4_1x1_f, param::Winograd::Format::DEFAULT,
0, 3);
}
} else if (FW == 5) {
if (m == 4) {
DISPATCH(winograd_4x5_1x1_f, param::Winograd::Format::DEFAULT,
0, 4);
}
}
}
if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) {
const dt_int8* src_ptr = src.compatible_ptr<dt_int8>();
if (param().compute_mode == param::ConvBias::ComputeMode::DEFAULT) {
dt_int16* dst_ptr = dst.compatible_ptr<dt_int16>();
dt_int16* workspace_ptr = workspace.ptr<dt_int16>();
if (FW == 3) {
if (m == 2) {
if (pack_c_size == 1) {
DISPATCH(winograd_2x3_8x8_s8,
param::Winograd::Format::MK8, 1, 0);
} else if (pack_c_size == 4) {
DISPATCH(winograd_2x3_8x8_s8_nchw44,
param::Winograd::Format::MK8, 1, 0);
}else{
megdnn_throw("only support pack_c_size = 1 or 4");
}
}
}
} else {
dt_int32* dst_ptr_tmp = dst.compatible_ptr<dt_int32>();
dt_int32* workspace_ptr_tmp = workspace.ptr<dt_int32>();
float* dst_ptr = reinterpret_cast<float*>(dst_ptr_tmp);
float* workspace_ptr = reinterpret_cast<float*>(workspace_ptr_tmp);
if (pack_c_size == 4) {
if (FW == 3) {
if (m == 2) {
DISPATCH(winograd_2x3_4x4_s8_f32_nchw44,
param::Winograd::Format::MK4, 1, 1);
}
}
} else {
megdnn_throw("only support pack_c_size == 4");
}
}
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
if (src.layout.dtype.enumv() == DTypeEnum::Float16) {
const dt_float16* src_ptr = src.ptr<dt_float16>();
dt_float16* dst_ptr = dst.ptr<dt_float16>();
dt_float16* workspace_ptr = workspace.ptr<dt_float16>();
if (FW == 3) {
if (m == 2) {
DISPATCH(winograd_2x3_4x4_f16, param::Winograd::Format::DEFAULT,
2, 0);
DISPATCH(winograd_2x3_8x8_f16, param::Winograd::Format::MK8, 2,
1);
} else if (m == 6) {
DISPATCH(winograd_6x3_1x1_f16, param::Winograd::Format::DEFAULT,
2, 2);
}
} else if (FW == 5) {
if (m == 4) {
DISPATCH(winograd_4x5_1x1_f16, param::Winograd::Format::DEFAULT,
2, 3);
}
}
}
#endif
#undef DISPATCH

megdnn_assert(execed,
"Unsupport winograd filter preprocess. m: %zu src: %s", m,
src.layout.to_string().c_str());
}

// vim: syntax=cpp.doxygen

+ 0
- 28
dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h View File

@@ -1,28 +0,0 @@
/**
* \file dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/common/utils.h"

namespace megdnn {
namespace arm_common {

class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess {
public:
using WinogradFilterPreprocess::WinogradFilterPreprocess;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
};

} // namespace arm_common
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 5
- 145
dnn/src/common/conv_bias.cpp View File

@@ -35,37 +35,11 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
const TensorLayout& bias, const TensorLayout& z,
const TensorLayout& dst, size_t workspace_in_bytes,
const PreprocessedFilter* preprocessed_filter) {
if ((param().format == param::ConvBias::Format::NCHW_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD) &&
src.dtype.category() == DTypeCategory::QUANTIZED) {
megdnn_assert(filter.dtype.enumv() == DTypeEnum::QuantizedS16 ||
//!int8 winogradf23_44 using float,QuantizedS32 take the scale
filter.dtype.enumv() == DTypeEnum::QuantizedS32);
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 ||
src.dtype.enumv() == DTypeEnum::Quantized8Asymm);
} else {
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv());
}
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv());
if (src.dtype.enumv() == DTypeEnum::QuantizedS8) {
if (bias.dtype.enumv() == DTypeEnum::QuantizedS32) {
float scale_src = src.dtype.param<dtype::QuantizedS8>().scale;
float scale_filter = 0.f;
if (param().format == param::ConvBias::Format::NCHW_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD) {
if (filter.dtype.enumv() == DTypeEnum::QuantizedS32) {
//! int8 winogradf23_44 using float,QuantizedS32 take the
//! scale
scale_filter =
filter.dtype.param<dtype::QuantizedS32>().scale;
} else {
scale_filter =
filter.dtype.param<dtype::QuantizedS16>().scale;
}
} else {
scale_filter = filter.dtype.param<dtype::QuantizedS8>().scale;
}
float scale_filter = filter.dtype.param<dtype::QuantizedS8>().scale;
float scale_bias = bias.dtype.param<dtype::QuantizedS32>().scale;
megdnn_assert(
std::abs(scale_src * scale_filter - scale_bias) < 1e-6,
@@ -77,15 +51,8 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
} else if (src.dtype.enumv() == DTypeEnum::Quantized8Asymm) {
if (bias.dtype.enumv() == DTypeEnum::QuantizedS32) {
float scale_src = src.dtype.param<dtype::Quantized8Asymm>().scale;
float scale_filter = 0.f;
if (param().format == param::ConvBias::Format::NCHW_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD) {
scale_filter = filter.dtype.param<dtype::QuantizedS16>().scale;
} else {
scale_filter =
filter.dtype.param<dtype::Quantized8Asymm>().scale;
}
float scale_filter =
filter.dtype.param<dtype::Quantized8Asymm>().scale;
float scale_bias = bias.dtype.param<dtype::QuantizedS32>().scale;
megdnn_assert(
std::abs(scale_src * scale_filter - scale_bias) < 1e-6,
@@ -115,7 +82,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
if (check_eq(bias, dst))
return ret;
if (param().format == param::ConvBias::Format::NCHW ||
param().format == param::ConvBias::Format::NCHW_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW4_NCHW) {
megdnn_assert(bias.shape[0] == 1);
megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s",
@@ -131,7 +97,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
} else if (param().format == param::ConvBias::Format::NCHW4 ||
param().format == param::ConvBias::Format::NCHW44 ||
param().format == param::ConvBias::Format::NCHW44_DOT ||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD ||
param().format == param::ConvBias::Format::NCHW32_NCHW4) {
megdnn_assert(bias.shape[0] == 1);
megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s",
@@ -140,8 +105,7 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
megdnn_assert(bias.shape[3] == 1);
megdnn_assert(bias.shape[4] == 4);
} else if (param().format == param::ConvBias::Format::NCHW8 ||
param().format == param::ConvBias::Format::NCHW88 ||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD) {
param().format == param::ConvBias::Format::NCHW88 ) {
megdnn_assert(bias.shape[0] == 1);
megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s",
bias.to_string().c_str(), dst.to_string().c_str());
@@ -175,11 +139,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
}

if (z.ndim != 0) {
megdnn_assert(param().format != param::ConvBias::Format::NCHW_WINOGRAD);
megdnn_assert(param().format !=
param::ConvBias::Format::NCHW88_WINOGRAD);
megdnn_assert(param().format !=
param::ConvBias::Format::NCHW44_WINOGRAD);
megdnn_assert(param().format != param::ConvBias::Format::NCHW4_NCHW32);
megdnn_assert(param().format != param::ConvBias::Format::NCHW32_NCHW4);
megdnn_assert(z.dtype.enumv() == dst.dtype.enumv());
@@ -187,105 +146,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
}
return ret;
}
/*!
* \brief deduce the origin filter layout and param after winograd transformed
*/
void ConvBiasForward::deduce_winograd_origin_layout_and_param(
const Param::Format format, const size_t output_block_size,
const TensorLayout& src_layout,
const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout,
Param& origin_param) {
if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD ||
format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD ||
format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
//! change NCHWxx_WINOGRAD to NCHWxx
size_t OC = 0;
size_t IC = 0;
size_t GROUP = 1;
size_t FH = winograd_filter_layout[1] - output_block_size + 1;

//! {alpha, alpha, IC, OC}
if (winograd_filter_layout.ndim == 4) {
OC = winograd_filter_layout[3];
IC = winograd_filter_layout[2];
}
//! {group, alpha, alpha, IC, OC}
else if (winograd_filter_layout.ndim == 5) {
OC = winograd_filter_layout[4];
IC = winograd_filter_layout[3];
GROUP = winograd_filter_layout[0];
}
//! {alpha, alpha, OC/f, IC/f, f, f}
else if (winograd_filter_layout.ndim == 6) {
OC = winograd_filter_layout[2] * winograd_filter_layout[5];
IC = winograd_filter_layout[3] * winograd_filter_layout[4];
}
//! {group, alpha, alpha, OC/f, IC/f, f, f}
else if (winograd_filter_layout.ndim == 7) {
OC = winograd_filter_layout[3] * winograd_filter_layout[6];
IC = winograd_filter_layout[4] * winograd_filter_layout[5];
GROUP = winograd_filter_layout[0];
}
auto origin_data_type = winograd_filter_layout.dtype;
if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) {
if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) {
float scale =
origin_data_type.param<dtype::QuantizedS16>().scale;
origin_data_type = megdnn::dtype::QuantizedS8(scale);
} else {
//! In order to braing the sacle of filter, the transformed
//! qint8 winograd filter computing with float dtype is Qint32
megdnn_assert(origin_data_type.enumv() ==
DTypeEnum::QuantizedS32);
float scale =
origin_data_type.param<dtype::QuantizedS32>().scale;
origin_data_type = megdnn::dtype::QuantizedS8(scale);
}
}

if (GROUP == 1) {
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
origin_layout =
TensorLayout({OC, IC, FH, FH}, origin_data_type);
} else if (format ==
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) {
origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4},
origin_data_type);
} else {
megdnn_assert(format ==
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD);
origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8},
origin_data_type);
}
} else {
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
origin_layout =
TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type);
} else if (format ==
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) {
origin_layout =
TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4},
origin_data_type);
} else {
megdnn_assert(format ==
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD);
origin_layout =
TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8},
origin_data_type);
}
}
origin_param.output_block_size = 0;
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
origin_param.format = megdnn::param::ConvBias::Format::NCHW;
} else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) {
origin_param.format = megdnn::param::ConvBias::Format::NCHW44;
} else {
megdnn_assert(format ==
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD);
origin_param.format = megdnn::param::ConvBias::Format::NCHW88;
}
}
}

template <typename T>
struct NCHWParamTrait;


+ 20
- 132
dnn/src/common/convolution.cpp View File

@@ -41,36 +41,12 @@ uint32_t spatial_getter(uint32_t filter, const Param&) {
return filter;
}

template <>
uint32_t
spatial_getter<param::ConvBias, param::ConvBias::Format::NCHW_WINOGRAD>(
uint32_t filter, const param::ConvBias& param) {
//! f = m + r - 1 -> r = f + 1 - m
return filter - param.output_block_size + 1;
}

template <>
uint32_t
spatial_getter<param::ConvBias, param::ConvBias::Format::NCHW88_WINOGRAD>(
uint32_t filter, const param::ConvBias& param) {
//! f = m + r - 1 -> r = f + 1 - m
return filter - param.output_block_size + 1;
}
template <>
uint32_t
spatial_getter<param::ConvBias, param::ConvBias::Format::NCHW44_WINOGRAD>(
uint32_t filter, const param::ConvBias& param) {
//! f = m + r - 1 -> r = f + 1 - m
return filter - param.output_block_size + 1;
}

template <typename Parameter, typename Param>
void make_canonized_filter_meta_nchw_nhwc(
size_t src_ndim, const TensorLayout& filter, const Param& param,
typename ConvolutionBase<Parameter>::CanonizedFilterMeta& ret) {
megdnn_assert(param.format == Param::Format::NCHW ||
param.format == Param::Format::NHWC ||
param.format == Param::Format::NCHW_WINOGRAD);
param.format == Param::Format::NHWC );
auto img_ndim = src_ndim - 2;
size_t flt_start, flt_spatial_start, ocpg_pos, icpg_pos;
if (param.sparse == Param::Sparse::DENSE) {
@@ -101,20 +77,6 @@ void make_canonized_filter_meta_nchw_nhwc(
flt_spatial_start = 2;
ocpg_pos = 0;
icpg_pos = 1;
} else if (param.format == Param::Format::NCHW_WINOGRAD) {
// filter should be (alphah, alphaw, ic, oc) or (alphah, alphaw, ocb,
// icb, ic_block_size, oc_block_size)
flt_spatial_start = 0;
if (filter.ndim == flt_start + 4) {
ocpg_pos = 3;
icpg_pos = 2;
} else {
megdnn_assert(filter.ndim == flt_start + 6);
ic_block_size = filter[flt_start + 4];
oc_block_size = filter[flt_start + 5];
ocpg_pos = 2;
icpg_pos = 3;
}
} else {
megdnn_assert(param.format == Param::Format::NHWC,
"invalid conv tensor format");
@@ -136,14 +98,8 @@ void make_canonized_filter_meta_nchw_nhwc(
megdnn_assert(dilation[i] > 0,
"invalid dilation on spatial dim %zu: %u", i,
dilation[i]);
if (param.format == Param::Format::NCHW_WINOGRAD) {
ret.spatial[i] =
spatial_getter<Param, Param::Format::NCHW_WINOGRAD>(
filter[i + flt_start + flt_spatial_start], param);
} else {
ret.spatial[i] = spatial_getter<Param, Param::Format::NCHW>(
filter[i + flt_start + flt_spatial_start], param);
}
ret.spatial[i] = spatial_getter<Param, Param::Format::NCHW>(
filter[i + flt_start + flt_spatial_start], param);
ret.dilated_spatial[i] = (ret.spatial[i] - 1) * dilation[i] + 1;
}
}
@@ -295,20 +251,12 @@ void make_canonized_filter_meta_nchwxx(
* FH, FW, pack_size(IC), pack_size(OC)} [group]
* {GROUP/pack_size, 1, 1, FH, FW, pack_size} [chan]
*
** NCHW88_WINOGRAD and NCHW44_WINOGRAD mode
* filter:
* {alpha, alpha, OC/pack_size, IC/pack_size, pack_size(IC),
*pack_size(OC)} [dense]
* {GROUP, alpha, alpha, OC_PER_GROUP/pack_size,
* IC_PER_GROUP/pack_size, pack_size(IC), pack_size(OC)} [group]
*
*/

megdnn_assert(param.format == Param::Format::NCHW88 ||
param.format == Param::Format::NCHW44 ||
param.format == Param::Format::NCHW44_WINOGRAD ||
param.format == Param::Format::NCHW44_DOT ||
param.format == Param::Format::NCHW88_WINOGRAD);
param.format == Param::Format::NCHW44_DOT);
size_t img_ndim = 2;
size_t flt_start = 0;
size_t flt_spatial_start = 2;
@@ -325,10 +273,6 @@ void make_canonized_filter_meta_nchwxx(
filter[filter.ndim - 1]);
ret.group = 1;
flt_start = 0;
if (param.format == Param::Format::NCHW88_WINOGRAD ||
param.format == Param::Format::NCHW44_WINOGRAD) {
flt_start = 2;
}
if (filter[filter.ndim - 2] == 2 * pack_size &&
filter[filter.ndim - 1] == 2 * pack_size) {
pack_c_size = 2 * pack_size;
@@ -339,10 +283,6 @@ void make_canonized_filter_meta_nchwxx(
ret.icpg = filter[flt_start + 1] * pack_c_size;
} else if (filter.ndim == img_ndim + 3) {
// ohwi8o
megdnn_assert(param.format != Param::Format::NCHW88_WINOGRAD,
"Hybrid nchw88 mode in not support winograd");
megdnn_assert(param.format != Param::Format::NCHW44_WINOGRAD,
"Hybrid nchw44 mode in not support winograd");
flt_start = 0;
flt_spatial_start = 1;
ret.group = 1;
@@ -357,15 +297,9 @@ void make_canonized_filter_meta_nchwxx(
megdnn_assert(param.sparse == Param::Sparse::GROUP,
"invalid convolution sparse type");
flt_start = 1;
if (param.format == Param::Format::NCHW88_WINOGRAD ||
param.format == Param::Format::NCHW44_WINOGRAD) {
flt_start = 3;
}
auto filter_oc = filter[flt_start];
auto filter_ic = filter[flt_start + 1];
if (filter_oc == 1 && filter_ic == 1 && filter.ndim == (img_ndim + 4) &&
param.format != Param::Format::NCHW88_WINOGRAD &&
param.format != Param::Format::NCHW44_WINOGRAD) {
if (filter_oc == 1 && filter_ic == 1 && filter.ndim == (img_ndim + 4)) {
// Depthwise case goihw8g
megdnn_assert(filter.ndim == img_ndim + 4,
"bad filter ndim for group convolution: "
@@ -416,17 +350,7 @@ void make_canonized_filter_meta_nchwxx(
"NCHWXX has invalid dilation on spatial dim %zu: %u, "
"require to be 1",
i, dilation[i]);
if (param.format == Param::Format::NCHW88_WINOGRAD) {
ret.spatial[i] =
spatial_getter<Param, Param::Format::NCHW88_WINOGRAD>(
filter[i + flt_start - 2], param);
} else if (param.format == Param::Format::NCHW44_WINOGRAD) {
ret.spatial[i] =
spatial_getter<Param, Param::Format::NCHW44_WINOGRAD>(
filter[i + flt_start - 2], param);
} else {
ret.spatial[i] = filter[i + flt_start + flt_spatial_start];
}
ret.spatial[i] = filter[i + flt_start + flt_spatial_start];
ret.dilated_spatial[i] = (ret.spatial[i] - 1) * dilation[i] + 1;
}
}
@@ -579,13 +503,11 @@ ConvolutionBase<Parameter>::make_canonized_filter_meta(
} else if (param().format == Param::Format::NCHW8) {
make_canonized_filter_meta_nchwx<8, Parameter>(src_ndim, filter,
param(), ret);
} else if (param().format == Param::Format::NCHW88 ||
param().format == Param::Format::NCHW88_WINOGRAD) {
} else if (param().format == Param::Format::NCHW88) {
make_canonized_filter_meta_nchwxx<8, Parameter>(src_ndim, filter,
param(), ret);
} else if (param().format == Param::Format::NCHW44 ||
param().format == Param::Format::NCHW44_DOT ||
param().format == Param::Format::NCHW44_WINOGRAD) {
param().format == Param::Format::NCHW44_DOT) {
make_canonized_filter_meta_nchwxx<4, Parameter>(src_ndim, filter,
param(), ret);
} else if (param().format == Param::Format::NCHW32 ||
@@ -597,8 +519,7 @@ ConvolutionBase<Parameter>::make_canonized_filter_meta(
param(), ret);
} else {
megdnn_assert(param().format == Param::Format::NHWC ||
param().format == Param::Format::NCHW ||
param().format == Param::Format::NCHW_WINOGRAD);
param().format == Param::Format::NCHW);
make_canonized_filter_meta_nchw_nhwc<Parameter>(src_ndim, filter,
param(), ret);
}
@@ -619,17 +540,8 @@ void ConvolutionBase<Parameter>::check_or_deduce_dtype_fwd(DType src,
} else if (src.enumv() == DTypeEnum::QuantizedS8 ||
src.enumv() == DTypeEnum::Quantized8Asymm ||
src.enumv() == DTypeEnum::Quantized4Asymm) {
//! Qint8 winograd compute with float, in order to bringing the filter
//! scale, here just use QuantizedS32 as filter type.
if (src.enumv() == DTypeEnum::QuantizedS8 &&
filter.enumv() == DTypeEnum::QuantizedS32) {
supported_dst_dtype.push_back(dtype::QuantizedS32(
src.param<dtype::QuantizedS8>().scale *
filter.param<dtype::QuantizedS32>().scale));
} else {
supported_dst_dtype.push_back(
dtype::QuantizedS32(mul_scale(src, filter)));
}
supported_dst_dtype.push_back(
dtype::QuantizedS32(mul_scale(src, filter)));
if (dst.valid() && dst.enumv() == src.enumv()) {
supported_dst_dtype.push_back(dst);
}
@@ -681,24 +593,12 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
megdnn_assert_contiguous(src);
megdnn_assert_contiguous(filter);
megdnn_assert(src.ndim >= 3_z, "%s", errmsg().c_str());
if ((param().format == Param::Format::NCHW_WINOGRAD ||
param().format == Param::Format::NCHW44_WINOGRAD) &&
src.dtype.category() == DTypeCategory::QUANTIZED) {
megdnn_assert((filter.dtype.enumv() == DTypeEnum::QuantizedS16 ||
filter.dtype.enumv() == DTypeEnum::QuantizedS32),
"%s", errmsg().c_str());
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 ||
src.dtype.enumv() == DTypeEnum::Quantized8Asymm,
"%s", errmsg().c_str());
} else {
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv(), "%s",
errmsg().c_str());
}
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv(), "%s",
errmsg().c_str());
check_or_deduce_dtype_fwd(src.dtype, filter.dtype, dst.dtype);
size_t img_dim;
if (param().format == Param::Format::NCHW ||
param().format == Param::Format::NHWC ||
param().format == Param::Format::NCHW_WINOGRAD) {
param().format == Param::Format::NHWC) {
img_dim = src.ndim - 2;
megdnn_assert(filter.ndim >= img_dim + 2 && filter.ndim <= img_dim + 6,
"%s", errmsg().c_str());
@@ -714,8 +614,6 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
param().format == Param::Format::NCHW32 ||
param().format == Param::Format::NCHW32_NCHW4 ||
param().format == Param::Format::NCHW88 ||
param().format == Param::Format::NCHW88_WINOGRAD ||
param().format == Param::Format::NCHW44_WINOGRAD ||
param().format == Param::Format::CHWN4);
img_dim = src.ndim - 3;
if ((param().format == Param::Format::NCHW88 ||
@@ -770,8 +668,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
"but got src %s, filter %s",
src.to_string().c_str(), filter.to_string().c_str());
}
if (param().format == Param::Format::NCHW88 ||
param().format == Param::Format::NCHW88_WINOGRAD) {
if (param().format == Param::Format::NCHW88) {
megdnn_assert((src.ndim == 4 && filter.ndim == 5 &&
filter[filter.ndim - 1] == 8) ||
(src.ndim == 5 &&
@@ -786,8 +683,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
src.to_string().c_str(), filter.to_string().c_str());
}
if (param().format == Param::Format::NCHW44 ||
param().format == Param::Format::NCHW44_DOT ||
param().format == Param::Format::NCHW44_WINOGRAD) {
param().format == Param::Format::NCHW44_DOT) {
//!support nchw44 filter change to 88 for int8 winogradf23_88 using MK8 mamtul
megdnn_assert((src.ndim == 4 && filter.ndim == 5 &&
filter[filter.ndim - 1] == 4) ||
@@ -820,12 +716,10 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
"currently only convolution on 2D image is supported");
auto cflt = make_canonized_filter_meta(src.ndim, filter);
if (param().format == Param::Format::NCHW ||
param().format == Param::Format::NHWC ||
param().format == Param::Format::NCHW_WINOGRAD) {
param().format == Param::Format::NHWC ) {
size_t src_or_dst_c_pos = 0;
size_t src_or_dst_spatial_start = 0;
if (param().format == Param::Format::NCHW ||
param().format == Param::Format::NCHW_WINOGRAD) {
if (param().format == Param::Format::NCHW) {
src_or_dst_c_pos = 1;
src_or_dst_spatial_start = 2;
} else {
@@ -836,10 +730,6 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
}
megdnn_assert(cflt.icpg * cflt.group == src[src_or_dst_c_pos], "%s",
errmsg().c_str());
if (param().format == Param::Format::NCHW_WINOGRAD) {
megdnn_assert(cflt.spatial[0] == cflt.spatial[1],
"NCHW_WINOGRAD only support conv with fh == fw");
}
dst.ndim = src.ndim;
dst[0] = src[0];
dst[src_or_dst_c_pos] = cflt.ocpg * cflt.group;
@@ -900,8 +790,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
dst[3] = infer_conv_shape(src[3], cflt.dilated_spatial[1],
cflt.stride[1], cflt.padding[1]);
dst[4] = 32;
} else if (param().format == Param::Format::NCHW88 ||
param().format == Param::Format::NCHW88_WINOGRAD) {
} else if (param().format == Param::Format::NCHW88 ) {
megdnn_assert(src.ndim == 5 || (src.ndim == 4 && src[1] <= 8),
"invalid src ndim for NCHW88, expected=5 or 4, got=%zu",
src.ndim);
@@ -923,8 +812,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src,
}

} else if (param().format == Param::Format::NCHW44 ||
param().format == Param::Format::NCHW44_DOT ||
param().format == Param::Format::NCHW44_WINOGRAD) {
param().format == Param::Format::NCHW44_DOT) {
megdnn_assert(src.ndim == 5 || (src.ndim == 4 && src[1] <= 4),
"invalid src ndim for NCHW44, expected=5 or 4, got=%zu",
src.ndim);


+ 0
- 1
dnn/src/common/handle_impl.h View File

@@ -189,7 +189,6 @@ private:
cb(RelayoutFormat) \
cb(TopK) \
cb(PowC) \
cb(WinogradFilterPreprocess) \
cb(LocalShareForward) \
cb(LocalShareBackwardData) \
cb(LocalShareBackwardFilter) \


+ 0
- 157
dnn/src/common/winograd_filter_preprocess.cpp View File

@@ -1,157 +0,0 @@
/**
* \file dnn/src/common/winograd_filter_preprocess.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/oprs.h"

#include <numeric>
#include "src/common/utils.h"

using namespace megdnn;
void WinogradFilterPreprocess::deduce_layout(const TensorLayout& src,
TensorLayout& dst) {
auto errmsg = [&]() {
return "invalid filter layout:" + megdnn_layout_msg(src);
};
MEGDNN_MARK_USED_VAR(errmsg);
//! NCHW88 weight layout include
//! dense{oc/8, ic/8, fh, fw, 8, 8}; group {g, oc/8, ic/8, fh, fw, 8, 8};
//! channel wise{g/8, 1, 1, fh, fw, 8}
megdnn_assert(
src.ndim == 4 || src.ndim == 5 || src.ndim == 6 || src.ndim == 7,
"%s", errmsg().c_str());
//! nchw88 channel wise conv
megdnn_assert(!(src.ndim == 6 && src[1] == 1 && src[2] == 1),
"chennel wise nchw88 can not use winograd ");
//! nchw88 group conv
size_t flt_start = 0;
size_t pack_c_size = 1;
size_t group = 1;
//! group conv
if (src.ndim == 5) {
flt_start = 1;
group = src[0];
//! nchw88 dense conv
} else if (src.ndim == 6) {
pack_c_size = src[5];
//! nchw88 group conv
} else if (src.ndim == 7) {
flt_start = 1;
group = src[0];
pack_c_size = src[6];
}
size_t OC = src[flt_start] * pack_c_size,
IC = src[flt_start + 1] * pack_c_size, FH = src[flt_start + 2],
FW = src[flt_start + 3];
size_t m = param().output_block_size;
megdnn_assert(FH == FW, "%s", errmsg().c_str());

size_t alpha = FH + m - 1;
DType dst_type = src.dtype;
if (src.dtype.category() == DTypeCategory::QUANTIZED) {
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8);
if (param().compute_mode ==
param::ConvBias::ComputeMode::DEFAULT) {
//! input int8 compute short
dst_type = dtype::QuantizedS16(
src.dtype.param<dtype::QuantizedS8>().scale);
} else {
//! input int8 compute float32
dst_type = dtype::QuantizedS32(
src.dtype.param<dtype::QuantizedS8>().scale);
}
}

if (src.ndim == 4 || src.ndim == 6) {
if (param().format == param::Winograd::Format::DEFAULT) {
dst = TensorLayout({alpha, alpha, IC, OC}, dst_type);
} else {
megdnn_assert(param().format == param::Winograd::Format::MK4 ||
param().format == param::Winograd::Format::MK8);
size_t pack_size = MatrixMulForward::pack_size(param().format);
dst = TensorLayout({alpha, alpha, OC / pack_size, IC / pack_size,
pack_size, pack_size},
dst_type);
}
} else {
megdnn_assert(src.ndim == 5 || src.ndim == 7);
if (param().format == param::Winograd::Format::DEFAULT) {
dst = TensorLayout({group, alpha, alpha, IC, OC}, dst_type);
} else {
megdnn_assert(param().format == param::Winograd::Format::MK4 ||
param().format == param::Winograd::Format::MK8);
size_t pack_size = MatrixMulForward::pack_size(param().format);
dst = TensorLayout({group, alpha, alpha, OC / pack_size,
IC / pack_size, pack_size, pack_size},
dst_type);
}
}
}

void WinogradFilterPreprocess::check_exec(const TensorLayout& src,
const TensorLayout& dst,
size_t workspace_in_bytes) {
auto errmsg = [&]() {
return megdnn_layout_msg(src) + ", " + megdnn_layout_msg(dst);
};
MEGDNN_MARK_USED_VAR(errmsg);
megdnn_assert_contiguous(src);
megdnn_assert_contiguous(dst);
//! nchwxx now only support Format MKx
if (param().format == param::Winograd::Format::DEFAULT) {
megdnn_assert(src.ndim == dst.ndim && (src.ndim == 4 || src.ndim == 5),
"%s", errmsg().c_str());
} else {
megdnn_assert(
(param().format == param::Winograd::Format::MK4 ||
param().format == param::Winograd::Format::MK8) &&
(src.ndim == dst.ndim - 2 || src.ndim == dst.ndim) &&
(src.ndim == 4 || src.ndim == 5 || src.ndim == 6 ||
src.ndim == 7),
"%s", errmsg().c_str());
}

TensorLayout dst_expected;
deduce_layout(src, dst_expected);
megdnn_assert_eq_layout(dst_expected, dst);
auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst);
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes);
}

size_t WinogradFilterPreprocess::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) {
MEGDNN_MARK_USED_VAR(dst);
DType output_compute_dtype = src.dtype;
if (src.dtype.category() == DTypeCategory::QUANTIZED) {
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 ||
src.dtype.enumv() == DTypeEnum::Quantized8Asymm);
if (param().compute_mode ==
param::ConvBias::ComputeMode::DEFAULT) {
//! input int8 compute short
output_compute_dtype = dtype::QuantizedS16(
src.dtype.param<dtype::QuantizedS8>().scale);
} else {
//! input int8 compute float32
output_compute_dtype = dtype::QuantizedS32(
src.dtype.param<dtype::QuantizedS8>().scale);
}
}

size_t FW = src[3];
if (src.ndim == 5 || src.ndim == 7) {
FW = src[4];
}

size_t pack_size = MatrixMulForward::pack_size(param().format);
size_t alpha = param().output_block_size + FW - 1;
return 2 * alpha * alpha * output_compute_dtype.size() * pack_size *
pack_size;
}

// vim: syntax=cpp.doxygen

+ 0
- 1
dnn/src/cuda/handle_create.cpp View File

@@ -72,7 +72,6 @@
#include "src/cuda/type_cvt/opr_impl.h"
#include "src/cuda/warp_affine/opr_impl.h"
#include "src/cuda/warp_perspective/opr_impl.h"
#include "src/cuda/winograd_filter_preprocess/opr_impl.h"
#include "src/cuda/local_share/opr_impl.h"
#include "src/cuda/roi_align/opr_impl.h"
#include "src/cuda/batch_conv_bias/opr_impl.h"


+ 0
- 22
dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp View File

@@ -1,22 +0,0 @@
/**
* \file dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/cuda/winograd_filter_preprocess/opr_impl.h"
#include "src/common/utils.h"

using namespace megdnn;
using namespace cuda;

void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in, _megdnn_tensor_in,
_megdnn_workspace) {
megdnn_throw("WinogradFilterPreprocess is not supported in CUDA");
}

// vim: syntax=cpp.doxygen

+ 0
- 27
dnn/src/cuda/winograd_filter_preprocess/opr_impl.h View File

@@ -1,27 +0,0 @@
/**
* \file dnn/src/cuda/winograd_filter_preprocess/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"

namespace megdnn {
namespace cuda {

class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess {
public:
using WinogradFilterPreprocess::WinogradFilterPreprocess;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
};

} // namespace cuda
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 4
- 24
dnn/src/fallback/conv_bias/algos.cpp View File

@@ -259,12 +259,7 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -329,12 +324,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK4)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -397,12 +387,7 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
.get_matmul_kern_param(param);

return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::DEFAULT)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -467,12 +452,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK8)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW &&
param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&


+ 3
- 45
dnn/src/fallback/conv_bias/opr_impl.cpp View File

@@ -342,10 +342,7 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
param().format == Param::Format::NCHW4 ||
param().format == Param::Format::NCHW44 ||
param().format == Param::Format::NCHW44_DOT ||
param().format == Param::Format::NCHW ||
param().format == Param::Format::NCHW_WINOGRAD ||
param().format == Param::Format::NCHW88_WINOGRAD ||
param().format == Param::Format::NCHW44_WINOGRAD) {
param().format == Param::Format::NCHW) {
spatial_pos = 2;
} else if (param().format == Param::Format::NHWC) {
spatial_pos = 1;
@@ -370,25 +367,7 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
"should be equal");
auto&& fm = check_layout_fwd(src, filter, dst);
auto& conv_fm = reinterpret_cast<ConvolutionImpl::CanonizedFilterMeta&>(fm);

param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT;
if (param().format == Param::Format::NCHW_WINOGRAD ||
param().format == Param::Format::NCHW88_WINOGRAD ||
param().format == Param::Format::NCHW44_WINOGRAD) {
size_t flt_start = 0;
if (param().sparse == Param::Sparse::GROUP) {
flt_start = 1;
}

if (filter.ndim == 6 + flt_start) {
if (filter[5] == 4) {
format = param::MatrixMul::Format::MK4;
} else {
megdnn_assert(filter[5] == 8);
format = param::MatrixMul::Format::MK8;
}
}
}
size_t nr_threads = static_cast<naive::HandleImpl*>(handle())
->megcore_dispatcher()
->nr_threads();
@@ -407,8 +386,6 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
nr_threads,
reinterpret_cast<const ConvolutionForward::PreprocessedFilter*>(
preprocessed_filter)},
param().output_block_size,
format,
bias.dtype,
bias.stride[0],
bias_mode,
@@ -537,11 +514,7 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order(
auto FH = param.filter_meta.spatial[0];
auto FW = param.filter_meta.spatial[1];
//! TODO: now winograd only support in fast-run
if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD ||
param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD ||
param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) {
return {AlgoCategory::WINOGRAD};
}

//! im2col + matmul
bool im2col_prefer = (IC >= 32 || OC >= 32);
//! quantized algo use matmul when direct algo is unusable
@@ -632,21 +605,6 @@ const T* ConvBiasImpl::NCBKernParam::filter(size_t group_pack_id,

break;
}
case ConvBiasImpl::Param::Format::NCHW_WINOGRAD:
case ConvBiasImpl::Param::Format::NCHW44_WINOGRAD:
case ConvBiasImpl::Param::Format::NCHW88_WINOGRAD: {
//! four format of weight layout
//! 1. {g, alpha, alpha, ocpg/8, icpg/8, 8, 8}
//! 2. {alpha, alpha, ocpg/8, icpg/8, 8, 8}
//! 3. {g, alpha, alpha, oc, ic, 8, 8}
//! 4. {alpha, alpha, oc, ic}
group_offset = pack_group_size * group_pack_id * filter_meta.icpg *
filter_meta.ocpg *
(filter_meta.spatial[0] + output_block_size - 1) *
(filter_meta.spatial[1] + output_block_size - 1) *
filter_type.size();
break;
}
default:
megdnn_assert(0, "other filter format is not support yet");
}


+ 0
- 6
dnn/src/fallback/conv_bias/opr_impl.h View File

@@ -103,19 +103,13 @@ public:
struct NCBKernSizeParam : ConvolutionImpl::NCBKernSizeParam {
NCBKernSizeParam() = default;
NCBKernSizeParam(const ConvolutionImpl::NCBKernSizeParam& param,
size_t output_block_size,
param::MatrixMul::Format winograd_matmul_format,
DType bias_type, ptrdiff_t bias_bs, BiasMode bias_mode,
Param::NonlineMode nonlineMode)
: ConvolutionImpl::NCBKernSizeParam(param),
output_block_size{output_block_size},
winograd_matmul_format{winograd_matmul_format},
bias_type{bias_type},
bias_bs{bias_bs},
bias_mode{bias_mode},
nonlineMode{nonlineMode} {}
size_t output_block_size; //!< used in winograd algo
param::MatrixMul::Format winograd_matmul_format;
DType bias_type;
//! stride for batch of bias
ptrdiff_t bias_bs;


+ 3
- 21
dnn/src/fallback/conv_bias/winograd/winograd.h View File

@@ -88,13 +88,7 @@ class ConvBias {
size_t filter_transform_buf_size = 0;
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE,
//! OC_BLOCK_SIZE)
if (param.preprocessed_filter == nullptr &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW_WINOGRAD &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW88_WINOGRAD &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW44_WINOGRAD) {
if (param.preprocessed_filter == nullptr) {
filter_transform_buf_size = Strategy::ALPHA * Strategy::ALPHA * OC *
IC * sizeof(input_filter_compute_type);
}
@@ -108,12 +102,7 @@ class ConvBias {
nullptr,
{winograd_comput_size, filter_transform_buf_size * GROUP});
} else {
megdnn_assert(param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD ||
param.filter_meta.format ==
param::ConvBias::Format::NCHW88_WINOGRAD ||
param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD);
megdnn_assert(param.preprocessed_filter != nullptr);
return WorkspaceBundle(nullptr, {winograd_comput_size});
}
}
@@ -499,7 +488,6 @@ public:
const TensorND& preprocessed_dst =
param.preprocessed_filter->tensors[0];
WorkspaceBundle bundle = get_preprocess_wbundle(param);

Strategy strategy = m_strategy;
SmallVector<NCBKern> kerns;
auto filter_process_kern =
@@ -558,13 +546,7 @@ public:
param.filter_meta.stride[1] == 1 &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44 ||
param.filter_meta.format ==
param::ConvBias::Format::NCHW_WINOGRAD ||
param.filter_meta.format ==
param::ConvBias::Format::NCHW88_WINOGRAD ||
param.filter_meta.format ==
param::ConvBias::Format::NCHW44_WINOGRAD));
param.filter_meta.format == param::ConvBias::Format::NCHW44));

SmallVector<NCBKern> kerns;
if (param.preprocessed_filter == nullptr &&


+ 0
- 2
dnn/src/fallback/convolution/algos.cpp View File

@@ -316,8 +316,6 @@ ConvolutionImpl::AlgoDefault::init_conv_bias_param(
mul_scale(param.src_type, param.filter_type));
}
return {param,
0,
param::MatrixMul::Format::DEFAULT,
bias_type,
0,
BiasMode::NO_BIAS,


+ 1
- 2
dnn/src/fallback/convolution/opr_impl.cpp View File

@@ -225,8 +225,7 @@ ConvolutionImpl::NCBKernSizeParam ConvolutionImpl::make_ncb_kern_size_param(
param().format == Param::Format::NCHW44_DOT ||
param().format == Param::Format::NCHW44) {
spatial_pos = 2;
} else if (param().format == Param::Format::NCHW ||
param().format == Param::Format::NCHW_WINOGRAD) {
} else if (param().format == Param::Format::NCHW) {
spatial_pos = 2;
} else if (param().format == Param::Format::NHWC) {
spatial_pos = 1;


+ 0
- 1
dnn/src/naive/handle.cpp View File

@@ -78,7 +78,6 @@
#include "src/naive/type_cvt/opr_impl.h"
#include "src/naive/warp_affine/opr_impl.h"
#include "src/naive/warp_perspective/opr_impl.h"
#include "src/naive/winograd_filter_preprocess/opr_impl.h"
#include "src/naive/remap/opr_impl.h"
#include "src/naive/fake_quant/opr_impl.h"



+ 0
- 234
dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp View File

@@ -1,234 +0,0 @@
/**
* \file dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "src/naive/winograd_filter_preprocess/opr_impl.h"
#include "src/common/utils.h"
#include "src/common/winograd/winograd_helper.h"
#include "src/naive/handle.h"

#include "midout.h"
MIDOUT_DECL(megdnn_naive_winograd_filter_preprocess)

using namespace megdnn;
using namespace naive;

void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
check_exec(src.layout, dst.layout, workspace.size);
//! nchw88 group conv
size_t flt_start = 0;
size_t pack_c_size = 1;
size_t group = 1;
//! group conv
if (src.layout.ndim == 5) {
flt_start = 1;
group = src.layout[0];
//! nchw88 dense conv
} else if (src.layout.ndim == 6) {
pack_c_size = src.layout[5];
//! nchw88 group conv
} else if (src.layout.ndim == 7) {
flt_start = 1;
group = src.layout[0];
pack_c_size = src.layout[6];
}
size_t OC = src.layout[flt_start] * pack_c_size,
IC = src.layout[flt_start + 1] * pack_c_size,
FW = src.layout[flt_start + 3];

size_t m = param().output_block_size;

bool execed = false;

#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::ConvBias::Format::NCHW, \
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \
OC, m, FW, interp_points, src.layout.dtype, \
rescale); \
}

#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _rescale) \
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \
param::Winograd::Format::DEFAULT, _rescale); \
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \
param::Winograd::Format::MK4, _rescale);

#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _rescale) \
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \
param::Winograd::Format::DEFAULT, _rescale); \
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \
param::Winograd::Format::MK8, _rescale);

#define DISPATCH_KERNEL(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _kern, _rescale, ...) \
const _ctype* src_ptr = src.compatible_ptr<_ctype>(); \
_input_filter_compute_type* dst_ptr = \
dst.compatible_ptr<_input_filter_compute_type>(); \
_input_filter_compute_type* workspace_ptr = \
workspace.ptr<_input_filter_compute_type>(); \
MIDOUT_BEGIN(megdnn_naive_winograd_filter_preprocess, ##__VA_ARGS__) { \
for (size_t g = 0; g < group; g++) { \
auto run = [=]() { \
_kern(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _rescale); \
}; \
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); \
src_ptr += src.layout.stride[0]; \
dst_ptr += dst.layout.stride[0]; \
} \
execed = true; \
} \
MIDOUT_END();

#define DISPATCH_DTYPE(_midout_tag) \
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \
DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \
} \
if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { \
DISPATCH_KERNEL(dt_int8, dt_int8, dt_int16, dt_int32, \
DISPATCH_FORMAT_MK8, 2.0f, _midout_tag, 1); \
} \
MEGDNN_INC_FLOAT16(if (src.layout.dtype.enumv() == DTypeEnum::Float16) { \
DISPATCH_KERNEL(dt_float16, dt_float16, dt_float16, dt_float16, \
DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 2); \
})

if (src.layout.ndim <= 5) {
//! dispatch_dtype with consider layout and format.
if (FW == 3) {
if (m == 2) {
std::vector<float> interp_points = {0, 1, -1};
DISPATCH_DTYPE(0);
} else if (m == 6) {
std::vector<float> interp_points = {0, 1, -1, 2, -2, 0.5, -0.5};
DISPATCH_DTYPE(1);
}
} else if (FW == 4) {
if (m == 5) {
std::vector<float> interp_points = {0, 0.5, -0.5, 1, -1, 2, -2};
DISPATCH_DTYPE(2);
}
} else if (FW == 5) {
if (m == 4) {
std::vector<float> interp_points = {0, 1, -1, 0.5, -0.5, 2, -2};
DISPATCH_DTYPE(3);
}
}
#undef cb
#undef DISPATCH_FORMAT_MK4
#undef DISPATCH_FORMAT_MK8
#undef DISPATCH_DTYPE
} else {
megdnn_assert(src.layout.ndim == 6 || src.layout.ndim == 7);
#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::ConvBias::Format::NCHW88, \
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \
OC, m, FW, interp_points, src.layout.dtype, \
rescale); \
}

#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _rescale) \
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \
param::Winograd::Format::MK8, _rescale);

#define DISPATCH_DTYPE(_midout_tag) \
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \
DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 0); \
}
if (pack_c_size == 8) { //! NCHW88
if (FW == 3) {
if (m == 2) {
std::vector<float> interp_points = {0, 1, -1};
DISPATCH_DTYPE(4);
} else if (m == 6) {
std::vector<float> interp_points = {0, 1, -1, 2,
-2, 0.5, -0.5};
DISPATCH_DTYPE(5);
}
}
#undef cb
#undef DISPATCH_DTYPE
}
else if (pack_c_size == 4) { //! NCHW44
#define cb(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _format, rescale) \
if (param().format == _format) { \
return winograd::StrategyHelper< \
_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, param::ConvBias::Format::NCHW44, \
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \
OC, m, FW, interp_points, src.layout.dtype, \
rescale); \
}

#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \
_output_compute_type, _rescale) \
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \
param::Winograd::Format::MK4, _rescale);

#define DISPATCH_DTYPE(_midout_tag) \
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \
DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \
} \
if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { \
if (param().format == param::Winograd::Format::MK4) { \
DISPATCH_KERNEL(dt_int8, dt_int8, dt_float32, dt_float32, \
DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \
} else if (param().format == param::Winograd::Format::MK8) { \
DISPATCH_KERNEL(dt_int8, dt_int8, dt_int16, dt_int32, \
DISPATCH_FORMAT_MK8, 2.0f, _midout_tag, 0); \
} \
}
if (FW == 3) {
if (m == 2) {
std::vector<float> interp_points = {0, 1, -1};
DISPATCH_DTYPE(6);
} else if (m == 6) {
std::vector<float> interp_points = {0, 1, -1, 2,
-2, 0.5, -0.5};
DISPATCH_DTYPE(7);
} else if (m == 7) {
std::vector<float> interp_points = {0, 1, -1, 2,
-2, 0.5, -0.5, 1.5};
DISPATCH_DTYPE(8);
}
}
#undef cb
#undef DISPATCH_FORMAT_MK8
#undef DISPATCH_FORMAT_MK4
#undef DISPATCH_KERNEL
#undef DISPATCH_DTYPE
}
}

megdnn_assert(execed,
"Unsupport winograd filter preprocess. m: %zu src: %s", m,
src.layout.to_string().c_str());
}

// vim: syntax=cpp.doxygen

+ 0
- 28
dnn/src/naive/winograd_filter_preprocess/opr_impl.h View File

@@ -1,28 +0,0 @@
/**
* \file dnn/src/naive/winograd_filter_preprocess/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/common/utils.h"

namespace megdnn {
namespace naive {

class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess {
public:
using WinogradFilterPreprocess::WinogradFilterPreprocess;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
};

} // namespace naive
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 2
- 12
dnn/src/x86/conv_bias/f32/winograd_algo.cpp View File

@@ -43,12 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW88_WINOGRAD &&
param.output_block_size == 6 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK8)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW88 &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&
@@ -89,12 +84,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
(param.filter_meta.format ==
param::ConvBias::Format::NCHW88_WINOGRAD &&
param.output_block_size == 2 &&
param.winograd_matmul_format ==
param::MatrixMul::Format::MK8)) &&
param.filter_meta.format == param::ConvBias::Format::NCHW88 &&
!param.filter_meta.should_flip &&
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] &&
param.filter_meta.spatial[0] == 3) &&


+ 1
- 5
dnn/src/x86/conv_bias/opr_impl.cpp View File

@@ -173,11 +173,7 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order(
auto FH = param.filter_meta.spatial[0];
auto FW = param.filter_meta.spatial[1];
//! TODO: now winograd only support fast-run
if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD ||
param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD ||
param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) {
return {AlgoCategory::WINOGRAD};
}

//! nchw88 use mkl-dnn which algo is direct
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
return {AlgoCategory::DIRECT, AlgoCategory::IM2COL};


+ 130
- 244
dnn/test/arm_common/conv_bias_multi_thread.cpp View File

@@ -629,6 +629,35 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_8x8x32) {

#endif

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);

Checker<ConvBiasForward> checker(handle());

auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
const float eps) {
for (auto&& arg : args) {
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};
run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(),
dtype::Float32(), 1e-3f);
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(args, dtype::Float16(), dtype::Float16(), dtype::Float16(),
dtype::Float16(), 0.35f);
#endif
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
@@ -717,207 +746,97 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
check_winograd("1:4:32", checker, args);
}



TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);

Checker<ConvBiasForward> checker(handle());

auto extra_impl = [](const TensorNDArray& tensors, uint32_t m,
param::ConvBias param, Handle* handle) {
megdnn_assert(param.format == param::ConvBias::Format::NCHW);
auto winograd_preprocess_opr =
handle->create_operator<WinogradFilterPreprocess>();
winograd_preprocess_opr->param().output_block_size = m;
TensorLayout filter_transform_layout;
winograd_preprocess_opr->deduce_layout(tensors[1].layout,
filter_transform_layout);
size_t winograd_preprocess_workspace_in_bytes =
winograd_preprocess_opr->get_workspace_in_bytes(
tensors[1].layout, filter_transform_layout);

auto conv_bias_opr = handle->create_operator<ConvBias>();
conv_bias_opr->param() = param;
conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD;
conv_bias_opr->param().output_block_size = m;
size_t conv_bias_workspace_in_bytes =
conv_bias_opr->get_workspace_in_bytes(
tensors[0].layout, filter_transform_layout,
tensors[2].layout, tensors[3].layout, tensors[4].layout,
nullptr);

WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
conv_bias_workspace_in_bytes,
winograd_preprocess_workspace_in_bytes});
wb.set(malloc(wb.total_size_in_bytes()));

TensorND filter_transform_tensor(wb.get(0),
std::move(filter_transform_layout));
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
wb.get_workspace(2));
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
tensors[3], tensors[4], nullptr,
wb.get_workspace(1));

free(wb.ptr());
};

auto run = [&checker, &extra_impl](
Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
const float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(extra_impl,
std::placeholders::_1, m,
arg.param, handle));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
run(handle(), args, {6}, dtype::Float32(), dtype::Float32(),
dtype::Float32(), dtype::Float32(), 1e-3f);
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(handle(), args, {6}, dtype::Float16(), dtype::Float16(),
dtype::Float16(), dtype::Float16(), 0.35f);
#endif
}



TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_first_half(args.begin(),
args.begin() + args.size() / 2);
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, 1e-3f);
}



TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
args.end());
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, 1e-3f);
}



#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

std::vector<TestArg> args = get_winograd_mk_packed_args(8);
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25);
run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, 0.25);
}


#endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

@@ -933,24 +852,19 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
get_quantized_winograd_mk_packed_args(8);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args,
DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
@@ -958,7 +872,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) {
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

@@ -973,118 +886,99 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) {
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
run(quantized_args, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
dtype::QuantizedS8(60.25f),1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) {
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));

ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, false, true);
get_int8_nchw44_args(3, 4, true, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
run(quantized_args, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), epsilon);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) {
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));

std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, false, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) {
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) {
using namespace conv_bias;

Checker<ConvBiasForward> checker(handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

@@ -1096,23 +990,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, true, true);
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
run(quantized_args, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
dtype::QuantizedS8(0.49550694f), epsilon);
}







#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) {
using namespace conv_bias;
@@ -1170,7 +1056,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) {
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}

#endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
using namespace conv_bias;
@@ -1187,6 +1072,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {

check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) {
using namespace conv_bias;


+ 86
- 186
dnn/test/arm_common/conv_bias_multi_thread_weight_preprocess.cpp View File

@@ -83,56 +83,12 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) {

Checker<ConvBiasForward> checker(handle());

auto extra_impl = [](const TensorNDArray& tensors, uint32_t m,
param::ConvBias param, Handle* handle) {
megdnn_assert(param.format == param::ConvBias::Format::NCHW44);
auto winograd_preprocess_opr =
handle->create_operator<WinogradFilterPreprocess>();
winograd_preprocess_opr->param().output_block_size = m;
winograd_preprocess_opr->param().format = param::MatrixMul::Format::MK4;
TensorLayout filter_transform_layout;
winograd_preprocess_opr->deduce_layout(tensors[1].layout,
filter_transform_layout);
size_t winograd_preprocess_workspace_in_bytes =
winograd_preprocess_opr->get_workspace_in_bytes(
tensors[1].layout, filter_transform_layout);

auto conv_bias_opr = handle->create_operator<ConvBias>();
conv_bias_opr->param() = param;
conv_bias_opr->param().format =
param::ConvBias::Format::NCHW44_WINOGRAD;
conv_bias_opr->param().output_block_size = m;
size_t conv_bias_workspace_in_bytes =
conv_bias_opr->get_workspace_in_bytes(
tensors[0].layout, filter_transform_layout,
tensors[2].layout, tensors[3].layout, tensors[4].layout,
nullptr);

WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
conv_bias_workspace_in_bytes,
winograd_preprocess_workspace_in_bytes});
wb.set(malloc(wb.total_size_in_bytes()));

TensorND filter_transform_tensor(wb.get(0),
std::move(filter_transform_layout));
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
wb.get_workspace(2));
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
tensors[3], tensors[4], nullptr,
wb.get_workspace(1));
free(wb.ptr());
};

auto run = [&checker, &extra_impl](
Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](
const std::vector<TestArg>& args,
DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
const float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(extra_impl,
std::placeholders::_1, m,
arg.param, handle));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
@@ -140,7 +96,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) {
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

@@ -149,7 +104,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) {
// dtype::Float32(), dtype::Float32(), 1e-2f);

//! remove this when low precision mode is ok
run(handle(), nchw44_args, {2, 6}, dtype::Float32(), dtype::Float32(),
run(nchw44_args, dtype::Float32(), dtype::Float32(),
dtype::Float32(), dtype::Float32(), 1e-3f);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
@@ -158,31 +113,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_first_half(args.begin(),
args.begin() + args.size() / 2);
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, 1e-3f);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) {
@@ -190,31 +138,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
args.end());
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, 1e-3f);
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS,
@@ -223,32 +164,25 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

std::vector<TestArg> args = get_winograd_mk_packed_args(8);
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25);
run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, 0.25);
}
#endif
TEST_F(ARM_COMMON_MULTI_THREADS,
@@ -257,23 +191,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

@@ -289,9 +217,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
get_quantized_winograd_mk_packed_args(8);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
@@ -299,15 +226,11 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args,
DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
@@ -315,7 +238,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

@@ -330,9 +252,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) {
@@ -340,23 +261,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

@@ -372,9 +287,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
get_int8_nchw44_args(3, 4, false, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
@@ -383,23 +297,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

@@ -414,11 +322,10 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
run(quantized_args, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
dtype::QuantizedS8(0.49550694f), epsilon);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
@@ -427,23 +334,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS,

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};

@@ -459,11 +360,10 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
get_int8_nchw44_args(3, 4, true, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
run(quantized_args, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
dtype::QuantizedS8(0.49550694f), epsilon);
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) {


+ 0
- 91
dnn/test/arm_common/winograd_filter_preprocess.cpp View File

@@ -1,91 +0,0 @@
/**
* \file dnn/test/arm_common/winograd_filter_preprocess.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "test/common/checker.h"
#include "test/common/benchmarker.h"
#include "test/common/winograd_filter_preprocess.h"

#include "test/arm_common/fixture.h"

using namespace megdnn;
using namespace test;

TEST_F(ARM_COMMON, WinogradFilterPreprocessF32) {
using namespace winograd_filter_preprocess;
Checker<WinogradFilterPreprocess> checker(handle());
// default
std::vector<TestArg> args = get_args(6, 3);
std::vector<TestArg> args54 = get_args(5, 4);
std::vector<TestArg> args45 = get_args(4, 5);

// mk4
std::vector<TestArg> args_mk4_out2 =
get_mk_packed_args(2, param::Winograd::Format::MK4, 4);
std::vector<TestArg> args_mk4_out6 =
get_mk_packed_args(6, param::Winograd::Format::MK4, 4);

args.insert(args.end(), args54.begin(), args54.end());
args.insert(args.end(), args45.begin(), args45.end());
args.insert(args.end(), args_mk4_out2.begin(), args_mk4_out2.end());
args.insert(args.end(), args_mk4_out6.begin(), args_mk4_out6.end());
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_dtype(0, dtype::Float32{})
.set_dtype(1, dtype::Float32{})
.execs({arg.src, {}});
}
}

TEST_F(ARM_COMMON, WinogradFilterPreprocessQs8) {
using namespace winograd_filter_preprocess;
std::vector<TestArg> args =
get_mk_packed_args(2, param::Winograd::Format::MK8, 8);
Checker<WinogradFilterPreprocess> checker(handle());
UniformIntRNG rng{-50, 50};
checker.set_rng(0, &rng).set_rng(1, &rng).set_rng(2, &rng);
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS16(2.5f))
.execs({arg.src, {}});
}
}

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON, WinogradFilterPreprocessF16) {
using namespace winograd_filter_preprocess;
Checker<WinogradFilterPreprocess> checker(handle());
// default
std::vector<TestArg> args = get_args(6, 3);
std::vector<TestArg> args_23 =
get_mk_packed_args(2, param::Winograd::Format::DEFAULT, 4);
std::vector<TestArg> args45 = get_args(4, 5);

// mk8
std::vector<TestArg> args_mk8_out2 =
get_mk_packed_args(2, param::Winograd::Format::MK8, 8);

args.insert(args.end(), args_23.begin(), args_23.end());
args.insert(args.end(), args45.begin(), args45.end());
args.insert(args.end(), args_mk8_out2.begin(), args_mk8_out2.end());

Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_rng(0, rng)
.set_dtype(0, dtype::Float16{})
.set_dtype(1, dtype::Float16{})
.execs({arg.src, {}});
}
}

#endif

// vim: syntax=cpp.doxygen

+ 0
- 45
dnn/test/common/conv_bias.cpp View File

@@ -1152,50 +1152,6 @@ void check_conv_bias_preprocess(std::vector<conv_bias::TestArg> args,
}


void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m,
param::ConvBias param, Handle* handle,
param::MatrixMul::Format format) {
megdnn_assert(param.format == param::ConvBias::Format::NCHW ||
param.format == param::ConvBias::Format::NCHW44);
auto winograd_preprocess_opr =
handle->create_operator<WinogradFilterPreprocess>();
winograd_preprocess_opr->param().output_block_size = m;
winograd_preprocess_opr->param().format = format;
winograd_preprocess_opr->param().compute_mode = param.compute_mode;
TensorLayout filter_transform_layout;
winograd_preprocess_opr->deduce_layout(tensors[1].layout,
filter_transform_layout);
size_t winograd_preprocess_workspace_in_bytes =
winograd_preprocess_opr->get_workspace_in_bytes(
tensors[1].layout, filter_transform_layout);

auto conv_bias_opr = handle->create_operator<ConvBias>();
conv_bias_opr->param() = param;
if (param.format == param::ConvBias::Format::NCHW) {
conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD;
} else {
conv_bias_opr->param().format =
param::ConvBias::Format::NCHW44_WINOGRAD;
}
conv_bias_opr->param().output_block_size = m;
size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes(
tensors[0].layout, filter_transform_layout, tensors[2].layout,
tensors[3].layout, tensors[4].layout, nullptr);

WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
conv_bias_workspace_in_bytes,
winograd_preprocess_workspace_in_bytes});
wb.set(malloc(wb.total_size_in_bytes()));

TensorND filter_transform_tensor(wb.get(0),
std::move(filter_transform_layout));
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
wb.get_workspace(2));
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
tensors[3], tensors[4], nullptr, wb.get_workspace(1));
free(wb.ptr());
};

void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle,
RNG* rng, float epsilon, DType type0, DType type1,
DType type2, DType type3, const char* algo_name) {
@@ -1388,7 +1344,6 @@ std::vector<conv_bias::TestArg> get_nchw44_conv_bias_args(
}
return args;
}

} // namespace conv_bias
} // namespace test
} // namespace megdnn


+ 0
- 3
dnn/test/common/conv_bias.h View File

@@ -94,9 +94,6 @@ void checker_conv_bias_int8x8x16(
std::vector<megdnn::test::conv_bias::TestArg> args,
megdnn::Handle* handle, const char* algo_name);

void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m,
param::ConvBias param, Handle* handle,
param::MatrixMul::Format format);
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args,
Handle* handle, RNG* rng, float epsilon,
DType type0, DType type1, DType type2,


+ 0
- 1
dnn/test/common/opr_trait.h View File

@@ -95,7 +95,6 @@ DEF(MaskConvolution, 4, true, true);
DEF(MaskPropagate, 2, true, true);
DEF(RelayoutFormat, 2, true, true);
DEF(MaxTensorDiff, 2, true, false);
DEF(WinogradFilterPreprocess, 2, true, true);
DEF(LocalShareForward, 3, true, true);
DEF(LocalShareBackwardData, 3, true, false);
DEF(LocalShareBackwardFilter, 3, true, false);


+ 12
- 59
dnn/test/x86/conv_bias.cpp View File

@@ -1814,69 +1814,22 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward> checker(handle());
auto extra_impl = [](const TensorNDArray& tensors, uint32_t m,
param::ConvBias param, Handle* handle) {
megdnn_assert(param.format == param::ConvBias::Format::NCHW88);
auto winograd_preprocess_opr =
handle->create_operator<WinogradFilterPreprocess>();
winograd_preprocess_opr->param().output_block_size = m;
winograd_preprocess_opr->param().format = param::MatrixMul::Format::MK8;
TensorLayout filter_transform_layout;
winograd_preprocess_opr->deduce_layout(tensors[1].layout,
filter_transform_layout);
size_t winograd_preprocess_workspace_in_bytes =
winograd_preprocess_opr->get_workspace_in_bytes(
tensors[1].layout, filter_transform_layout);

auto conv_bias_opr = handle->create_operator<ConvBias>();
conv_bias_opr->param() = param;
conv_bias_opr->param().format =
param::ConvBias::Format::NCHW88_WINOGRAD;
conv_bias_opr->param().output_block_size = m;
size_t conv_bias_workspace_in_bytes =
conv_bias_opr->get_workspace_in_bytes(
tensors[0].layout, filter_transform_layout,
tensors[2].layout, tensors[3].layout, tensors[4].layout,
nullptr);

WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
conv_bias_workspace_in_bytes,
winograd_preprocess_workspace_in_bytes});
wb.set(malloc(wb.total_size_in_bytes()));

TensorND filter_transform_tensor(wb.get(0),
std::move(filter_transform_layout));
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
wb.get_workspace(2));
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
tensors[3], tensors[4], nullptr,
wb.get_workspace(1));

free(wb.ptr());
};

auto run = [&checker, &extra_impl](
Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
const float eps) {
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
const float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(extra_impl,
std::placeholders::_1, m,
arg.param, handle));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
};
run(handle(), args, {2, 6}, dtype::Float32(), dtype::Float32(),
dtype::Float32(), dtype::Float32(), 1e-3f);
run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(),
dtype::Float32(), 1e-3f);
}

/*********************************** End winograd ************************/


+ 0
- 1
src/core/impl/graph/cg_impl.cpp View File

@@ -32,7 +32,6 @@
#include "megbrain/jit/fusion_pass.h"
#endif

#include "megbrain/gopt/weights_preprocess.h"

using namespace mgb;
using namespace cg;


+ 0
- 3
src/gopt/impl/framework.cpp View File

@@ -14,7 +14,6 @@
#include "megbrain/gopt/gtrans.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/gopt/misc.h"
#include "megbrain/gopt/weights_preprocess.h"
#include "megbrain/graph/cg.h"
#include "megbrain/graph/event.h"
#include "megbrain/graph/exc_extra_info.h"
@@ -780,8 +779,6 @@ const GraphOptimizer& GraphOptimizer::add_passes_for_optimize_options(
add_pass<FuseConvBiasZPass>();
});

cb(weight_winograd_transform,
{ add_pass<WinogradTransformReplacePass>(); });
#undef cb

if (need_param_fuse) {


+ 0
- 206
src/gopt/impl/weights_preprocess.cpp View File

@@ -1,206 +0,0 @@
/**
* \file src/gopt/impl/weights_preprocess.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#include "megbrain/gopt/weights_preprocess.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/tensor_manip.h"

#include "megbrain/utils/hash_ct.h"
#include "midout.h"

MIDOUT_DECL(megbrain_weight_preprocess)
#define MIDOUT_B(tag) \
MIDOUT_BEGIN(megbrain_weight_preprocess, midout_iv(MGB_HASH_STR(tag))) {
#define MIDOUT_E \
} \
MIDOUT_END();

using namespace mgb;
using namespace gopt;
using namespace cg;

const char* WinogradTransformReplacePass::name() const {
return "winograd_transform";
}

void WinogradTransformReplacePass::apply(OptState& opt) const {
MIDOUT_B("WinogradTransformReplacePass::apply")
auto rewriter = opt.graph().make_rewriter();
ConstVarPropogate cvprop{ConstVarType::IMMUTABLE_AND_PARAM};
opt.graph().iter([&cvprop](OperatorNodeBase *opr) {
cvprop.add_opr(opr);
});

auto get_algo = [](const opr::ConvBias& opr) -> std::string {
auto&& inputs = opr.input();
SmallVector<TensorLayout> layouts;
mgb_assert(inputs.size() >= 2 && inputs.size() <= 4);
auto&& mo = opr.megdnn_opr();
for (size_t i = 0; i < 4; i++) {
if (inputs.size() <= i) {
if (i == 2) {
//! bias
DType dtype;
mo->deduce_dtype(inputs[0]->dtype(), inputs[1]->dtype(),
DType{}, DType{}, dtype);
layouts.emplace_back(TensorShape{}, dtype);
} else {
layouts.emplace_back(TensorShape{}, opr.output(0)->dtype(),
opr.output(0)->format());
}
} else {
layouts.emplace_back(inputs[i]->shape(), inputs[i]->dtype(),
inputs[i]->format());
}
}
layouts.emplace_back(opr.output(0)->shape(), opr.output(0)->dtype(),
opr.output(0)->format());

AlgoChooserProfileCache& cache = opr.profile_cache();
auto param_blob = opr.param_blob();
AlgoChooserProfileCache::Key cache_key{layouts.data(), layouts.size(),
param_blob.first,
param_blob.second};
auto&& rst = cache.get(cache_key);
if (!rst.valid())
return "";
auto prof = rst.val();
if (prof.empty())
return "";
return prof[0].algo;
};
auto on_opr = [&](OperatorNodeBase* opr) {
auto type = opr->dyn_typeinfo();
do {
if (type != opr::ConvBias::typeinfo())
break;
auto&& conv_bias_opr = opr->cast_final_safe<opr::ConvBias>();
auto&& inputs = conv_bias_opr.input();
VarNodeArray new_inp;
new_inp.reserve(inputs.size());
for (auto i : inputs) {
new_inp.push_back(rewriter.get_var(i));
}
if (!(cvprop.is_midconst(inputs[1]) ||
cvprop.is_const(inputs[1]))) {
break;
}
auto algo_name = get_algo(conv_bias_opr);
auto winograd_param =
megdnn::ConvBias::parse_winograd_name(algo_name);
if (winograd_param == megdnn::ConvBias::INVALID_WINOGRAD_PARAM)
break;
mgb_assert(
conv_bias_opr.param().format ==
megdnn::ConvBias::Param::Format::NCHW ||
conv_bias_opr.param().format ==
megdnn::ConvBias::Param::Format::NCHW88 ||
conv_bias_opr.param().format ==
megdnn::ConvBias::Param::Format::NCHW44,
"currently winograd only suppport NCHW and NCHW44 and "
"NCHW88");
opr::ConvBiasForward::check_winograd_param_valid(
winograd_param, conv_bias_opr.input(0)->dtype());
megdnn::param::Winograd winograd_preprocess_param;
winograd_preprocess_param.format =
opr::ConvBiasForward::get_matmul_format(winograd_param);
winograd_preprocess_param.output_block_size =
winograd_param.output_block_size;

auto conv_bias_param = conv_bias_opr.param();
//! If input dtype is Qint8 and matmul format is MK4, The winograd
//! compute type is float.
if (conv_bias_opr.input(0)->dtype().enumv() ==
DTypeEnum::QuantizedS8 &&
winograd_preprocess_param.format ==
megdnn::param::MatrixMul::Format::MK4) {
winograd_preprocess_param.compute_mode =
megdnn::param::ConvBias::ComputeMode::FLOAT32;
conv_bias_param.compute_mode =
megdnn::param::ConvBias::ComputeMode::FLOAT32;
}

auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make(
new_inp[1], winograd_preprocess_param);
mgb_assert(inputs.size() == 2 || inputs.size() == 3,
"input size need to be 2/3, but got: %zu",
inputs.size());
SymbolVar new_conv_bias_opr;

if (new_inp[0]->shape().ndim == 4) {
conv_bias_param.format =
megdnn::ConvBias::Param::Format::NCHW_WINOGRAD;
} else {
mgb_assert(new_inp[0]->shape().ndim == 5);
size_t pack_size = new_inp[0]->shape()[4];
if (pack_size == 8) {
conv_bias_param.format =
megdnn::ConvBias::Param::Format::NCHW88_WINOGRAD;
} else if (pack_size == 4) {
conv_bias_param.format =
megdnn::ConvBias::Param::Format::NCHW44_WINOGRAD;
} else {
mgb_assert(0, "Invalid pack size %zu in algo %s", pack_size,
algo_name.c_str());
}
}

conv_bias_param.output_block_size =
winograd_param.output_block_size;
if (inputs.size() == 2) {
new_conv_bias_opr = opr::ConvBias::make(
new_inp[0], winograd_preprocess_opr.node(),
conv_bias_param, conv_bias_opr.execution_policy(),
conv_bias_opr.config());
} else {
new_conv_bias_opr = opr::ConvBias::make(
new_inp[0], winograd_preprocess_opr.node(), new_inp[2],
conv_bias_param, conv_bias_opr.execution_policy(),
conv_bias_opr.config());
}

auto&& origin_out = conv_bias_opr.output();
auto&& cur_out = new_conv_bias_opr.node()->owner_opr()->output();
mgb_assert(origin_out.size() == cur_out.size());
for (size_t i = 0; i < origin_out.size(); i++) {
if (!origin_out[i]->contain_flag(
VarNode::Flag::VOLATILE_CONTENT)) {
rewriter.replace_var(origin_out[i], cur_out[i], nullptr);
}
}
return;
} while (0);

rewriter.auto_replace_outputs(opr);
};

opt.graph().iter(on_opr);
rewriter.apply_inplace();
MIDOUT_E
}

/**
* \warning WinogradTransformReplacePass implies that we run ParamFuse pass
* before(currently run ParamFuse in optimize_for_inference when dump model),
* othwise it can not deal with \c ConvBias(x, W+1), as the node of W+1 has no
* flag PERSISTENT_DEVICE_VALUE, it's a mid-const node, we should use
* ConstVarPropogate strictly speaking.
*/
void gopt::transform_vars_inplace_with_winograd(
mgb::cg::VarNodeArray& dest_vars) {
gopt::GraphOptimizer optimizer;
optimizer.add_pass<WinogradTransformReplacePass>();
optimizer.add_pass<ParamFusePass>();
optimizer.apply_inplace(dest_vars);
}

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 0
- 32
src/gopt/include/megbrain/gopt/weights_preprocess.h View File

@@ -1,32 +0,0 @@
/**
* \file src/gopt/include/megbrain/gopt/weights_preprocess.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#pragma once

#include "megbrain/gopt/framework.h"

namespace mgb {
namespace gopt {

class WinogradTransformReplacePass final : public Pass {
class Impl;

public:
const char* name() const override;
void apply(OptState& opt) const override;
};

void transform_vars_inplace_with_winograd(mgb::cg::VarNodeArray& dest_vars);

} // namespace gopt
} // namespace mgb

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 0
- 13
src/opr/impl/search_policy/algo_chooser.cpp View File

@@ -46,7 +46,6 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result(

ConvTensorLayouts origin_layouts = ctx.layouts();
typename Opr::Param origin_param = ctx.mgb_opr()->param();
get_origin_param_and_layouts(ctx, origin_layouts, origin_param);
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(),
origin_layouts.size(), &origin_param,
sizeof(origin_param)};
@@ -104,18 +103,6 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result(
return prof_rst;
}

template <>
void AlgoChooser<megdnn::ConvBias>::get_origin_param_and_layouts(
const ExeContext& ctx, ConvTensorLayouts& layouts,
megdnn::ConvBias::Param& param) {
auto format = static_cast<megdnn::param::ConvBias::Format>(
ctx.megdnn_opr()->param().format);
size_t output_block_size = ctx.megdnn_opr()->param().output_block_size;
megdnn::ConvBias::deduce_winograd_origin_layout_and_param(
format, output_block_size, ctx.layouts()[0], ctx.layouts()[1],
layouts[1], param);
}

template <typename Opr>
typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile(
ExeContext& ctx, bool require_reproducible, bool enable_update) {


+ 0
- 10
src/opr/impl/tensor_manip.cpp View File

@@ -1607,15 +1607,5 @@ void RelayoutFormat::init_output_format() {
}
// f}}}
//
/* f{{{ ===================== WinogradFilterPreprocess ===================== */
MGB_DYN_TYPE_OBJ_FINAL_IMPL(WinogradFilterPreprocess);
MEGDNN_OPR_INIT1(WinogradFilterPreprocess, "winograd_filter_preprocess")
void WinogradFilterPreprocess::init_output_dtype() {
TensorLayout dst;
TensorLayout src{input(0)->shape(), input(0)->dtype(), input(0)->format()};
megdnn_opr()->deduce_layout(src, dst);
output(0)->dtype(dst.dtype);
}
// f}}}

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 0
- 1
src/opr/impl/tensor_manip.sereg.h View File

@@ -184,7 +184,6 @@ namespace opr {

MGB_REG_OPR_SHALLOW_COPY(ParamPackConcat, opr_shallow_copy_param_pack_concat);
MGB_SEREG_OPR(RelayoutFormat, 1);
MGB_SEREG_OPR(WinogradFilterPreprocess, 1);
} // namespace opr

} // namespace mgb


+ 0
- 4
src/opr/include/megbrain/opr/search_policy/algo_chooser.h View File

@@ -113,10 +113,6 @@ class AlgoChooser {
//! entrance for getting algorithm according to execution strategy
static ImplAlgo get_algo(ExeContext& ctx);

static void get_origin_param_and_layouts(const ExeContext&,
ConvTensorLayouts&,
typename Opr::Param&) {}

//! get all profile result, either by retrieving cache or profiling
static AlgoChooserProfileCache::Result get_profile_result(
ExeContext& ctx, bool enable_update);


+ 0
- 16
src/opr/include/megbrain/opr/tensor_manip.h View File

@@ -635,22 +635,6 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat,
const OperatorNodeConfig &config = {});
void init_output_format() override final;
};

/*!
* \brief change conv weights layout base on winograd transform.
*
* See docs of megdnn params for more details
*/
MGB_DEFINE_OPR_CLASS(WinogradFilterPreprocess,
intl::MegDNNOprWrapperFwd<megdnn::WinogradFilterPreprocess>)
public:
WinogradFilterPreprocess(VarNode* p0, const Param& param,
const OperatorNodeConfig& config);
static SymbolVar make(SymbolVar p0, const Param& param = {},
const OperatorNodeConfig& config = {});
void init_output_dtype() override final;
};

} // opr
} // mgb



+ 2
- 28
src/plugin/impl/opr_footprint.cpp View File

@@ -171,12 +171,6 @@ uint64_t eval_conv_computation(const TensorShape& src_shape,
cpos = 1;
spatial_start = 2;
break;
case Param::Format::NCHW_WINOGRAD:
case Param::Format::NCHW44_WINOGRAD:
case Param::Format::NCHW88_WINOGRAD:
cpos = 1;
spatial_start = 0;
break;
case Param::Format::NHWC:
cpos = 3;
spatial_start = 1;
@@ -203,29 +197,9 @@ uint64_t eval_conv_computation(const TensorShape& src_shape,

uint64_t fh = static_cast<uint64_t>(filter_shape[spatial_start]);
uint64_t fw = static_cast<uint64_t>(filter_shape[spatial_start + 1]);
if (param.format == Param::Format::NCHW_WINOGRAD ||
param.format == Param::Format::NCHW44_WINOGRAD ||
param.format == Param::Format::NCHW88_WINOGRAD) {
mgb_assert(opr->same_type<opr::ConvBias>(),
"Only conv bias support WINOGRAD");
auto&& conv_bias_opr = opr->cast_final_safe<opr::ConvBias>();
uint32_t output_block_size = conv_bias_opr.param().output_block_size;
mgb_assert(fh == fw,
"NCHW_WINOGRAD, NCHW88_WINOGRAD need fw==fh, got fw: %u fh "
"%u\n",
static_cast<uint32_t>(fh), static_cast<uint32_t>(fw));
fh = fh + 1 - output_block_size;
fw = fw + 1 - output_block_size;
}
// mul and add are counted as 2 operations
if(param.format == Param::Format::NCHW88_WINOGRAD){
return dst_shape.total_nr_elems() * fh * fw *
static_cast<uint64_t>(src_shape[cpos] * 8) / group * 2;
}
if (param.format == Param::Format::NCHW44_WINOGRAD) {
return dst_shape.total_nr_elems() * fh * fw *
static_cast<uint64_t>(src_shape[cpos] * 4) / group * 2;
}
return dst_shape.total_nr_elems() * fh * fw *
static_cast<uint64_t>(src_shape[cpos]) / group * 2;
}


+ 3
- 1
src/serialization/impl/schema.fbs View File

@@ -28,6 +28,7 @@ table Blob {
}

table Reserved0 {}
table DeprecatedParam {}

union OperatorParam {
param.Empty = 1,
@@ -50,7 +51,8 @@ union OperatorParam {
param.ElemwiseMultiType = 18,
param.PowC = 19,
param.MatrixMul = 20,
param.Winograd = 21,
//Reserved for param.Winograd = 21,
DeprecatedParam = 21,
param.SVD = 22,
param.Reduce = 23,
param.Cumsum = 24,


Loading…
Cancel
Save