Browse Source

feat(dnn/fallback): add winograd weight preprocess

GitOrigin-RevId: 4741298e44
release-0.6
Megvii Engine Team 5 years ago
parent
commit
fff2cdc7bb
16 changed files with 896 additions and 869 deletions
  1. +26
    -143
      dnn/src/arm_common/conv_bias/f16/algos.cpp
  2. +4
    -65
      dnn/src/arm_common/conv_bias/f16/algos.h
  3. +41
    -252
      dnn/src/arm_common/conv_bias/fp32/algos.cpp
  4. +9
    -96
      dnn/src/arm_common/conv_bias/fp32/algos.h
  5. +16
    -108
      dnn/src/arm_common/conv_bias/int8/algos.cpp
  6. +3
    -44
      dnn/src/arm_common/conv_bias/int8/algos.h
  7. +0
    -1
      dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp
  8. +2
    -2
      dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp
  9. +19
    -35
      dnn/src/fallback/conv_bias/algos.cpp
  10. +24
    -0
      dnn/src/fallback/conv_bias/common.h
  11. +232
    -27
      dnn/src/fallback/conv_bias/winograd/winograd.h
  12. +2
    -26
      dnn/src/x86/conv_bias/f32/algos.h
  13. +10
    -69
      dnn/src/x86/conv_bias/f32/winograd_algo.cpp
  14. +17
    -0
      dnn/test/arm_common/conv_bias.cpp
  15. +459
    -0
      dnn/test/arm_common/conv_bias_multi_thread.cpp
  16. +32
    -1
      dnn/test/x86/conv_bias.cpp

+ 26
- 143
dnn/src/arm_common/conv_bias/f16/algos.cpp View File

@@ -34,11 +34,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) {
using Strategy = winograd::winograd_2x3_4x4_f16; using Strategy = winograd::winograd_2x3_4x4_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP16WinogradF23::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 1) {
winograd::winograd_2x3_4x4_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF23::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 2) {
winograd::winograd_2x3_4x4_f16 strategy(
param.src_type, param.filter_type, param.dst_type);

auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23,
winograd::winograd_2x3_4x4_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::DEFAULT);


/* ======================= AlgoFP16WinogradF45 ======================== */ /* ======================= AlgoFP16WinogradF45 ======================== */


@@ -106,11 +76,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) {
using Strategy = winograd::winograd_4x5_1x1_f16; using Strategy = winograd::winograd_4x5_1x1_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP16WinogradF45::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
winograd::winograd_4x5_1x1_f16 strategy(param.src_type, param.filter_type,
param.dst_type);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 1) {
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45,
winograd::winograd_4x5_1x1_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::DEFAULT);


SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF45::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 2) {
winograd::winograd_4x5_1x1_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP16WinogradF63 ======================== */ /* ======================= AlgoFP16WinogradF63 ======================== */


bool ConvBiasImpl::AlgoFP16WinogradF63::usable( bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
@@ -174,11 +116,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) {
using Strategy = winograd::winograd_6x3_1x1_f16; using Strategy = winograd::winograd_6x3_1x1_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP16WinogradF63::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
winograd::winograd_6x3_1x1_f16 strategy(param.src_type, param.filter_type,
param.dst_type);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 1) {
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF63::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 2) {
winograd::winograd_6x3_1x1_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63,
winograd::winograd_6x3_1x1_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::DEFAULT);


/* ======================= AlgoFP16WinogradF23_8x8 ======================== */ /* ======================= AlgoFP16WinogradF23_8x8 ======================== */


@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP16WinogradF23_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 1) {
winograd::winograd_2x3_8x8_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) {
winograd::winograd_2x3_8x8_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8,
winograd::winograd_2x3_8x8_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::MK8);


/*========================from Convolution=============================*/ /*========================from Convolution=============================*/




+ 4
- 65
dnn/src/arm_common/conv_bias/f16/algos.h View File

@@ -22,7 +22,6 @@ public:
AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -30,22 +29,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;

uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase { class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase {
@@ -53,7 +37,6 @@ public:
AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -61,30 +44,14 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;

virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();


uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase { class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase {
public: public:
AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -93,29 +60,13 @@ public:
return m_name.c_str(); return m_name.c_str();
} }


bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;

uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };
class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase { class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase {
public: public:
AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -123,19 +74,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;

virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoF16Direct final : public AlgoBase { class ConvBiasImpl::AlgoF16Direct final : public AlgoBase {


+ 41
- 252
dnn/src/arm_common/conv_bias/fp32/algos.cpp View File

@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 1) {
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_4x4::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 2) {
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4,
winograd::winograd_2x3_4x4_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);


/* ======================= AlgoFP32WinogradF63 ======================== */ /* ======================= AlgoFP32WinogradF63 ======================== */


@@ -113,11 +83,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) {
using Strategy = winograd::winograd_6x3_1x1_f; using Strategy = winograd::winograd_6x3_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF63::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 1) {
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) {
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63,
winograd::winograd_6x3_1x1_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::DEFAULT);


/* ======================= AlgoFP32WinogradF54 ======================== */ /* ======================= AlgoFP32WinogradF54 ======================== */


@@ -182,11 +123,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) {
using Strategy = winograd::winograd_5x4_1x1_f; using Strategy = winograd::winograd_5x4_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF54::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 1) {
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF54::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) {
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54,
winograd::winograd_5x4_1x1_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::DEFAULT);


/* ======================= AlgoFP32WinogradF45 ======================== */ /* ======================= AlgoFP32WinogradF45 ======================== */


@@ -251,11 +163,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) {
using Strategy = winograd::winograd_4x5_1x1_f; using Strategy = winograd::winograd_4x5_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF45::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 1) {
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF45::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) {
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45,
winograd::winograd_4x5_1x1_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::DEFAULT);


/* ======================= AlgoFP32WinogradF63_4x4 ======================== */ /* ======================= AlgoFP32WinogradF63_4x4 ======================== */


@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 1) {
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_4x4::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 2) {
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4,
winograd::winograd_6x3_4x4_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);


/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ /* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */


@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == m_matmul_algo->packmode() ==
@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) {
winograd::winograd_F23_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) {
winograd::winograd_F23_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44,
winograd::winograd_F23_mk4_f_nchw44,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);


/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ /* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */


@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == m_matmul_algo->packmode() ==
@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) {
winograd::winograd_F63_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) {
winograd::winograd_F63_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44,
winograd::winograd_F63_mk4_f_nchw44,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);


/* ===================== direct algo ===================== */ /* ===================== direct algo ===================== */
MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl);


+ 9
- 96
dnn/src/arm_common/conv_bias/fp32/algos.h View File

@@ -17,13 +17,11 @@


namespace megdnn { namespace megdnn {
namespace arm_common { namespace arm_common {

class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase {
public: public:
AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -31,18 +29,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase {
@@ -50,7 +37,6 @@ public:
AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -58,19 +44,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;

uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase {
@@ -78,7 +52,6 @@ public:
AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -86,19 +59,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;

uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase {
@@ -106,7 +67,6 @@ public:
AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -114,19 +74,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;

uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase {
@@ -134,7 +82,6 @@ public:
AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -142,19 +89,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;

uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


//===================== NCHW44 Winograd Support =====================// //===================== NCHW44 Winograd Support =====================//
@@ -163,7 +98,6 @@ public:
AlgoFP32WinogradF23_4x4_NCHW44( AlgoFP32WinogradF23_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -172,18 +106,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase {
@@ -191,7 +114,6 @@ public:
AlgoFP32WinogradF63_4x4_NCHW44( AlgoFP32WinogradF63_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -200,18 +122,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };
// ================================================================= // // ================================================================= //


@@ -329,4 +240,6 @@ public:
} // namespace arm_common } // namespace arm_common
} // namespace megdnn } // namespace megdnn


#undef MEGDNN_WINOGRAD_ALGO_FUN_DECLARE

// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen

+ 16
- 108
dnn/src/arm_common/conv_bias/int8/algos.cpp View File

@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>( megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
param.dst_type.enumv() == DTypeEnum::QuantizedS8; param.dst_type.enumv() == DTypeEnum::QuantizedS8;
} }


size_t ConvBiasImpl::AlgoS8WinogradF23_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
winograd::winograd_2x3_8x8_s8 strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8,
winograd::winograd_2x3_8x8_s8,
megdnn_arm_common_conv_bias_int8,
param::MatrixMul::Format::MK8);


SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 0, 2) {
winograd::winograd_2x3_8x8_s8 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
//=========================== input int8 compute float32 ========= //=========================== input int8 compute float32 =========
bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
is_matmul_usable = m_matmul_algo->usable( is_matmul_usable = m_matmul_algo->usable(
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param)); .get_matmul_kern_param(param));
return is_matmul_usable && return is_matmul_usable &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv("arm_common_AlgoS8CF32WinogradF23_4x4::get_workspace"_hash)) {
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8CF32WinogradF23_4x4::dispatch_kerns"_hash)) {
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44,
winograd::winograd_2x3_4x4_s8_f32_nchw44,
megdnn_arm_common_conv_bias_int8,
param::MatrixMul::Format::MK4);


/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ /* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */
bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; using Strategy = winograd::winograd_2x3_8x8_s8_nchw44;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
bool is_matmul_usable = m_matmul_algo->usable(matmul_param); bool is_matmul_usable = m_matmul_algo->usable(matmul_param);
return is_matmul_usable && return is_matmul_usable &&
@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::get_workspace"_hash)) {
winograd::winograd_2x3_8x8_s8_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8_NCHW44,
winograd::winograd_2x3_8x8_s8_nchw44,
megdnn_arm_common_conv_bias_int8,
param::MatrixMul::Format::MK8);


SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns"_hash)) {
winograd::winograd_2x3_8x8_s8_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen

+ 3
- 44
dnn/src/arm_common/conv_bias/int8/algos.h View File

@@ -201,7 +201,6 @@ public:
AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -209,20 +208,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


//=======================input int8 compute fp32 output int8============ //=======================input int8 compute fp32 output int8============
@@ -231,7 +217,6 @@ public:
AlgoS8CF32WinogradF23_4x4_NCHW44( AlgoS8CF32WinogradF23_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -240,20 +225,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


//=======================input int8 compute int16 output int8============ //=======================input int8 compute int16 output int8============
@@ -262,7 +234,6 @@ public:
AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -271,20 +242,8 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);


private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


} // namespace arm_common } // namespace arm_common


+ 0
- 1
dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp View File

@@ -14,7 +14,6 @@
#include "src/arm_common/conv_bias/int8/algos.h" #include "src/arm_common/conv_bias/int8/algos.h"
#include "src/arm_common/conv_bias/int8/direct.h" #include "src/arm_common/conv_bias/int8/direct.h"
#include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h" #include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h"
#include "src/arm_common/conv_bias/int8/strategy.h"
#include "src/arm_common/elemwise_op.h" #include "src/arm_common/elemwise_op.h"
#include "src/common/opr_delegate.h" #include "src/common/opr_delegate.h"




+ 2
- 2
dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp View File

@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
auto run = [=]() { \ auto run = [=]() { \
_strategy strategy(src.layout.dtype, src.layout.dtype, \ _strategy strategy(src.layout.dtype, src.layout.dtype, \
src.layout.dtype); \ src.layout.dtype); \
megdnn::winograd::ConvBias<_strategy, _format>( \
strategy, 1, 1, 1, 1, 1) \
megdnn::winograd::ConvBias<_strategy, _format>(strategy, \
1_z) \
.filter_process(src_ptr, dst_ptr, workspace_ptr, \ .filter_process(src_ptr, dst_ptr, workspace_ptr, \
OC, IC); \ OC, IC); \
}; \ }; \


+ 19
- 35
dnn/src/fallback/conv_bias/algos.cpp View File

@@ -242,11 +242,9 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) {
using Strategy = fallback::winograd::winograd_2x3_1x1_f; using Strategy = fallback::winograd::winograd_2x3_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_f>( fallback::winograd::winograd_2x3_1x1_f>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, p)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
param.src_type, param.filter_type, param.dst_type); param.src_type, param.filter_type, param.dst_type);


auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_f>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
fallback::winograd::winograd_2x3_1x1_f>(strategy,
UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_4x4_f, fallback::winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE,
p)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(


auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_4x4_f, fallback::winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -389,11 +381,9 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) {
using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; using Strategy = fallback::winograd::winograd_2x3_1x1_qs8;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);


return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_qs8>( fallback::winograd::winograd_2x3_1x1_qs8>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, p)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(


auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_qs8>( fallback::winograd::winograd_2x3_1x1_qs8>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
strategy, UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_8x8_qs8, fallback::winograd::winograd_2x3_8x8_qs8,
param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE,
p)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns(


auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_8x8_qs8, fallback::winograd::winograd_2x3_8x8_qs8,
param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();


+ 24
- 0
dnn/src/fallback/conv_bias/common.h View File

@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode;
break; \ break; \
} }


#define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \
bool is_reproducible() const override { return true; } \
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \
AlgoSelectionStrategy algo_selection_strategy) const override; \
size_t get_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) const override; \
virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \
const NCBKernSizeParam& param) \
const override; \
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \
const override; \
size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) \
const override; \
virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \
const override; \
\
private: \
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \
mutable std::string m_name; \
uint32_t m_tile_size;

enum class PostprocessMode : uint8_t { enum class PostprocessMode : uint8_t {
FLOAT = 0, ///< support all biasmode and no_nonlinemode FLOAT = 0, ///< support all biasmode and no_nonlinemode
NO_PROCESS, ///<support non bias and identity NO_PROCESS, ///<support non bias and identity


+ 232
- 27
dnn/src/fallback/conv_bias/winograd/winograd.h View File

@@ -88,7 +88,8 @@ class ConvBias {
size_t filter_transform_buf_size = 0; size_t filter_transform_buf_size = 0;
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE, //! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE,
//! OC_BLOCK_SIZE) //! OC_BLOCK_SIZE)
if (param.filter_meta.format !=
if (param.preprocessed_filter == nullptr &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW_WINOGRAD && param::ConvBias::Format::NCHW_WINOGRAD &&
param.filter_meta.format != param.filter_meta.format !=
param::ConvBias::Format::NCHW88_WINOGRAD && param::ConvBias::Format::NCHW88_WINOGRAD &&
@@ -150,14 +151,30 @@ class ConvBias {
transform_mid_buf_size, matmul_workspace_size}); transform_mid_buf_size, matmul_workspace_size});
} }


WorkspaceBundle get_preprocess_wbundle(
const NCBKernSizeParam& param) const {
//! use for inner temporary usage
size_t transform_mid_buf_size =
2 * Strategy::ALPHA * Strategy::ALPHA *
sizeof(output_compute_type) *
std::max(Strategy::IC_BLOCK_SIZE, Strategy::OC_BLOCK_SIZE);
size_t nr_threads = param.nr_threads;
SmallVector<size_t> space_vec(nr_threads, transform_mid_buf_size);
return WorkspaceBundle{nullptr, space_vec};
}

public: public:
//! Get the m_unit_oc_size, according to the nr_threads and //! Get the m_unit_oc_size, according to the nr_threads and
//! output_featuremap_size. When single thread the m_unit_oc_size is set //! output_featuremap_size. When single thread the m_unit_oc_size is set
//! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set //! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set
//! according to nr_threads and out_featuremap_size
ConvBias(const Strategy& strategy, size_t unit_tile_size, size_t nr_threads,
size_t OH, size_t OW, size_t OC)
//! according to nr_threads and out_featuremap_size
ConvBias(const Strategy& strategy, size_t unit_tile_size,
const NCBKernSizeParam& param)
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} { : m_strategy{strategy}, m_unit_tile_size{unit_tile_size} {
size_t nr_threads = param.nr_threads;
size_t OC = param.filter_meta.ocpg;
size_t OH = param.osz[0];
size_t OW = param.osz[1];
if (nr_threads > 1) { if (nr_threads > 1) {
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE);
size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE); size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE);
@@ -178,12 +195,55 @@ public:
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT; m_unit_oc_size = UNIT_OC_SIZE_DEFAULT;
} }
} }
ConvBias(const Strategy& strategy, size_t unit_tile_size)
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} {
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT;
}


size_t get_workspace_size( size_t get_workspace_size(
const NCBKernSizeParam& param, const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase* matmul_algo) const { fallback::MatrixMulImpl::AlgoBase* matmul_algo) const {
return get_wbundle(param, matmul_algo).total_size_in_bytes(); return get_wbundle(param, matmul_algo).total_size_in_bytes();
} }

size_t get_preprocess_workspace_size(
const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase*) const {
return get_preprocess_wbundle(param).total_size_in_bytes();
}

SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) {
size_t OC = param.filter_meta.ocpg;
size_t IC = param.filter_meta.icpg;
size_t GROUP = param.filter_meta.group;
SmallVector<TensorLayout> preprocessed_layouts;
DType dtype = m_strategy.filter_dtype;
if (dtype.category() == DTypeCategory::QUANTIZED) {
if (format == param::MatrixMul::Format::MK4) {
dtype = dtype::Float32();
} else if (format == param::MatrixMul::Format::MK8) {
dtype = dtype::Int16();
}
}
if (format == param::MatrixMul::Format::DEFAULT) {
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC, IC}, dtype});
} else if (format == param::MatrixMul::Format::MK4) {
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 4, IC / 4,
4, 4},
dtype});
} else {
megdnn_assert(format == param::MatrixMul::Format::MK8);
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 8, IC / 8,
8, 8},
dtype});
}
return preprocessed_layouts;
}

//! Used by winograd_filter_preprocess opr //! Used by winograd_filter_preprocess opr
void filter_process(const stype* filter_ptr, void filter_process(const stype* filter_ptr,
input_filter_compute_type* filter_transform_buf, input_filter_compute_type* filter_transform_buf,
@@ -199,7 +259,6 @@ public:
const WorkspaceBundle& bundle_compute, const WorkspaceBundle& bundle_compute,
const NCBKernParam& kern_param, const NCBKernParam& kern_param,
const NCBKernIndex& ncb_index) { const NCBKernIndex& ncb_index) {
size_t compute_workspace_size_per_thread = size_t compute_workspace_size_per_thread =
bundle_compute.total_size_in_bytes(); bundle_compute.total_size_in_bytes();
size_t thread_id = ncb_index.thread_id; size_t thread_id = ncb_index.thread_id;
@@ -235,6 +294,47 @@ public:
IC, oc_start, oc_end); IC, oc_start, oc_end);
} }


static void filter_preprocess(Strategy strategy,
const WorkspaceBundle& bundle,
const TensorND& preprocessed_tensor,
const NCBKernParam& kern_param,
const NCBKernIndex& ncb_index) {
size_t thread_id = ncb_index.thread_id;
size_t oc_id = ncb_index.ndrange_id[1];
size_t group_id = ncb_index.ndrange_id[0];
size_t OC = kern_param.filter_meta.ocpg;
size_t IC = kern_param.filter_meta.icpg;
size_t filter_group_size = Strategy::ALPHA * Strategy::ALPHA * OC * IC *
sizeof(input_filter_compute_type);
//! Filter trans dst ptr
input_filter_compute_type* filter_transform_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(
preprocessed_tensor.raw_ptr) +
group_id * filter_group_size);
//! Filter trans src ptr
input_filter_compute_type* transform_mid_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle.get(thread_id)));

const stype* filter_ptr = kern_param.filter<stype>(group_id);
size_t oc_start, oc_end;

if (kern_param.filter_meta.format == param::ConvBias::Format::NCHW88) {
oc_start = 8 * oc_id;
oc_end = oc_start + 8;
} else if (kern_param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
oc_start = 4 * oc_id;
oc_end = oc_start + 4;
} else {
oc_start = oc_id;
oc_end = oc_id + 1;
}
strategy.filter(filter_ptr, filter_transform_buf, transform_mid_buf, OC,
IC, oc_start, oc_end);
}

static void winograd_compute( static void winograd_compute(
Strategy strategy, const WorkspaceBundle& bundle_top, Strategy strategy, const WorkspaceBundle& bundle_top,
const WorkspaceBundle& bundle_compute, const WorkspaceBundle& bundle_compute,
@@ -287,15 +387,28 @@ public:
compute_workspace_size_per_thread * thread_id); compute_workspace_size_per_thread * thread_id);


//! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset //! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset
const input_filter_compute_type* filter_transform_buf =
static_cast<const input_filter_compute_type*>(
ncb_param.filter<input_filter_compute_type>(group_id));
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW ||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW44) {
const input_filter_compute_type* filter_transform_buf = nullptr;
if (nullptr != ncb_param.preprocessed_filter) {
auto preprocess_raw_ptr =
ncb_param.preprocessed_filter->tensors[0].raw_ptr;
filter_transform_buf = reinterpret_cast<input_filter_compute_type*>( filter_transform_buf = reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle_top.get(1)) +
reinterpret_cast<uintptr_t>(preprocess_raw_ptr) +
group_id * filter_group_size); group_id * filter_group_size);
} else {
filter_transform_buf =
static_cast<const input_filter_compute_type*>(
ncb_param.filter<input_filter_compute_type>(
group_id));
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW ||
ncb_param.filter_meta.format ==
param::ConvBias::Format::NCHW88 ||
ncb_param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
filter_transform_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle_top.get(1)) +
group_id * filter_group_size);
}
} }
//! prepare matmul param //! prepare matmul param
matmul_param.workspace_ptr = reinterpret_cast<void*>( matmul_param.workspace_ptr = reinterpret_cast<void*>(
@@ -371,6 +484,47 @@ public:
oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit); oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit);
}; };


SmallVector<NCBKern> get_preprocess_kerns(
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) {
megdnn_assert(
param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44);
megdnn_assert(param.preprocessed_filter &&
param.preprocessed_filter->tensors.size() > 0);
size_t OC = param.filter_meta.ocpg;
size_t GROUP = param.filter_meta.group;
const TensorND& preprocessed_dst =
param.preprocessed_filter->tensors[0];
WorkspaceBundle bundle = get_preprocess_wbundle(param);

Strategy strategy = m_strategy;
SmallVector<NCBKern> kerns;
auto filter_process_kern =
[strategy, bundle, &preprocessed_dst](
const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("filter_preprocess"_hash)) {
bundle.set(ncb_param.workspace_ptr);
filter_preprocess(strategy, bundle, preprocessed_dst,
ncb_param, ncb_index);
}
MIDOUT_END();
};
size_t oc_parallelism = OC;
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
megdnn_assert(OC % 8 == 0);
oc_parallelism = OC / 8;
} else if (param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
megdnn_assert(OC % 4 == 0);
oc_parallelism = OC / 4;
}
kerns.push_back({filter_process_kern, {GROUP, oc_parallelism}});
return kerns;
}

SmallVector<NCBKern> get_kerns( SmallVector<NCBKern> get_kerns(
const NCBKernSizeParam& param, const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase* matmul_algo) { fallback::MatrixMulImpl::AlgoBase* matmul_algo) {
@@ -386,7 +540,6 @@ public:
static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) = static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) =
get_matmul_kern_param(param, m_unit_oc_size); get_matmul_kern_param(param, m_unit_oc_size);


Strategy strategy = m_strategy;
size_t unit_tile_size = m_unit_tile_size; size_t unit_tile_size = m_unit_tile_size;
size_t unit_oc_size = m_unit_oc_size; size_t unit_oc_size = m_unit_oc_size;
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE);
@@ -411,20 +564,22 @@ public:
param::ConvBias::Format::NCHW44_WINOGRAD)); param::ConvBias::Format::NCHW44_WINOGRAD));


SmallVector<NCBKern> kerns; SmallVector<NCBKern> kerns;
if (param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44) {
//! probably a gcc bug, labmda require capturing 'this' to call
//! static member function
if (param.preprocessed_filter == nullptr &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44)) {
auto filter_process_kern = auto filter_process_kern =
[this, strategy, bundle_top, bundle_compute](
[strategy = m_strategy, bundle_top, bundle_compute](
const NCBKernParam& ncb_param, const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable { const NCBKernIndex& ncb_index) mutable {
MEGDNN_MARK_USED_VAR(this);
bundle_top.set(ncb_param.workspace_ptr);
bundle_compute.set(bundle_top.get(0));
filter_process(strategy, bundle_top, bundle_compute,
ncb_param, std::move(ncb_index));
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("filter_process"_hash)) {
bundle_top.set(ncb_param.workspace_ptr);
bundle_compute.set(bundle_top.get(0));
filter_process(strategy, bundle_top, bundle_compute,
ncb_param, std::move(ncb_index));
}
MIDOUT_END();
}; };
size_t oc_parallelism = OC; size_t oc_parallelism = OC;
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
@@ -438,12 +593,12 @@ public:
kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}}); kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}});
} }
auto winograd_compute_kern = auto winograd_compute_kern =
[strategy, bundle_top, bundle_compute, matmul_algo,
[strategy = m_strategy, bundle_top, bundle_compute, matmul_algo,
matmul_param, unit_tile_size, matmul_param, unit_tile_size,
unit_oc_size](const NCBKernParam& ncb_param, unit_oc_size](const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable { const NCBKernIndex& ncb_index) mutable {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0,
0) {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("winograd_compute"_hash)) {
bundle_top.set(ncb_param.workspace_ptr); bundle_top.set(ncb_param.workspace_ptr);
bundle_compute.set(bundle_top.get(0)); bundle_compute.set(bundle_top.get(0));
winograd_compute(strategy, bundle_top, bundle_compute, winograd_compute(strategy, bundle_top, bundle_compute,
@@ -562,4 +717,54 @@ public:
filter_dtype(filter_dtype), \ filter_dtype(filter_dtype), \
dst_dtype(dst_dtype) {} dst_dtype(dst_dtype) {}


#define MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, _fun, _strategy, \
_midout_flag, _matmul_format) \
MEGDNN_MARK_USED_VAR(param); \
MIDOUT_BEGIN(_midout_flag, midout_iv(#_class #_fun##_hash)) { \
_strategy strategy(param.src_type, param.filter_type, param.dst_type); \
return megdnn::winograd::ConvBias<_strategy, _matmul_format>( \
strategy, m_tile_size, param) \
._fun(param, m_matmul_algo); \
} \
MIDOUT_END();

#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \
_matmul_format) \
size_t ConvBiasImpl::_class::get_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \
_strategy, _midout_flag, \
_matmul_format); \
return 0; \
} \
size_t ConvBiasImpl::_class::get_preprocess_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, get_preprocess_workspace_size, _strategy, \
_midout_flag, _matmul_format); \
return 0; \
} \
SmallVector<TensorLayout> \
ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, deduce_preprocessed_filter_layout, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> \
ConvBiasImpl::_class::dispatch_preprocess_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \
_strategy, _midout_flag, \
_matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
}

// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen

+ 2
- 26
dnn/src/x86/conv_bias/f32/algos.h View File

@@ -94,7 +94,6 @@ public:
AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -102,19 +101,8 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
void* type() const override; void* type() const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase {
@@ -122,7 +110,6 @@ public:
AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
@@ -130,19 +117,8 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
void* type() const override; void* type() const override;

private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
}; };


/* ===================== matmul algo ===================== */ /* ===================== matmul algo ===================== */


+ 10
- 69
dnn/src/x86/conv_bias/f32/winograd_algo.cpp View File

@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW88 || (opr->param().format == param::ConvBias::Format::NCHW88 ||
@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF63_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 1) {
winograd::winograd_nchw88_6x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}

SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) {
winograd::winograd_nchw88_6x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8,
winograd::winograd_nchw88_6x3_8x8_f,
megdnn_x86_winograd_fp32,
param::MatrixMul::Format::MK8);


/* ======================= AlgoFP32WinogradF23_8*8 ======================== */ /* ======================= AlgoFP32WinogradF23_8*8 ======================== */


@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW88 || (opr->param().format == param::ConvBias::Format::NCHW88 ||
@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
return false; return false;
} }


size_t ConvBiasImpl::AlgoFP32WinogradF23_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 1) {
winograd::winograd_nchw88_2x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_8x8,
winograd::winograd_nchw88_2x3_8x8_f,
megdnn_x86_winograd_fp32,
param::MatrixMul::Format::MK8);


SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) {
winograd::winograd_nchw88_2x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen

+ 17
- 0
dnn/test/arm_common/conv_bias.cpp View File

@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
} }
} }


TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward> checker(handle());

check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}

TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());

check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}

#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \ #define CONV_BIAS_MATMUL_QU8_MODE(MODE) \
using namespace conv_bias; \ using namespace conv_bias; \
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \ std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \


+ 459
- 0
dnn/test/arm_common/conv_bias_multi_thread.cpp View File

@@ -783,6 +783,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) {
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4); check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
@@ -791,6 +799,16 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
param::ConvBias::Format::NCHW44); param::ConvBias::Format::NCHW44);
} }


TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F23_4_NCHW44_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4,
param::ConvBias::Format::NCHW44);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3); std::vector<TestArg> args = get_winograd_args(3);
@@ -799,6 +817,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
check_winograd("1:6:32", checker, args); check_winograd("1:6:32", checker, args);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:6:32", checker, args);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(); std::vector<TestArg> args = get_winograd_mk_packed_args();
@@ -807,6 +833,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());

check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
@@ -815,6 +850,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
param::ConvBias::Format::NCHW44); param::ConvBias::Format::NCHW44);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4,
param::ConvBias::Format::NCHW44);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(4); std::vector<TestArg> args = get_winograd_args(4);
@@ -823,6 +867,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
check_winograd("1:5:32", checker, args); check_winograd("1:5:32", checker, args);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(4);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:5:32", checker, args);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5); std::vector<TestArg> args = get_winograd_args(5);
@@ -831,6 +883,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
check_winograd("1:4:32", checker, args); check_winograd("1:4:32", checker, args);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:4:32", checker, args);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3); std::vector<TestArg> args = get_winograd_args(3);
@@ -1007,6 +1067,39 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) {
1e-3f); 1e-3f);
} }


TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F32_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_first_half(args.begin(),
args.begin() + args.size() / 2);
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
}

TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
using namespace conv_bias; using namespace conv_bias;


@@ -1038,6 +1131,38 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
1e-3f); 1e-3f);
} }


TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
args.end());
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
}

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
using namespace conv_bias; using namespace conv_bias;
@@ -1070,6 +1195,40 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25); 0.25);
} }

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F16_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

std::vector<TestArg> args = get_winograd_mk_packed_args(8);
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25);
}
#endif #endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
using namespace conv_bias; using namespace conv_bias;
@@ -1281,6 +1440,223 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
epsilon); epsilon);
} }


TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:%s:8:2:32", matmul_name).c_str()));

std::vector<TestArg> quantized_args =
get_quantized_winograd_mk_packed_args(8);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));

std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));

std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, false, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
}

TEST_F(ARM_COMMON_MULTI_THREADS,
WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE_WEIGHT_PREPROCESS) {
using namespace conv_bias;

Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};

float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, true, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
}

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) {
using namespace conv_bias; using namespace conv_bias;
@@ -1338,6 +1714,72 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) {
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8); param::MatrixMul::Format::MK8);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd_fp16("1:2:32", checker, args, NULL, 0.08);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_F45_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
std::vector<TestArg> args_head_half(args.begin(),
args.begin() + args.size() / 2);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:4:32", checker, args_head_half, rng, 0.25);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_F45_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
args.end());
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:4:32", checker, args_back_half, rng, 0.25);
}
//! FIXME: This test may be failed if run `ARM_COMMON.CONV_BIAS_WINOGRAD*`, but
//! it will pass when run single testcase
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:6:32", checker, args, rng, 0.3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_8x8_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_head_half(args.begin(),
args.begin() + args.size() / 2);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("8:2:32", checker, args_head_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_8x8_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
args.end());
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
#endif #endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
using namespace conv_bias; using namespace conv_bias;
@@ -1354,6 +1796,23 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {


check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
} }
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
UniformIntRNG rng{-50, 50};
checker.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS8(2.5f))
.set_dtype(2, dtype::QuantizedS32(6.25f))
.set_dtype(4, dtype::QuantizedS8(60.25f))
.set_rng(0, &rng)
.set_rng(1, &rng)
.set_rng(2, &rng);

check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
}


void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
RNG* rng, float epsilon, DType type0, DType type1, RNG* rng, float epsilon, DType type0, DType type1,


+ 32
- 1
dnn/test/x86/conv_bias.cpp View File

@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() {
TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{}); TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{});
//! bias //! bias
args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8}, args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8},
TensorShape{oc, ic, 3, 3, 8, 8}, TensorShape{2, oc, i, i, 8});
TensorShape{oc, ic, 3, 3, 8, 8},
TensorShape{2, oc, i, i, 8});


/*cur_param.sparse = param::ConvBias::Sparse::GROUP; /*cur_param.sparse = param::ConvBias::Sparse::GROUP;
args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8}, args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8},
@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) {
} }
} }


TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());

checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:6").c_str()));

for (auto&& arg : args) {
checker.set_param(arg.param).execs(
{arg.src, arg.filter, arg.bias, {}, {}});
}
}

TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) { TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); std::vector<TestArg> args = get_winograd_mk_nchw88_args();
@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
} }
} }


TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());

checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:2").c_str()));

for (auto&& arg : args) {
checker.set_param(arg.param).execs(
{arg.src, arg.filter, arg.bias, {}, {}});
}
}

TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); std::vector<TestArg> args = get_winograd_mk_nchw88_args();


Loading…
Cancel
Save