GitOrigin-RevId: 4741298e44
release-0.6
@@ -34,11 +34,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { | ||||
using Strategy = winograd::winograd_2x3_4x4_f16; | using Strategy = winograd::winograd_2x3_4x4_f16; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP16WinogradF23::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 1) { | |||||
winograd::winograd_2x3_4x4_f16 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP16WinogradF23::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 2) { | |||||
winograd::winograd_2x3_4x4_f16 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23, | |||||
winograd::winograd_2x3_4x4_f16, | |||||
megdnn_arm_common_winograd_fp16, | |||||
param::MatrixMul::Format::DEFAULT); | |||||
/* ======================= AlgoFP16WinogradF45 ======================== */ | /* ======================= AlgoFP16WinogradF45 ======================== */ | ||||
@@ -106,11 +76,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { | ||||
using Strategy = winograd::winograd_4x5_1x1_f16; | using Strategy = winograd::winograd_4x5_1x1_f16; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP16WinogradF45::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
winograd::winograd_4x5_1x1_f16 strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 1) { | |||||
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45, | |||||
winograd::winograd_4x5_1x1_f16, | |||||
megdnn_arm_common_winograd_fp16, | |||||
param::MatrixMul::Format::DEFAULT); | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP16WinogradF45::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 2) { | |||||
winograd::winograd_4x5_1x1_f16 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
/* ======================= AlgoFP16WinogradF63 ======================== */ | /* ======================= AlgoFP16WinogradF63 ======================== */ | ||||
bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | ||||
@@ -174,11 +116,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { | ||||
using Strategy = winograd::winograd_6x3_1x1_f16; | using Strategy = winograd::winograd_6x3_1x1_f16; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP16WinogradF63::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
winograd::winograd_6x3_1x1_f16 strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 1) { | |||||
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP16WinogradF63::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 2) { | |||||
winograd::winograd_6x3_1x1_f16 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63, | |||||
winograd::winograd_6x3_1x1_f16, | |||||
megdnn_arm_common_winograd_fp16, | |||||
param::MatrixMul::Format::DEFAULT); | |||||
/* ======================= AlgoFP16WinogradF23_8x8 ======================== */ | /* ======================= AlgoFP16WinogradF23_8x8 ======================== */ | ||||
@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK8>( | param::MatrixMul::Format::MK8>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP16WinogradF23_8x8::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 1) { | |||||
winograd::winograd_2x3_8x8_f16 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP16WinogradF23_8x8::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) { | |||||
winograd::winograd_2x3_8x8_f16 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8, | |||||
winograd::winograd_2x3_8x8_f16, | |||||
megdnn_arm_common_winograd_fp16, | |||||
param::MatrixMul::Format::MK8); | |||||
/*========================from Convolution=============================*/ | /*========================from Convolution=============================*/ | ||||
@@ -22,7 +22,6 @@ public: | |||||
AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -30,22 +29,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
static std::vector<fallback::MatrixMulImpl::Algorithm*> | |||||
get_avaiable_matmul_algos(const NCBKernSizeParam& param); | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase { | class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase { | ||||
@@ -53,7 +37,6 @@ public: | |||||
AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -61,30 +44,14 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
static std::vector<fallback::MatrixMulImpl::Algorithm*> | |||||
get_avaiable_matmul_algos(const NCBKernSizeParam& param); | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
uint32_t m_tile_size; | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase { | class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase { | ||||
public: | public: | ||||
AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -93,29 +60,13 @@ public: | |||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
static std::vector<fallback::MatrixMulImpl::Algorithm*> | |||||
get_avaiable_matmul_algos(const NCBKernSizeParam& param); | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase { | class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase { | ||||
public: | public: | ||||
AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -123,19 +74,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoF16Direct final : public AlgoBase { | class ConvBiasImpl::AlgoF16Direct final : public AlgoBase { | ||||
@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK4>( | param::MatrixMul::Format::MK4>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 1) { | |||||
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF23_4x4::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 2) { | |||||
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4, | |||||
winograd::winograd_2x3_4x4_f, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::MK4); | |||||
/* ======================= AlgoFP32WinogradF63 ======================== */ | /* ======================= AlgoFP32WinogradF63 ======================== */ | ||||
@@ -113,11 +83,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { | ||||
using Strategy = winograd::winograd_6x3_1x1_f; | using Strategy = winograd::winograd_6x3_1x1_f; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF63::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 1) { | |||||
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF63::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) { | |||||
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63, | |||||
winograd::winograd_6x3_1x1_f, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::DEFAULT); | |||||
/* ======================= AlgoFP32WinogradF54 ======================== */ | /* ======================= AlgoFP32WinogradF54 ======================== */ | ||||
@@ -182,11 +123,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { | ||||
using Strategy = winograd::winograd_5x4_1x1_f; | using Strategy = winograd::winograd_5x4_1x1_f; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF54::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 1) { | |||||
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF54::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) { | |||||
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54, | |||||
winograd::winograd_5x4_1x1_f, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::DEFAULT); | |||||
/* ======================= AlgoFP32WinogradF45 ======================== */ | /* ======================= AlgoFP32WinogradF45 ======================== */ | ||||
@@ -251,11 +163,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { | MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { | ||||
using Strategy = winograd::winograd_4x5_1x1_f; | using Strategy = winograd::winograd_4x5_1x1_f; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF45::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 1) { | |||||
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF45::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) { | |||||
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45, | |||||
winograd::winograd_4x5_1x1_f, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::DEFAULT); | |||||
/* ======================= AlgoFP32WinogradF63_4x4 ======================== */ | /* ======================= AlgoFP32WinogradF63_4x4 ======================== */ | ||||
@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK4>( | param::MatrixMul::Format::MK4>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 1) { | |||||
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF63_4x4::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 2) { | |||||
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4, | |||||
winograd::winograd_6x3_4x4_f, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::MK4); | |||||
/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ | /* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ | ||||
@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK4>( | param::MatrixMul::Format::MK4>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
m_matmul_algo->packmode() == | m_matmul_algo->packmode() == | ||||
@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | |||||
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) { | |||||
winograd::winograd_F23_mk4_f_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | |||||
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) { | |||||
winograd::winograd_F23_mk4_f_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44, | |||||
winograd::winograd_F23_mk4_f_nchw44, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::MK4); | |||||
/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ | /* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ | ||||
@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK4>( | param::MatrixMul::Format::MK4>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
m_matmul_algo->packmode() == | m_matmul_algo->packmode() == | ||||
@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | |||||
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) { | |||||
winograd::winograd_F63_mk4_f_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, | |||||
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) { | |||||
winograd::winograd_F63_mk4_f_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44, | |||||
winograd::winograd_F63_mk4_f_nchw44, | |||||
megdnn_arm_common_winograd_fp32, | |||||
param::MatrixMul::Format::MK4); | |||||
/* ===================== direct algo ===================== */ | /* ===================== direct algo ===================== */ | ||||
MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); | MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); | ||||
@@ -17,13 +17,11 @@ | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace arm_common { | namespace arm_common { | ||||
class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase { | ||||
public: | public: | ||||
AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -31,18 +29,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase { | ||||
@@ -50,7 +37,6 @@ public: | |||||
AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -58,19 +44,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase { | ||||
@@ -78,7 +52,6 @@ public: | |||||
AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -86,19 +59,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase { | ||||
@@ -106,7 +67,6 @@ public: | |||||
AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -114,19 +74,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase { | ||||
@@ -134,7 +82,6 @@ public: | |||||
AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -142,19 +89,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
//===================== NCHW44 Winograd Support =====================// | //===================== NCHW44 Winograd Support =====================// | ||||
@@ -163,7 +98,6 @@ public: | |||||
AlgoFP32WinogradF23_4x4_NCHW44( | AlgoFP32WinogradF23_4x4_NCHW44( | ||||
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) | fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -172,18 +106,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase { | ||||
@@ -191,7 +114,6 @@ public: | |||||
AlgoFP32WinogradF63_4x4_NCHW44( | AlgoFP32WinogradF63_4x4_NCHW44( | ||||
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) | fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -200,18 +122,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
// ================================================================= // | // ================================================================= // | ||||
@@ -329,4 +240,6 @@ public: | |||||
} // namespace arm_common | } // namespace arm_common | ||||
} // namespace megdnn | } // namespace megdnn | ||||
#undef MEGDNN_WINOGRAD_ALGO_FUN_DECLARE | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( | |||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>( | megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8; | param.dst_type.enumv() == DTypeEnum::QuantizedS8; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoS8WinogradF23_8x8::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
winograd::winograd_2x3_8x8_s8 strategy(param.src_type, param.filter_type, | |||||
param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8, | |||||
winograd::winograd_2x3_8x8_s8, | |||||
megdnn_arm_common_conv_bias_int8, | |||||
param::MatrixMul::Format::MK8); | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoS8WinogradF23_8x8::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 0, 2) { | |||||
winograd::winograd_2x3_8x8_s8 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
//=========================== input int8 compute float32 ========= | //=========================== input int8 compute float32 ========= | ||||
bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | ||||
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | ||||
@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||||
is_matmul_usable = m_matmul_algo->usable( | is_matmul_usable = m_matmul_algo->usable( | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK4>( | param::MatrixMul::Format::MK4>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param)); | .get_matmul_kern_param(param)); | ||||
return is_matmul_usable && | return is_matmul_usable && | ||||
m_matmul_algo->packmode() == PackMode::NO_PACK && | m_matmul_algo->packmode() == PackMode::NO_PACK && | ||||
@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MIDOUT_BEGIN( | |||||
megdnn_arm_common_conv_bias_int8, | |||||
midout_iv("arm_common_AlgoS8CF32WinogradF23_4x4::get_workspace"_hash)) { | |||||
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN( | |||||
megdnn_arm_common_conv_bias_int8, | |||||
midout_iv( | |||||
"arm_common_AlgoS8CF32WinogradF23_4x4::dispatch_kerns"_hash)) { | |||||
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44, | |||||
param::MatrixMul::Format::MK4>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44, | |||||
winograd::winograd_2x3_4x4_s8_f32_nchw44, | |||||
megdnn_arm_common_conv_bias_int8, | |||||
param::MatrixMul::Format::MK4); | |||||
/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ | /* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ | ||||
bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | ||||
@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | |||||
using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; | using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
bool is_matmul_usable = m_matmul_algo->usable(matmul_param); | bool is_matmul_usable = m_matmul_algo->usable(matmul_param); | ||||
return is_matmul_usable && | return is_matmul_usable && | ||||
@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MIDOUT_BEGIN( | |||||
megdnn_arm_common_conv_bias_int8, | |||||
midout_iv( | |||||
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::get_workspace"_hash)) { | |||||
winograd::winograd_2x3_8x8_s8_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8_NCHW44, | |||||
winograd::winograd_2x3_8x8_s8_nchw44, | |||||
megdnn_arm_common_conv_bias_int8, | |||||
param::MatrixMul::Format::MK8); | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MIDOUT_BEGIN( | |||||
megdnn_arm_common_conv_bias_int8, | |||||
midout_iv( | |||||
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns"_hash)) { | |||||
winograd::winograd_2x3_8x8_s8_nchw44 strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -201,7 +201,6 @@ public: | |||||
AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -209,20 +208,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
static std::vector<fallback::MatrixMulImpl::Algorithm*> | |||||
get_avaiable_matmul_algos(const NCBKernSizeParam& param); | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
//=======================input int8 compute fp32 output int8============ | //=======================input int8 compute fp32 output int8============ | ||||
@@ -231,7 +217,6 @@ public: | |||||
AlgoS8CF32WinogradF23_4x4_NCHW44( | AlgoS8CF32WinogradF23_4x4_NCHW44( | ||||
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) | fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -240,20 +225,7 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
static std::vector<fallback::MatrixMulImpl::Algorithm*> | |||||
get_avaiable_matmul_algos(const NCBKernSizeParam& param); | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
//=======================input int8 compute int16 output int8============ | //=======================input int8 compute int16 output int8============ | ||||
@@ -262,7 +234,6 @@ public: | |||||
AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -271,20 +242,8 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
static std::vector<fallback::MatrixMulImpl::Algorithm*> | |||||
get_avaiable_matmul_algos(const NCBKernSizeParam& param); | |||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
} // namespace arm_common | } // namespace arm_common | ||||
@@ -14,7 +14,6 @@ | |||||
#include "src/arm_common/conv_bias/int8/algos.h" | #include "src/arm_common/conv_bias/int8/algos.h" | ||||
#include "src/arm_common/conv_bias/int8/direct.h" | #include "src/arm_common/conv_bias/int8/direct.h" | ||||
#include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h" | #include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h" | ||||
#include "src/arm_common/conv_bias/int8/strategy.h" | |||||
#include "src/arm_common/elemwise_op.h" | #include "src/arm_common/elemwise_op.h" | ||||
#include "src/common/opr_delegate.h" | #include "src/common/opr_delegate.h" | ||||
@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, | |||||
auto run = [=]() { \ | auto run = [=]() { \ | ||||
_strategy strategy(src.layout.dtype, src.layout.dtype, \ | _strategy strategy(src.layout.dtype, src.layout.dtype, \ | ||||
src.layout.dtype); \ | src.layout.dtype); \ | ||||
megdnn::winograd::ConvBias<_strategy, _format>( \ | |||||
strategy, 1, 1, 1, 1, 1) \ | |||||
megdnn::winograd::ConvBias<_strategy, _format>(strategy, \ | |||||
1_z) \ | |||||
.filter_process(src_ptr, dst_ptr, workspace_ptr, \ | .filter_process(src_ptr, dst_ptr, workspace_ptr, \ | ||||
OC, IC); \ | OC, IC); \ | ||||
}; \ | }; \ | ||||
@@ -242,11 +242,9 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( | |||||
MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { | ||||
using Strategy = fallback::winograd::winograd_2x3_1x1_f; | using Strategy = fallback::winograd::winograd_2x3_1x1_f; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, UNIT_TILE_SIZE, param.nr_threads, | |||||
param.osz[0], param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, UNIT_TILE_SIZE, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
(opr->param().format == | (opr->param().format == | ||||
@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( | |||||
p.src_type, p.filter_type, p.dst_type); | p.src_type, p.filter_type, p.dst_type); | ||||
return megdnn::winograd::ConvBias< | return megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_1x1_f>( | fallback::winograd::winograd_2x3_1x1_f>( | ||||
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], | |||||
p.osz[1], p.filter_meta.ocpg) | |||||
strategy, UNIT_TILE_SIZE, p) | |||||
.get_workspace_size(p, m_matmul_algo); | .get_workspace_size(p, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( | |||||
param.src_type, param.filter_type, param.dst_type); | param.src_type, param.filter_type, param.dst_type); | ||||
auto winograd_impl = megdnn::winograd::ConvBias< | auto winograd_impl = megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_1x1_f>( | |||||
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
fallback::winograd::winograd_2x3_1x1_f>(strategy, | |||||
UNIT_TILE_SIZE, param); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | return winograd_impl.get_kerns(param, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK4>( | param::MatrixMul::Format::MK4>( | ||||
strategy, UNIT_TILE_SIZE, param.nr_threads, | |||||
param.osz[0], param.osz[1], param.filter_meta.ocpg) | |||||
strategy, UNIT_TILE_SIZE, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( | |||||
p.src_type, p.filter_type, p.dst_type); | p.src_type, p.filter_type, p.dst_type); | ||||
return megdnn::winograd::ConvBias< | return megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_4x4_f, | fallback::winograd::winograd_2x3_4x4_f, | ||||
param::MatrixMul::Format::MK4>( | |||||
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], | |||||
p.osz[1], p.filter_meta.ocpg) | |||||
param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE, | |||||
p) | |||||
.get_workspace_size(p, m_matmul_algo); | .get_workspace_size(p, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( | |||||
auto winograd_impl = megdnn::winograd::ConvBias< | auto winograd_impl = megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_4x4_f, | fallback::winograd::winograd_2x3_4x4_f, | ||||
param::MatrixMul::Format::MK4>( | |||||
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE, param); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | return winograd_impl.get_kerns(param, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -389,11 +381,9 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( | |||||
MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { | MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { | ||||
using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; | using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; | ||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
auto&& matmul_param = | |||||
megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, UNIT_TILE_SIZE, param.nr_threads, | |||||
param.osz[0], param.osz[1], param.filter_meta.ocpg) | |||||
.get_matmul_kern_param(param); | |||||
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>( | |||||
strategy, UNIT_TILE_SIZE, param) | |||||
.get_matmul_kern_param(param); | |||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( | |||||
p.src_type, p.filter_type, p.dst_type); | p.src_type, p.filter_type, p.dst_type); | ||||
return megdnn::winograd::ConvBias< | return megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_1x1_qs8>( | fallback::winograd::winograd_2x3_1x1_qs8>( | ||||
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], | |||||
p.osz[1], p.filter_meta.ocpg) | |||||
strategy, UNIT_TILE_SIZE, p) | |||||
.get_workspace_size(p, m_matmul_algo); | .get_workspace_size(p, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( | |||||
auto winograd_impl = megdnn::winograd::ConvBias< | auto winograd_impl = megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_1x1_qs8>( | fallback::winograd::winograd_2x3_1x1_qs8>( | ||||
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
strategy, UNIT_TILE_SIZE, param); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | return winograd_impl.get_kerns(param, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK8>( | param::MatrixMul::Format::MK8>( | ||||
strategy, UNIT_TILE_SIZE, param.nr_threads, | |||||
param.osz[0], param.osz[1], param.filter_meta.ocpg) | |||||
strategy, UNIT_TILE_SIZE, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW || | (opr->param().format == param::ConvBias::Format::NCHW || | ||||
@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( | |||||
p.src_type, p.filter_type, p.dst_type); | p.src_type, p.filter_type, p.dst_type); | ||||
return megdnn::winograd::ConvBias< | return megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_8x8_qs8, | fallback::winograd::winograd_2x3_8x8_qs8, | ||||
param::MatrixMul::Format::MK8>( | |||||
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], | |||||
p.osz[1], p.filter_meta.ocpg) | |||||
param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE, | |||||
p) | |||||
.get_workspace_size(p, m_matmul_algo); | .get_workspace_size(p, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns( | |||||
auto winograd_impl = megdnn::winograd::ConvBias< | auto winograd_impl = megdnn::winograd::ConvBias< | ||||
fallback::winograd::winograd_2x3_8x8_qs8, | fallback::winograd::winograd_2x3_8x8_qs8, | ||||
param::MatrixMul::Format::MK8>( | |||||
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE, param); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | return winograd_impl.get_kerns(param, m_matmul_algo); | ||||
} | } | ||||
MIDOUT_END(); | MIDOUT_END(); | ||||
@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode; | |||||
break; \ | break; \ | ||||
} | } | ||||
#define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \ | |||||
bool is_reproducible() const override { return true; } \ | |||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \ | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; \ | |||||
size_t get_workspace(fallback::ConvBiasImpl*, \ | |||||
const NCBKernSizeParam& param) const override; \ | |||||
virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \ | |||||
const NCBKernSizeParam& param) \ | |||||
const override; \ | |||||
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \ | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \ | |||||
const override; \ | |||||
size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \ | |||||
const NCBKernSizeParam& param) \ | |||||
const override; \ | |||||
virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \ | |||||
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \ | |||||
const override; \ | |||||
\ | |||||
private: \ | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \ | |||||
mutable std::string m_name; \ | |||||
uint32_t m_tile_size; | |||||
enum class PostprocessMode : uint8_t { | enum class PostprocessMode : uint8_t { | ||||
FLOAT = 0, ///< support all biasmode and no_nonlinemode | FLOAT = 0, ///< support all biasmode and no_nonlinemode | ||||
NO_PROCESS, ///<support non bias and identity | NO_PROCESS, ///<support non bias and identity | ||||
@@ -88,7 +88,8 @@ class ConvBias { | |||||
size_t filter_transform_buf_size = 0; | size_t filter_transform_buf_size = 0; | ||||
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE, | //! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE, | ||||
//! OC_BLOCK_SIZE) | //! OC_BLOCK_SIZE) | ||||
if (param.filter_meta.format != | |||||
if (param.preprocessed_filter == nullptr && | |||||
param.filter_meta.format != | |||||
param::ConvBias::Format::NCHW_WINOGRAD && | param::ConvBias::Format::NCHW_WINOGRAD && | ||||
param.filter_meta.format != | param.filter_meta.format != | ||||
param::ConvBias::Format::NCHW88_WINOGRAD && | param::ConvBias::Format::NCHW88_WINOGRAD && | ||||
@@ -150,14 +151,30 @@ class ConvBias { | |||||
transform_mid_buf_size, matmul_workspace_size}); | transform_mid_buf_size, matmul_workspace_size}); | ||||
} | } | ||||
WorkspaceBundle get_preprocess_wbundle( | |||||
const NCBKernSizeParam& param) const { | |||||
//! use for inner temporary usage | |||||
size_t transform_mid_buf_size = | |||||
2 * Strategy::ALPHA * Strategy::ALPHA * | |||||
sizeof(output_compute_type) * | |||||
std::max(Strategy::IC_BLOCK_SIZE, Strategy::OC_BLOCK_SIZE); | |||||
size_t nr_threads = param.nr_threads; | |||||
SmallVector<size_t> space_vec(nr_threads, transform_mid_buf_size); | |||||
return WorkspaceBundle{nullptr, space_vec}; | |||||
} | |||||
public: | public: | ||||
//! Get the m_unit_oc_size, according to the nr_threads and | //! Get the m_unit_oc_size, according to the nr_threads and | ||||
//! output_featuremap_size. When single thread the m_unit_oc_size is set | //! output_featuremap_size. When single thread the m_unit_oc_size is set | ||||
//! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set | //! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set | ||||
//! according to nr_threads and out_featuremap_size | |||||
ConvBias(const Strategy& strategy, size_t unit_tile_size, size_t nr_threads, | |||||
size_t OH, size_t OW, size_t OC) | |||||
//! according to nr_threads and out_featuremap_size | |||||
ConvBias(const Strategy& strategy, size_t unit_tile_size, | |||||
const NCBKernSizeParam& param) | |||||
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} { | : m_strategy{strategy}, m_unit_tile_size{unit_tile_size} { | ||||
size_t nr_threads = param.nr_threads; | |||||
size_t OC = param.filter_meta.ocpg; | |||||
size_t OH = param.osz[0]; | |||||
size_t OW = param.osz[1]; | |||||
if (nr_threads > 1) { | if (nr_threads > 1) { | ||||
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); | size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); | ||||
size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE); | size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE); | ||||
@@ -178,12 +195,55 @@ public: | |||||
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT; | m_unit_oc_size = UNIT_OC_SIZE_DEFAULT; | ||||
} | } | ||||
} | } | ||||
ConvBias(const Strategy& strategy, size_t unit_tile_size) | |||||
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} { | |||||
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT; | |||||
} | |||||
size_t get_workspace_size( | size_t get_workspace_size( | ||||
const NCBKernSizeParam& param, | const NCBKernSizeParam& param, | ||||
fallback::MatrixMulImpl::AlgoBase* matmul_algo) const { | fallback::MatrixMulImpl::AlgoBase* matmul_algo) const { | ||||
return get_wbundle(param, matmul_algo).total_size_in_bytes(); | return get_wbundle(param, matmul_algo).total_size_in_bytes(); | ||||
} | } | ||||
size_t get_preprocess_workspace_size( | |||||
const NCBKernSizeParam& param, | |||||
fallback::MatrixMulImpl::AlgoBase*) const { | |||||
return get_preprocess_wbundle(param).total_size_in_bytes(); | |||||
} | |||||
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||||
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) { | |||||
size_t OC = param.filter_meta.ocpg; | |||||
size_t IC = param.filter_meta.icpg; | |||||
size_t GROUP = param.filter_meta.group; | |||||
SmallVector<TensorLayout> preprocessed_layouts; | |||||
DType dtype = m_strategy.filter_dtype; | |||||
if (dtype.category() == DTypeCategory::QUANTIZED) { | |||||
if (format == param::MatrixMul::Format::MK4) { | |||||
dtype = dtype::Float32(); | |||||
} else if (format == param::MatrixMul::Format::MK8) { | |||||
dtype = dtype::Int16(); | |||||
} | |||||
} | |||||
if (format == param::MatrixMul::Format::DEFAULT) { | |||||
preprocessed_layouts.push_back( | |||||
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC, IC}, dtype}); | |||||
} else if (format == param::MatrixMul::Format::MK4) { | |||||
preprocessed_layouts.push_back( | |||||
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 4, IC / 4, | |||||
4, 4}, | |||||
dtype}); | |||||
} else { | |||||
megdnn_assert(format == param::MatrixMul::Format::MK8); | |||||
preprocessed_layouts.push_back( | |||||
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 8, IC / 8, | |||||
8, 8}, | |||||
dtype}); | |||||
} | |||||
return preprocessed_layouts; | |||||
} | |||||
//! Used by winograd_filter_preprocess opr | //! Used by winograd_filter_preprocess opr | ||||
void filter_process(const stype* filter_ptr, | void filter_process(const stype* filter_ptr, | ||||
input_filter_compute_type* filter_transform_buf, | input_filter_compute_type* filter_transform_buf, | ||||
@@ -199,7 +259,6 @@ public: | |||||
const WorkspaceBundle& bundle_compute, | const WorkspaceBundle& bundle_compute, | ||||
const NCBKernParam& kern_param, | const NCBKernParam& kern_param, | ||||
const NCBKernIndex& ncb_index) { | const NCBKernIndex& ncb_index) { | ||||
size_t compute_workspace_size_per_thread = | size_t compute_workspace_size_per_thread = | ||||
bundle_compute.total_size_in_bytes(); | bundle_compute.total_size_in_bytes(); | ||||
size_t thread_id = ncb_index.thread_id; | size_t thread_id = ncb_index.thread_id; | ||||
@@ -235,6 +294,47 @@ public: | |||||
IC, oc_start, oc_end); | IC, oc_start, oc_end); | ||||
} | } | ||||
static void filter_preprocess(Strategy strategy, | |||||
const WorkspaceBundle& bundle, | |||||
const TensorND& preprocessed_tensor, | |||||
const NCBKernParam& kern_param, | |||||
const NCBKernIndex& ncb_index) { | |||||
size_t thread_id = ncb_index.thread_id; | |||||
size_t oc_id = ncb_index.ndrange_id[1]; | |||||
size_t group_id = ncb_index.ndrange_id[0]; | |||||
size_t OC = kern_param.filter_meta.ocpg; | |||||
size_t IC = kern_param.filter_meta.icpg; | |||||
size_t filter_group_size = Strategy::ALPHA * Strategy::ALPHA * OC * IC * | |||||
sizeof(input_filter_compute_type); | |||||
//! Filter trans dst ptr | |||||
input_filter_compute_type* filter_transform_buf = | |||||
reinterpret_cast<input_filter_compute_type*>( | |||||
reinterpret_cast<uintptr_t>( | |||||
preprocessed_tensor.raw_ptr) + | |||||
group_id * filter_group_size); | |||||
//! Filter trans src ptr | |||||
input_filter_compute_type* transform_mid_buf = | |||||
reinterpret_cast<input_filter_compute_type*>( | |||||
reinterpret_cast<uintptr_t>(bundle.get(thread_id))); | |||||
const stype* filter_ptr = kern_param.filter<stype>(group_id); | |||||
size_t oc_start, oc_end; | |||||
if (kern_param.filter_meta.format == param::ConvBias::Format::NCHW88) { | |||||
oc_start = 8 * oc_id; | |||||
oc_end = oc_start + 8; | |||||
} else if (kern_param.filter_meta.format == | |||||
param::ConvBias::Format::NCHW44) { | |||||
oc_start = 4 * oc_id; | |||||
oc_end = oc_start + 4; | |||||
} else { | |||||
oc_start = oc_id; | |||||
oc_end = oc_id + 1; | |||||
} | |||||
strategy.filter(filter_ptr, filter_transform_buf, transform_mid_buf, OC, | |||||
IC, oc_start, oc_end); | |||||
} | |||||
static void winograd_compute( | static void winograd_compute( | ||||
Strategy strategy, const WorkspaceBundle& bundle_top, | Strategy strategy, const WorkspaceBundle& bundle_top, | ||||
const WorkspaceBundle& bundle_compute, | const WorkspaceBundle& bundle_compute, | ||||
@@ -287,15 +387,28 @@ public: | |||||
compute_workspace_size_per_thread * thread_id); | compute_workspace_size_per_thread * thread_id); | ||||
//! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset | //! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset | ||||
const input_filter_compute_type* filter_transform_buf = | |||||
static_cast<const input_filter_compute_type*>( | |||||
ncb_param.filter<input_filter_compute_type>(group_id)); | |||||
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW44) { | |||||
const input_filter_compute_type* filter_transform_buf = nullptr; | |||||
if (nullptr != ncb_param.preprocessed_filter) { | |||||
auto preprocess_raw_ptr = | |||||
ncb_param.preprocessed_filter->tensors[0].raw_ptr; | |||||
filter_transform_buf = reinterpret_cast<input_filter_compute_type*>( | filter_transform_buf = reinterpret_cast<input_filter_compute_type*>( | ||||
reinterpret_cast<uintptr_t>(bundle_top.get(1)) + | |||||
reinterpret_cast<uintptr_t>(preprocess_raw_ptr) + | |||||
group_id * filter_group_size); | group_id * filter_group_size); | ||||
} else { | |||||
filter_transform_buf = | |||||
static_cast<const input_filter_compute_type*>( | |||||
ncb_param.filter<input_filter_compute_type>( | |||||
group_id)); | |||||
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
ncb_param.filter_meta.format == | |||||
param::ConvBias::Format::NCHW88 || | |||||
ncb_param.filter_meta.format == | |||||
param::ConvBias::Format::NCHW44) { | |||||
filter_transform_buf = | |||||
reinterpret_cast<input_filter_compute_type*>( | |||||
reinterpret_cast<uintptr_t>(bundle_top.get(1)) + | |||||
group_id * filter_group_size); | |||||
} | |||||
} | } | ||||
//! prepare matmul param | //! prepare matmul param | ||||
matmul_param.workspace_ptr = reinterpret_cast<void*>( | matmul_param.workspace_ptr = reinterpret_cast<void*>( | ||||
@@ -371,6 +484,47 @@ public: | |||||
oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit); | oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit); | ||||
}; | }; | ||||
SmallVector<NCBKern> get_preprocess_kerns( | |||||
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) { | |||||
megdnn_assert( | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW44); | |||||
megdnn_assert(param.preprocessed_filter && | |||||
param.preprocessed_filter->tensors.size() > 0); | |||||
size_t OC = param.filter_meta.ocpg; | |||||
size_t GROUP = param.filter_meta.group; | |||||
const TensorND& preprocessed_dst = | |||||
param.preprocessed_filter->tensors[0]; | |||||
WorkspaceBundle bundle = get_preprocess_wbundle(param); | |||||
Strategy strategy = m_strategy; | |||||
SmallVector<NCBKern> kerns; | |||||
auto filter_process_kern = | |||||
[strategy, bundle, &preprocessed_dst]( | |||||
const NCBKernParam& ncb_param, | |||||
const NCBKernIndex& ncb_index) mutable { | |||||
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | |||||
midout_iv("filter_preprocess"_hash)) { | |||||
bundle.set(ncb_param.workspace_ptr); | |||||
filter_preprocess(strategy, bundle, preprocessed_dst, | |||||
ncb_param, ncb_index); | |||||
} | |||||
MIDOUT_END(); | |||||
}; | |||||
size_t oc_parallelism = OC; | |||||
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { | |||||
megdnn_assert(OC % 8 == 0); | |||||
oc_parallelism = OC / 8; | |||||
} else if (param.filter_meta.format == | |||||
param::ConvBias::Format::NCHW44) { | |||||
megdnn_assert(OC % 4 == 0); | |||||
oc_parallelism = OC / 4; | |||||
} | |||||
kerns.push_back({filter_process_kern, {GROUP, oc_parallelism}}); | |||||
return kerns; | |||||
} | |||||
SmallVector<NCBKern> get_kerns( | SmallVector<NCBKern> get_kerns( | ||||
const NCBKernSizeParam& param, | const NCBKernSizeParam& param, | ||||
fallback::MatrixMulImpl::AlgoBase* matmul_algo) { | fallback::MatrixMulImpl::AlgoBase* matmul_algo) { | ||||
@@ -386,7 +540,6 @@ public: | |||||
static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) = | static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) = | ||||
get_matmul_kern_param(param, m_unit_oc_size); | get_matmul_kern_param(param, m_unit_oc_size); | ||||
Strategy strategy = m_strategy; | |||||
size_t unit_tile_size = m_unit_tile_size; | size_t unit_tile_size = m_unit_tile_size; | ||||
size_t unit_oc_size = m_unit_oc_size; | size_t unit_oc_size = m_unit_oc_size; | ||||
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); | size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); | ||||
@@ -411,20 +564,22 @@ public: | |||||
param::ConvBias::Format::NCHW44_WINOGRAD)); | param::ConvBias::Format::NCHW44_WINOGRAD)); | ||||
SmallVector<NCBKern> kerns; | SmallVector<NCBKern> kerns; | ||||
if (param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW44) { | |||||
//! probably a gcc bug, labmda require capturing 'this' to call | |||||
//! static member function | |||||
if (param.preprocessed_filter == nullptr && | |||||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||||
param.filter_meta.format == param::ConvBias::Format::NCHW44)) { | |||||
auto filter_process_kern = | auto filter_process_kern = | ||||
[this, strategy, bundle_top, bundle_compute]( | |||||
[strategy = m_strategy, bundle_top, bundle_compute]( | |||||
const NCBKernParam& ncb_param, | const NCBKernParam& ncb_param, | ||||
const NCBKernIndex& ncb_index) mutable { | const NCBKernIndex& ncb_index) mutable { | ||||
MEGDNN_MARK_USED_VAR(this); | |||||
bundle_top.set(ncb_param.workspace_ptr); | |||||
bundle_compute.set(bundle_top.get(0)); | |||||
filter_process(strategy, bundle_top, bundle_compute, | |||||
ncb_param, std::move(ncb_index)); | |||||
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | |||||
midout_iv("filter_process"_hash)) { | |||||
bundle_top.set(ncb_param.workspace_ptr); | |||||
bundle_compute.set(bundle_top.get(0)); | |||||
filter_process(strategy, bundle_top, bundle_compute, | |||||
ncb_param, std::move(ncb_index)); | |||||
} | |||||
MIDOUT_END(); | |||||
}; | }; | ||||
size_t oc_parallelism = OC; | size_t oc_parallelism = OC; | ||||
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { | if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { | ||||
@@ -438,12 +593,12 @@ public: | |||||
kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}}); | kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}}); | ||||
} | } | ||||
auto winograd_compute_kern = | auto winograd_compute_kern = | ||||
[strategy, bundle_top, bundle_compute, matmul_algo, | |||||
[strategy = m_strategy, bundle_top, bundle_compute, matmul_algo, | |||||
matmul_param, unit_tile_size, | matmul_param, unit_tile_size, | ||||
unit_oc_size](const NCBKernParam& ncb_param, | unit_oc_size](const NCBKernParam& ncb_param, | ||||
const NCBKernIndex& ncb_index) mutable { | const NCBKernIndex& ncb_index) mutable { | ||||
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, | |||||
0) { | |||||
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, | |||||
midout_iv("winograd_compute"_hash)) { | |||||
bundle_top.set(ncb_param.workspace_ptr); | bundle_top.set(ncb_param.workspace_ptr); | ||||
bundle_compute.set(bundle_top.get(0)); | bundle_compute.set(bundle_top.get(0)); | ||||
winograd_compute(strategy, bundle_top, bundle_compute, | winograd_compute(strategy, bundle_top, bundle_compute, | ||||
@@ -562,4 +717,54 @@ public: | |||||
filter_dtype(filter_dtype), \ | filter_dtype(filter_dtype), \ | ||||
dst_dtype(dst_dtype) {} | dst_dtype(dst_dtype) {} | ||||
#define MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, _fun, _strategy, \ | |||||
_midout_flag, _matmul_format) \ | |||||
MEGDNN_MARK_USED_VAR(param); \ | |||||
MIDOUT_BEGIN(_midout_flag, midout_iv(#_class #_fun##_hash)) { \ | |||||
_strategy strategy(param.src_type, param.filter_type, param.dst_type); \ | |||||
return megdnn::winograd::ConvBias<_strategy, _matmul_format>( \ | |||||
strategy, m_tile_size, param) \ | |||||
._fun(param, m_matmul_algo); \ | |||||
} \ | |||||
MIDOUT_END(); | |||||
#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \ | |||||
_matmul_format) \ | |||||
size_t ConvBiasImpl::_class::get_workspace( \ | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \ | |||||
_strategy, _midout_flag, \ | |||||
_matmul_format); \ | |||||
return 0; \ | |||||
} \ | |||||
size_t ConvBiasImpl::_class::get_preprocess_workspace( \ | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ | |||||
_class, get_preprocess_workspace_size, _strategy, \ | |||||
_midout_flag, _matmul_format); \ | |||||
return 0; \ | |||||
} \ | |||||
SmallVector<TensorLayout> \ | |||||
ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \ | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ | |||||
_class, deduce_preprocessed_filter_layout, _strategy, \ | |||||
_midout_flag, _matmul_format); \ | |||||
return {}; \ | |||||
} \ | |||||
SmallVector<ConvBiasImpl::NCBKern> \ | |||||
ConvBiasImpl::_class::dispatch_preprocess_kerns( \ | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \ | |||||
_strategy, _midout_flag, \ | |||||
_matmul_format); \ | |||||
return {}; \ | |||||
} \ | |||||
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \ | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ | |||||
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \ | |||||
_midout_flag, _matmul_format); \ | |||||
return {}; \ | |||||
} | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -94,7 +94,6 @@ public: | |||||
AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -102,19 +101,8 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
void* type() const override; | void* type() const override; | ||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase { | class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase { | ||||
@@ -122,7 +110,6 @@ public: | |||||
AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, | ||||
uint32_t tile_size) | uint32_t tile_size) | ||||
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} | ||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | const char* name() const override { | ||||
if (m_name.empty()) { | if (m_name.empty()) { | ||||
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( | ||||
@@ -130,19 +117,8 @@ public: | |||||
} | } | ||||
return m_name.c_str(); | return m_name.c_str(); | ||||
} | } | ||||
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
virtual SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
void* type() const override; | void* type() const override; | ||||
private: | |||||
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; | |||||
mutable std::string m_name; | |||||
uint32_t m_tile_size; | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE(); | |||||
}; | }; | ||||
/* ===================== matmul algo ===================== */ | /* ===================== matmul algo ===================== */ | ||||
@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK8>( | param::MatrixMul::Format::MK8>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW88 || | (opr->param().format == param::ConvBias::Format::NCHW88 || | ||||
@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF63_8x8::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 1) { | |||||
winograd::winograd_nchw88_6x3_8x8_f strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF63_8x8::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) { | |||||
winograd::winograd_nchw88_6x3_8x8_f strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8, | |||||
winograd::winograd_nchw88_6x3_8x8_f, | |||||
megdnn_x86_winograd_fp32, | |||||
param::MatrixMul::Format::MK8); | |||||
/* ======================= AlgoFP32WinogradF23_8*8 ======================== */ | /* ======================= AlgoFP32WinogradF23_8*8 ======================== */ | ||||
@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( | |||||
auto&& matmul_param = | auto&& matmul_param = | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
param::MatrixMul::Format::MK8>( | param::MatrixMul::Format::MK8>( | ||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
strategy, m_tile_size, param) | |||||
.get_matmul_kern_param(param); | .get_matmul_kern_param(param); | ||||
return m_matmul_algo->usable(matmul_param) && | return m_matmul_algo->usable(matmul_param) && | ||||
(opr->param().format == param::ConvBias::Format::NCHW88 || | (opr->param().format == param::ConvBias::Format::NCHW88 || | ||||
@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( | |||||
return false; | return false; | ||||
} | } | ||||
size_t ConvBiasImpl::AlgoFP32WinogradF23_8x8::get_workspace( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 1) { | |||||
winograd::winograd_nchw88_2x3_8x8_f strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg) | |||||
.get_workspace_size(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return 0; | |||||
} | |||||
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_8x8, | |||||
winograd::winograd_nchw88_2x3_8x8_f, | |||||
megdnn_x86_winograd_fp32, | |||||
param::MatrixMul::Format::MK8); | |||||
SmallVector<ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoFP32WinogradF23_8x8::dispatch_kerns( | |||||
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) { | |||||
winograd::winograd_nchw88_2x3_8x8_f strategy( | |||||
param.src_type, param.filter_type, param.dst_type); | |||||
auto winograd_impl = | |||||
megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f, | |||||
param::MatrixMul::Format::MK8>( | |||||
strategy, m_tile_size, param.nr_threads, param.osz[0], | |||||
param.osz[1], param.filter_meta.ocpg); | |||||
return winograd_impl.get_kerns(param, m_matmul_algo); | |||||
} | |||||
MIDOUT_END(); | |||||
return {}; | |||||
} | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) { | |||||
} | } | ||||
} | } | ||||
TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | |||||
Checker<ConvBiasForward> checker(handle()); | |||||
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); | |||||
} | |||||
TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); | |||||
} | |||||
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \ | #define CONV_BIAS_MATMUL_QU8_MODE(MODE) \ | ||||
using namespace conv_bias; \ | using namespace conv_bias; \ | ||||
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \ | std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \ | ||||
@@ -783,6 +783,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) { | |||||
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4); | check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); | std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); | ||||
@@ -791,6 +799,16 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) { | |||||
param::ConvBias::Format::NCHW44); | param::ConvBias::Format::NCHW44); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_F23_4_NCHW44_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4, | |||||
param::ConvBias::Format::NCHW44); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_args(3); | std::vector<TestArg> args = get_winograd_args(3); | ||||
@@ -799,6 +817,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) { | |||||
check_winograd("1:6:32", checker, args); | check_winograd("1:6:32", checker, args); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_args(3); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("1:6:32", checker, args); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | std::vector<TestArg> args = get_winograd_mk_packed_args(); | ||||
@@ -807,6 +833,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) { | |||||
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); | check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); | std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); | ||||
@@ -815,6 +850,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) { | |||||
param::ConvBias::Format::NCHW44); | param::ConvBias::Format::NCHW44); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4, | |||||
param::ConvBias::Format::NCHW44); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_args(4); | std::vector<TestArg> args = get_winograd_args(4); | ||||
@@ -823,6 +867,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) { | |||||
check_winograd("1:5:32", checker, args); | check_winograd("1:5:32", checker, args); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_args(4); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("1:5:32", checker, args); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_args(5); | std::vector<TestArg> args = get_winograd_args(5); | ||||
@@ -831,6 +883,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { | |||||
check_winograd("1:4:32", checker, args); | check_winograd("1:4:32", checker, args); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_args(5); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd("1:4:32", checker, args); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_args(3); | std::vector<TestArg> args = get_winograd_args(3); | ||||
@@ -1007,6 +1067,39 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) { | |||||
1e-3f); | 1e-3f); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_MK_PACKED_F32_1_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||||
std::vector<TestArg> args_first_half(args.begin(), | |||||
args.begin() + args.size() / 2); | |||||
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, | |||||
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, | |||||
1e-3f); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -1038,6 +1131,38 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { | |||||
1e-3f); | 1e-3f); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||||
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2, | |||||
args.end()); | |||||
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, | |||||
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, | |||||
1e-3f); | |||||
} | |||||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -1070,6 +1195,40 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { | |||||
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, | dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, | ||||
0.25); | 0.25); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_MK_PACKED_F16_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||||
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); | |||||
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{}, | |||||
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, | |||||
0.25); | |||||
} | |||||
#endif | #endif | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -1281,6 +1440,223 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
epsilon); | epsilon); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_MK_PACKED_INT8_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
#if MEGDNN_AARCH64 | |||||
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; | |||||
#else | |||||
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; | |||||
#endif | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD:%s:8:2:32", matmul_name).c_str())); | |||||
std::vector<TestArg> quantized_args = | |||||
get_quantized_winograd_mk_packed_args(8); | |||||
UniformIntRNG int_rng{-50, 50}; | |||||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
#if MEGDNN_AARCH64 | |||||
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; | |||||
#else | |||||
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; | |||||
#endif | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str())); | |||||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4); | |||||
UniformIntRNG int_rng{-50, 50}; | |||||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
#if MEGDNN_AARCH64 | |||||
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; | |||||
#else | |||||
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; | |||||
#endif | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str())); | |||||
std::vector<TestArg> quantized_args = | |||||
get_int8_nchw44_args(3, 4, false, true); | |||||
UniformIntRNG int_rng{-50, 50}; | |||||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
float epsilon = 0.001; | |||||
#if MEGDNN_AARCH64 | |||||
const char* matmul_name = "AARCH64_F32_MK4_4x16"; | |||||
#else | |||||
const char* matmul_name = "ARMV7_F32_MK4_4x8"; | |||||
#endif | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); | |||||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true); | |||||
UniformIntRNG int_rng{-50, 50}; | |||||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), | |||||
dtype::QuantizedS8(0.01887994f), | |||||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||||
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, | |||||
epsilon); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||||
const std::vector<size_t>& out_size, DType A_dtype, | |||||
DType B_dtype, DType C_dtype, DType D_dtype, | |||||
param::MatrixMul::Format format, float eps) { | |||||
for (auto&& arg : args) { | |||||
for (uint32_t m : out_size) { | |||||
checker.set_extra_opr_impl(std::bind( | |||||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||||
arg.param, handle, format)); | |||||
checker.set_dtype(0, A_dtype) | |||||
.set_dtype(1, B_dtype) | |||||
.set_dtype(2, C_dtype) | |||||
.set_dtype(4, D_dtype) | |||||
.set_epsilon(eps) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
}; | |||||
float epsilon = 0.001; | |||||
#if MEGDNN_AARCH64 | |||||
const char* matmul_name = "AARCH64_F32_MK4_4x16"; | |||||
#else | |||||
const char* matmul_name = "ARMV7_F32_MK4_4x8"; | |||||
#endif | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); | |||||
std::vector<TestArg> quantized_args = | |||||
get_int8_nchw44_args(3, 4, true, true); | |||||
UniformIntRNG int_rng{-50, 50}; | |||||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), | |||||
dtype::QuantizedS8(0.01887994f), | |||||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||||
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, | |||||
epsilon); | |||||
} | |||||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -1338,6 +1714,72 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) { | |||||
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, | check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, | ||||
param::MatrixMul::Format::MK8); | param::MatrixMul::Format::MK8); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
check_winograd_fp16("1:2:32", checker, args, NULL, 0.08); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_F16_F45_1_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_args(5); | |||||
std::vector<TestArg> args_head_half(args.begin(), | |||||
args.begin() + args.size() / 2); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
//! fp16 range -1.0 ~ 1.0 | |||||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||||
check_winograd_fp16("1:4:32", checker, args_head_half, rng, 0.25); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_F16_F45_2_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_args(5); | |||||
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2, | |||||
args.end()); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
//! fp16 range -1.0 ~ 1.0 | |||||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||||
check_winograd_fp16("1:4:32", checker, args_back_half, rng, 0.25); | |||||
} | |||||
//! FIXME: This test may be failed if run `ARM_COMMON.CONV_BIAS_WINOGRAD*`, but | |||||
//! it will pass when run single testcase | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F63_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_args(3); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
//! fp16 range -1.0 ~ 1.0 | |||||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||||
check_winograd_fp16("1:6:32", checker, args, rng, 0.3); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_F16_8x8_1_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||||
std::vector<TestArg> args_head_half(args.begin(), | |||||
args.begin() + args.size() / 2); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||||
check_winograd_fp16("8:2:32", checker, args_head_half, rng, 0.25, | |||||
param::MatrixMul::Format::MK8); | |||||
} | |||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_F16_8x8_2_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||||
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2, | |||||
args.end()); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||||
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, | |||||
param::MatrixMul::Format::MK8); | |||||
} | |||||
#endif | #endif | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { | TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
@@ -1354,6 +1796,23 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { | |||||
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); | check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); | ||||
} | } | ||||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||||
CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
UniformIntRNG rng{-50, 50}; | |||||
checker.set_dtype(0, dtype::QuantizedS8(2.5f)) | |||||
.set_dtype(1, dtype::QuantizedS8(2.5f)) | |||||
.set_dtype(2, dtype::QuantizedS32(6.25f)) | |||||
.set_dtype(4, dtype::QuantizedS8(60.25f)) | |||||
.set_rng(0, &rng) | |||||
.set_rng(1, &rng) | |||||
.set_rng(2, &rng); | |||||
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); | |||||
} | |||||
void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, | void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, | ||||
RNG* rng, float epsilon, DType type0, DType type1, | RNG* rng, float epsilon, DType type0, DType type1, | ||||
@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() { | |||||
TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{}); | TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{}); | ||||
//! bias | //! bias | ||||
args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8}, | args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8}, | ||||
TensorShape{oc, ic, 3, 3, 8, 8}, TensorShape{2, oc, i, i, 8}); | |||||
TensorShape{oc, ic, 3, 3, 8, 8}, | |||||
TensorShape{2, oc, i, i, 8}); | |||||
/*cur_param.sparse = param::ConvBias::Sparse::GROUP; | /*cur_param.sparse = param::ConvBias::Sparse::GROUP; | ||||
args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8}, | args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8}, | ||||
@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) { | |||||
} | } | ||||
} | } | ||||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:6").c_str())); | |||||
for (auto&& arg : args) { | |||||
checker.set_param(arg.param).execs( | |||||
{arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) { | TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | ||||
@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) { | |||||
} | } | ||||
} | } | ||||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23_WEIGHT_PREPROCESS) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | |||||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||||
handle()); | |||||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||||
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:2").c_str())); | |||||
for (auto&& arg : args) { | |||||
checker.set_param(arg.param).execs( | |||||
{arg.src, arg.filter, arg.bias, {}, {}}); | |||||
} | |||||
} | |||||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { | TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | ||||