GitOrigin-RevId: 78c3cfceae
release-1.2
@@ -435,16 +435,6 @@ public: | |||
const TensorLayout& bias, const TensorLayout& z, | |||
const TensorLayout& dst) = 0; | |||
/** | |||
* \brief deduce the origin filter layout and conv_bias param after winograd | |||
* transform, this used in fast-run to construct the origin cache-key | |||
*/ | |||
static void deduce_winograd_origin_layout_and_param( | |||
const Param::Format format, const size_t output_block_size, | |||
const TensorLayout& src_layout, | |||
const TensorLayout& winograd_filter_layout, | |||
TensorLayout& origin_layout, Param& origin_param); | |||
enum class BiasMode : uint32_t { | |||
NO_BIAS = 0, //!< no bias | |||
BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] | |||
@@ -91,29 +91,6 @@ class MaxTensorDiff : public OperatorBase { | |||
void check_exec(const TensorLayout& layout1, | |||
const TensorLayout& layout2, size_t workspace_in_bytes); | |||
}; | |||
/*! | |||
* \brief winograd preprocess opr. | |||
* | |||
* for the detail \see src/fallback/conv_bias/winograd/winograd.h | |||
* | |||
*/ | |||
class WinogradFilterPreprocess : public OperatorBase { | |||
DEF_OPR_PARAM(Winograd); | |||
DEF_OPR_IMPL(WinogradFilterPreprocess, OperatorBase, 1, 1); | |||
public: | |||
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||
_megdnn_workspace) = 0; | |||
size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&); | |||
void deduce_layout(const TensorLayout& src, TensorLayout& dst); | |||
protected: | |||
void check_exec(const TensorLayout& src, const TensorLayout& dst, | |||
size_t workspace_in_bytes); | |||
}; | |||
} // namespace megdnn | |||
#include "megdnn/internal/opr_header_epilogue.h" | |||
@@ -39,7 +39,7 @@ pdef('Axis').add_fields('int32', 'axis', 0) | |||
'NCHW44','NCHW44_DOT', | |||
Doc('NCHW_WINOGRAD', 'NCHW layout with weights tranformed by winograd'), | |||
Doc('NCHW88_WINOGRAD', 'NCHW88 layout with weights tranformed by winograd'), | |||
Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'), | |||
Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'), | |||
Doc('NCHW4_NCHW32', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), | |||
Doc('NCHW32_NCHW4', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), | |||
Doc('NCHW4_NCHW', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), | |||
@@ -456,15 +456,6 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) | |||
'layout is (K/4, M/4, 4(m), 4(k)) x (K/4, N, 4(k))')) | |||
) | |||
(pdef('Winograd', 'winograd param used in convbias'). | |||
add_fields( | |||
'uint32', | |||
Doc('output_block_size', 'output block size, detail meaning see winograd ' | |||
'in convbias, equals to the meaning of m in F(m, r)'), 0). | |||
add_enum_alias('Format', 'MatrixMul'). | |||
add_enum_alias('ComputeMode', 'Convolution', name_field='compute_mode') | |||
) | |||
(pdef('SVD'). | |||
add_fields('bool', | |||
Doc('full_matrices', | |||
@@ -27,7 +27,7 @@ using namespace arm_common; | |||
/* ======================= AlgoFP16WinogradF23 ======================== */ | |||
bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | |||
const NCBKernSizeParam& param, | |||
const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | |||
MEGDNN_MARK_USED_VAR(param); | |||
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { | |||
@@ -37,12 +37,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -78,12 +73,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 4 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 5) && | |||
@@ -117,12 +107,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 6 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -162,12 +147,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK8)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -47,12 +47,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -86,12 +81,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 6 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -125,12 +115,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 5 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 4) && | |||
@@ -164,12 +149,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 4 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 5) && | |||
@@ -209,12 +189,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 6 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -257,12 +232,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == | |||
fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -303,12 +273,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == | |||
fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD && | |||
param.output_block_size == 6 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -350,12 +315,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF73_4x4_NCHW44::usable( | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == | |||
fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD && | |||
param.output_block_size == 7 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -242,14 +242,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
((param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -293,13 +287,8 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
.get_matmul_kern_param(param)); | |||
return is_matmul_usable && | |||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
((param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||
((param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD) && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -341,14 +330,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( | |||
.get_matmul_kern_param(param); | |||
bool is_matmul_usable = m_matmul_algo->usable(matmul_param); | |||
return is_matmul_usable && | |||
((param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW44 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -240,7 +240,6 @@ void winograd_2x3_4x4_s8_f32_nchw44::filter(const int8_t* filter, | |||
float* transform_mid_buf, size_t OC, size_t IC, | |||
size_t oc_start, size_t oc_end) { | |||
constexpr int alpha = 2 + 3 - 1; | |||
/** | |||
* origin: (4x3) * (3 x 3) * (3 x 4) | |||
*/ | |||
@@ -290,8 +290,8 @@ ConvBiasImpl::get_all_packed_algo() { | |||
bool ConvBiasImpl::is_matmul_quantized_prefer( | |||
const ConvBiasImpl::NCBKernSizeParam& param) const { | |||
fallback::ConvBiasImpl::NCBKernSizeParam conv_ncb_param( | |||
param, 0, param::MatrixMul::Format::DEFAULT, {}, 0, | |||
BiasMode::NO_BIAS, param::ConvBias::NonlineMode::IDENTITY); | |||
param, {}, 0, BiasMode::NO_BIAS, | |||
param::ConvBias::NonlineMode::IDENTITY); | |||
conv_ncb_param.dst_type = param.bias_type; | |||
conv_ncb_param.filter_meta.group = 1; | |||
@@ -320,11 +320,6 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order( | |||
auto FH = param.filter_meta.spatial[0]; | |||
auto FW = param.filter_meta.spatial[1]; | |||
//! TODO: now winograd only support fast-run | |||
if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) { | |||
return {AlgoCategory::WINOGRAD}; | |||
} | |||
//! im2col | |||
bool im2col_prefer = (IC >= 32 || OC >= 32); | |||
//! quantized algo use matmul when direct algo is unusable | |||
@@ -27,7 +27,7 @@ | |||
#include "src/arm_common/type_cvt/opr_impl.h" | |||
#include "src/arm_common/reduce/opr_impl.h" | |||
#include "src/arm_common/conv_bias/opr_impl.h" | |||
#include "src/arm_common/winograd_filter_preprocess/opr_impl.h" | |||
namespace megdnn { | |||
namespace arm_common { | |||
@@ -50,7 +50,6 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(WarpPerspective) | |||
MEGDNN_SPECIALIZE_CREATE_OPERATOR(TypeCvt) | |||
MEGDNN_SPECIALIZE_CREATE_OPERATOR(Reduce) | |||
MEGDNN_SPECIALIZE_CREATE_OPERATOR(ConvBias) | |||
MEGDNN_SPECIALIZE_CREATE_OPERATOR(WinogradFilterPreprocess) | |||
MEGDNN_SPECIALIZE_CREATE_OPERATOR(ConvolutionBackwardData) | |||
#pragma GCC diagnostic push | |||
@@ -1,179 +0,0 @@ | |||
/** | |||
* \file dnn/src/arm_common/winograd_filter_preprocess/opr_impl.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "src/arm_common/winograd_filter_preprocess/opr_impl.h" | |||
#include "src/arm_common/handle.h" | |||
#include "src/common/utils.h" | |||
#include "src/arm_common/conv_bias/fp32/strategy.h" | |||
#include "src/arm_common/conv_bias/int8/strategy.h" | |||
#include "src/arm_common/conv_bias/f16/strategy.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(megdnn_arm_common_winograd_filter_preprocess) | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, | |||
_megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) { | |||
using namespace winograd; | |||
check_exec(src.layout, dst.layout, workspace.size); | |||
//! NCHW44 group conv or NCHW group conv or both dense conv | |||
size_t flt_start = 0; | |||
size_t pack_c_size = 1; | |||
size_t group = 1; | |||
if (src.layout.ndim == 5) { //! {g, OC, IC, FH, FW} | |||
flt_start = 1; | |||
group = src.layout[0]; | |||
} else if (src.layout.ndim == 6) { //! {OC/4, IC/4, FH, FW, 4, 4} | |||
pack_c_size = src.layout[5]; | |||
} else if (src.layout.ndim == 7) { //! {g, OC/4, IC/4, FH, FW, 4, 4} | |||
flt_start = 1; | |||
group = src.layout[0]; | |||
pack_c_size = src.layout[6]; | |||
} | |||
size_t OC = src.layout[flt_start] * pack_c_size, | |||
IC = src.layout[flt_start + 1] * pack_c_size, | |||
FW = src.layout[flt_start + 3]; | |||
size_t m = param().output_block_size; | |||
bool execed = false; | |||
#define DISPATCH(_strategy, _format, ...) \ | |||
MIDOUT_BEGIN(megdnn_arm_common_winograd_filter_preprocess, \ | |||
##__VA_ARGS__) { \ | |||
if (param().format == _format) { \ | |||
for (size_t g = 0; g < group; g++) { \ | |||
auto run = [=]() { \ | |||
_strategy strategy(src.layout.dtype, src.layout.dtype, \ | |||
src.layout.dtype); \ | |||
megdnn::winograd::ConvBias<_strategy, _format>(strategy, \ | |||
1_z) \ | |||
.filter_process(src_ptr, dst_ptr, workspace_ptr, \ | |||
OC, IC); \ | |||
}; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); \ | |||
src_ptr += src.layout.stride[0]; \ | |||
dst_ptr += dst.layout.stride[0]; \ | |||
} \ | |||
execed = true; \ | |||
} \ | |||
} \ | |||
MIDOUT_END(); | |||
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { | |||
const float* src_ptr = src.ptr<float>(); | |||
float* dst_ptr = dst.ptr<float>(); | |||
float* workspace_ptr = workspace.ptr<float>(); | |||
if (FW == 3) { | |||
if (m == 2) { | |||
if (pack_c_size == 1) { | |||
DISPATCH(winograd_2x3_4x4_f, param::Winograd::Format::MK4, | |||
0, 0); | |||
} else if (pack_c_size == 4) { | |||
DISPATCH(winograd_F23_mk4_f_nchw44, | |||
param::Winograd::Format::MK4, 0, 5); | |||
} | |||
} else if (m == 6) { | |||
DISPATCH(winograd_6x3_1x1_f, param::Winograd::Format::DEFAULT, | |||
0, 1); | |||
if (pack_c_size == 1) { | |||
DISPATCH(winograd_6x3_4x4_f, param::Winograd::Format::MK4, | |||
0, 2); | |||
} else if (pack_c_size == 4) { | |||
DISPATCH(winograd_F63_mk4_f_nchw44, | |||
param::Winograd::Format::MK4, 0, 6); | |||
} | |||
} else if (m == 7) { | |||
megdnn_assert(pack_c_size == 4, "WINOGRAD F(7,3) Only Supports NCHW44"); | |||
DISPATCH(winograd_F73_mk4_f_nchw44, | |||
param::Winograd::Format::MK4, 0, 7); | |||
} | |||
} else if (FW == 4) { | |||
if (m == 5) { | |||
DISPATCH(winograd_5x4_1x1_f, param::Winograd::Format::DEFAULT, | |||
0, 3); | |||
} | |||
} else if (FW == 5) { | |||
if (m == 4) { | |||
DISPATCH(winograd_4x5_1x1_f, param::Winograd::Format::DEFAULT, | |||
0, 4); | |||
} | |||
} | |||
} | |||
if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
const dt_int8* src_ptr = src.compatible_ptr<dt_int8>(); | |||
if (param().compute_mode == param::ConvBias::ComputeMode::DEFAULT) { | |||
dt_int16* dst_ptr = dst.compatible_ptr<dt_int16>(); | |||
dt_int16* workspace_ptr = workspace.ptr<dt_int16>(); | |||
if (FW == 3) { | |||
if (m == 2) { | |||
if (pack_c_size == 1) { | |||
DISPATCH(winograd_2x3_8x8_s8, | |||
param::Winograd::Format::MK8, 1, 0); | |||
} else if (pack_c_size == 4) { | |||
DISPATCH(winograd_2x3_8x8_s8_nchw44, | |||
param::Winograd::Format::MK8, 1, 0); | |||
}else{ | |||
megdnn_throw("only support pack_c_size = 1 or 4"); | |||
} | |||
} | |||
} | |||
} else { | |||
dt_int32* dst_ptr_tmp = dst.compatible_ptr<dt_int32>(); | |||
dt_int32* workspace_ptr_tmp = workspace.ptr<dt_int32>(); | |||
float* dst_ptr = reinterpret_cast<float*>(dst_ptr_tmp); | |||
float* workspace_ptr = reinterpret_cast<float*>(workspace_ptr_tmp); | |||
if (pack_c_size == 4) { | |||
if (FW == 3) { | |||
if (m == 2) { | |||
DISPATCH(winograd_2x3_4x4_s8_f32_nchw44, | |||
param::Winograd::Format::MK4, 1, 1); | |||
} | |||
} | |||
} else { | |||
megdnn_throw("only support pack_c_size == 4"); | |||
} | |||
} | |||
} | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
if (src.layout.dtype.enumv() == DTypeEnum::Float16) { | |||
const dt_float16* src_ptr = src.ptr<dt_float16>(); | |||
dt_float16* dst_ptr = dst.ptr<dt_float16>(); | |||
dt_float16* workspace_ptr = workspace.ptr<dt_float16>(); | |||
if (FW == 3) { | |||
if (m == 2) { | |||
DISPATCH(winograd_2x3_4x4_f16, param::Winograd::Format::DEFAULT, | |||
2, 0); | |||
DISPATCH(winograd_2x3_8x8_f16, param::Winograd::Format::MK8, 2, | |||
1); | |||
} else if (m == 6) { | |||
DISPATCH(winograd_6x3_1x1_f16, param::Winograd::Format::DEFAULT, | |||
2, 2); | |||
} | |||
} else if (FW == 5) { | |||
if (m == 4) { | |||
DISPATCH(winograd_4x5_1x1_f16, param::Winograd::Format::DEFAULT, | |||
2, 3); | |||
} | |||
} | |||
} | |||
#endif | |||
#undef DISPATCH | |||
megdnn_assert(execed, | |||
"Unsupport winograd filter preprocess. m: %zu src: %s", m, | |||
src.layout.to_string().c_str()); | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -1,28 +0,0 @@ | |||
/** | |||
* \file dnn/src/arm_common/winograd_filter_preprocess/opr_impl.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/oprs.h" | |||
#include "src/common/utils.h" | |||
namespace megdnn { | |||
namespace arm_common { | |||
class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess { | |||
public: | |||
using WinogradFilterPreprocess::WinogradFilterPreprocess; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) override; | |||
}; | |||
} // namespace arm_common | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -35,37 +35,11 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
const TensorLayout& bias, const TensorLayout& z, | |||
const TensorLayout& dst, size_t workspace_in_bytes, | |||
const PreprocessedFilter* preprocessed_filter) { | |||
if ((param().format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD) && | |||
src.dtype.category() == DTypeCategory::QUANTIZED) { | |||
megdnn_assert(filter.dtype.enumv() == DTypeEnum::QuantizedS16 || | |||
//!int8 winogradf23_44 using float,QuantizedS32 take the scale | |||
filter.dtype.enumv() == DTypeEnum::QuantizedS32); | |||
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||
src.dtype.enumv() == DTypeEnum::Quantized8Asymm); | |||
} else { | |||
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv()); | |||
} | |||
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv()); | |||
if (src.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
if (bias.dtype.enumv() == DTypeEnum::QuantizedS32) { | |||
float scale_src = src.dtype.param<dtype::QuantizedS8>().scale; | |||
float scale_filter = 0.f; | |||
if (param().format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
if (filter.dtype.enumv() == DTypeEnum::QuantizedS32) { | |||
//! int8 winogradf23_44 using float,QuantizedS32 take the | |||
//! scale | |||
scale_filter = | |||
filter.dtype.param<dtype::QuantizedS32>().scale; | |||
} else { | |||
scale_filter = | |||
filter.dtype.param<dtype::QuantizedS16>().scale; | |||
} | |||
} else { | |||
scale_filter = filter.dtype.param<dtype::QuantizedS8>().scale; | |||
} | |||
float scale_filter = filter.dtype.param<dtype::QuantizedS8>().scale; | |||
float scale_bias = bias.dtype.param<dtype::QuantizedS32>().scale; | |||
megdnn_assert( | |||
std::abs(scale_src * scale_filter - scale_bias) < 1e-6, | |||
@@ -77,15 +51,8 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
} else if (src.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||
if (bias.dtype.enumv() == DTypeEnum::QuantizedS32) { | |||
float scale_src = src.dtype.param<dtype::Quantized8Asymm>().scale; | |||
float scale_filter = 0.f; | |||
if (param().format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
scale_filter = filter.dtype.param<dtype::QuantizedS16>().scale; | |||
} else { | |||
scale_filter = | |||
filter.dtype.param<dtype::Quantized8Asymm>().scale; | |||
} | |||
float scale_filter = | |||
filter.dtype.param<dtype::Quantized8Asymm>().scale; | |||
float scale_bias = bias.dtype.param<dtype::QuantizedS32>().scale; | |||
megdnn_assert( | |||
std::abs(scale_src * scale_filter - scale_bias) < 1e-6, | |||
@@ -115,7 +82,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
if (check_eq(bias, dst)) | |||
return ret; | |||
if (param().format == param::ConvBias::Format::NCHW || | |||
param().format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW4_NCHW) { | |||
megdnn_assert(bias.shape[0] == 1); | |||
megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s", | |||
@@ -131,7 +97,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
} else if (param().format == param::ConvBias::Format::NCHW4 || | |||
param().format == param::ConvBias::Format::NCHW44 || | |||
param().format == param::ConvBias::Format::NCHW44_DOT || | |||
param().format == param::ConvBias::Format::NCHW44_WINOGRAD || | |||
param().format == param::ConvBias::Format::NCHW32_NCHW4) { | |||
megdnn_assert(bias.shape[0] == 1); | |||
megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s", | |||
@@ -140,8 +105,7 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
megdnn_assert(bias.shape[3] == 1); | |||
megdnn_assert(bias.shape[4] == 4); | |||
} else if (param().format == param::ConvBias::Format::NCHW8 || | |||
param().format == param::ConvBias::Format::NCHW88 || | |||
param().format == param::ConvBias::Format::NCHW88_WINOGRAD) { | |||
param().format == param::ConvBias::Format::NCHW88 ) { | |||
megdnn_assert(bias.shape[0] == 1); | |||
megdnn_assert(bias.shape[1] == dst.shape[1], "bias:%s, dst:%s", | |||
bias.to_string().c_str(), dst.to_string().c_str()); | |||
@@ -175,11 +139,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
} | |||
if (z.ndim != 0) { | |||
megdnn_assert(param().format != param::ConvBias::Format::NCHW_WINOGRAD); | |||
megdnn_assert(param().format != | |||
param::ConvBias::Format::NCHW88_WINOGRAD); | |||
megdnn_assert(param().format != | |||
param::ConvBias::Format::NCHW44_WINOGRAD); | |||
megdnn_assert(param().format != param::ConvBias::Format::NCHW4_NCHW32); | |||
megdnn_assert(param().format != param::ConvBias::Format::NCHW32_NCHW4); | |||
megdnn_assert(z.dtype.enumv() == dst.dtype.enumv()); | |||
@@ -187,105 +146,6 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
} | |||
return ret; | |||
} | |||
/*! | |||
* \brief deduce the origin filter layout and param after winograd transformed | |||
*/ | |||
void ConvBiasForward::deduce_winograd_origin_layout_and_param( | |||
const Param::Format format, const size_t output_block_size, | |||
const TensorLayout& src_layout, | |||
const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout, | |||
Param& origin_param) { | |||
if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD || | |||
format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD || | |||
format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
//! change NCHWxx_WINOGRAD to NCHWxx | |||
size_t OC = 0; | |||
size_t IC = 0; | |||
size_t GROUP = 1; | |||
size_t FH = winograd_filter_layout[1] - output_block_size + 1; | |||
//! {alpha, alpha, IC, OC} | |||
if (winograd_filter_layout.ndim == 4) { | |||
OC = winograd_filter_layout[3]; | |||
IC = winograd_filter_layout[2]; | |||
} | |||
//! {group, alpha, alpha, IC, OC} | |||
else if (winograd_filter_layout.ndim == 5) { | |||
OC = winograd_filter_layout[4]; | |||
IC = winograd_filter_layout[3]; | |||
GROUP = winograd_filter_layout[0]; | |||
} | |||
//! {alpha, alpha, OC/f, IC/f, f, f} | |||
else if (winograd_filter_layout.ndim == 6) { | |||
OC = winograd_filter_layout[2] * winograd_filter_layout[5]; | |||
IC = winograd_filter_layout[3] * winograd_filter_layout[4]; | |||
} | |||
//! {group, alpha, alpha, OC/f, IC/f, f, f} | |||
else if (winograd_filter_layout.ndim == 7) { | |||
OC = winograd_filter_layout[3] * winograd_filter_layout[6]; | |||
IC = winograd_filter_layout[4] * winograd_filter_layout[5]; | |||
GROUP = winograd_filter_layout[0]; | |||
} | |||
auto origin_data_type = winograd_filter_layout.dtype; | |||
if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) { | |||
float scale = | |||
origin_data_type.param<dtype::QuantizedS16>().scale; | |||
origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||
} else { | |||
//! In order to braing the sacle of filter, the transformed | |||
//! qint8 winograd filter computing with float dtype is Qint32 | |||
megdnn_assert(origin_data_type.enumv() == | |||
DTypeEnum::QuantizedS32); | |||
float scale = | |||
origin_data_type.param<dtype::QuantizedS32>().scale; | |||
origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||
} | |||
} | |||
if (GROUP == 1) { | |||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
origin_layout = | |||
TensorLayout({OC, IC, FH, FH}, origin_data_type); | |||
} else if (format == | |||
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4}, | |||
origin_data_type); | |||
} else { | |||
megdnn_assert(format == | |||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8}, | |||
origin_data_type); | |||
} | |||
} else { | |||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
origin_layout = | |||
TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type); | |||
} else if (format == | |||
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
origin_layout = | |||
TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4}, | |||
origin_data_type); | |||
} else { | |||
megdnn_assert(format == | |||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
origin_layout = | |||
TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8}, | |||
origin_data_type); | |||
} | |||
} | |||
origin_param.output_block_size = 0; | |||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
origin_param.format = megdnn::param::ConvBias::Format::NCHW; | |||
} else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
origin_param.format = megdnn::param::ConvBias::Format::NCHW44; | |||
} else { | |||
megdnn_assert(format == | |||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
origin_param.format = megdnn::param::ConvBias::Format::NCHW88; | |||
} | |||
} | |||
} | |||
template <typename T> | |||
struct NCHWParamTrait; | |||
@@ -41,36 +41,12 @@ uint32_t spatial_getter(uint32_t filter, const Param&) { | |||
return filter; | |||
} | |||
template <> | |||
uint32_t | |||
spatial_getter<param::ConvBias, param::ConvBias::Format::NCHW_WINOGRAD>( | |||
uint32_t filter, const param::ConvBias& param) { | |||
//! f = m + r - 1 -> r = f + 1 - m | |||
return filter - param.output_block_size + 1; | |||
} | |||
template <> | |||
uint32_t | |||
spatial_getter<param::ConvBias, param::ConvBias::Format::NCHW88_WINOGRAD>( | |||
uint32_t filter, const param::ConvBias& param) { | |||
//! f = m + r - 1 -> r = f + 1 - m | |||
return filter - param.output_block_size + 1; | |||
} | |||
template <> | |||
uint32_t | |||
spatial_getter<param::ConvBias, param::ConvBias::Format::NCHW44_WINOGRAD>( | |||
uint32_t filter, const param::ConvBias& param) { | |||
//! f = m + r - 1 -> r = f + 1 - m | |||
return filter - param.output_block_size + 1; | |||
} | |||
template <typename Parameter, typename Param> | |||
void make_canonized_filter_meta_nchw_nhwc( | |||
size_t src_ndim, const TensorLayout& filter, const Param& param, | |||
typename ConvolutionBase<Parameter>::CanonizedFilterMeta& ret) { | |||
megdnn_assert(param.format == Param::Format::NCHW || | |||
param.format == Param::Format::NHWC || | |||
param.format == Param::Format::NCHW_WINOGRAD); | |||
param.format == Param::Format::NHWC ); | |||
auto img_ndim = src_ndim - 2; | |||
size_t flt_start, flt_spatial_start, ocpg_pos, icpg_pos; | |||
if (param.sparse == Param::Sparse::DENSE) { | |||
@@ -101,20 +77,6 @@ void make_canonized_filter_meta_nchw_nhwc( | |||
flt_spatial_start = 2; | |||
ocpg_pos = 0; | |||
icpg_pos = 1; | |||
} else if (param.format == Param::Format::NCHW_WINOGRAD) { | |||
// filter should be (alphah, alphaw, ic, oc) or (alphah, alphaw, ocb, | |||
// icb, ic_block_size, oc_block_size) | |||
flt_spatial_start = 0; | |||
if (filter.ndim == flt_start + 4) { | |||
ocpg_pos = 3; | |||
icpg_pos = 2; | |||
} else { | |||
megdnn_assert(filter.ndim == flt_start + 6); | |||
ic_block_size = filter[flt_start + 4]; | |||
oc_block_size = filter[flt_start + 5]; | |||
ocpg_pos = 2; | |||
icpg_pos = 3; | |||
} | |||
} else { | |||
megdnn_assert(param.format == Param::Format::NHWC, | |||
"invalid conv tensor format"); | |||
@@ -136,14 +98,8 @@ void make_canonized_filter_meta_nchw_nhwc( | |||
megdnn_assert(dilation[i] > 0, | |||
"invalid dilation on spatial dim %zu: %u", i, | |||
dilation[i]); | |||
if (param.format == Param::Format::NCHW_WINOGRAD) { | |||
ret.spatial[i] = | |||
spatial_getter<Param, Param::Format::NCHW_WINOGRAD>( | |||
filter[i + flt_start + flt_spatial_start], param); | |||
} else { | |||
ret.spatial[i] = spatial_getter<Param, Param::Format::NCHW>( | |||
filter[i + flt_start + flt_spatial_start], param); | |||
} | |||
ret.spatial[i] = spatial_getter<Param, Param::Format::NCHW>( | |||
filter[i + flt_start + flt_spatial_start], param); | |||
ret.dilated_spatial[i] = (ret.spatial[i] - 1) * dilation[i] + 1; | |||
} | |||
} | |||
@@ -295,20 +251,12 @@ void make_canonized_filter_meta_nchwxx( | |||
* FH, FW, pack_size(IC), pack_size(OC)} [group] | |||
* {GROUP/pack_size, 1, 1, FH, FW, pack_size} [chan] | |||
* | |||
** NCHW88_WINOGRAD and NCHW44_WINOGRAD mode | |||
* filter: | |||
* {alpha, alpha, OC/pack_size, IC/pack_size, pack_size(IC), | |||
*pack_size(OC)} [dense] | |||
* {GROUP, alpha, alpha, OC_PER_GROUP/pack_size, | |||
* IC_PER_GROUP/pack_size, pack_size(IC), pack_size(OC)} [group] | |||
* | |||
*/ | |||
megdnn_assert(param.format == Param::Format::NCHW88 || | |||
param.format == Param::Format::NCHW44 || | |||
param.format == Param::Format::NCHW44_WINOGRAD || | |||
param.format == Param::Format::NCHW44_DOT || | |||
param.format == Param::Format::NCHW88_WINOGRAD); | |||
param.format == Param::Format::NCHW44_DOT); | |||
size_t img_ndim = 2; | |||
size_t flt_start = 0; | |||
size_t flt_spatial_start = 2; | |||
@@ -325,10 +273,6 @@ void make_canonized_filter_meta_nchwxx( | |||
filter[filter.ndim - 1]); | |||
ret.group = 1; | |||
flt_start = 0; | |||
if (param.format == Param::Format::NCHW88_WINOGRAD || | |||
param.format == Param::Format::NCHW44_WINOGRAD) { | |||
flt_start = 2; | |||
} | |||
if (filter[filter.ndim - 2] == 2 * pack_size && | |||
filter[filter.ndim - 1] == 2 * pack_size) { | |||
pack_c_size = 2 * pack_size; | |||
@@ -339,10 +283,6 @@ void make_canonized_filter_meta_nchwxx( | |||
ret.icpg = filter[flt_start + 1] * pack_c_size; | |||
} else if (filter.ndim == img_ndim + 3) { | |||
// ohwi8o | |||
megdnn_assert(param.format != Param::Format::NCHW88_WINOGRAD, | |||
"Hybrid nchw88 mode in not support winograd"); | |||
megdnn_assert(param.format != Param::Format::NCHW44_WINOGRAD, | |||
"Hybrid nchw44 mode in not support winograd"); | |||
flt_start = 0; | |||
flt_spatial_start = 1; | |||
ret.group = 1; | |||
@@ -357,15 +297,9 @@ void make_canonized_filter_meta_nchwxx( | |||
megdnn_assert(param.sparse == Param::Sparse::GROUP, | |||
"invalid convolution sparse type"); | |||
flt_start = 1; | |||
if (param.format == Param::Format::NCHW88_WINOGRAD || | |||
param.format == Param::Format::NCHW44_WINOGRAD) { | |||
flt_start = 3; | |||
} | |||
auto filter_oc = filter[flt_start]; | |||
auto filter_ic = filter[flt_start + 1]; | |||
if (filter_oc == 1 && filter_ic == 1 && filter.ndim == (img_ndim + 4) && | |||
param.format != Param::Format::NCHW88_WINOGRAD && | |||
param.format != Param::Format::NCHW44_WINOGRAD) { | |||
if (filter_oc == 1 && filter_ic == 1 && filter.ndim == (img_ndim + 4)) { | |||
// Depthwise case goihw8g | |||
megdnn_assert(filter.ndim == img_ndim + 4, | |||
"bad filter ndim for group convolution: " | |||
@@ -416,17 +350,7 @@ void make_canonized_filter_meta_nchwxx( | |||
"NCHWXX has invalid dilation on spatial dim %zu: %u, " | |||
"require to be 1", | |||
i, dilation[i]); | |||
if (param.format == Param::Format::NCHW88_WINOGRAD) { | |||
ret.spatial[i] = | |||
spatial_getter<Param, Param::Format::NCHW88_WINOGRAD>( | |||
filter[i + flt_start - 2], param); | |||
} else if (param.format == Param::Format::NCHW44_WINOGRAD) { | |||
ret.spatial[i] = | |||
spatial_getter<Param, Param::Format::NCHW44_WINOGRAD>( | |||
filter[i + flt_start - 2], param); | |||
} else { | |||
ret.spatial[i] = filter[i + flt_start + flt_spatial_start]; | |||
} | |||
ret.spatial[i] = filter[i + flt_start + flt_spatial_start]; | |||
ret.dilated_spatial[i] = (ret.spatial[i] - 1) * dilation[i] + 1; | |||
} | |||
} | |||
@@ -579,13 +503,11 @@ ConvolutionBase<Parameter>::make_canonized_filter_meta( | |||
} else if (param().format == Param::Format::NCHW8) { | |||
make_canonized_filter_meta_nchwx<8, Parameter>(src_ndim, filter, | |||
param(), ret); | |||
} else if (param().format == Param::Format::NCHW88 || | |||
param().format == Param::Format::NCHW88_WINOGRAD) { | |||
} else if (param().format == Param::Format::NCHW88) { | |||
make_canonized_filter_meta_nchwxx<8, Parameter>(src_ndim, filter, | |||
param(), ret); | |||
} else if (param().format == Param::Format::NCHW44 || | |||
param().format == Param::Format::NCHW44_DOT || | |||
param().format == Param::Format::NCHW44_WINOGRAD) { | |||
param().format == Param::Format::NCHW44_DOT) { | |||
make_canonized_filter_meta_nchwxx<4, Parameter>(src_ndim, filter, | |||
param(), ret); | |||
} else if (param().format == Param::Format::NCHW32 || | |||
@@ -597,8 +519,7 @@ ConvolutionBase<Parameter>::make_canonized_filter_meta( | |||
param(), ret); | |||
} else { | |||
megdnn_assert(param().format == Param::Format::NHWC || | |||
param().format == Param::Format::NCHW || | |||
param().format == Param::Format::NCHW_WINOGRAD); | |||
param().format == Param::Format::NCHW); | |||
make_canonized_filter_meta_nchw_nhwc<Parameter>(src_ndim, filter, | |||
param(), ret); | |||
} | |||
@@ -619,17 +540,8 @@ void ConvolutionBase<Parameter>::check_or_deduce_dtype_fwd(DType src, | |||
} else if (src.enumv() == DTypeEnum::QuantizedS8 || | |||
src.enumv() == DTypeEnum::Quantized8Asymm || | |||
src.enumv() == DTypeEnum::Quantized4Asymm) { | |||
//! Qint8 winograd compute with float, in order to bringing the filter | |||
//! scale, here just use QuantizedS32 as filter type. | |||
if (src.enumv() == DTypeEnum::QuantizedS8 && | |||
filter.enumv() == DTypeEnum::QuantizedS32) { | |||
supported_dst_dtype.push_back(dtype::QuantizedS32( | |||
src.param<dtype::QuantizedS8>().scale * | |||
filter.param<dtype::QuantizedS32>().scale)); | |||
} else { | |||
supported_dst_dtype.push_back( | |||
dtype::QuantizedS32(mul_scale(src, filter))); | |||
} | |||
supported_dst_dtype.push_back( | |||
dtype::QuantizedS32(mul_scale(src, filter))); | |||
if (dst.valid() && dst.enumv() == src.enumv()) { | |||
supported_dst_dtype.push_back(dst); | |||
} | |||
@@ -681,24 +593,12 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
megdnn_assert_contiguous(src); | |||
megdnn_assert_contiguous(filter); | |||
megdnn_assert(src.ndim >= 3_z, "%s", errmsg().c_str()); | |||
if ((param().format == Param::Format::NCHW_WINOGRAD || | |||
param().format == Param::Format::NCHW44_WINOGRAD) && | |||
src.dtype.category() == DTypeCategory::QUANTIZED) { | |||
megdnn_assert((filter.dtype.enumv() == DTypeEnum::QuantizedS16 || | |||
filter.dtype.enumv() == DTypeEnum::QuantizedS32), | |||
"%s", errmsg().c_str()); | |||
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||
src.dtype.enumv() == DTypeEnum::Quantized8Asymm, | |||
"%s", errmsg().c_str()); | |||
} else { | |||
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv(), "%s", | |||
errmsg().c_str()); | |||
} | |||
megdnn_assert(src.dtype.enumv() == filter.dtype.enumv(), "%s", | |||
errmsg().c_str()); | |||
check_or_deduce_dtype_fwd(src.dtype, filter.dtype, dst.dtype); | |||
size_t img_dim; | |||
if (param().format == Param::Format::NCHW || | |||
param().format == Param::Format::NHWC || | |||
param().format == Param::Format::NCHW_WINOGRAD) { | |||
param().format == Param::Format::NHWC) { | |||
img_dim = src.ndim - 2; | |||
megdnn_assert(filter.ndim >= img_dim + 2 && filter.ndim <= img_dim + 6, | |||
"%s", errmsg().c_str()); | |||
@@ -714,8 +614,6 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
param().format == Param::Format::NCHW32 || | |||
param().format == Param::Format::NCHW32_NCHW4 || | |||
param().format == Param::Format::NCHW88 || | |||
param().format == Param::Format::NCHW88_WINOGRAD || | |||
param().format == Param::Format::NCHW44_WINOGRAD || | |||
param().format == Param::Format::CHWN4); | |||
img_dim = src.ndim - 3; | |||
if ((param().format == Param::Format::NCHW88 || | |||
@@ -770,8 +668,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
"but got src %s, filter %s", | |||
src.to_string().c_str(), filter.to_string().c_str()); | |||
} | |||
if (param().format == Param::Format::NCHW88 || | |||
param().format == Param::Format::NCHW88_WINOGRAD) { | |||
if (param().format == Param::Format::NCHW88) { | |||
megdnn_assert((src.ndim == 4 && filter.ndim == 5 && | |||
filter[filter.ndim - 1] == 8) || | |||
(src.ndim == 5 && | |||
@@ -786,8 +683,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
src.to_string().c_str(), filter.to_string().c_str()); | |||
} | |||
if (param().format == Param::Format::NCHW44 || | |||
param().format == Param::Format::NCHW44_DOT || | |||
param().format == Param::Format::NCHW44_WINOGRAD) { | |||
param().format == Param::Format::NCHW44_DOT) { | |||
//!support nchw44 filter change to 88 for int8 winogradf23_88 using MK8 mamtul | |||
megdnn_assert((src.ndim == 4 && filter.ndim == 5 && | |||
filter[filter.ndim - 1] == 4) || | |||
@@ -820,12 +716,10 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
"currently only convolution on 2D image is supported"); | |||
auto cflt = make_canonized_filter_meta(src.ndim, filter); | |||
if (param().format == Param::Format::NCHW || | |||
param().format == Param::Format::NHWC || | |||
param().format == Param::Format::NCHW_WINOGRAD) { | |||
param().format == Param::Format::NHWC ) { | |||
size_t src_or_dst_c_pos = 0; | |||
size_t src_or_dst_spatial_start = 0; | |||
if (param().format == Param::Format::NCHW || | |||
param().format == Param::Format::NCHW_WINOGRAD) { | |||
if (param().format == Param::Format::NCHW) { | |||
src_or_dst_c_pos = 1; | |||
src_or_dst_spatial_start = 2; | |||
} else { | |||
@@ -836,10 +730,6 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
} | |||
megdnn_assert(cflt.icpg * cflt.group == src[src_or_dst_c_pos], "%s", | |||
errmsg().c_str()); | |||
if (param().format == Param::Format::NCHW_WINOGRAD) { | |||
megdnn_assert(cflt.spatial[0] == cflt.spatial[1], | |||
"NCHW_WINOGRAD only support conv with fh == fw"); | |||
} | |||
dst.ndim = src.ndim; | |||
dst[0] = src[0]; | |||
dst[src_or_dst_c_pos] = cflt.ocpg * cflt.group; | |||
@@ -900,8 +790,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
dst[3] = infer_conv_shape(src[3], cflt.dilated_spatial[1], | |||
cflt.stride[1], cflt.padding[1]); | |||
dst[4] = 32; | |||
} else if (param().format == Param::Format::NCHW88 || | |||
param().format == Param::Format::NCHW88_WINOGRAD) { | |||
} else if (param().format == Param::Format::NCHW88 ) { | |||
megdnn_assert(src.ndim == 5 || (src.ndim == 4 && src[1] <= 8), | |||
"invalid src ndim for NCHW88, expected=5 or 4, got=%zu", | |||
src.ndim); | |||
@@ -923,8 +812,7 @@ ConvolutionBase<Parameter>::deduce_layout_fwd(const TensorLayout& src, | |||
} | |||
} else if (param().format == Param::Format::NCHW44 || | |||
param().format == Param::Format::NCHW44_DOT || | |||
param().format == Param::Format::NCHW44_WINOGRAD) { | |||
param().format == Param::Format::NCHW44_DOT) { | |||
megdnn_assert(src.ndim == 5 || (src.ndim == 4 && src[1] <= 4), | |||
"invalid src ndim for NCHW44, expected=5 or 4, got=%zu", | |||
src.ndim); | |||
@@ -189,7 +189,6 @@ private: | |||
cb(RelayoutFormat) \ | |||
cb(TopK) \ | |||
cb(PowC) \ | |||
cb(WinogradFilterPreprocess) \ | |||
cb(LocalShareForward) \ | |||
cb(LocalShareBackwardData) \ | |||
cb(LocalShareBackwardFilter) \ | |||
@@ -1,157 +0,0 @@ | |||
/** | |||
* \file dnn/src/common/winograd_filter_preprocess.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megdnn/oprs.h" | |||
#include <numeric> | |||
#include "src/common/utils.h" | |||
using namespace megdnn; | |||
void WinogradFilterPreprocess::deduce_layout(const TensorLayout& src, | |||
TensorLayout& dst) { | |||
auto errmsg = [&]() { | |||
return "invalid filter layout:" + megdnn_layout_msg(src); | |||
}; | |||
MEGDNN_MARK_USED_VAR(errmsg); | |||
//! NCHW88 weight layout include | |||
//! dense{oc/8, ic/8, fh, fw, 8, 8}; group {g, oc/8, ic/8, fh, fw, 8, 8}; | |||
//! channel wise{g/8, 1, 1, fh, fw, 8} | |||
megdnn_assert( | |||
src.ndim == 4 || src.ndim == 5 || src.ndim == 6 || src.ndim == 7, | |||
"%s", errmsg().c_str()); | |||
//! nchw88 channel wise conv | |||
megdnn_assert(!(src.ndim == 6 && src[1] == 1 && src[2] == 1), | |||
"chennel wise nchw88 can not use winograd "); | |||
//! nchw88 group conv | |||
size_t flt_start = 0; | |||
size_t pack_c_size = 1; | |||
size_t group = 1; | |||
//! group conv | |||
if (src.ndim == 5) { | |||
flt_start = 1; | |||
group = src[0]; | |||
//! nchw88 dense conv | |||
} else if (src.ndim == 6) { | |||
pack_c_size = src[5]; | |||
//! nchw88 group conv | |||
} else if (src.ndim == 7) { | |||
flt_start = 1; | |||
group = src[0]; | |||
pack_c_size = src[6]; | |||
} | |||
size_t OC = src[flt_start] * pack_c_size, | |||
IC = src[flt_start + 1] * pack_c_size, FH = src[flt_start + 2], | |||
FW = src[flt_start + 3]; | |||
size_t m = param().output_block_size; | |||
megdnn_assert(FH == FW, "%s", errmsg().c_str()); | |||
size_t alpha = FH + m - 1; | |||
DType dst_type = src.dtype; | |||
if (src.dtype.category() == DTypeCategory::QUANTIZED) { | |||
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8); | |||
if (param().compute_mode == | |||
param::ConvBias::ComputeMode::DEFAULT) { | |||
//! input int8 compute short | |||
dst_type = dtype::QuantizedS16( | |||
src.dtype.param<dtype::QuantizedS8>().scale); | |||
} else { | |||
//! input int8 compute float32 | |||
dst_type = dtype::QuantizedS32( | |||
src.dtype.param<dtype::QuantizedS8>().scale); | |||
} | |||
} | |||
if (src.ndim == 4 || src.ndim == 6) { | |||
if (param().format == param::Winograd::Format::DEFAULT) { | |||
dst = TensorLayout({alpha, alpha, IC, OC}, dst_type); | |||
} else { | |||
megdnn_assert(param().format == param::Winograd::Format::MK4 || | |||
param().format == param::Winograd::Format::MK8); | |||
size_t pack_size = MatrixMulForward::pack_size(param().format); | |||
dst = TensorLayout({alpha, alpha, OC / pack_size, IC / pack_size, | |||
pack_size, pack_size}, | |||
dst_type); | |||
} | |||
} else { | |||
megdnn_assert(src.ndim == 5 || src.ndim == 7); | |||
if (param().format == param::Winograd::Format::DEFAULT) { | |||
dst = TensorLayout({group, alpha, alpha, IC, OC}, dst_type); | |||
} else { | |||
megdnn_assert(param().format == param::Winograd::Format::MK4 || | |||
param().format == param::Winograd::Format::MK8); | |||
size_t pack_size = MatrixMulForward::pack_size(param().format); | |||
dst = TensorLayout({group, alpha, alpha, OC / pack_size, | |||
IC / pack_size, pack_size, pack_size}, | |||
dst_type); | |||
} | |||
} | |||
} | |||
void WinogradFilterPreprocess::check_exec(const TensorLayout& src, | |||
const TensorLayout& dst, | |||
size_t workspace_in_bytes) { | |||
auto errmsg = [&]() { | |||
return megdnn_layout_msg(src) + ", " + megdnn_layout_msg(dst); | |||
}; | |||
MEGDNN_MARK_USED_VAR(errmsg); | |||
megdnn_assert_contiguous(src); | |||
megdnn_assert_contiguous(dst); | |||
//! nchwxx now only support Format MKx | |||
if (param().format == param::Winograd::Format::DEFAULT) { | |||
megdnn_assert(src.ndim == dst.ndim && (src.ndim == 4 || src.ndim == 5), | |||
"%s", errmsg().c_str()); | |||
} else { | |||
megdnn_assert( | |||
(param().format == param::Winograd::Format::MK4 || | |||
param().format == param::Winograd::Format::MK8) && | |||
(src.ndim == dst.ndim - 2 || src.ndim == dst.ndim) && | |||
(src.ndim == 4 || src.ndim == 5 || src.ndim == 6 || | |||
src.ndim == 7), | |||
"%s", errmsg().c_str()); | |||
} | |||
TensorLayout dst_expected; | |||
deduce_layout(src, dst_expected); | |||
megdnn_assert_eq_layout(dst_expected, dst); | |||
auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst); | |||
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | |||
} | |||
size_t WinogradFilterPreprocess::get_workspace_in_bytes( | |||
const TensorLayout& src, const TensorLayout& dst) { | |||
MEGDNN_MARK_USED_VAR(dst); | |||
DType output_compute_dtype = src.dtype; | |||
if (src.dtype.category() == DTypeCategory::QUANTIZED) { | |||
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||
src.dtype.enumv() == DTypeEnum::Quantized8Asymm); | |||
if (param().compute_mode == | |||
param::ConvBias::ComputeMode::DEFAULT) { | |||
//! input int8 compute short | |||
output_compute_dtype = dtype::QuantizedS16( | |||
src.dtype.param<dtype::QuantizedS8>().scale); | |||
} else { | |||
//! input int8 compute float32 | |||
output_compute_dtype = dtype::QuantizedS32( | |||
src.dtype.param<dtype::QuantizedS8>().scale); | |||
} | |||
} | |||
size_t FW = src[3]; | |||
if (src.ndim == 5 || src.ndim == 7) { | |||
FW = src[4]; | |||
} | |||
size_t pack_size = MatrixMulForward::pack_size(param().format); | |||
size_t alpha = param().output_block_size + FW - 1; | |||
return 2 * alpha * alpha * output_compute_dtype.size() * pack_size * | |||
pack_size; | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -72,7 +72,6 @@ | |||
#include "src/cuda/type_cvt/opr_impl.h" | |||
#include "src/cuda/warp_affine/opr_impl.h" | |||
#include "src/cuda/warp_perspective/opr_impl.h" | |||
#include "src/cuda/winograd_filter_preprocess/opr_impl.h" | |||
#include "src/cuda/local_share/opr_impl.h" | |||
#include "src/cuda/roi_align/opr_impl.h" | |||
#include "src/cuda/batch_conv_bias/opr_impl.h" | |||
@@ -1,22 +0,0 @@ | |||
/** | |||
* \file dnn/src/cuda/winograd_filter_preprocess/opr_impl.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "src/cuda/winograd_filter_preprocess/opr_impl.h" | |||
#include "src/common/utils.h" | |||
using namespace megdnn; | |||
using namespace cuda; | |||
void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in, _megdnn_tensor_in, | |||
_megdnn_workspace) { | |||
megdnn_throw("WinogradFilterPreprocess is not supported in CUDA"); | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -1,27 +0,0 @@ | |||
/** | |||
* \file dnn/src/cuda/winograd_filter_preprocess/opr_impl.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/oprs.h" | |||
namespace megdnn { | |||
namespace cuda { | |||
class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess { | |||
public: | |||
using WinogradFilterPreprocess::WinogradFilterPreprocess; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) override; | |||
}; | |||
} // namespace cuda | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -259,12 +259,7 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( | |||
strategy, UNIT_TILE_SIZE, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -329,12 +324,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( | |||
strategy, UNIT_TILE_SIZE, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK4)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -397,12 +387,7 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::DEFAULT)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -467,12 +452,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( | |||
strategy, UNIT_TILE_SIZE, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK8)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW && | |||
param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -342,10 +342,7 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( | |||
param().format == Param::Format::NCHW4 || | |||
param().format == Param::Format::NCHW44 || | |||
param().format == Param::Format::NCHW44_DOT || | |||
param().format == Param::Format::NCHW || | |||
param().format == Param::Format::NCHW_WINOGRAD || | |||
param().format == Param::Format::NCHW88_WINOGRAD || | |||
param().format == Param::Format::NCHW44_WINOGRAD) { | |||
param().format == Param::Format::NCHW) { | |||
spatial_pos = 2; | |||
} else if (param().format == Param::Format::NHWC) { | |||
spatial_pos = 1; | |||
@@ -370,25 +367,7 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( | |||
"should be equal"); | |||
auto&& fm = check_layout_fwd(src, filter, dst); | |||
auto& conv_fm = reinterpret_cast<ConvolutionImpl::CanonizedFilterMeta&>(fm); | |||
param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT; | |||
if (param().format == Param::Format::NCHW_WINOGRAD || | |||
param().format == Param::Format::NCHW88_WINOGRAD || | |||
param().format == Param::Format::NCHW44_WINOGRAD) { | |||
size_t flt_start = 0; | |||
if (param().sparse == Param::Sparse::GROUP) { | |||
flt_start = 1; | |||
} | |||
if (filter.ndim == 6 + flt_start) { | |||
if (filter[5] == 4) { | |||
format = param::MatrixMul::Format::MK4; | |||
} else { | |||
megdnn_assert(filter[5] == 8); | |||
format = param::MatrixMul::Format::MK8; | |||
} | |||
} | |||
} | |||
size_t nr_threads = static_cast<naive::HandleImpl*>(handle()) | |||
->megcore_dispatcher() | |||
->nr_threads(); | |||
@@ -407,8 +386,6 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( | |||
nr_threads, | |||
reinterpret_cast<const ConvolutionForward::PreprocessedFilter*>( | |||
preprocessed_filter)}, | |||
param().output_block_size, | |||
format, | |||
bias.dtype, | |||
bias.stride[0], | |||
bias_mode, | |||
@@ -537,11 +514,7 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order( | |||
auto FH = param.filter_meta.spatial[0]; | |||
auto FW = param.filter_meta.spatial[1]; | |||
//! TODO: now winograd only support in fast-run | |||
if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) { | |||
return {AlgoCategory::WINOGRAD}; | |||
} | |||
//! im2col + matmul | |||
bool im2col_prefer = (IC >= 32 || OC >= 32); | |||
//! quantized algo use matmul when direct algo is unusable | |||
@@ -632,21 +605,6 @@ const T* ConvBiasImpl::NCBKernParam::filter(size_t group_pack_id, | |||
break; | |||
} | |||
case ConvBiasImpl::Param::Format::NCHW_WINOGRAD: | |||
case ConvBiasImpl::Param::Format::NCHW44_WINOGRAD: | |||
case ConvBiasImpl::Param::Format::NCHW88_WINOGRAD: { | |||
//! four format of weight layout | |||
//! 1. {g, alpha, alpha, ocpg/8, icpg/8, 8, 8} | |||
//! 2. {alpha, alpha, ocpg/8, icpg/8, 8, 8} | |||
//! 3. {g, alpha, alpha, oc, ic, 8, 8} | |||
//! 4. {alpha, alpha, oc, ic} | |||
group_offset = pack_group_size * group_pack_id * filter_meta.icpg * | |||
filter_meta.ocpg * | |||
(filter_meta.spatial[0] + output_block_size - 1) * | |||
(filter_meta.spatial[1] + output_block_size - 1) * | |||
filter_type.size(); | |||
break; | |||
} | |||
default: | |||
megdnn_assert(0, "other filter format is not support yet"); | |||
} | |||
@@ -103,19 +103,13 @@ public: | |||
struct NCBKernSizeParam : ConvolutionImpl::NCBKernSizeParam { | |||
NCBKernSizeParam() = default; | |||
NCBKernSizeParam(const ConvolutionImpl::NCBKernSizeParam& param, | |||
size_t output_block_size, | |||
param::MatrixMul::Format winograd_matmul_format, | |||
DType bias_type, ptrdiff_t bias_bs, BiasMode bias_mode, | |||
Param::NonlineMode nonlineMode) | |||
: ConvolutionImpl::NCBKernSizeParam(param), | |||
output_block_size{output_block_size}, | |||
winograd_matmul_format{winograd_matmul_format}, | |||
bias_type{bias_type}, | |||
bias_bs{bias_bs}, | |||
bias_mode{bias_mode}, | |||
nonlineMode{nonlineMode} {} | |||
size_t output_block_size; //!< used in winograd algo | |||
param::MatrixMul::Format winograd_matmul_format; | |||
DType bias_type; | |||
//! stride for batch of bias | |||
ptrdiff_t bias_bs; | |||
@@ -88,13 +88,7 @@ class ConvBias { | |||
size_t filter_transform_buf_size = 0; | |||
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE, | |||
//! OC_BLOCK_SIZE) | |||
if (param.preprocessed_filter == nullptr && | |||
param.filter_meta.format != | |||
param::ConvBias::Format::NCHW_WINOGRAD && | |||
param.filter_meta.format != | |||
param::ConvBias::Format::NCHW88_WINOGRAD && | |||
param.filter_meta.format != | |||
param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
if (param.preprocessed_filter == nullptr) { | |||
filter_transform_buf_size = Strategy::ALPHA * Strategy::ALPHA * OC * | |||
IC * sizeof(input_filter_compute_type); | |||
} | |||
@@ -108,12 +102,7 @@ class ConvBias { | |||
nullptr, | |||
{winograd_comput_size, filter_transform_buf_size * GROUP}); | |||
} else { | |||
megdnn_assert(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD || | |||
param.filter_meta.format == | |||
param::ConvBias::Format::NCHW88_WINOGRAD || | |||
param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD); | |||
megdnn_assert(param.preprocessed_filter != nullptr); | |||
return WorkspaceBundle(nullptr, {winograd_comput_size}); | |||
} | |||
} | |||
@@ -499,7 +488,6 @@ public: | |||
const TensorND& preprocessed_dst = | |||
param.preprocessed_filter->tensors[0]; | |||
WorkspaceBundle bundle = get_preprocess_wbundle(param); | |||
Strategy strategy = m_strategy; | |||
SmallVector<NCBKern> kerns; | |||
auto filter_process_kern = | |||
@@ -558,13 +546,7 @@ public: | |||
param.filter_meta.stride[1] == 1 && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44 || | |||
param.filter_meta.format == | |||
param::ConvBias::Format::NCHW_WINOGRAD || | |||
param.filter_meta.format == | |||
param::ConvBias::Format::NCHW88_WINOGRAD || | |||
param.filter_meta.format == | |||
param::ConvBias::Format::NCHW44_WINOGRAD)); | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44)); | |||
SmallVector<NCBKern> kerns; | |||
if (param.preprocessed_filter == nullptr && | |||
@@ -316,8 +316,6 @@ ConvolutionImpl::AlgoDefault::init_conv_bias_param( | |||
mul_scale(param.src_type, param.filter_type)); | |||
} | |||
return {param, | |||
0, | |||
param::MatrixMul::Format::DEFAULT, | |||
bias_type, | |||
0, | |||
BiasMode::NO_BIAS, | |||
@@ -225,8 +225,7 @@ ConvolutionImpl::NCBKernSizeParam ConvolutionImpl::make_ncb_kern_size_param( | |||
param().format == Param::Format::NCHW44_DOT || | |||
param().format == Param::Format::NCHW44) { | |||
spatial_pos = 2; | |||
} else if (param().format == Param::Format::NCHW || | |||
param().format == Param::Format::NCHW_WINOGRAD) { | |||
} else if (param().format == Param::Format::NCHW) { | |||
spatial_pos = 2; | |||
} else if (param().format == Param::Format::NHWC) { | |||
spatial_pos = 1; | |||
@@ -78,7 +78,6 @@ | |||
#include "src/naive/type_cvt/opr_impl.h" | |||
#include "src/naive/warp_affine/opr_impl.h" | |||
#include "src/naive/warp_perspective/opr_impl.h" | |||
#include "src/naive/winograd_filter_preprocess/opr_impl.h" | |||
#include "src/naive/remap/opr_impl.h" | |||
#include "src/naive/fake_quant/opr_impl.h" | |||
@@ -1,234 +0,0 @@ | |||
/** | |||
* \file dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/naive/winograd_filter_preprocess/opr_impl.h" | |||
#include "src/common/utils.h" | |||
#include "src/common/winograd/winograd_helper.h" | |||
#include "src/naive/handle.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(megdnn_naive_winograd_filter_preprocess) | |||
using namespace megdnn; | |||
using namespace naive; | |||
void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, | |||
_megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) { | |||
check_exec(src.layout, dst.layout, workspace.size); | |||
//! nchw88 group conv | |||
size_t flt_start = 0; | |||
size_t pack_c_size = 1; | |||
size_t group = 1; | |||
//! group conv | |||
if (src.layout.ndim == 5) { | |||
flt_start = 1; | |||
group = src.layout[0]; | |||
//! nchw88 dense conv | |||
} else if (src.layout.ndim == 6) { | |||
pack_c_size = src.layout[5]; | |||
//! nchw88 group conv | |||
} else if (src.layout.ndim == 7) { | |||
flt_start = 1; | |||
group = src.layout[0]; | |||
pack_c_size = src.layout[6]; | |||
} | |||
size_t OC = src.layout[flt_start] * pack_c_size, | |||
IC = src.layout[flt_start + 1] * pack_c_size, | |||
FW = src.layout[flt_start + 3]; | |||
size_t m = param().output_block_size; | |||
bool execed = false; | |||
#define cb(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _format, rescale) \ | |||
if (param().format == _format) { \ | |||
return winograd::StrategyHelper< \ | |||
_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, param::ConvBias::Format::NCHW, \ | |||
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ | |||
OC, m, FW, interp_points, src.layout.dtype, \ | |||
rescale); \ | |||
} | |||
#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _rescale) \ | |||
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ | |||
param::Winograd::Format::DEFAULT, _rescale); \ | |||
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ | |||
param::Winograd::Format::MK4, _rescale); | |||
#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _rescale) \ | |||
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ | |||
param::Winograd::Format::DEFAULT, _rescale); \ | |||
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ | |||
param::Winograd::Format::MK8, _rescale); | |||
#define DISPATCH_KERNEL(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _kern, _rescale, ...) \ | |||
const _ctype* src_ptr = src.compatible_ptr<_ctype>(); \ | |||
_input_filter_compute_type* dst_ptr = \ | |||
dst.compatible_ptr<_input_filter_compute_type>(); \ | |||
_input_filter_compute_type* workspace_ptr = \ | |||
workspace.ptr<_input_filter_compute_type>(); \ | |||
MIDOUT_BEGIN(megdnn_naive_winograd_filter_preprocess, ##__VA_ARGS__) { \ | |||
for (size_t g = 0; g < group; g++) { \ | |||
auto run = [=]() { \ | |||
_kern(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _rescale); \ | |||
}; \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); \ | |||
src_ptr += src.layout.stride[0]; \ | |||
dst_ptr += dst.layout.stride[0]; \ | |||
} \ | |||
execed = true; \ | |||
} \ | |||
MIDOUT_END(); | |||
#define DISPATCH_DTYPE(_midout_tag) \ | |||
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \ | |||
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ | |||
DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \ | |||
} \ | |||
if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { \ | |||
DISPATCH_KERNEL(dt_int8, dt_int8, dt_int16, dt_int32, \ | |||
DISPATCH_FORMAT_MK8, 2.0f, _midout_tag, 1); \ | |||
} \ | |||
MEGDNN_INC_FLOAT16(if (src.layout.dtype.enumv() == DTypeEnum::Float16) { \ | |||
DISPATCH_KERNEL(dt_float16, dt_float16, dt_float16, dt_float16, \ | |||
DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 2); \ | |||
}) | |||
if (src.layout.ndim <= 5) { | |||
//! dispatch_dtype with consider layout and format. | |||
if (FW == 3) { | |||
if (m == 2) { | |||
std::vector<float> interp_points = {0, 1, -1}; | |||
DISPATCH_DTYPE(0); | |||
} else if (m == 6) { | |||
std::vector<float> interp_points = {0, 1, -1, 2, -2, 0.5, -0.5}; | |||
DISPATCH_DTYPE(1); | |||
} | |||
} else if (FW == 4) { | |||
if (m == 5) { | |||
std::vector<float> interp_points = {0, 0.5, -0.5, 1, -1, 2, -2}; | |||
DISPATCH_DTYPE(2); | |||
} | |||
} else if (FW == 5) { | |||
if (m == 4) { | |||
std::vector<float> interp_points = {0, 1, -1, 0.5, -0.5, 2, -2}; | |||
DISPATCH_DTYPE(3); | |||
} | |||
} | |||
#undef cb | |||
#undef DISPATCH_FORMAT_MK4 | |||
#undef DISPATCH_FORMAT_MK8 | |||
#undef DISPATCH_DTYPE | |||
} else { | |||
megdnn_assert(src.layout.ndim == 6 || src.layout.ndim == 7); | |||
#define cb(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _format, rescale) \ | |||
if (param().format == _format) { \ | |||
return winograd::StrategyHelper< \ | |||
_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, param::ConvBias::Format::NCHW88, \ | |||
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ | |||
OC, m, FW, interp_points, src.layout.dtype, \ | |||
rescale); \ | |||
} | |||
#define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _rescale) \ | |||
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ | |||
param::Winograd::Format::MK8, _rescale); | |||
#define DISPATCH_DTYPE(_midout_tag) \ | |||
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \ | |||
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ | |||
DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 0); \ | |||
} | |||
if (pack_c_size == 8) { //! NCHW88 | |||
if (FW == 3) { | |||
if (m == 2) { | |||
std::vector<float> interp_points = {0, 1, -1}; | |||
DISPATCH_DTYPE(4); | |||
} else if (m == 6) { | |||
std::vector<float> interp_points = {0, 1, -1, 2, | |||
-2, 0.5, -0.5}; | |||
DISPATCH_DTYPE(5); | |||
} | |||
} | |||
#undef cb | |||
#undef DISPATCH_DTYPE | |||
} | |||
else if (pack_c_size == 4) { //! NCHW44 | |||
#define cb(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _format, rescale) \ | |||
if (param().format == _format) { \ | |||
return winograd::StrategyHelper< \ | |||
_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, param::ConvBias::Format::NCHW44, \ | |||
_format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ | |||
OC, m, FW, interp_points, src.layout.dtype, \ | |||
rescale); \ | |||
} | |||
#define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \ | |||
_output_compute_type, _rescale) \ | |||
cb(_ctype, _dst_type, _input_filter_compute_type, _output_compute_type, \ | |||
param::Winograd::Format::MK4, _rescale); | |||
#define DISPATCH_DTYPE(_midout_tag) \ | |||
if (src.layout.dtype.enumv() == DTypeEnum::Float32) { \ | |||
DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ | |||
DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \ | |||
} \ | |||
if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { \ | |||
if (param().format == param::Winograd::Format::MK4) { \ | |||
DISPATCH_KERNEL(dt_int8, dt_int8, dt_float32, dt_float32, \ | |||
DISPATCH_FORMAT_MK4, 1.0f, _midout_tag, 0); \ | |||
} else if (param().format == param::Winograd::Format::MK8) { \ | |||
DISPATCH_KERNEL(dt_int8, dt_int8, dt_int16, dt_int32, \ | |||
DISPATCH_FORMAT_MK8, 2.0f, _midout_tag, 0); \ | |||
} \ | |||
} | |||
if (FW == 3) { | |||
if (m == 2) { | |||
std::vector<float> interp_points = {0, 1, -1}; | |||
DISPATCH_DTYPE(6); | |||
} else if (m == 6) { | |||
std::vector<float> interp_points = {0, 1, -1, 2, | |||
-2, 0.5, -0.5}; | |||
DISPATCH_DTYPE(7); | |||
} else if (m == 7) { | |||
std::vector<float> interp_points = {0, 1, -1, 2, | |||
-2, 0.5, -0.5, 1.5}; | |||
DISPATCH_DTYPE(8); | |||
} | |||
} | |||
#undef cb | |||
#undef DISPATCH_FORMAT_MK8 | |||
#undef DISPATCH_FORMAT_MK4 | |||
#undef DISPATCH_KERNEL | |||
#undef DISPATCH_DTYPE | |||
} | |||
} | |||
megdnn_assert(execed, | |||
"Unsupport winograd filter preprocess. m: %zu src: %s", m, | |||
src.layout.to_string().c_str()); | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -1,28 +0,0 @@ | |||
/** | |||
* \file dnn/src/naive/winograd_filter_preprocess/opr_impl.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/oprs.h" | |||
#include "src/common/utils.h" | |||
namespace megdnn { | |||
namespace naive { | |||
class WinogradFilterPreprocessImpl : public WinogradFilterPreprocess { | |||
public: | |||
using WinogradFilterPreprocess::WinogradFilterPreprocess; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) override; | |||
}; | |||
} // namespace naive | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -43,12 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW88_WINOGRAD && | |||
param.output_block_size == 6 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK8)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW88 && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -89,12 +84,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( | |||
strategy, m_tile_size, param) | |||
.get_matmul_kern_param(param); | |||
return m_matmul_algo->usable(matmul_param) && | |||
(param.filter_meta.format == param::ConvBias::Format::NCHW88 || | |||
(param.filter_meta.format == | |||
param::ConvBias::Format::NCHW88_WINOGRAD && | |||
param.output_block_size == 2 && | |||
param.winograd_matmul_format == | |||
param::MatrixMul::Format::MK8)) && | |||
param.filter_meta.format == param::ConvBias::Format::NCHW88 && | |||
!param.filter_meta.should_flip && | |||
(param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && | |||
param.filter_meta.spatial[0] == 3) && | |||
@@ -173,11 +173,7 @@ SmallVector<AlgoCategory> ConvBiasImpl::suggest_algo_category_order( | |||
auto FH = param.filter_meta.spatial[0]; | |||
auto FW = param.filter_meta.spatial[1]; | |||
//! TODO: now winograd only support fast-run | |||
if (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD || | |||
param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD) { | |||
return {AlgoCategory::WINOGRAD}; | |||
} | |||
//! nchw88 use mkl-dnn which algo is direct | |||
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { | |||
return {AlgoCategory::DIRECT, AlgoCategory::IM2COL}; | |||
@@ -629,6 +629,35 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_8x8x32) { | |||
#endif | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { | |||
using namespace conv_bias; | |||
std::vector<TestArg> args = get_winograd_args(3); | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
const float eps) { | |||
for (auto&& arg : args) { | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(), | |||
dtype::Float32(), 1e-3f); | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); | |||
run(args, dtype::Float16(), dtype::Float16(), dtype::Float16(), | |||
dtype::Float16(), 0.35f); | |||
#endif | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) { | |||
using namespace conv_bias; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(); | |||
@@ -717,207 +746,97 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { | |||
check_winograd("1:4:32", checker, args); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { | |||
using namespace conv_bias; | |||
std::vector<TestArg> args = get_winograd_args(3); | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto extra_impl = [](const TensorNDArray& tensors, uint32_t m, | |||
param::ConvBias param, Handle* handle) { | |||
megdnn_assert(param.format == param::ConvBias::Format::NCHW); | |||
auto winograd_preprocess_opr = | |||
handle->create_operator<WinogradFilterPreprocess>(); | |||
winograd_preprocess_opr->param().output_block_size = m; | |||
TensorLayout filter_transform_layout; | |||
winograd_preprocess_opr->deduce_layout(tensors[1].layout, | |||
filter_transform_layout); | |||
size_t winograd_preprocess_workspace_in_bytes = | |||
winograd_preprocess_opr->get_workspace_in_bytes( | |||
tensors[1].layout, filter_transform_layout); | |||
auto conv_bias_opr = handle->create_operator<ConvBias>(); | |||
conv_bias_opr->param() = param; | |||
conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD; | |||
conv_bias_opr->param().output_block_size = m; | |||
size_t conv_bias_workspace_in_bytes = | |||
conv_bias_opr->get_workspace_in_bytes( | |||
tensors[0].layout, filter_transform_layout, | |||
tensors[2].layout, tensors[3].layout, tensors[4].layout, | |||
nullptr); | |||
WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), | |||
conv_bias_workspace_in_bytes, | |||
winograd_preprocess_workspace_in_bytes}); | |||
wb.set(malloc(wb.total_size_in_bytes())); | |||
TensorND filter_transform_tensor(wb.get(0), | |||
std::move(filter_transform_layout)); | |||
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, | |||
wb.get_workspace(2)); | |||
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], | |||
tensors[3], tensors[4], nullptr, | |||
wb.get_workspace(1)); | |||
free(wb.ptr()); | |||
}; | |||
auto run = [&checker, &extra_impl]( | |||
Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
const float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind(extra_impl, | |||
std::placeholders::_1, m, | |||
arg.param, handle)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
} | |||
}; | |||
run(handle(), args, {6}, dtype::Float32(), dtype::Float32(), | |||
dtype::Float32(), dtype::Float32(), 1e-3f); | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); | |||
run(handle(), args, {6}, dtype::Float16(), dtype::Float16(), | |||
dtype::Float16(), dtype::Float16(), 0.35f); | |||
#endif | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||
std::vector<TestArg> args_first_half(args.begin(), | |||
args.begin() + args.size() / 2); | |||
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, | |||
1e-3f); | |||
run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, 1e-3f); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2, | |||
args.end()); | |||
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, | |||
1e-3f); | |||
run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, 1e-3f); | |||
} | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); | |||
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{}, | |||
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, | |||
0.25); | |||
run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{}, | |||
dtype::Float16{}, 0.25); | |||
} | |||
#endif | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
@@ -933,24 +852,19 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { | |||
get_quantized_winograd_mk_packed_args(8); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, | |||
DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
@@ -958,7 +872,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) { | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
} | |||
}; | |||
@@ -973,118 +886,99 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) { | |||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||
run(quantized_args, dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||
dtype::QuantizedS8(60.25f),1e-3); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) { | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
float epsilon = 0.001; | |||
#if MEGDNN_AARCH64 | |||
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; | |||
const char* matmul_name = "AARCH64_F32_MK4_4x16"; | |||
#else | |||
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; | |||
const char* matmul_name = "ARMV7_F32_MK4_4x8"; | |||
#endif | |||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str())); | |||
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); | |||
std::vector<TestArg> quantized_args = | |||
get_int8_nchw44_args(3, 4, false, true); | |||
get_int8_nchw44_args(3, 4, true, true); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||
run(quantized_args, dtype::QuantizedS8(0.41113496f), | |||
dtype::QuantizedS8(0.01887994f), | |||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||
dtype::QuantizedS8(0.49550694f), epsilon); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) { | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
float epsilon = 0.001; | |||
#if MEGDNN_AARCH64 | |||
const char* matmul_name = "AARCH64_F32_MK4_4x16"; | |||
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8"; | |||
#else | |||
const char* matmul_name = "ARMV7_F32_MK4_4x8"; | |||
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8"; | |||
#endif | |||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); | |||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true); | |||
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str())); | |||
std::vector<TestArg> quantized_args = | |||
get_int8_nchw44_args(3, 4, false, true); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), | |||
dtype::QuantizedS8(0.01887994f), | |||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, | |||
epsilon); | |||
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) { | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) { | |||
using namespace conv_bias; | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
@@ -1096,23 +990,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
#endif | |||
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>( | |||
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str())); | |||
std::vector<TestArg> quantized_args = | |||
get_int8_nchw44_args(3, 4, true, true); | |||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), | |||
run(quantized_args, dtype::QuantizedS8(0.41113496f), | |||
dtype::QuantizedS8(0.01887994f), | |||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, | |||
epsilon); | |||
dtype::QuantizedS8(0.49550694f), epsilon); | |||
} | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) { | |||
using namespace conv_bias; | |||
@@ -1170,7 +1056,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) { | |||
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, | |||
param::MatrixMul::Format::MK8); | |||
} | |||
#endif | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { | |||
using namespace conv_bias; | |||
@@ -1187,6 +1072,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { | |||
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) { | |||
using namespace conv_bias; | |||
@@ -83,56 +83,12 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) { | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto extra_impl = [](const TensorNDArray& tensors, uint32_t m, | |||
param::ConvBias param, Handle* handle) { | |||
megdnn_assert(param.format == param::ConvBias::Format::NCHW44); | |||
auto winograd_preprocess_opr = | |||
handle->create_operator<WinogradFilterPreprocess>(); | |||
winograd_preprocess_opr->param().output_block_size = m; | |||
winograd_preprocess_opr->param().format = param::MatrixMul::Format::MK4; | |||
TensorLayout filter_transform_layout; | |||
winograd_preprocess_opr->deduce_layout(tensors[1].layout, | |||
filter_transform_layout); | |||
size_t winograd_preprocess_workspace_in_bytes = | |||
winograd_preprocess_opr->get_workspace_in_bytes( | |||
tensors[1].layout, filter_transform_layout); | |||
auto conv_bias_opr = handle->create_operator<ConvBias>(); | |||
conv_bias_opr->param() = param; | |||
conv_bias_opr->param().format = | |||
param::ConvBias::Format::NCHW44_WINOGRAD; | |||
conv_bias_opr->param().output_block_size = m; | |||
size_t conv_bias_workspace_in_bytes = | |||
conv_bias_opr->get_workspace_in_bytes( | |||
tensors[0].layout, filter_transform_layout, | |||
tensors[2].layout, tensors[3].layout, tensors[4].layout, | |||
nullptr); | |||
WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), | |||
conv_bias_workspace_in_bytes, | |||
winograd_preprocess_workspace_in_bytes}); | |||
wb.set(malloc(wb.total_size_in_bytes())); | |||
TensorND filter_transform_tensor(wb.get(0), | |||
std::move(filter_transform_layout)); | |||
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, | |||
wb.get_workspace(2)); | |||
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], | |||
tensors[3], tensors[4], nullptr, | |||
wb.get_workspace(1)); | |||
free(wb.ptr()); | |||
}; | |||
auto run = [&checker, &extra_impl]( | |||
Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker]( | |||
const std::vector<TestArg>& args, | |||
DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
const float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind(extra_impl, | |||
std::placeholders::_1, m, | |||
arg.param, handle)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
@@ -140,7 +96,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) { | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
} | |||
}; | |||
@@ -149,7 +104,7 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_PREPROCESS_NCHW44) { | |||
// dtype::Float32(), dtype::Float32(), 1e-2f); | |||
//! remove this when low precision mode is ok | |||
run(handle(), nchw44_args, {2, 6}, dtype::Float32(), dtype::Float32(), | |||
run(nchw44_args, dtype::Float32(), dtype::Float32(), | |||
dtype::Float32(), dtype::Float32(), 1e-3f); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
@@ -158,31 +113,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||
std::vector<TestArg> args_first_half(args.begin(), | |||
args.begin() + args.size() / 2); | |||
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, | |||
1e-3f); | |||
run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, 1e-3f); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) { | |||
@@ -190,31 +138,24 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2, | |||
args.end()); | |||
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4, | |||
1e-3f); | |||
run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, | |||
dtype::Float32{}, 1e-3f); | |||
} | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
@@ -223,32 +164,25 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
std::vector<TestArg> args = get_winograd_mk_packed_args(8); | |||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng); | |||
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{}, | |||
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, | |||
0.25); | |||
run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{}, | |||
dtype::Float16{}, 0.25); | |||
} | |||
#endif | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
@@ -257,23 +191,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
@@ -289,9 +217,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
get_quantized_winograd_mk_packed_args(8); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) { | |||
@@ -299,15 +226,11 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, | |||
DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
@@ -315,7 +238,6 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
} | |||
}; | |||
@@ -330,9 +252,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) { | |||
@@ -340,23 +261,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
@@ -372,9 +287,8 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
get_int8_nchw44_args(3, 4, false, true); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f), | |||
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3); | |||
run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f), | |||
dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
@@ -383,23 +297,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
@@ -414,11 +322,10 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), | |||
run(quantized_args, dtype::QuantizedS8(0.41113496f), | |||
dtype::QuantizedS8(0.01887994f), | |||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, | |||
epsilon); | |||
dtype::QuantizedS8(0.49550694f), epsilon); | |||
} | |||
TEST_F(ARM_COMMON_MULTI_THREADS, | |||
@@ -427,23 +334,17 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker( | |||
handle()); | |||
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
param::MatrixMul::Format format, float eps) { | |||
float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind( | |||
winograd_algo_extra_impl, std::placeholders::_1, m, | |||
arg.param, handle, format)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
@@ -459,11 +360,10 @@ TEST_F(ARM_COMMON_MULTI_THREADS, | |||
get_int8_nchw44_args(3, 4, true, true); | |||
UniformIntRNG int_rng{-50, 50}; | |||
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng); | |||
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f), | |||
run(quantized_args, dtype::QuantizedS8(0.41113496f), | |||
dtype::QuantizedS8(0.01887994f), | |||
dtype::QuantizedS32(0.41113496f * 0.01887994f), | |||
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4, | |||
epsilon); | |||
dtype::QuantizedS8(0.49550694f), epsilon); | |||
} | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) { | |||
@@ -1,91 +0,0 @@ | |||
/** | |||
* \file dnn/test/arm_common/winograd_filter_preprocess.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "test/common/checker.h" | |||
#include "test/common/benchmarker.h" | |||
#include "test/common/winograd_filter_preprocess.h" | |||
#include "test/arm_common/fixture.h" | |||
using namespace megdnn; | |||
using namespace test; | |||
TEST_F(ARM_COMMON, WinogradFilterPreprocessF32) { | |||
using namespace winograd_filter_preprocess; | |||
Checker<WinogradFilterPreprocess> checker(handle()); | |||
// default | |||
std::vector<TestArg> args = get_args(6, 3); | |||
std::vector<TestArg> args54 = get_args(5, 4); | |||
std::vector<TestArg> args45 = get_args(4, 5); | |||
// mk4 | |||
std::vector<TestArg> args_mk4_out2 = | |||
get_mk_packed_args(2, param::Winograd::Format::MK4, 4); | |||
std::vector<TestArg> args_mk4_out6 = | |||
get_mk_packed_args(6, param::Winograd::Format::MK4, 4); | |||
args.insert(args.end(), args54.begin(), args54.end()); | |||
args.insert(args.end(), args45.begin(), args45.end()); | |||
args.insert(args.end(), args_mk4_out2.begin(), args_mk4_out2.end()); | |||
args.insert(args.end(), args_mk4_out6.begin(), args_mk4_out6.end()); | |||
for (auto&& arg : args) { | |||
checker.set_param(arg.param) | |||
.set_dtype(0, dtype::Float32{}) | |||
.set_dtype(1, dtype::Float32{}) | |||
.execs({arg.src, {}}); | |||
} | |||
} | |||
TEST_F(ARM_COMMON, WinogradFilterPreprocessQs8) { | |||
using namespace winograd_filter_preprocess; | |||
std::vector<TestArg> args = | |||
get_mk_packed_args(2, param::Winograd::Format::MK8, 8); | |||
Checker<WinogradFilterPreprocess> checker(handle()); | |||
UniformIntRNG rng{-50, 50}; | |||
checker.set_rng(0, &rng).set_rng(1, &rng).set_rng(2, &rng); | |||
for (auto&& arg : args) { | |||
checker.set_param(arg.param) | |||
.set_dtype(0, dtype::QuantizedS8(2.5f)) | |||
.set_dtype(1, dtype::QuantizedS16(2.5f)) | |||
.execs({arg.src, {}}); | |||
} | |||
} | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
TEST_F(ARM_COMMON, WinogradFilterPreprocessF16) { | |||
using namespace winograd_filter_preprocess; | |||
Checker<WinogradFilterPreprocess> checker(handle()); | |||
// default | |||
std::vector<TestArg> args = get_args(6, 3); | |||
std::vector<TestArg> args_23 = | |||
get_mk_packed_args(2, param::Winograd::Format::DEFAULT, 4); | |||
std::vector<TestArg> args45 = get_args(4, 5); | |||
// mk8 | |||
std::vector<TestArg> args_mk8_out2 = | |||
get_mk_packed_args(2, param::Winograd::Format::MK8, 8); | |||
args.insert(args.end(), args_23.begin(), args_23.end()); | |||
args.insert(args.end(), args45.begin(), args45.end()); | |||
args.insert(args.end(), args_mk8_out2.begin(), args_mk8_out2.end()); | |||
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00); | |||
for (auto&& arg : args) { | |||
checker.set_param(arg.param) | |||
.set_rng(0, rng) | |||
.set_dtype(0, dtype::Float16{}) | |||
.set_dtype(1, dtype::Float16{}) | |||
.execs({arg.src, {}}); | |||
} | |||
} | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -1152,50 +1152,6 @@ void check_conv_bias_preprocess(std::vector<conv_bias::TestArg> args, | |||
} | |||
void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, | |||
param::ConvBias param, Handle* handle, | |||
param::MatrixMul::Format format) { | |||
megdnn_assert(param.format == param::ConvBias::Format::NCHW || | |||
param.format == param::ConvBias::Format::NCHW44); | |||
auto winograd_preprocess_opr = | |||
handle->create_operator<WinogradFilterPreprocess>(); | |||
winograd_preprocess_opr->param().output_block_size = m; | |||
winograd_preprocess_opr->param().format = format; | |||
winograd_preprocess_opr->param().compute_mode = param.compute_mode; | |||
TensorLayout filter_transform_layout; | |||
winograd_preprocess_opr->deduce_layout(tensors[1].layout, | |||
filter_transform_layout); | |||
size_t winograd_preprocess_workspace_in_bytes = | |||
winograd_preprocess_opr->get_workspace_in_bytes( | |||
tensors[1].layout, filter_transform_layout); | |||
auto conv_bias_opr = handle->create_operator<ConvBias>(); | |||
conv_bias_opr->param() = param; | |||
if (param.format == param::ConvBias::Format::NCHW) { | |||
conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD; | |||
} else { | |||
conv_bias_opr->param().format = | |||
param::ConvBias::Format::NCHW44_WINOGRAD; | |||
} | |||
conv_bias_opr->param().output_block_size = m; | |||
size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes( | |||
tensors[0].layout, filter_transform_layout, tensors[2].layout, | |||
tensors[3].layout, tensors[4].layout, nullptr); | |||
WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), | |||
conv_bias_workspace_in_bytes, | |||
winograd_preprocess_workspace_in_bytes}); | |||
wb.set(malloc(wb.total_size_in_bytes())); | |||
TensorND filter_transform_tensor(wb.get(0), | |||
std::move(filter_transform_layout)); | |||
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, | |||
wb.get_workspace(2)); | |||
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], | |||
tensors[3], tensors[4], nullptr, wb.get_workspace(1)); | |||
free(wb.ptr()); | |||
}; | |||
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle, | |||
RNG* rng, float epsilon, DType type0, DType type1, | |||
DType type2, DType type3, const char* algo_name) { | |||
@@ -1388,7 +1344,6 @@ std::vector<conv_bias::TestArg> get_nchw44_conv_bias_args( | |||
} | |||
return args; | |||
} | |||
} // namespace conv_bias | |||
} // namespace test | |||
} // namespace megdnn | |||
@@ -94,9 +94,6 @@ void checker_conv_bias_int8x8x16( | |||
std::vector<megdnn::test::conv_bias::TestArg> args, | |||
megdnn::Handle* handle, const char* algo_name); | |||
void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m, | |||
param::ConvBias param, Handle* handle, | |||
param::MatrixMul::Format format); | |||
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, | |||
Handle* handle, RNG* rng, float epsilon, | |||
DType type0, DType type1, DType type2, | |||
@@ -95,7 +95,6 @@ DEF(MaskConvolution, 4, true, true); | |||
DEF(MaskPropagate, 2, true, true); | |||
DEF(RelayoutFormat, 2, true, true); | |||
DEF(MaxTensorDiff, 2, true, false); | |||
DEF(WinogradFilterPreprocess, 2, true, true); | |||
DEF(LocalShareForward, 3, true, true); | |||
DEF(LocalShareBackwardData, 3, true, false); | |||
DEF(LocalShareBackwardFilter, 3, true, false); | |||
@@ -1814,69 +1814,22 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { | |||
using namespace conv_bias; | |||
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); | |||
Checker<ConvBiasForward> checker(handle()); | |||
auto extra_impl = [](const TensorNDArray& tensors, uint32_t m, | |||
param::ConvBias param, Handle* handle) { | |||
megdnn_assert(param.format == param::ConvBias::Format::NCHW88); | |||
auto winograd_preprocess_opr = | |||
handle->create_operator<WinogradFilterPreprocess>(); | |||
winograd_preprocess_opr->param().output_block_size = m; | |||
winograd_preprocess_opr->param().format = param::MatrixMul::Format::MK8; | |||
TensorLayout filter_transform_layout; | |||
winograd_preprocess_opr->deduce_layout(tensors[1].layout, | |||
filter_transform_layout); | |||
size_t winograd_preprocess_workspace_in_bytes = | |||
winograd_preprocess_opr->get_workspace_in_bytes( | |||
tensors[1].layout, filter_transform_layout); | |||
auto conv_bias_opr = handle->create_operator<ConvBias>(); | |||
conv_bias_opr->param() = param; | |||
conv_bias_opr->param().format = | |||
param::ConvBias::Format::NCHW88_WINOGRAD; | |||
conv_bias_opr->param().output_block_size = m; | |||
size_t conv_bias_workspace_in_bytes = | |||
conv_bias_opr->get_workspace_in_bytes( | |||
tensors[0].layout, filter_transform_layout, | |||
tensors[2].layout, tensors[3].layout, tensors[4].layout, | |||
nullptr); | |||
WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(), | |||
conv_bias_workspace_in_bytes, | |||
winograd_preprocess_workspace_in_bytes}); | |||
wb.set(malloc(wb.total_size_in_bytes())); | |||
TensorND filter_transform_tensor(wb.get(0), | |||
std::move(filter_transform_layout)); | |||
winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor, | |||
wb.get_workspace(2)); | |||
conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2], | |||
tensors[3], tensors[4], nullptr, | |||
wb.get_workspace(1)); | |||
free(wb.ptr()); | |||
}; | |||
auto run = [&checker, &extra_impl]( | |||
Handle* handle, const std::vector<TestArg>& args, | |||
const std::vector<size_t>& out_size, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
const float eps) { | |||
auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype, | |||
DType B_dtype, DType C_dtype, DType D_dtype, | |||
const float eps) { | |||
for (auto&& arg : args) { | |||
for (uint32_t m : out_size) { | |||
checker.set_extra_opr_impl(std::bind(extra_impl, | |||
std::placeholders::_1, m, | |||
arg.param, handle)); | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
checker.set_dtype(0, A_dtype) | |||
.set_dtype(1, B_dtype) | |||
.set_dtype(2, C_dtype) | |||
.set_dtype(4, D_dtype) | |||
.set_epsilon(eps) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
}; | |||
run(handle(), args, {2, 6}, dtype::Float32(), dtype::Float32(), | |||
dtype::Float32(), dtype::Float32(), 1e-3f); | |||
run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(), | |||
dtype::Float32(), 1e-3f); | |||
} | |||
/*********************************** End winograd ************************/ | |||
@@ -32,7 +32,6 @@ | |||
#include "megbrain/jit/fusion_pass.h" | |||
#endif | |||
#include "megbrain/gopt/weights_preprocess.h" | |||
using namespace mgb; | |||
using namespace cg; | |||
@@ -14,7 +14,6 @@ | |||
#include "megbrain/gopt/gtrans.h" | |||
#include "megbrain/gopt/inference.h" | |||
#include "megbrain/gopt/misc.h" | |||
#include "megbrain/gopt/weights_preprocess.h" | |||
#include "megbrain/graph/cg.h" | |||
#include "megbrain/graph/event.h" | |||
#include "megbrain/graph/exc_extra_info.h" | |||
@@ -780,8 +779,6 @@ const GraphOptimizer& GraphOptimizer::add_passes_for_optimize_options( | |||
add_pass<FuseConvBiasZPass>(); | |||
}); | |||
cb(weight_winograd_transform, | |||
{ add_pass<WinogradTransformReplacePass>(); }); | |||
#undef cb | |||
if (need_param_fuse) { | |||
@@ -1,206 +0,0 @@ | |||
/** | |||
* \file src/gopt/impl/weights_preprocess.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megbrain/gopt/weights_preprocess.h" | |||
#include "megbrain/gopt/inference.h" | |||
#include "megbrain/opr/dnn/convolution.h" | |||
#include "megbrain/opr/tensor_manip.h" | |||
#include "megbrain/utils/hash_ct.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(megbrain_weight_preprocess) | |||
#define MIDOUT_B(tag) \ | |||
MIDOUT_BEGIN(megbrain_weight_preprocess, midout_iv(MGB_HASH_STR(tag))) { | |||
#define MIDOUT_E \ | |||
} \ | |||
MIDOUT_END(); | |||
using namespace mgb; | |||
using namespace gopt; | |||
using namespace cg; | |||
const char* WinogradTransformReplacePass::name() const { | |||
return "winograd_transform"; | |||
} | |||
void WinogradTransformReplacePass::apply(OptState& opt) const { | |||
MIDOUT_B("WinogradTransformReplacePass::apply") | |||
auto rewriter = opt.graph().make_rewriter(); | |||
ConstVarPropogate cvprop{ConstVarType::IMMUTABLE_AND_PARAM}; | |||
opt.graph().iter([&cvprop](OperatorNodeBase *opr) { | |||
cvprop.add_opr(opr); | |||
}); | |||
auto get_algo = [](const opr::ConvBias& opr) -> std::string { | |||
auto&& inputs = opr.input(); | |||
SmallVector<TensorLayout> layouts; | |||
mgb_assert(inputs.size() >= 2 && inputs.size() <= 4); | |||
auto&& mo = opr.megdnn_opr(); | |||
for (size_t i = 0; i < 4; i++) { | |||
if (inputs.size() <= i) { | |||
if (i == 2) { | |||
//! bias | |||
DType dtype; | |||
mo->deduce_dtype(inputs[0]->dtype(), inputs[1]->dtype(), | |||
DType{}, DType{}, dtype); | |||
layouts.emplace_back(TensorShape{}, dtype); | |||
} else { | |||
layouts.emplace_back(TensorShape{}, opr.output(0)->dtype(), | |||
opr.output(0)->format()); | |||
} | |||
} else { | |||
layouts.emplace_back(inputs[i]->shape(), inputs[i]->dtype(), | |||
inputs[i]->format()); | |||
} | |||
} | |||
layouts.emplace_back(opr.output(0)->shape(), opr.output(0)->dtype(), | |||
opr.output(0)->format()); | |||
AlgoChooserProfileCache& cache = opr.profile_cache(); | |||
auto param_blob = opr.param_blob(); | |||
AlgoChooserProfileCache::Key cache_key{layouts.data(), layouts.size(), | |||
param_blob.first, | |||
param_blob.second}; | |||
auto&& rst = cache.get(cache_key); | |||
if (!rst.valid()) | |||
return ""; | |||
auto prof = rst.val(); | |||
if (prof.empty()) | |||
return ""; | |||
return prof[0].algo; | |||
}; | |||
auto on_opr = [&](OperatorNodeBase* opr) { | |||
auto type = opr->dyn_typeinfo(); | |||
do { | |||
if (type != opr::ConvBias::typeinfo()) | |||
break; | |||
auto&& conv_bias_opr = opr->cast_final_safe<opr::ConvBias>(); | |||
auto&& inputs = conv_bias_opr.input(); | |||
VarNodeArray new_inp; | |||
new_inp.reserve(inputs.size()); | |||
for (auto i : inputs) { | |||
new_inp.push_back(rewriter.get_var(i)); | |||
} | |||
if (!(cvprop.is_midconst(inputs[1]) || | |||
cvprop.is_const(inputs[1]))) { | |||
break; | |||
} | |||
auto algo_name = get_algo(conv_bias_opr); | |||
auto winograd_param = | |||
megdnn::ConvBias::parse_winograd_name(algo_name); | |||
if (winograd_param == megdnn::ConvBias::INVALID_WINOGRAD_PARAM) | |||
break; | |||
mgb_assert( | |||
conv_bias_opr.param().format == | |||
megdnn::ConvBias::Param::Format::NCHW || | |||
conv_bias_opr.param().format == | |||
megdnn::ConvBias::Param::Format::NCHW88 || | |||
conv_bias_opr.param().format == | |||
megdnn::ConvBias::Param::Format::NCHW44, | |||
"currently winograd only suppport NCHW and NCHW44 and " | |||
"NCHW88"); | |||
opr::ConvBiasForward::check_winograd_param_valid( | |||
winograd_param, conv_bias_opr.input(0)->dtype()); | |||
megdnn::param::Winograd winograd_preprocess_param; | |||
winograd_preprocess_param.format = | |||
opr::ConvBiasForward::get_matmul_format(winograd_param); | |||
winograd_preprocess_param.output_block_size = | |||
winograd_param.output_block_size; | |||
auto conv_bias_param = conv_bias_opr.param(); | |||
//! If input dtype is Qint8 and matmul format is MK4, The winograd | |||
//! compute type is float. | |||
if (conv_bias_opr.input(0)->dtype().enumv() == | |||
DTypeEnum::QuantizedS8 && | |||
winograd_preprocess_param.format == | |||
megdnn::param::MatrixMul::Format::MK4) { | |||
winograd_preprocess_param.compute_mode = | |||
megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||
conv_bias_param.compute_mode = | |||
megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||
} | |||
auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( | |||
new_inp[1], winograd_preprocess_param); | |||
mgb_assert(inputs.size() == 2 || inputs.size() == 3, | |||
"input size need to be 2/3, but got: %zu", | |||
inputs.size()); | |||
SymbolVar new_conv_bias_opr; | |||
if (new_inp[0]->shape().ndim == 4) { | |||
conv_bias_param.format = | |||
megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; | |||
} else { | |||
mgb_assert(new_inp[0]->shape().ndim == 5); | |||
size_t pack_size = new_inp[0]->shape()[4]; | |||
if (pack_size == 8) { | |||
conv_bias_param.format = | |||
megdnn::ConvBias::Param::Format::NCHW88_WINOGRAD; | |||
} else if (pack_size == 4) { | |||
conv_bias_param.format = | |||
megdnn::ConvBias::Param::Format::NCHW44_WINOGRAD; | |||
} else { | |||
mgb_assert(0, "Invalid pack size %zu in algo %s", pack_size, | |||
algo_name.c_str()); | |||
} | |||
} | |||
conv_bias_param.output_block_size = | |||
winograd_param.output_block_size; | |||
if (inputs.size() == 2) { | |||
new_conv_bias_opr = opr::ConvBias::make( | |||
new_inp[0], winograd_preprocess_opr.node(), | |||
conv_bias_param, conv_bias_opr.execution_policy(), | |||
conv_bias_opr.config()); | |||
} else { | |||
new_conv_bias_opr = opr::ConvBias::make( | |||
new_inp[0], winograd_preprocess_opr.node(), new_inp[2], | |||
conv_bias_param, conv_bias_opr.execution_policy(), | |||
conv_bias_opr.config()); | |||
} | |||
auto&& origin_out = conv_bias_opr.output(); | |||
auto&& cur_out = new_conv_bias_opr.node()->owner_opr()->output(); | |||
mgb_assert(origin_out.size() == cur_out.size()); | |||
for (size_t i = 0; i < origin_out.size(); i++) { | |||
if (!origin_out[i]->contain_flag( | |||
VarNode::Flag::VOLATILE_CONTENT)) { | |||
rewriter.replace_var(origin_out[i], cur_out[i], nullptr); | |||
} | |||
} | |||
return; | |||
} while (0); | |||
rewriter.auto_replace_outputs(opr); | |||
}; | |||
opt.graph().iter(on_opr); | |||
rewriter.apply_inplace(); | |||
MIDOUT_E | |||
} | |||
/** | |||
* \warning WinogradTransformReplacePass implies that we run ParamFuse pass | |||
* before(currently run ParamFuse in optimize_for_inference when dump model), | |||
* othwise it can not deal with \c ConvBias(x, W+1), as the node of W+1 has no | |||
* flag PERSISTENT_DEVICE_VALUE, it's a mid-const node, we should use | |||
* ConstVarPropogate strictly speaking. | |||
*/ | |||
void gopt::transform_vars_inplace_with_winograd( | |||
mgb::cg::VarNodeArray& dest_vars) { | |||
gopt::GraphOptimizer optimizer; | |||
optimizer.add_pass<WinogradTransformReplacePass>(); | |||
optimizer.add_pass<ParamFusePass>(); | |||
optimizer.apply_inplace(dest_vars); | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -1,32 +0,0 @@ | |||
/** | |||
* \file src/gopt/include/megbrain/gopt/weights_preprocess.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megbrain/gopt/framework.h" | |||
namespace mgb { | |||
namespace gopt { | |||
class WinogradTransformReplacePass final : public Pass { | |||
class Impl; | |||
public: | |||
const char* name() const override; | |||
void apply(OptState& opt) const override; | |||
}; | |||
void transform_vars_inplace_with_winograd(mgb::cg::VarNodeArray& dest_vars); | |||
} // namespace gopt | |||
} // namespace mgb | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -46,7 +46,6 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
ConvTensorLayouts origin_layouts = ctx.layouts(); | |||
typename Opr::Param origin_param = ctx.mgb_opr()->param(); | |||
get_origin_param_and_layouts(ctx, origin_layouts, origin_param); | |||
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), | |||
origin_layouts.size(), &origin_param, | |||
sizeof(origin_param)}; | |||
@@ -104,18 +103,6 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
return prof_rst; | |||
} | |||
template <> | |||
void AlgoChooser<megdnn::ConvBias>::get_origin_param_and_layouts( | |||
const ExeContext& ctx, ConvTensorLayouts& layouts, | |||
megdnn::ConvBias::Param& param) { | |||
auto format = static_cast<megdnn::param::ConvBias::Format>( | |||
ctx.megdnn_opr()->param().format); | |||
size_t output_block_size = ctx.megdnn_opr()->param().output_block_size; | |||
megdnn::ConvBias::deduce_winograd_origin_layout_and_param( | |||
format, output_block_size, ctx.layouts()[0], ctx.layouts()[1], | |||
layouts[1], param); | |||
} | |||
template <typename Opr> | |||
typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | |||
ExeContext& ctx, bool require_reproducible, bool enable_update) { | |||
@@ -1607,15 +1607,5 @@ void RelayoutFormat::init_output_format() { | |||
} | |||
// f}}} | |||
// | |||
/* f{{{ ===================== WinogradFilterPreprocess ===================== */ | |||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(WinogradFilterPreprocess); | |||
MEGDNN_OPR_INIT1(WinogradFilterPreprocess, "winograd_filter_preprocess") | |||
void WinogradFilterPreprocess::init_output_dtype() { | |||
TensorLayout dst; | |||
TensorLayout src{input(0)->shape(), input(0)->dtype(), input(0)->format()}; | |||
megdnn_opr()->deduce_layout(src, dst); | |||
output(0)->dtype(dst.dtype); | |||
} | |||
// f}}} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -184,7 +184,6 @@ namespace opr { | |||
MGB_REG_OPR_SHALLOW_COPY(ParamPackConcat, opr_shallow_copy_param_pack_concat); | |||
MGB_SEREG_OPR(RelayoutFormat, 1); | |||
MGB_SEREG_OPR(WinogradFilterPreprocess, 1); | |||
} // namespace opr | |||
} // namespace mgb | |||
@@ -113,10 +113,6 @@ class AlgoChooser { | |||
//! entrance for getting algorithm according to execution strategy | |||
static ImplAlgo get_algo(ExeContext& ctx); | |||
static void get_origin_param_and_layouts(const ExeContext&, | |||
ConvTensorLayouts&, | |||
typename Opr::Param&) {} | |||
//! get all profile result, either by retrieving cache or profiling | |||
static AlgoChooserProfileCache::Result get_profile_result( | |||
ExeContext& ctx, bool enable_update); | |||
@@ -635,22 +635,6 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat, | |||
const OperatorNodeConfig &config = {}); | |||
void init_output_format() override final; | |||
}; | |||
/*! | |||
* \brief change conv weights layout base on winograd transform. | |||
* | |||
* See docs of megdnn params for more details | |||
*/ | |||
MGB_DEFINE_OPR_CLASS(WinogradFilterPreprocess, | |||
intl::MegDNNOprWrapperFwd<megdnn::WinogradFilterPreprocess>) | |||
public: | |||
WinogradFilterPreprocess(VarNode* p0, const Param& param, | |||
const OperatorNodeConfig& config); | |||
static SymbolVar make(SymbolVar p0, const Param& param = {}, | |||
const OperatorNodeConfig& config = {}); | |||
void init_output_dtype() override final; | |||
}; | |||
} // opr | |||
} // mgb | |||
@@ -171,12 +171,6 @@ uint64_t eval_conv_computation(const TensorShape& src_shape, | |||
cpos = 1; | |||
spatial_start = 2; | |||
break; | |||
case Param::Format::NCHW_WINOGRAD: | |||
case Param::Format::NCHW44_WINOGRAD: | |||
case Param::Format::NCHW88_WINOGRAD: | |||
cpos = 1; | |||
spatial_start = 0; | |||
break; | |||
case Param::Format::NHWC: | |||
cpos = 3; | |||
spatial_start = 1; | |||
@@ -203,29 +197,9 @@ uint64_t eval_conv_computation(const TensorShape& src_shape, | |||
uint64_t fh = static_cast<uint64_t>(filter_shape[spatial_start]); | |||
uint64_t fw = static_cast<uint64_t>(filter_shape[spatial_start + 1]); | |||
if (param.format == Param::Format::NCHW_WINOGRAD || | |||
param.format == Param::Format::NCHW44_WINOGRAD || | |||
param.format == Param::Format::NCHW88_WINOGRAD) { | |||
mgb_assert(opr->same_type<opr::ConvBias>(), | |||
"Only conv bias support WINOGRAD"); | |||
auto&& conv_bias_opr = opr->cast_final_safe<opr::ConvBias>(); | |||
uint32_t output_block_size = conv_bias_opr.param().output_block_size; | |||
mgb_assert(fh == fw, | |||
"NCHW_WINOGRAD, NCHW88_WINOGRAD need fw==fh, got fw: %u fh " | |||
"%u\n", | |||
static_cast<uint32_t>(fh), static_cast<uint32_t>(fw)); | |||
fh = fh + 1 - output_block_size; | |||
fw = fw + 1 - output_block_size; | |||
} | |||
// mul and add are counted as 2 operations | |||
if(param.format == Param::Format::NCHW88_WINOGRAD){ | |||
return dst_shape.total_nr_elems() * fh * fw * | |||
static_cast<uint64_t>(src_shape[cpos] * 8) / group * 2; | |||
} | |||
if (param.format == Param::Format::NCHW44_WINOGRAD) { | |||
return dst_shape.total_nr_elems() * fh * fw * | |||
static_cast<uint64_t>(src_shape[cpos] * 4) / group * 2; | |||
} | |||
return dst_shape.total_nr_elems() * fh * fw * | |||
static_cast<uint64_t>(src_shape[cpos]) / group * 2; | |||
} | |||
@@ -28,6 +28,7 @@ table Blob { | |||
} | |||
table Reserved0 {} | |||
table DeprecatedParam {} | |||
union OperatorParam { | |||
param.Empty = 1, | |||
@@ -50,7 +51,8 @@ union OperatorParam { | |||
param.ElemwiseMultiType = 18, | |||
param.PowC = 19, | |||
param.MatrixMul = 20, | |||
param.Winograd = 21, | |||
//Reserved for param.Winograd = 21, | |||
DeprecatedParam = 21, | |||
param.SVD = 22, | |||
param.Reduce = 23, | |||
param.Cumsum = 24, | |||