@@ -351,6 +351,12 @@ public: | |||
const TensorLayout& bias, const TensorLayout& z, | |||
const TensorLayout& dst) = 0; | |||
static void deduce_winograd_origin_layout_and_param( | |||
const Param::Format format, const size_t output_block_size, | |||
const TensorLayout& src_layout, | |||
const TensorLayout& winograd_filter_layout, | |||
TensorLayout& origin_layout, Param& origin_param); | |||
enum class BiasMode : uint32_t { | |||
NO_BIAS = 0, //!< no bias | |||
BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] | |||
@@ -285,6 +285,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
bool is_matmul_usable = false; | |||
using Strategy = winograd::winograd_2x3_4x4_s8_f32_nchw44; | |||
using PackMode = fallback::MatrixMulImpl::AlgoBase::PackMode; | |||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | |||
is_matmul_usable = m_matmul_algo->usable( | |||
megdnn::winograd::ConvBias<Strategy, | |||
@@ -293,6 +294,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
param.osz[1], param.filter_meta.ocpg) | |||
.get_matmul_kern_param(param)); | |||
return is_matmul_usable && | |||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||
((opr->param().format == param::ConvBias::Format::NCHW44 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | |||
((opr->param().format == | |||
@@ -308,8 +310,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||
(param.filter_meta.dilation[0] == | |||
param.filter_meta.dilation[1] && | |||
param.filter_meta.dilation[0] == 1) && | |||
(param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 || | |||
param.compute_mode == param::ConvBias::ComputeMode::DEFAULT) && | |||
param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 && | |||
param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.bias_type.enumv() == DTypeEnum::QuantizedS32 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||
@@ -164,6 +164,105 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||
} | |||
return ret; | |||
} | |||
/*! | |||
* \brief deduce the origin filter layout and param after winograd transformed | |||
*/ | |||
void ConvBiasForward::deduce_winograd_origin_layout_and_param( | |||
const Param::Format format, const size_t output_block_size, | |||
const TensorLayout& src_layout, | |||
const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout, | |||
Param& origin_param) { | |||
if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD || | |||
format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD || | |||
format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
//! change NCHWxx_WINOGRAD to NCHWxx | |||
size_t OC = 0; | |||
size_t IC = 0; | |||
size_t GROUP = 1; | |||
size_t FH = winograd_filter_layout[1] - output_block_size + 1; | |||
//! {alpha, alpha, IC, OC} | |||
if (winograd_filter_layout.ndim == 4) { | |||
OC = winograd_filter_layout[3]; | |||
IC = winograd_filter_layout[2]; | |||
} | |||
//! {group, alpha, alpha, IC, OC} | |||
else if (winograd_filter_layout.ndim == 5) { | |||
OC = winograd_filter_layout[4]; | |||
IC = winograd_filter_layout[3]; | |||
GROUP = winograd_filter_layout[0]; | |||
} | |||
//! {alpha, alpha, OC/f, IC/f, f, f} | |||
else if (winograd_filter_layout.ndim == 6) { | |||
OC = winograd_filter_layout[2] * winograd_filter_layout[5]; | |||
IC = winograd_filter_layout[3] * winograd_filter_layout[4]; | |||
} | |||
//! {group, alpha, alpha, OC/f, IC/f, f, f} | |||
else if (winograd_filter_layout.ndim == 7) { | |||
OC = winograd_filter_layout[3] * winograd_filter_layout[6]; | |||
IC = winograd_filter_layout[4] * winograd_filter_layout[5]; | |||
GROUP = winograd_filter_layout[0]; | |||
} | |||
auto origin_data_type = winograd_filter_layout.dtype; | |||
if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) { | |||
float scale = | |||
origin_data_type.param<dtype::QuantizedS16>().scale; | |||
origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||
} else { | |||
//! In order to braing the sacle of filter, the transformed | |||
//! qint8 winograd filter computing with float dtype is Qint32 | |||
megdnn_assert(origin_data_type.enumv() == | |||
DTypeEnum::QuantizedS32); | |||
float scale = | |||
origin_data_type.param<dtype::QuantizedS32>().scale; | |||
origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||
} | |||
} | |||
if (GROUP == 1) { | |||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
origin_layout = | |||
TensorLayout({OC, IC, FH, FH}, origin_data_type); | |||
} else if (format == | |||
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4}, | |||
origin_data_type); | |||
} else { | |||
megdnn_assert(format == | |||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8}, | |||
origin_data_type); | |||
} | |||
} else { | |||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
origin_layout = | |||
TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type); | |||
} else if (format == | |||
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
origin_layout = | |||
TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4}, | |||
origin_data_type); | |||
} else { | |||
megdnn_assert(format == | |||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
origin_layout = | |||
TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8}, | |||
origin_data_type); | |||
} | |||
} | |||
origin_param.output_block_size = 0; | |||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||
origin_param.format = megdnn::param::ConvBias::Format::NCHW; | |||
} else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||
origin_param.format = megdnn::param::ConvBias::Format::NCHW44; | |||
} else { | |||
megdnn_assert(format == | |||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||
origin_param.format = megdnn::param::ConvBias::Format::NCHW88; | |||
} | |||
} | |||
} | |||
template <typename T> | |||
struct NCHWParamTrait; | |||
@@ -103,18 +103,17 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { | |||
winograd_preprocess_param.output_block_size = | |||
winograd_param.output_block_size; | |||
size_t pack_c_size = 1; | |||
if (new_inp[0]->shape().ndim == 5) { | |||
pack_c_size = new_inp[0]->layout().shape[4]; | |||
} | |||
auto conv_bias_param = conv_bias_opr.param(); | |||
//! If input dtype is Qint8 and matmul format is MK4, The winograd | |||
//! compute type is float. | |||
if (conv_bias_opr.input(0)->dtype().enumv() == | |||
DTypeEnum::QuantizedS8 && | |||
pack_c_size == 4 && | |||
winograd_preprocess_param.format == | |||
megdnn::param::MatrixMul::Format::MK4) { | |||
winograd_preprocess_param.compute_mode = | |||
megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||
conv_bias_param.compute_mode = | |||
megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||
} | |||
auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( | |||
@@ -124,7 +123,6 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { | |||
inputs.size()); | |||
SymbolVar new_conv_bias_opr; | |||
auto conv_bias_param = conv_bias_opr.param(); | |||
if (new_inp[0]->shape().ndim == 4) { | |||
conv_bias_param.format = | |||
megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; | |||
@@ -562,6 +562,10 @@ class AlgoChooser { | |||
} | |||
} | |||
static void get_origin_param_and_layouts(const ExeContext&, | |||
ConvTensorLayouts&, | |||
typename Opr::Param&) {} | |||
//! get all profile result, either by retrieving cache or profiling | |||
static AlgoChooserProfileCache::Result get_profile_result( | |||
ExeContext& ctx, bool enable_update); | |||
@@ -600,10 +604,14 @@ template <typename Opr> | |||
AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
ExeContext& ctx, bool enable_update) { | |||
AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache(); | |||
auto param_blob = ctx.mgb_opr()->param_blob(); | |||
AlgoChooserProfileCache::Key cache_key{ctx.layouts().data(), | |||
ctx.layouts().size(), | |||
param_blob.first, param_blob.second}; | |||
ConvTensorLayouts origin_layouts = ctx.layouts(); | |||
typename Opr::Param origin_param = ctx.mgb_opr()->param(); | |||
get_origin_param_and_layouts(ctx, origin_layouts, origin_param); | |||
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), | |||
origin_layouts.size(), &origin_param, | |||
sizeof(origin_param)}; | |||
{ | |||
auto&& rst = cache.get(cache_key); | |||
if (rst.valid()) | |||
@@ -658,6 +666,23 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||
return prof_rst; | |||
} | |||
template <> | |||
void AlgoChooser<megdnn::ConvBias>::get_origin_param_and_layouts( | |||
const ExeContext& ctx, ConvTensorLayouts& layouts, | |||
megdnn::ConvBias::Param& param) { | |||
auto format = static_cast<megdnn::param::ConvBias::Format>( | |||
ctx.megdnn_opr()->param().format); | |||
size_t output_block_size = ctx.megdnn_opr()->param().output_block_size; | |||
TensorLayout origin_layout; | |||
megdnn::ConvBias::deduce_winograd_origin_layout_and_param( | |||
format, output_block_size, ctx.layouts()[0], ctx.layouts()[1], | |||
origin_layout, param); | |||
for (size_t i = 0; i < ctx.layouts().size(); i++) { | |||
layouts[i] = ctx.layouts()[i]; | |||
} | |||
layouts[1] = origin_layout; | |||
} | |||
template <typename Opr> | |||
typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | |||
ExeContext& ctx, bool require_reproducible, bool enable_update) { | |||
@@ -724,6 +749,18 @@ void AlgoChooser<megdnn::ConvBias>::ExeContext:: | |||
ConvBiasForward::get_matmul_format(winograd_param); | |||
winograd_preprocess_opr->param().output_block_size = | |||
winograd_param.output_block_size; | |||
//! When filter input is qint8 and Matmul format is MK4, the winograd | |||
//! compute type is float | |||
if (m_layouts[1].dtype.enumv() == DTypeEnum::QuantizedS8 && | |||
param.opr_param.format == megdnn::ConvBias::Param::Format::NCHW44) { | |||
if (winograd_preprocess_opr->param().format == | |||
megdnn::param::MatrixMul::Format::MK4){ | |||
winograd_preprocess_opr->param().compute_mode = | |||
ConvBias::Param::ComputeMode::FLOAT32; | |||
param.opr_param.compute_mode = | |||
ConvBias::Param::ComputeMode::FLOAT32; | |||
} | |||
} | |||
TensorLayout filter_transform_layout; | |||
winograd_preprocess_opr->deduce_layout(m_layouts[1], | |||
filter_transform_layout); | |||