@@ -351,6 +351,12 @@ public: | |||||
const TensorLayout& bias, const TensorLayout& z, | const TensorLayout& bias, const TensorLayout& z, | ||||
const TensorLayout& dst) = 0; | const TensorLayout& dst) = 0; | ||||
static void deduce_winograd_origin_layout_and_param( | |||||
const Param::Format format, const size_t output_block_size, | |||||
const TensorLayout& src_layout, | |||||
const TensorLayout& winograd_filter_layout, | |||||
TensorLayout& origin_layout, Param& origin_param); | |||||
enum class BiasMode : uint32_t { | enum class BiasMode : uint32_t { | ||||
NO_BIAS = 0, //!< no bias | NO_BIAS = 0, //!< no bias | ||||
BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] | BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] | ||||
@@ -285,6 +285,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||||
bool is_matmul_usable = false; | bool is_matmul_usable = false; | ||||
using Strategy = winograd::winograd_2x3_4x4_s8_f32_nchw44; | using Strategy = winograd::winograd_2x3_4x4_s8_f32_nchw44; | ||||
using PackMode = fallback::MatrixMulImpl::AlgoBase::PackMode; | |||||
Strategy strategy(param.src_type, param.filter_type, param.dst_type); | Strategy strategy(param.src_type, param.filter_type, param.dst_type); | ||||
is_matmul_usable = m_matmul_algo->usable( | is_matmul_usable = m_matmul_algo->usable( | ||||
megdnn::winograd::ConvBias<Strategy, | megdnn::winograd::ConvBias<Strategy, | ||||
@@ -293,6 +294,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||||
param.osz[1], param.filter_meta.ocpg) | param.osz[1], param.filter_meta.ocpg) | ||||
.get_matmul_kern_param(param)); | .get_matmul_kern_param(param)); | ||||
return is_matmul_usable && | return is_matmul_usable && | ||||
m_matmul_algo->packmode() == PackMode::NO_PACK && | |||||
((opr->param().format == param::ConvBias::Format::NCHW44 && | ((opr->param().format == param::ConvBias::Format::NCHW44 && | ||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | param.filter_type.enumv() == DTypeEnum::QuantizedS8) || | ||||
((opr->param().format == | ((opr->param().format == | ||||
@@ -308,8 +310,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( | |||||
(param.filter_meta.dilation[0] == | (param.filter_meta.dilation[0] == | ||||
param.filter_meta.dilation[1] && | param.filter_meta.dilation[1] && | ||||
param.filter_meta.dilation[0] == 1) && | param.filter_meta.dilation[0] == 1) && | ||||
(param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 || | |||||
param.compute_mode == param::ConvBias::ComputeMode::DEFAULT) && | |||||
param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 && | |||||
param.src_type.enumv() == DTypeEnum::QuantizedS8 && | param.src_type.enumv() == DTypeEnum::QuantizedS8 && | ||||
param.bias_type.enumv() == DTypeEnum::QuantizedS32 && | param.bias_type.enumv() == DTypeEnum::QuantizedS32 && | ||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8; | param.dst_type.enumv() == DTypeEnum::QuantizedS8; | ||||
@@ -164,6 +164,105 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( | |||||
} | } | ||||
return ret; | return ret; | ||||
} | } | ||||
/*! | |||||
* \brief deduce the origin filter layout and param after winograd transformed | |||||
*/ | |||||
void ConvBiasForward::deduce_winograd_origin_layout_and_param( | |||||
const Param::Format format, const size_t output_block_size, | |||||
const TensorLayout& src_layout, | |||||
const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout, | |||||
Param& origin_param) { | |||||
if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD || | |||||
format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD || | |||||
format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||||
//! change NCHWxx_WINOGRAD to NCHWxx | |||||
size_t OC = 0; | |||||
size_t IC = 0; | |||||
size_t GROUP = 1; | |||||
size_t FH = winograd_filter_layout[1] - output_block_size + 1; | |||||
//! {alpha, alpha, IC, OC} | |||||
if (winograd_filter_layout.ndim == 4) { | |||||
OC = winograd_filter_layout[3]; | |||||
IC = winograd_filter_layout[2]; | |||||
} | |||||
//! {group, alpha, alpha, IC, OC} | |||||
else if (winograd_filter_layout.ndim == 5) { | |||||
OC = winograd_filter_layout[4]; | |||||
IC = winograd_filter_layout[3]; | |||||
GROUP = winograd_filter_layout[0]; | |||||
} | |||||
//! {alpha, alpha, OC/f, IC/f, f, f} | |||||
else if (winograd_filter_layout.ndim == 6) { | |||||
OC = winograd_filter_layout[2] * winograd_filter_layout[5]; | |||||
IC = winograd_filter_layout[3] * winograd_filter_layout[4]; | |||||
} | |||||
//! {group, alpha, alpha, OC/f, IC/f, f, f} | |||||
else if (winograd_filter_layout.ndim == 7) { | |||||
OC = winograd_filter_layout[3] * winograd_filter_layout[6]; | |||||
IC = winograd_filter_layout[4] * winograd_filter_layout[5]; | |||||
GROUP = winograd_filter_layout[0]; | |||||
} | |||||
auto origin_data_type = winograd_filter_layout.dtype; | |||||
if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||||
if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) { | |||||
float scale = | |||||
origin_data_type.param<dtype::QuantizedS16>().scale; | |||||
origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||||
} else { | |||||
//! In order to braing the sacle of filter, the transformed | |||||
//! qint8 winograd filter computing with float dtype is Qint32 | |||||
megdnn_assert(origin_data_type.enumv() == | |||||
DTypeEnum::QuantizedS32); | |||||
float scale = | |||||
origin_data_type.param<dtype::QuantizedS32>().scale; | |||||
origin_data_type = megdnn::dtype::QuantizedS8(scale); | |||||
} | |||||
} | |||||
if (GROUP == 1) { | |||||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||||
origin_layout = | |||||
TensorLayout({OC, IC, FH, FH}, origin_data_type); | |||||
} else if (format == | |||||
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||||
origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4}, | |||||
origin_data_type); | |||||
} else { | |||||
megdnn_assert(format == | |||||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||||
origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8}, | |||||
origin_data_type); | |||||
} | |||||
} else { | |||||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||||
origin_layout = | |||||
TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type); | |||||
} else if (format == | |||||
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||||
origin_layout = | |||||
TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4}, | |||||
origin_data_type); | |||||
} else { | |||||
megdnn_assert(format == | |||||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||||
origin_layout = | |||||
TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8}, | |||||
origin_data_type); | |||||
} | |||||
} | |||||
origin_param.output_block_size = 0; | |||||
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { | |||||
origin_param.format = megdnn::param::ConvBias::Format::NCHW; | |||||
} else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { | |||||
origin_param.format = megdnn::param::ConvBias::Format::NCHW44; | |||||
} else { | |||||
megdnn_assert(format == | |||||
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); | |||||
origin_param.format = megdnn::param::ConvBias::Format::NCHW88; | |||||
} | |||||
} | |||||
} | |||||
template <typename T> | template <typename T> | ||||
struct NCHWParamTrait; | struct NCHWParamTrait; | ||||
@@ -103,18 +103,17 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { | |||||
winograd_preprocess_param.output_block_size = | winograd_preprocess_param.output_block_size = | ||||
winograd_param.output_block_size; | winograd_param.output_block_size; | ||||
size_t pack_c_size = 1; | |||||
if (new_inp[0]->shape().ndim == 5) { | |||||
pack_c_size = new_inp[0]->layout().shape[4]; | |||||
} | |||||
auto conv_bias_param = conv_bias_opr.param(); | |||||
//! If input dtype is Qint8 and matmul format is MK4, The winograd | |||||
//! compute type is float. | |||||
if (conv_bias_opr.input(0)->dtype().enumv() == | if (conv_bias_opr.input(0)->dtype().enumv() == | ||||
DTypeEnum::QuantizedS8 && | DTypeEnum::QuantizedS8 && | ||||
pack_c_size == 4 && | |||||
winograd_preprocess_param.format == | winograd_preprocess_param.format == | ||||
megdnn::param::MatrixMul::Format::MK4) { | megdnn::param::MatrixMul::Format::MK4) { | ||||
winograd_preprocess_param.compute_mode = | winograd_preprocess_param.compute_mode = | ||||
megdnn::param::ConvBias::ComputeMode::FLOAT32; | megdnn::param::ConvBias::ComputeMode::FLOAT32; | ||||
conv_bias_param.compute_mode = | |||||
megdnn::param::ConvBias::ComputeMode::FLOAT32; | |||||
} | } | ||||
auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( | auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( | ||||
@@ -124,7 +123,6 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { | |||||
inputs.size()); | inputs.size()); | ||||
SymbolVar new_conv_bias_opr; | SymbolVar new_conv_bias_opr; | ||||
auto conv_bias_param = conv_bias_opr.param(); | |||||
if (new_inp[0]->shape().ndim == 4) { | if (new_inp[0]->shape().ndim == 4) { | ||||
conv_bias_param.format = | conv_bias_param.format = | ||||
megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; | megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; | ||||
@@ -562,6 +562,10 @@ class AlgoChooser { | |||||
} | } | ||||
} | } | ||||
static void get_origin_param_and_layouts(const ExeContext&, | |||||
ConvTensorLayouts&, | |||||
typename Opr::Param&) {} | |||||
//! get all profile result, either by retrieving cache or profiling | //! get all profile result, either by retrieving cache or profiling | ||||
static AlgoChooserProfileCache::Result get_profile_result( | static AlgoChooserProfileCache::Result get_profile_result( | ||||
ExeContext& ctx, bool enable_update); | ExeContext& ctx, bool enable_update); | ||||
@@ -600,10 +604,14 @@ template <typename Opr> | |||||
AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | ||||
ExeContext& ctx, bool enable_update) { | ExeContext& ctx, bool enable_update) { | ||||
AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache(); | AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache(); | ||||
auto param_blob = ctx.mgb_opr()->param_blob(); | |||||
AlgoChooserProfileCache::Key cache_key{ctx.layouts().data(), | |||||
ctx.layouts().size(), | |||||
param_blob.first, param_blob.second}; | |||||
ConvTensorLayouts origin_layouts = ctx.layouts(); | |||||
typename Opr::Param origin_param = ctx.mgb_opr()->param(); | |||||
get_origin_param_and_layouts(ctx, origin_layouts, origin_param); | |||||
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), | |||||
origin_layouts.size(), &origin_param, | |||||
sizeof(origin_param)}; | |||||
{ | { | ||||
auto&& rst = cache.get(cache_key); | auto&& rst = cache.get(cache_key); | ||||
if (rst.valid()) | if (rst.valid()) | ||||
@@ -658,6 +666,23 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result( | |||||
return prof_rst; | return prof_rst; | ||||
} | } | ||||
template <> | |||||
void AlgoChooser<megdnn::ConvBias>::get_origin_param_and_layouts( | |||||
const ExeContext& ctx, ConvTensorLayouts& layouts, | |||||
megdnn::ConvBias::Param& param) { | |||||
auto format = static_cast<megdnn::param::ConvBias::Format>( | |||||
ctx.megdnn_opr()->param().format); | |||||
size_t output_block_size = ctx.megdnn_opr()->param().output_block_size; | |||||
TensorLayout origin_layout; | |||||
megdnn::ConvBias::deduce_winograd_origin_layout_and_param( | |||||
format, output_block_size, ctx.layouts()[0], ctx.layouts()[1], | |||||
origin_layout, param); | |||||
for (size_t i = 0; i < ctx.layouts().size(); i++) { | |||||
layouts[i] = ctx.layouts()[i]; | |||||
} | |||||
layouts[1] = origin_layout; | |||||
} | |||||
template <typename Opr> | template <typename Opr> | ||||
typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile( | ||||
ExeContext& ctx, bool require_reproducible, bool enable_update) { | ExeContext& ctx, bool require_reproducible, bool enable_update) { | ||||
@@ -724,6 +749,18 @@ void AlgoChooser<megdnn::ConvBias>::ExeContext:: | |||||
ConvBiasForward::get_matmul_format(winograd_param); | ConvBiasForward::get_matmul_format(winograd_param); | ||||
winograd_preprocess_opr->param().output_block_size = | winograd_preprocess_opr->param().output_block_size = | ||||
winograd_param.output_block_size; | winograd_param.output_block_size; | ||||
//! When filter input is qint8 and Matmul format is MK4, the winograd | |||||
//! compute type is float | |||||
if (m_layouts[1].dtype.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.opr_param.format == megdnn::ConvBias::Param::Format::NCHW44) { | |||||
if (winograd_preprocess_opr->param().format == | |||||
megdnn::param::MatrixMul::Format::MK4){ | |||||
winograd_preprocess_opr->param().compute_mode = | |||||
ConvBias::Param::ComputeMode::FLOAT32; | |||||
param.opr_param.compute_mode = | |||||
ConvBias::Param::ComputeMode::FLOAT32; | |||||
} | |||||
} | |||||
TensorLayout filter_transform_layout; | TensorLayout filter_transform_layout; | ||||
winograd_preprocess_opr->deduce_layout(m_layouts[1], | winograd_preprocess_opr->deduce_layout(m_layouts[1], | ||||
filter_transform_layout); | filter_transform_layout); | ||||