@@ -0,0 +1,247 @@ | |||
/** | |||
* \file dnn/src/x86/conv_bias/int8/algo_usable_preferred.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/x86/conv_bias/int8/algo_usable_preferred.h" | |||
#include "src/x86/utils.h" | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
#include <mkldnn.hpp> | |||
#endif | |||
#include <cstring> | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
using namespace dnnl; | |||
#endif | |||
using namespace megdnn; | |||
using namespace x86; | |||
namespace megdnn { | |||
namespace x86 { | |||
bool chanwise_avx2_stride1_qint8_usable( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = | |||
(param.bias_mode != BiasMode::BIAS) && | |||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||
fm.dilation[1] == 1 && (FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||
fm.stride[0] == 1 && fm.stride[1] == 1 && (fm.icpg == 1) && | |||
(fm.ocpg == 1) && is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
} | |||
bool chanwise_avx2_stride1_qint8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
MEGDNN_MARK_USED_VAR(param); | |||
return true; | |||
} | |||
bool chanwise_avx2_stride1_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
return chanwise_avx2_stride1_qint8_usable(param) && | |||
chanwise_avx2_stride1_qint8_preferred(param); | |||
} | |||
bool chanwise_avx2_stride2_qint8_usable( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = | |||
(param.bias_mode != BiasMode::BIAS) && | |||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||
fm.dilation[1] == 1 && (FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||
fm.stride[0] == 2 && fm.stride[1] == 2 && (fm.icpg == 1) && | |||
(fm.ocpg == 1) && is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
} | |||
bool chanwise_avx2_stride2_qint8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
MEGDNN_MARK_USED_VAR(param); | |||
return true; | |||
} | |||
bool chanwise_avx2_stride2_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
return chanwise_avx2_stride2_qint8_usable(param) && | |||
chanwise_avx2_stride2_qint8_preferred(param); | |||
} | |||
bool direct_avx2_stride1_int8_usable( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||
fm.dilation[1] == 1 && | |||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||
fm.stride[0] == 1 && fm.stride[1] == 1 && | |||
is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
} | |||
bool direct_avx2_stride1_int8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
auto IC = fm.icpg; | |||
auto OC = fm.ocpg; | |||
auto is_preferred = true; | |||
if (IC > 128 && OC > 128) | |||
is_preferred = false; | |||
return is_preferred; | |||
} | |||
bool direct_avx2_stride1_int8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
return direct_avx2_stride1_int8_usable(param) && | |||
direct_avx2_stride1_int8_preferred(param); | |||
} | |||
bool direct_avx2_stride2_int8_usable( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||
fm.dilation[1] == 1 && | |||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||
fm.stride[0] == 2 && fm.stride[1] == 2 && | |||
is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
} | |||
bool direct_avx2_stride2_int8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
auto IC = fm.icpg; | |||
auto OC = fm.ocpg; | |||
auto is_preferred = false; | |||
if (IC <= 31 && OC <= 31) | |||
is_preferred = true; | |||
return is_preferred; | |||
} | |||
bool direct_avx2_stride2_int8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
return direct_avx2_stride2_int8_usable(param) && | |||
direct_avx2_stride2_int8_preferred(param); | |||
} | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
bool mkldnn_qint8_usable(const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||
param.src_type.enumv() == DTypeEnum::Int8) && | |||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY; | |||
} | |||
bool mkldnn_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam& param) { | |||
MEGDNN_MARK_USED_VAR(param); | |||
return is_supported(SIMDType::VNNI); | |||
} | |||
bool mkldnn_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
return mkldnn_qint8_usable(param) && mkldnn_qint8_preferred(param); | |||
} | |||
bool mkldnn_matmul_qint8_usable(const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||
param.src_type.enumv() == DTypeEnum::Int8) && | |||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.group == 1 && fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY && | |||
//! The matmul opr is only used in single thread | |||
//! TODO:support the no pack matmul algo in fallback im2col + matmul | |||
param.nr_threads == 1_z; | |||
} | |||
bool mkldnn_matmul_qint8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
auto is_preferred = true; | |||
auto&& fm = param.filter_meta; | |||
megdnn_assert_internal(fm.group == 1 && fm.dilation[0] == 1 && | |||
fm.dilation[1] == 1); | |||
// single channel conv should never use matrix mul | |||
if (fm.ocpg == 1 || fm.icpg == 1) | |||
is_preferred = false; | |||
return is_preferred && is_supported(SIMDType::VNNI); | |||
} | |||
bool mkldnn_matmul_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
return mkldnn_matmul_qint8_usable(param) && | |||
mkldnn_matmul_qint8_preferred(param); | |||
} | |||
#endif | |||
} // namespace x86 | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,56 @@ | |||
/** | |||
* \file dnn/src/x86/conv_bias/int8/algo_usable_preferred.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include "src/common/utils.h" | |||
#include "src/x86/conv_bias/opr_impl.h" | |||
namespace megdnn { | |||
namespace x86 { | |||
bool chanwise_avx2_stride1_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool chanwise_avx2_stride1_qint8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
bool chanwise_avx2_stride1_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
bool chanwise_avx2_stride2_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool chanwise_avx2_stride2_qint8_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
bool chanwise_avx2_stride2_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
bool direct_avx2_stride1_int8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool direct_avx2_stride1_int8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool direct_avx2_stride1_int8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
bool direct_avx2_stride2_int8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool direct_avx2_stride2_int8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool direct_avx2_stride2_int8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
bool mkldnn_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool mkldnn_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool mkldnn_qint8_usable_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool mkldnn_matmul_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool mkldnn_matmul_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||
bool mkldnn_matmul_qint8_usable_preferred( | |||
const ConvBiasImpl::NCBKernSizeParam&); | |||
#endif | |||
} // namespace x86 | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -14,6 +14,7 @@ | |||
#include "src/common/opr_delegate.h" | |||
#include "src/common/utils.h" | |||
#include "src/fallback/convolution/img2col_helper.h" | |||
#include "src/x86/conv_bias/int8/algo_usable_preferred.h" | |||
#include "src/x86/conv_bias/int8/avx2_chanwise_stride1.h" | |||
#include "src/x86/conv_bias/int8/avx2_chanwise_stride2.h" | |||
#include "src/x86/conv_bias/int8/avx2_direct_conv_stride1.h" | |||
@@ -37,25 +38,7 @@ using namespace x86; | |||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( | |||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = | |||
(param.bias_mode != BiasMode::BIAS) && | |||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && fm.stride[0] == 1 && | |||
fm.stride[1] == 1 && (fm.icpg == 1) && (fm.ocpg == 1) && | |||
is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
return chanwise_avx2_stride1_qint8_usable(param); | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle( | |||
@@ -94,28 +77,15 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_kimpls( | |||
return avx2_chanwise_stride1::get_kimpls(param, bundle); | |||
} | |||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::is_preferred( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return chanwise_avx2_stride1_qint8_preferred(param); | |||
} | |||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( | |||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = | |||
(param.bias_mode != BiasMode::BIAS) && | |||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && fm.stride[0] == 2 && | |||
fm.stride[1] == 2 && (fm.icpg == 1) && (fm.ocpg == 1) && | |||
is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
return chanwise_avx2_stride2_qint8_usable(param); | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle( | |||
@@ -154,28 +124,15 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_kimpls( | |||
return avx2_chanwise_stride2::get_kimpls(param, bundle); | |||
} | |||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::is_preferred( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return chanwise_avx2_stride2_qint8_preferred(param); | |||
} | |||
bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( | |||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||
fm.stride[0] == 1 && fm.stride[1] == 1 && | |||
is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
return direct_avx2_stride1_int8_usable(param); | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle( | |||
@@ -224,19 +181,75 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( | |||
return direct_conv_avx2_stride1::get_kimpls(param, bundle); | |||
} | |||
bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::is_preferred( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return direct_avx2_stride1_int8_preferred(param); | |||
} | |||
/* ===================== avx2 int8 stride 2 ===================== */ | |||
bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( | |||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy) const { | |||
return direct_avx2_stride2_int8_usable(param); | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( | |||
const NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
size_t N = param.n; | |||
size_t IC = fm.icpg; | |||
size_t OC = fm.ocpg; | |||
size_t IH = param.isz[0]; | |||
size_t IW = param.isz[1]; | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t FH = fm.spatial[0]; | |||
size_t FW = fm.spatial[1]; | |||
size_t GROUP = fm.group; | |||
size_t IC_STEP = 2, OC_STEP = 4; | |||
size_t pad_h = fm.padding[0]; | |||
size_t pad_w = fm.padding[1]; | |||
size_t src_size = 0, filter_size = 0; | |||
//! pack filter, pack src | |||
filter_size = GROUP * round_up(OC, OC_STEP) * round_up(IC, IC_STEP) * FH * | |||
FW * sizeof(int16_t); | |||
//! avx256 iw max offset 32, caused by w_remain < 16 | |||
src_size = N * GROUP * div_ceil(IC, IC_STEP) * (IH + 2 * pad_h) * | |||
(IW + 2 * pad_w) * 2 * sizeof(int8_t) + | |||
32; | |||
bool need_post_process = param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||
if (need_post_process) { | |||
size_t dst_tmp = N * GROUP * OC * OW * OH * sizeof(int32_t); | |||
return WorkspaceBundle(nullptr, {src_size, filter_size, dst_tmp}); | |||
} else { | |||
return WorkspaceBundle(nullptr, {src_size, filter_size}); | |||
} | |||
} | |||
size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return get_bundle(param).total_size_in_bytes(); | |||
} | |||
SmallVector<fallback::ConvBiasImpl::NCBKern> | |||
ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( | |||
const NCBKernSizeParam& param) const { | |||
auto bundle = get_bundle(param); | |||
return direct_conv_avx2_stride2::get_kimpls(param, bundle); | |||
} | |||
bool ConvBiasImpl::AlgoAVX2DirectConvStride2::is_preferred( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return direct_avx2_stride2_int8_preferred(param); | |||
} | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy) const { | |||
auto&& fm = param.filter_meta; | |||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||
param.src_type.enumv() == DTypeEnum::Int8) && | |||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY; | |||
return mkldnn_qint8_usable(param); | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoMkldnnQint8::get_bundle( | |||
@@ -412,39 +425,25 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( | |||
stream_mkldnn.wait(); | |||
} | |||
} | |||
#undef REORDER_MEMORY | |||
#endif | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
bool ConvBiasImpl::AlgoMkldnnQint8::is_preferred( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return mkldnn_qint8_preferred(param); | |||
} | |||
/* ===================== mkldnn qint8 matmul algo ===================== */ | |||
bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy) const { | |||
auto&& fm = param.filter_meta; | |||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||
param.src_type.enumv() == DTypeEnum::Int8) && | |||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.group == 1 && fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY && | |||
//! The matmul opr is only used in single thread | |||
//! TODO:support the no pack matmul algo in fallback im2col + matmul | |||
param.nr_threads == 1_z; | |||
return mkldnn_matmul_qint8_usable(param); | |||
} | |||
bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
auto&& fm = param.filter_meta; | |||
megdnn_assert_internal(fm.group == 1 && fm.dilation[0] == 1 && | |||
fm.dilation[1] == 1); | |||
// single channel conv should never use matrix mul | |||
if (fm.ocpg == 1 || fm.icpg == 1) | |||
return false; | |||
return true; | |||
return mkldnn_matmul_qint8_preferred(param); | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( | |||
const NCBKernSizeParam& param) { | |||
UNPACK_CONV_F32_NCB_KERN_SIZES(param); | |||
@@ -473,6 +472,7 @@ WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( | |||
} | |||
return {nullptr, {part0, part1, part2}}; | |||
} | |||
MatrixMul* ConvBiasImpl::AlgoMkldnnMatmulQint8::get_matmul_opr() { | |||
static CpuOprDelegationStorage<> storage; | |||
return storage.get<MatrixMul>(); | |||
@@ -553,76 +553,5 @@ void ConvBiasImpl::AlgoMkldnnMatmulQint8::kern_mkldnn_matmul_s8x8x32( | |||
} | |||
#endif | |||
/* ===================== avx2 int8 stride 2 ===================== */ | |||
bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( | |||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy) const { | |||
auto&& fm = param.filter_meta; | |||
auto FH = fm.spatial[0]; | |||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||
param.bias_mode == BiasMode::NO_BIAS && | |||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||
fm.stride[0] == 2 && fm.stride[1] == 2 && | |||
is_supported(SIMDType::AVX2); | |||
return aviliable; | |||
} | |||
WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( | |||
const NCBKernSizeParam& param) { | |||
auto&& fm = param.filter_meta; | |||
size_t N = param.n; | |||
size_t IC = fm.icpg; | |||
size_t OC = fm.ocpg; | |||
size_t IH = param.isz[0]; | |||
size_t IW = param.isz[1]; | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t FH = fm.spatial[0]; | |||
size_t FW = fm.spatial[1]; | |||
size_t GROUP = fm.group; | |||
size_t IC_STEP = 2, OC_STEP = 4; | |||
size_t pad_h = fm.padding[0]; | |||
size_t pad_w = fm.padding[1]; | |||
size_t src_size = 0, filter_size = 0; | |||
//! pack filter, pack src | |||
filter_size = GROUP * round_up(OC, OC_STEP) * round_up(IC, IC_STEP) * FH * | |||
FW * sizeof(int16_t); | |||
//! avx256 iw max offset 32, caused by w_remain < 16 | |||
src_size = N * GROUP * div_ceil(IC, IC_STEP) * (IH + 2 * pad_h) * | |||
(IW + 2 * pad_w) * 2 * sizeof(int8_t) + | |||
32; | |||
bool need_post_process = param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||
if (need_post_process) { | |||
size_t dst_tmp = N * GROUP * OC * OW * OH * sizeof(int32_t); | |||
return WorkspaceBundle(nullptr, {src_size, filter_size, dst_tmp}); | |||
} else { | |||
return WorkspaceBundle(nullptr, {src_size, filter_size}); | |||
} | |||
} | |||
size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( | |||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
return get_bundle(param).total_size_in_bytes(); | |||
} | |||
SmallVector<fallback::ConvBiasImpl::NCBKern> | |||
ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( | |||
const NCBKernSizeParam& param) const { | |||
auto bundle = get_bundle(param); | |||
return direct_conv_avx2_stride2::get_kimpls(param, bundle); | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -35,6 +35,8 @@ public: | |||
return get_kimpls(param); | |||
} | |||
void* type() const override; | |||
bool is_preferred(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override; | |||
}; | |||
/* ===================== avx2 stride2 chanwise algo ===================== */ | |||
@@ -57,6 +59,8 @@ public: | |||
return get_kimpls(param); | |||
} | |||
void* type() const override; | |||
bool is_preferred(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override; | |||
}; | |||
/* ===================== avx2 stride1 direct algo ===================== */ | |||
@@ -79,6 +83,32 @@ public: | |||
return get_kimpls(param); | |||
} | |||
void* type() const override; | |||
bool is_preferred(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override; | |||
}; | |||
/* ================== avx2 int8 direct conv stride2 algo ================== */ | |||
class ConvBiasImpl::AlgoAVX2DirectConvStride2 final : public AlgoBase { | |||
SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | |||
static WorkspaceBundle get_bundle(const NCBKernSizeParam& param); | |||
public: | |||
bool is_reproducible() const override { return true; } | |||
const char* name() const override { | |||
return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; | |||
} | |||
bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||
size_t get_workspace(FallbackConvBiasImpl* opr, | |||
const NCBKernSizeParam& param) const override; | |||
SmallVector<NCBKern> dispatch_kerns( | |||
fallback::ConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override { | |||
return get_kimpls(param); | |||
} | |||
void* type() const override; | |||
bool is_preferred(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override; | |||
}; | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
@@ -117,6 +147,8 @@ public: | |||
return {{kern, {group, n, 1_z}}}; | |||
} | |||
void* type() const override; | |||
bool is_preferred(FallbackConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override; | |||
}; | |||
/* ===================== mkldnn qint8 matmul algo ===================== */ | |||
class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { | |||
@@ -148,27 +180,7 @@ public: | |||
void* type() const override; | |||
}; | |||
#endif | |||
/* ================== avx2 int8 direct conv stride2 algo ================== */ | |||
class ConvBiasImpl::AlgoAVX2DirectConvStride2 final : public AlgoBase { | |||
SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | |||
static WorkspaceBundle get_bundle(const NCBKernSizeParam& param); | |||
public: | |||
bool is_reproducible() const override { return true; } | |||
const char* name() const override { | |||
return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; | |||
} | |||
bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||
size_t get_workspace(FallbackConvBiasImpl* opr, | |||
const NCBKernSizeParam& param) const override; | |||
SmallVector<NCBKern> dispatch_kerns( | |||
fallback::ConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override { | |||
return get_kimpls(param); | |||
} | |||
void* type() const override; | |||
}; | |||
} // namespace x86 | |||
} // namespace megdnn | |||
@@ -16,6 +16,7 @@ | |||
#include "src/common/metahelper.h" | |||
#include "src/common/opr_delegate.h" | |||
#include "src/x86/conv_bias/f32/algos.h" | |||
#include "src/x86/conv_bias/int8/algo_usable_preferred.h" | |||
#include "src/x86/conv_bias/int8/algos.h" | |||
#include "src/x86/matrix_mul/opr_impl.h" | |||
@@ -94,12 +95,6 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { | |||
public: | |||
AlgoPack() { | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
//! Create the mkldnn algo | |||
all_algos.emplace_back(&mkldnn_conv_fp32); | |||
all_algos.emplace_back(&mkldnn_matmul_qint8); | |||
all_algos.emplace_back(&mkldnn_qint8); | |||
#endif | |||
all_algos.emplace_back(&stride1_direct_large_group); | |||
all_algos.emplace_back(&stride1_direct_small_group); | |||
all_algos.emplace_back(&stride2_direct_large_group); | |||
@@ -110,6 +105,14 @@ public: | |||
all_algos.emplace_back(&avx2_stride2_chanwsie_qint8); | |||
all_algos.emplace_back(&matmul); | |||
//! preference to use mkldnn algo on VNNI devices | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
//! Create the mkldnn algo | |||
all_algos.emplace_back(&mkldnn_conv_fp32); | |||
all_algos.emplace_back(&mkldnn_matmul_qint8); | |||
all_algos.emplace_back(&mkldnn_qint8); | |||
#endif | |||
static CpuOprDelegationStorage<> storage; | |||
auto matmul_opr = storage.get<MatrixMul>(); | |||
auto&& matmul_algos = | |||
@@ -159,4 +162,25 @@ const char* ConvBiasImpl::get_algorithm_set_name() const { | |||
return "X0"; | |||
} | |||
bool ConvBiasImpl::is_matmul_quantized_prefer( | |||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||
bool conv_direct_chanwise_mkldnn_usable = true; | |||
if (param.dst_type.enumv() == DTypeEnum::QuantizedS8 || | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32) { | |||
conv_direct_chanwise_mkldnn_usable = | |||
chanwise_avx2_stride1_qint8_usable_preferred(param) || | |||
chanwise_avx2_stride2_qint8_usable_preferred(param) || | |||
direct_avx2_stride1_int8_usable_preferred(param) || | |||
direct_avx2_stride2_int8_usable_preferred(param); | |||
} | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
conv_direct_chanwise_mkldnn_usable = | |||
conv_direct_chanwise_mkldnn_usable || | |||
mkldnn_qint8_usable_preferred(param) || | |||
mkldnn_matmul_qint8_usable_preferred(param); | |||
#endif | |||
return !conv_direct_chanwise_mkldnn_usable; | |||
} | |||
// vim: syntax=cpp.doxygen |
@@ -53,6 +53,9 @@ public: | |||
size_t& IW2, size_t& OH2, size_t& OW2); | |||
const char* get_algorithm_set_name() const override; | |||
bool is_matmul_quantized_prefer( | |||
const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; | |||
}; | |||
} // namespace x86 | |||