@@ -0,0 +1,247 @@ | |||||
/** | |||||
* \file dnn/src/x86/conv_bias/int8/algo_usable_preferred.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/x86/conv_bias/int8/algo_usable_preferred.h" | |||||
#include "src/x86/utils.h" | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
#include <mkldnn.hpp> | |||||
#endif | |||||
#include <cstring> | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
using namespace dnnl; | |||||
#endif | |||||
using namespace megdnn; | |||||
using namespace x86; | |||||
namespace megdnn { | |||||
namespace x86 { | |||||
bool chanwise_avx2_stride1_qint8_usable( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = | |||||
(param.bias_mode != BiasMode::BIAS) && | |||||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||||
fm.dilation[1] == 1 && (FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||||
fm.stride[0] == 1 && fm.stride[1] == 1 && (fm.icpg == 1) && | |||||
(fm.ocpg == 1) && is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
} | |||||
bool chanwise_avx2_stride1_qint8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
return true; | |||||
} | |||||
bool chanwise_avx2_stride1_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
return chanwise_avx2_stride1_qint8_usable(param) && | |||||
chanwise_avx2_stride1_qint8_preferred(param); | |||||
} | |||||
bool chanwise_avx2_stride2_qint8_usable( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = | |||||
(param.bias_mode != BiasMode::BIAS) && | |||||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||||
fm.dilation[1] == 1 && (FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||||
fm.stride[0] == 2 && fm.stride[1] == 2 && (fm.icpg == 1) && | |||||
(fm.ocpg == 1) && is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
} | |||||
bool chanwise_avx2_stride2_qint8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
return true; | |||||
} | |||||
bool chanwise_avx2_stride2_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
return chanwise_avx2_stride2_qint8_usable(param) && | |||||
chanwise_avx2_stride2_qint8_preferred(param); | |||||
} | |||||
bool direct_avx2_stride1_int8_usable( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||||
fm.dilation[1] == 1 && | |||||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||||
fm.stride[0] == 1 && fm.stride[1] == 1 && | |||||
is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
} | |||||
bool direct_avx2_stride1_int8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
auto IC = fm.icpg; | |||||
auto OC = fm.ocpg; | |||||
auto is_preferred = true; | |||||
if (IC > 128 && OC > 128) | |||||
is_preferred = false; | |||||
return is_preferred; | |||||
} | |||||
bool direct_avx2_stride1_int8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
return direct_avx2_stride1_int8_usable(param) && | |||||
direct_avx2_stride1_int8_preferred(param); | |||||
} | |||||
bool direct_avx2_stride2_int8_usable( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||||
fm.format == ConvBiasImpl::Param::Format::NCHW && | |||||
fm.spatial_ndim == 2 && fm.dilation[0] == 1 && | |||||
fm.dilation[1] == 1 && | |||||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||||
fm.stride[0] == 2 && fm.stride[1] == 2 && | |||||
is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
} | |||||
bool direct_avx2_stride2_int8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
auto IC = fm.icpg; | |||||
auto OC = fm.ocpg; | |||||
auto is_preferred = false; | |||||
if (IC <= 31 && OC <= 31) | |||||
is_preferred = true; | |||||
return is_preferred; | |||||
} | |||||
bool direct_avx2_stride2_int8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
return direct_avx2_stride2_int8_usable(param) && | |||||
direct_avx2_stride2_int8_preferred(param); | |||||
} | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
bool mkldnn_qint8_usable(const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||||
param.src_type.enumv() == DTypeEnum::Int8) && | |||||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY; | |||||
} | |||||
bool mkldnn_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
MEGDNN_MARK_USED_VAR(param); | |||||
return is_supported(SIMDType::VNNI); | |||||
} | |||||
bool mkldnn_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
return mkldnn_qint8_usable(param) && mkldnn_qint8_preferred(param); | |||||
} | |||||
bool mkldnn_matmul_qint8_usable(const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||||
param.src_type.enumv() == DTypeEnum::Int8) && | |||||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.group == 1 && fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY && | |||||
//! The matmul opr is only used in single thread | |||||
//! TODO:support the no pack matmul algo in fallback im2col + matmul | |||||
param.nr_threads == 1_z; | |||||
} | |||||
bool mkldnn_matmul_qint8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
auto is_preferred = true; | |||||
auto&& fm = param.filter_meta; | |||||
megdnn_assert_internal(fm.group == 1 && fm.dilation[0] == 1 && | |||||
fm.dilation[1] == 1); | |||||
// single channel conv should never use matrix mul | |||||
if (fm.ocpg == 1 || fm.icpg == 1) | |||||
is_preferred = false; | |||||
return is_preferred && is_supported(SIMDType::VNNI); | |||||
} | |||||
bool mkldnn_matmul_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
return mkldnn_matmul_qint8_usable(param) && | |||||
mkldnn_matmul_qint8_preferred(param); | |||||
} | |||||
#endif | |||||
} // namespace x86 | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,56 @@ | |||||
/** | |||||
* \file dnn/src/x86/conv_bias/int8/algo_usable_preferred.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include "src/common/utils.h" | |||||
#include "src/x86/conv_bias/opr_impl.h" | |||||
namespace megdnn { | |||||
namespace x86 { | |||||
bool chanwise_avx2_stride1_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool chanwise_avx2_stride1_qint8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool chanwise_avx2_stride1_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool chanwise_avx2_stride2_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool chanwise_avx2_stride2_qint8_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool chanwise_avx2_stride2_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool direct_avx2_stride1_int8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool direct_avx2_stride1_int8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool direct_avx2_stride1_int8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool direct_avx2_stride2_int8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool direct_avx2_stride2_int8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool direct_avx2_stride2_int8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
bool mkldnn_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool mkldnn_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool mkldnn_qint8_usable_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool mkldnn_matmul_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool mkldnn_matmul_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam&); | |||||
bool mkldnn_matmul_qint8_usable_preferred( | |||||
const ConvBiasImpl::NCBKernSizeParam&); | |||||
#endif | |||||
} // namespace x86 | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -14,6 +14,7 @@ | |||||
#include "src/common/opr_delegate.h" | #include "src/common/opr_delegate.h" | ||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/fallback/convolution/img2col_helper.h" | #include "src/fallback/convolution/img2col_helper.h" | ||||
#include "src/x86/conv_bias/int8/algo_usable_preferred.h" | |||||
#include "src/x86/conv_bias/int8/avx2_chanwise_stride1.h" | #include "src/x86/conv_bias/int8/avx2_chanwise_stride1.h" | ||||
#include "src/x86/conv_bias/int8/avx2_chanwise_stride2.h" | #include "src/x86/conv_bias/int8/avx2_chanwise_stride2.h" | ||||
#include "src/x86/conv_bias/int8/avx2_direct_conv_stride1.h" | #include "src/x86/conv_bias/int8/avx2_direct_conv_stride1.h" | ||||
@@ -37,25 +38,7 @@ using namespace x86; | |||||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( | bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( | ||||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = | |||||
(param.bias_mode != BiasMode::BIAS) && | |||||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && fm.stride[0] == 1 && | |||||
fm.stride[1] == 1 && (fm.icpg == 1) && (fm.ocpg == 1) && | |||||
is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
return chanwise_avx2_stride1_qint8_usable(param); | |||||
} | } | ||||
WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle( | WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle( | ||||
@@ -94,28 +77,15 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_kimpls( | |||||
return avx2_chanwise_stride1::get_kimpls(param, bundle); | return avx2_chanwise_stride1::get_kimpls(param, bundle); | ||||
} | } | ||||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::is_preferred( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return chanwise_avx2_stride1_qint8_preferred(param); | |||||
} | |||||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( | bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( | ||||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = | |||||
(param.bias_mode != BiasMode::BIAS) && | |||||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && | |||||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && fm.stride[0] == 2 && | |||||
fm.stride[1] == 2 && (fm.icpg == 1) && (fm.ocpg == 1) && | |||||
is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
return chanwise_avx2_stride2_qint8_usable(param); | |||||
} | } | ||||
WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle( | WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle( | ||||
@@ -154,28 +124,15 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_kimpls( | |||||
return avx2_chanwise_stride2::get_kimpls(param, bundle); | return avx2_chanwise_stride2::get_kimpls(param, bundle); | ||||
} | } | ||||
bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::is_preferred( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return chanwise_avx2_stride2_qint8_preferred(param); | |||||
} | |||||
bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( | bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( | ||||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy /*algo_selection_strategy*/) const { | AlgoSelectionStrategy /*algo_selection_strategy*/) const { | ||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||||
fm.stride[0] == 1 && fm.stride[1] == 1 && | |||||
is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
return direct_avx2_stride1_int8_usable(param); | |||||
} | } | ||||
WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle( | WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle( | ||||
@@ -224,19 +181,75 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( | |||||
return direct_conv_avx2_stride1::get_kimpls(param, bundle); | return direct_conv_avx2_stride1::get_kimpls(param, bundle); | ||||
} | } | ||||
bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::is_preferred( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return direct_avx2_stride1_int8_preferred(param); | |||||
} | |||||
/* ===================== avx2 int8 stride 2 ===================== */ | |||||
bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( | |||||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy) const { | |||||
return direct_avx2_stride2_int8_usable(param); | |||||
} | |||||
WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( | |||||
const NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
size_t N = param.n; | |||||
size_t IC = fm.icpg; | |||||
size_t OC = fm.ocpg; | |||||
size_t IH = param.isz[0]; | |||||
size_t IW = param.isz[1]; | |||||
size_t OH = param.osz[0]; | |||||
size_t OW = param.osz[1]; | |||||
size_t FH = fm.spatial[0]; | |||||
size_t FW = fm.spatial[1]; | |||||
size_t GROUP = fm.group; | |||||
size_t IC_STEP = 2, OC_STEP = 4; | |||||
size_t pad_h = fm.padding[0]; | |||||
size_t pad_w = fm.padding[1]; | |||||
size_t src_size = 0, filter_size = 0; | |||||
//! pack filter, pack src | |||||
filter_size = GROUP * round_up(OC, OC_STEP) * round_up(IC, IC_STEP) * FH * | |||||
FW * sizeof(int16_t); | |||||
//! avx256 iw max offset 32, caused by w_remain < 16 | |||||
src_size = N * GROUP * div_ceil(IC, IC_STEP) * (IH + 2 * pad_h) * | |||||
(IW + 2 * pad_w) * 2 * sizeof(int8_t) + | |||||
32; | |||||
bool need_post_process = param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||||
if (need_post_process) { | |||||
size_t dst_tmp = N * GROUP * OC * OW * OH * sizeof(int32_t); | |||||
return WorkspaceBundle(nullptr, {src_size, filter_size, dst_tmp}); | |||||
} else { | |||||
return WorkspaceBundle(nullptr, {src_size, filter_size}); | |||||
} | |||||
} | |||||
size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return get_bundle(param).total_size_in_bytes(); | |||||
} | |||||
SmallVector<fallback::ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( | |||||
const NCBKernSizeParam& param) const { | |||||
auto bundle = get_bundle(param); | |||||
return direct_conv_avx2_stride2::get_kimpls(param, bundle); | |||||
} | |||||
bool ConvBiasImpl::AlgoAVX2DirectConvStride2::is_preferred( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return direct_avx2_stride2_int8_preferred(param); | |||||
} | |||||
#if MEGDNN_X86_WITH_MKL_DNN | #if MEGDNN_X86_WITH_MKL_DNN | ||||
bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, | bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, | ||||
const NCBKernSizeParam& param, | const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
auto&& fm = param.filter_meta; | |||||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||||
param.src_type.enumv() == DTypeEnum::Int8) && | |||||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY; | |||||
return mkldnn_qint8_usable(param); | |||||
} | } | ||||
WorkspaceBundle ConvBiasImpl::AlgoMkldnnQint8::get_bundle( | WorkspaceBundle ConvBiasImpl::AlgoMkldnnQint8::get_bundle( | ||||
@@ -412,39 +425,25 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( | |||||
stream_mkldnn.wait(); | stream_mkldnn.wait(); | ||||
} | } | ||||
} | } | ||||
#undef REORDER_MEMORY | #undef REORDER_MEMORY | ||||
#endif | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
bool ConvBiasImpl::AlgoMkldnnQint8::is_preferred( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return mkldnn_qint8_preferred(param); | |||||
} | |||||
/* ===================== mkldnn qint8 matmul algo ===================== */ | /* ===================== mkldnn qint8 matmul algo ===================== */ | ||||
bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, | bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, | ||||
const NCBKernSizeParam& param, | const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
auto&& fm = param.filter_meta; | |||||
return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||||
param.src_type.enumv() == DTypeEnum::Int8) && | |||||
(param.dst_type.enumv() == DTypeEnum::QuantizedS32 || | |||||
param.dst_type.enumv() == DTypeEnum::Int32) && | |||||
fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.group == 1 && fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY && | |||||
//! The matmul opr is only used in single thread | |||||
//! TODO:support the no pack matmul algo in fallback im2col + matmul | |||||
param.nr_threads == 1_z; | |||||
return mkldnn_matmul_qint8_usable(param); | |||||
} | } | ||||
bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( | bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( | ||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | ||||
auto&& fm = param.filter_meta; | |||||
megdnn_assert_internal(fm.group == 1 && fm.dilation[0] == 1 && | |||||
fm.dilation[1] == 1); | |||||
// single channel conv should never use matrix mul | |||||
if (fm.ocpg == 1 || fm.icpg == 1) | |||||
return false; | |||||
return true; | |||||
return mkldnn_matmul_qint8_preferred(param); | |||||
} | } | ||||
WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( | WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( | ||||
const NCBKernSizeParam& param) { | const NCBKernSizeParam& param) { | ||||
UNPACK_CONV_F32_NCB_KERN_SIZES(param); | UNPACK_CONV_F32_NCB_KERN_SIZES(param); | ||||
@@ -473,6 +472,7 @@ WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( | |||||
} | } | ||||
return {nullptr, {part0, part1, part2}}; | return {nullptr, {part0, part1, part2}}; | ||||
} | } | ||||
MatrixMul* ConvBiasImpl::AlgoMkldnnMatmulQint8::get_matmul_opr() { | MatrixMul* ConvBiasImpl::AlgoMkldnnMatmulQint8::get_matmul_opr() { | ||||
static CpuOprDelegationStorage<> storage; | static CpuOprDelegationStorage<> storage; | ||||
return storage.get<MatrixMul>(); | return storage.get<MatrixMul>(); | ||||
@@ -553,76 +553,5 @@ void ConvBiasImpl::AlgoMkldnnMatmulQint8::kern_mkldnn_matmul_s8x8x32( | |||||
} | } | ||||
#endif | #endif | ||||
/* ===================== avx2 int8 stride 2 ===================== */ | |||||
bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( | |||||
FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy) const { | |||||
auto&& fm = param.filter_meta; | |||||
auto FH = fm.spatial[0]; | |||||
bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||||
(((param.src_type.enumv() == DTypeEnum::Int8 && | |||||
param.filter_type.enumv() == DTypeEnum::Int8 && | |||||
param.dst_type.enumv() == DTypeEnum::Int32) || | |||||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.filter_type.enumv() == DTypeEnum::QuantizedS8 && | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && | |||||
param.bias_mode == BiasMode::NO_BIAS && | |||||
param.nonlineMode == NonlineMode::IDENTITY)) && | |||||
fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && | |||||
fm.dilation[0] == 1 && fm.dilation[1] == 1 && | |||||
(FH == 2 || FH == 3 || FH == 5 || FH == 7) && | |||||
fm.stride[0] == 2 && fm.stride[1] == 2 && | |||||
is_supported(SIMDType::AVX2); | |||||
return aviliable; | |||||
} | |||||
WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( | |||||
const NCBKernSizeParam& param) { | |||||
auto&& fm = param.filter_meta; | |||||
size_t N = param.n; | |||||
size_t IC = fm.icpg; | |||||
size_t OC = fm.ocpg; | |||||
size_t IH = param.isz[0]; | |||||
size_t IW = param.isz[1]; | |||||
size_t OH = param.osz[0]; | |||||
size_t OW = param.osz[1]; | |||||
size_t FH = fm.spatial[0]; | |||||
size_t FW = fm.spatial[1]; | |||||
size_t GROUP = fm.group; | |||||
size_t IC_STEP = 2, OC_STEP = 4; | |||||
size_t pad_h = fm.padding[0]; | |||||
size_t pad_w = fm.padding[1]; | |||||
size_t src_size = 0, filter_size = 0; | |||||
//! pack filter, pack src | |||||
filter_size = GROUP * round_up(OC, OC_STEP) * round_up(IC, IC_STEP) * FH * | |||||
FW * sizeof(int16_t); | |||||
//! avx256 iw max offset 32, caused by w_remain < 16 | |||||
src_size = N * GROUP * div_ceil(IC, IC_STEP) * (IH + 2 * pad_h) * | |||||
(IW + 2 * pad_w) * 2 * sizeof(int8_t) + | |||||
32; | |||||
bool need_post_process = param.dst_type.enumv() == DTypeEnum::QuantizedS8; | |||||
if (need_post_process) { | |||||
size_t dst_tmp = N * GROUP * OC * OW * OH * sizeof(int32_t); | |||||
return WorkspaceBundle(nullptr, {src_size, filter_size, dst_tmp}); | |||||
} else { | |||||
return WorkspaceBundle(nullptr, {src_size, filter_size}); | |||||
} | |||||
} | |||||
size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( | |||||
FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { | |||||
return get_bundle(param).total_size_in_bytes(); | |||||
} | |||||
SmallVector<fallback::ConvBiasImpl::NCBKern> | |||||
ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( | |||||
const NCBKernSizeParam& param) const { | |||||
auto bundle = get_bundle(param); | |||||
return direct_conv_avx2_stride2::get_kimpls(param, bundle); | |||||
} | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -35,6 +35,8 @@ public: | |||||
return get_kimpls(param); | return get_kimpls(param); | ||||
} | } | ||||
void* type() const override; | void* type() const override; | ||||
bool is_preferred(FallbackConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
}; | }; | ||||
/* ===================== avx2 stride2 chanwise algo ===================== */ | /* ===================== avx2 stride2 chanwise algo ===================== */ | ||||
@@ -57,6 +59,8 @@ public: | |||||
return get_kimpls(param); | return get_kimpls(param); | ||||
} | } | ||||
void* type() const override; | void* type() const override; | ||||
bool is_preferred(FallbackConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
}; | }; | ||||
/* ===================== avx2 stride1 direct algo ===================== */ | /* ===================== avx2 stride1 direct algo ===================== */ | ||||
@@ -79,6 +83,32 @@ public: | |||||
return get_kimpls(param); | return get_kimpls(param); | ||||
} | } | ||||
void* type() const override; | void* type() const override; | ||||
bool is_preferred(FallbackConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
}; | |||||
/* ================== avx2 int8 direct conv stride2 algo ================== */ | |||||
class ConvBiasImpl::AlgoAVX2DirectConvStride2 final : public AlgoBase { | |||||
SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | |||||
static WorkspaceBundle get_bundle(const NCBKernSizeParam& param); | |||||
public: | |||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | |||||
return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; | |||||
} | |||||
bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override { | |||||
return get_kimpls(param); | |||||
} | |||||
void* type() const override; | |||||
bool is_preferred(FallbackConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
}; | }; | ||||
#if MEGDNN_X86_WITH_MKL_DNN | #if MEGDNN_X86_WITH_MKL_DNN | ||||
@@ -117,6 +147,8 @@ public: | |||||
return {{kern, {group, n, 1_z}}}; | return {{kern, {group, n, 1_z}}}; | ||||
} | } | ||||
void* type() const override; | void* type() const override; | ||||
bool is_preferred(FallbackConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override; | |||||
}; | }; | ||||
/* ===================== mkldnn qint8 matmul algo ===================== */ | /* ===================== mkldnn qint8 matmul algo ===================== */ | ||||
class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { | class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { | ||||
@@ -148,27 +180,7 @@ public: | |||||
void* type() const override; | void* type() const override; | ||||
}; | }; | ||||
#endif | #endif | ||||
/* ================== avx2 int8 direct conv stride2 algo ================== */ | |||||
class ConvBiasImpl::AlgoAVX2DirectConvStride2 final : public AlgoBase { | |||||
SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const; | |||||
static WorkspaceBundle get_bundle(const NCBKernSizeParam& param); | |||||
public: | |||||
bool is_reproducible() const override { return true; } | |||||
const char* name() const override { | |||||
return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; | |||||
} | |||||
bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, | |||||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||||
size_t get_workspace(FallbackConvBiasImpl* opr, | |||||
const NCBKernSizeParam& param) const override; | |||||
SmallVector<NCBKern> dispatch_kerns( | |||||
fallback::ConvBiasImpl*, | |||||
const NCBKernSizeParam& param) const override { | |||||
return get_kimpls(param); | |||||
} | |||||
void* type() const override; | |||||
}; | |||||
} // namespace x86 | } // namespace x86 | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -16,6 +16,7 @@ | |||||
#include "src/common/metahelper.h" | #include "src/common/metahelper.h" | ||||
#include "src/common/opr_delegate.h" | #include "src/common/opr_delegate.h" | ||||
#include "src/x86/conv_bias/f32/algos.h" | #include "src/x86/conv_bias/f32/algos.h" | ||||
#include "src/x86/conv_bias/int8/algo_usable_preferred.h" | |||||
#include "src/x86/conv_bias/int8/algos.h" | #include "src/x86/conv_bias/int8/algos.h" | ||||
#include "src/x86/matrix_mul/opr_impl.h" | #include "src/x86/matrix_mul/opr_impl.h" | ||||
@@ -94,12 +95,6 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { | |||||
public: | public: | ||||
AlgoPack() { | AlgoPack() { | ||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
//! Create the mkldnn algo | |||||
all_algos.emplace_back(&mkldnn_conv_fp32); | |||||
all_algos.emplace_back(&mkldnn_matmul_qint8); | |||||
all_algos.emplace_back(&mkldnn_qint8); | |||||
#endif | |||||
all_algos.emplace_back(&stride1_direct_large_group); | all_algos.emplace_back(&stride1_direct_large_group); | ||||
all_algos.emplace_back(&stride1_direct_small_group); | all_algos.emplace_back(&stride1_direct_small_group); | ||||
all_algos.emplace_back(&stride2_direct_large_group); | all_algos.emplace_back(&stride2_direct_large_group); | ||||
@@ -110,6 +105,14 @@ public: | |||||
all_algos.emplace_back(&avx2_stride2_chanwsie_qint8); | all_algos.emplace_back(&avx2_stride2_chanwsie_qint8); | ||||
all_algos.emplace_back(&matmul); | all_algos.emplace_back(&matmul); | ||||
//! preference to use mkldnn algo on VNNI devices | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
//! Create the mkldnn algo | |||||
all_algos.emplace_back(&mkldnn_conv_fp32); | |||||
all_algos.emplace_back(&mkldnn_matmul_qint8); | |||||
all_algos.emplace_back(&mkldnn_qint8); | |||||
#endif | |||||
static CpuOprDelegationStorage<> storage; | static CpuOprDelegationStorage<> storage; | ||||
auto matmul_opr = storage.get<MatrixMul>(); | auto matmul_opr = storage.get<MatrixMul>(); | ||||
auto&& matmul_algos = | auto&& matmul_algos = | ||||
@@ -159,4 +162,25 @@ const char* ConvBiasImpl::get_algorithm_set_name() const { | |||||
return "X0"; | return "X0"; | ||||
} | } | ||||
bool ConvBiasImpl::is_matmul_quantized_prefer( | |||||
const ConvBiasImpl::NCBKernSizeParam& param) { | |||||
bool conv_direct_chanwise_mkldnn_usable = true; | |||||
if (param.dst_type.enumv() == DTypeEnum::QuantizedS8 || | |||||
param.dst_type.enumv() == DTypeEnum::QuantizedS32) { | |||||
conv_direct_chanwise_mkldnn_usable = | |||||
chanwise_avx2_stride1_qint8_usable_preferred(param) || | |||||
chanwise_avx2_stride2_qint8_usable_preferred(param) || | |||||
direct_avx2_stride1_int8_usable_preferred(param) || | |||||
direct_avx2_stride2_int8_usable_preferred(param); | |||||
} | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
conv_direct_chanwise_mkldnn_usable = | |||||
conv_direct_chanwise_mkldnn_usable || | |||||
mkldnn_qint8_usable_preferred(param) || | |||||
mkldnn_matmul_qint8_usable_preferred(param); | |||||
#endif | |||||
return !conv_direct_chanwise_mkldnn_usable; | |||||
} | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -53,6 +53,9 @@ public: | |||||
size_t& IW2, size_t& OH2, size_t& OW2); | size_t& IW2, size_t& OH2, size_t& OW2); | ||||
const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
bool is_matmul_quantized_prefer( | |||||
const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; | |||||
}; | }; | ||||
} // namespace x86 | } // namespace x86 | ||||