@@ -0,0 +1,230 @@ | |||
/** | |||
* \file dnn/src/fallback/conv_bias/conv1x1/algos.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "src/fallback/conv_bias/conv1x1/algos.h" | |||
#include "src/common/opr_delegate.h" | |||
#include "src/fallback/conv_bias/common.h" | |||
#include "src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h" | |||
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | |||
#include "src/fallback/conv_bias/opr_impl.h" | |||
#include "megdnn/opr_param_defs.h" | |||
#include "src/naive/convolution/helper.h" | |||
#if MEGDNN_X86 | |||
#include "src/x86/conv_bias/postprocess_helper.h" | |||
#endif | |||
#include "midout.h" | |||
MIDOUT_DECL(megdnn_fallback_conv1x1) | |||
using namespace megdnn; | |||
using namespace fallback; | |||
#if MEGDNN_X86 | |||
using namespace x86; | |||
#endif | |||
using namespace conv1x1; | |||
size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic( | |||
const NCBKernSizeParam& param) const { | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t OC = param.filter_meta.ocpg; | |||
if (OH * OW >= 56 * 56 || OC >= 64) | |||
return m_oc_block_size; | |||
return div_ceil(OC, param.nr_threads); | |||
} | |||
size_t ConvBiasImpl::AlgoConv1x1::get_workspace( | |||
ConvBiasImpl*, const NCBKernSizeParam& param) const { | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | |||
auto matmul_param = | |||
get_matmul_kern_param(param, OH * OW, compt_oc_block_size); | |||
auto pack_mode = m_matmul_algo->packmode(); | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 0) { | |||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT> dispatcher; | |||
return dispatcher | |||
.get_bundle(param, matmul_param, m_matmul_algo, | |||
compt_oc_block_size) | |||
.total_size_in_bytes(); | |||
} | |||
MIDOUT_END(); | |||
} else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 1) { | |||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> dispatcher; | |||
return dispatcher | |||
.get_bundle(param, matmul_param, m_matmul_algo, | |||
compt_oc_block_size) | |||
.total_size_in_bytes(); | |||
} | |||
MIDOUT_END(); | |||
} else { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 2) { | |||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK> dispatcher; | |||
return dispatcher | |||
.get_bundle(param, matmul_param, m_matmul_algo, | |||
compt_oc_block_size) | |||
.total_size_in_bytes(); | |||
} | |||
MIDOUT_END(); | |||
} | |||
return 0; | |||
} | |||
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||
ConvBiasImpl* opr, const NCBKernSizeParam& param) const { | |||
SmallVector<ConvBiasImpl::NCBKern> ret_kern; | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t OC = param.filter_meta.ocpg; | |||
size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); | |||
size_t GROUP = param.filter_meta.group; | |||
size_t BATCH = param.n; | |||
size_t oc_blocks_per_group = div_ceil(OC, compt_oc_block_size); | |||
auto matmul_param = | |||
get_matmul_kern_param(param, OH * OW, compt_oc_block_size); | |||
WorkspaceBundle whole_bundle = {nullptr, {}}; | |||
WorkspaceBundle thread_bundle = {nullptr, {}}; | |||
WorkspaceBundle matmul_bundle = {nullptr, {}}; | |||
auto pack_mode = m_matmul_algo->packmode(); | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 0) { | |||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT> dispatcher; | |||
whole_bundle = dispatcher.get_bundle( | |||
param, matmul_param, m_matmul_algo, compt_oc_block_size); | |||
matmul_bundle = m_matmul_algo->get_bundle(matmul_param); | |||
} | |||
MIDOUT_END(); | |||
} else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 1) { | |||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> dispatcher; | |||
whole_bundle = dispatcher.get_bundle( | |||
param, matmul_param, m_matmul_algo, compt_oc_block_size); | |||
matmul_bundle = m_matmul_algo->get_bundle(matmul_param); | |||
} | |||
MIDOUT_END(); | |||
} else { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 2) { | |||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK> dispatcher; | |||
whole_bundle = dispatcher.get_bundle( | |||
param, matmul_param, m_matmul_algo, compt_oc_block_size); | |||
matmul_bundle = { | |||
nullptr, | |||
{0, 0, m_matmul_algo->get_workspace(matmul_param)}}; | |||
} | |||
MIDOUT_END(); | |||
} | |||
//! get thread bundle | |||
thread_bundle = get_thread_bundle(param, matmul_bundle.get_size(2), | |||
compt_oc_block_size); | |||
Conv1x1StrategyBase* conv1x1_strategy = | |||
Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, | |||
opr->param().format); | |||
auto kern_packA = [this, whole_bundle, matmul_bundle, param, | |||
compt_oc_block_size, conv1x1_strategy]( | |||
const NCBKernParam& ncb_param, | |||
const NCBKernIndex& ncb_index) mutable { | |||
conv1x1_strategy->packA(whole_bundle, matmul_bundle, | |||
compt_oc_block_size, this->m_matmul_algo, param, | |||
ncb_param, std::move(ncb_index)); | |||
}; | |||
auto kern_packB = [this, whole_bundle, matmul_bundle, param, | |||
conv1x1_strategy]( | |||
const NCBKernParam& ncb_param, | |||
const NCBKernIndex& ncb_index) mutable { | |||
conv1x1_strategy->packB(whole_bundle, matmul_bundle, | |||
this->m_matmul_algo, param, ncb_param, | |||
std::move(ncb_index)); | |||
}; | |||
auto kern_compt = [this, whole_bundle, matmul_bundle, thread_bundle, param, | |||
compt_oc_block_size, conv1x1_strategy]( | |||
const NCBKernParam& ncb_param, | |||
const NCBKernIndex& ncb_index) mutable { | |||
conv1x1_strategy->exec(whole_bundle, matmul_bundle, thread_bundle, | |||
compt_oc_block_size, this->m_matmul_algo, param, | |||
ncb_param, std::move(ncb_index)); | |||
}; | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT || | |||
pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | |||
ret_kern.push_back({kern_packA, {GROUP, oc_blocks_per_group}}); | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
ret_kern.push_back({kern_packB, {1}}); | |||
} | |||
} | |||
ret_kern.push_back({kern_compt, {BATCH, GROUP, oc_blocks_per_group}}); | |||
return ret_kern; | |||
} | |||
bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||
const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy) const { | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { | |||
//! only support nchw format | |||
if (opr->param().format != param::ConvBias::Format::NCHW) | |||
return false; | |||
size_t FH = param.filter_meta.spatial[0], | |||
FW = param.filter_meta.spatial[1]; | |||
size_t PH = param.filter_meta.padding[0], | |||
PW = param.filter_meta.padding[1]; | |||
size_t SH = param.filter_meta.stride[0], | |||
SW = param.filter_meta.stride[1]; | |||
if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) | |||
return false; | |||
//! make sure 8x8x16 and 8x8x32 biasmode is nobias and nonlineMode | |||
//! is identity otherwise return false mean that 8x8x32 and 8x8x16 | |||
//! not support PostProcess | |||
if (param.src_type.enumv() == param.filter_type.enumv() && | |||
(param.src_type.enumv() == DTypeEnum::Int8 && | |||
(param.dst_type.enumv() == DTypeEnum::Int16 || | |||
param.dst_type.enumv() == DTypeEnum::Int32)) && | |||
param.bias_mode != megdnn::BiasMode::NO_BIAS && | |||
param.nonlineMode != megdnn::NonlineMode::IDENTITY) | |||
return false; | |||
if (param.src_type.enumv() == param.filter_type.enumv() && | |||
((param.src_type.enumv() == DTypeEnum::QuantizedS8 || | |||
param.src_type.enumv() == DTypeEnum::Quantized8Asymm) && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS32) && | |||
param.bias_mode != megdnn::BiasMode::NO_BIAS && | |||
param.nonlineMode != megdnn::NonlineMode::IDENTITY) | |||
return false; | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
MatrixMulImpl::KernSizeParam matmul_param = | |||
get_matmul_kern_param(param, OH * OW, get_oc_tile_size_heuristic(param)); | |||
bool matmulusable = m_matmul_algo->usable(matmul_param); | |||
return matmulusable && | |||
(param.filter_meta.dilation[0] == | |||
param.filter_meta.dilation[1] && | |||
param.filter_meta.dilation[0] == 1) && | |||
param.compute_mode == param::ConvBias::ComputeMode::DEFAULT; | |||
} | |||
MIDOUT_END(); | |||
return false; | |||
} |
@@ -0,0 +1,56 @@ | |||
/** | |||
* \file dnn/src/fallback/conv_bias/conv1x1/algos.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/thin/small_vector.h" | |||
#include "src/common/utils.h" | |||
#include "src/fallback/conv_bias/opr_impl.h" | |||
#include "src/fallback/matrix_mul/opr_impl.h" | |||
namespace megdnn { | |||
namespace fallback { | |||
class ConvBiasImpl::AlgoConv1x1 final : public AlgoBase { | |||
public: | |||
AlgoConv1x1(MatrixMulImpl::AlgoBase* matmul_algo, size_t oc_block_size) | |||
: m_matmul_algo(matmul_algo), m_oc_block_size(oc_block_size) {} | |||
bool is_reproducible() const override { return true; } | |||
const char* name() const override { | |||
if (m_name.empty()) { | |||
m_name = ssprintf("CONV1x1:%s:%zu", m_matmul_algo->name(), | |||
m_oc_block_size); | |||
} | |||
return m_name.c_str(); | |||
} | |||
bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, | |||
AlgoSelectionStrategy algo_selection_strategy) const override; | |||
size_t get_workspace(ConvBiasImpl*, | |||
const NCBKernSizeParam& param) const override; | |||
SmallVector<NCBKern> dispatch_kerns( | |||
ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; | |||
protected: | |||
size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; | |||
private: | |||
MatrixMulImpl::AlgoBase* m_matmul_algo; | |||
mutable std::string m_name; | |||
mutable size_t m_oc_block_size = 0; | |||
}; | |||
} // namespace fallback | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,99 @@ | |||
/** | |||
* \file dnn/src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | |||
namespace megdnn { | |||
namespace fallback { | |||
namespace conv1x1 { | |||
namespace { | |||
//! get_thread_bundle | |||
WorkspaceBundle get_thread_bundle(const ConvBiasImpl::NCBKernSizeParam& param, | |||
size_t matmul_c_size, size_t oc_tile_size) { | |||
//! for some cases, matmul result need temp space to store | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
bool is_dst_8bit = (param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | |||
param.dst_type.enumv() == DTypeEnum::Quantized8Asymm); | |||
size_t matmul_dst_bytes_per_thread = | |||
is_dst_8bit ? oc_tile_size * OH * OW * sizeof(param.bias_type) : 0; | |||
return WorkspaceBundle{nullptr, | |||
{matmul_c_size, matmul_dst_bytes_per_thread}}; | |||
} | |||
} // anonymous namespace | |||
template <MatrixMulImpl::AlgoBase::PackMode pack_mode> | |||
class Conv1x1Kerns { | |||
public: | |||
//! get_bundle | |||
WorkspaceBundle get_bundle(const ConvBiasImpl::NCBKernSizeParam& param, | |||
const MatrixMulImpl::KernSizeParam& matmul_param, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
size_t oc_tile_size) { | |||
size_t GROUP = param.filter_meta.group; | |||
size_t OC = param.filter_meta.ocpg; | |||
size_t BATCH = param.n; | |||
//! bundle per thread | |||
//! matmul_param records a matmul with M = oc_tile_size, K = IC, N = OH | |||
//! * OW this does not bother packb bytes | |||
auto matmul_bundle = matmul_algo->get_bundle(matmul_param); | |||
auto thread_bundle = get_thread_bundle(param, matmul_bundle.get_size(2), | |||
oc_tile_size); | |||
//! size per thread | |||
size_t all_threads_bytes = | |||
thread_bundle.total_size_in_bytes() * param.nr_threads; | |||
//! packa size = GROUP * packa_size_each_group | |||
size_t packa_bytes_per_oc_tile = matmul_bundle.get_size(0); | |||
size_t oc_tiles_per_group = div_ceil(OC, oc_tile_size); | |||
size_t all_packa_bytes = | |||
packa_bytes_per_oc_tile * oc_tiles_per_group * GROUP; | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) | |||
return WorkspaceBundle{nullptr, | |||
{all_packa_bytes, 0, all_threads_bytes}}; | |||
//! packb size = N * GROUP * packb_size_per_group | |||
size_t packb_bytes_per_group = matmul_bundle.get_size(1); | |||
size_t all_packb_bytes = packb_bytes_per_group * GROUP * BATCH; | |||
return WorkspaceBundle{ | |||
nullptr, {all_packa_bytes, all_packb_bytes, all_threads_bytes}}; | |||
} | |||
}; | |||
template<> | |||
class Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK> { | |||
public: | |||
//! get_bundle | |||
WorkspaceBundle get_bundle(const ConvBiasImpl::NCBKernSizeParam& param, | |||
const MatrixMulImpl::KernSizeParam& matmul_param, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
size_t oc_tile_size) { | |||
size_t matmul_size = matmul_algo->get_workspace(matmul_param); | |||
auto thread_bundle = get_thread_bundle(param, matmul_size, oc_tile_size); | |||
//! size per thread | |||
size_t all_threads_bytes = | |||
thread_bundle.total_size_in_bytes() * param.nr_threads; | |||
return WorkspaceBundle{nullptr, {0, 0, all_threads_bytes}}; | |||
} | |||
}; | |||
} // namespace conv1x1 | |||
} // namespace fallback | |||
} // namespace megdnn |
@@ -0,0 +1,214 @@ | |||
/** | |||
* \file dnn/src/fallback/conv_bias/conv1x1/Conv1x1_strategy.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include <unordered_map> | |||
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(megdnn_fallback_conv1x1_factory_strategy) | |||
namespace megdnn { | |||
namespace fallback { | |||
namespace conv1x1 { | |||
namespace { | |||
struct StrategyHashParam { | |||
ConvBiasImpl::NCBKernSizeParam param; | |||
param::ConvBias::Format format; | |||
MatrixMulImpl::AlgoBase::PackMode packmode; | |||
}; | |||
struct StrategyHashParamHash { | |||
std::size_t operator()(const StrategyHashParam& sparam) const { | |||
constexpr size_t base = 1; //! avoid hashkey is zero | |||
std::size_t result = | |||
static_cast<std::size_t>(sparam.param.src_type.enumv()) + base; | |||
result = result ^ | |||
((static_cast<std::size_t>(sparam.param.dst_type.enumv()) + | |||
base) | |||
<< 3); | |||
result = result ^ | |||
((static_cast<std::size_t>(sparam.param.filter_type.enumv()) + | |||
base) | |||
<< 6); | |||
result = result ^ | |||
((static_cast<std::size_t>(sparam.param.bias_type.enumv()) + | |||
base) | |||
<< 9); | |||
result = result ^ | |||
((static_cast<std::size_t>(sparam.format) + base) << 12); | |||
result = result ^ | |||
((static_cast<std::size_t>(sparam.packmode) + base) << 15); | |||
return result; | |||
}; | |||
}; | |||
struct StrategyHashParamEqual { | |||
bool operator()(const StrategyHashParam& param1, | |||
const StrategyHashParam& param2) const { | |||
bool flags = true; | |||
flags = param1.param.src_type == param2.param.src_type && flags; | |||
flags = param1.param.filter_type == param2.param.filter_type && flags; | |||
flags = param1.param.bias_type == param2.param.bias_type && flags; | |||
flags = param1.param.dst_type == param2.param.dst_type && flags; | |||
flags = param1.format == param2.format && flags; | |||
flags = param1.packmode == param2.packmode && flags; | |||
return flags; | |||
}; | |||
}; | |||
std::unique_ptr<Conv1x1StrategyBase> create_conv1x1_strategy( | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
param::ConvBias::Format format) { | |||
MEGDNN_MARK_USED_VAR(format); | |||
#define cb1(_packmode, _dt, _post_ctype, _postprocess_mode, _midout_tag) \ | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1_factory_strategy, \ | |||
midout_iv(_midout_tag)) { \ | |||
if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \ | |||
return std::make_unique< \ | |||
Conv1x1Strategy<_dt, _dt, _dt, _post_ctype, _post_ctype, \ | |||
_postprocess_mode, _packmode>>(); \ | |||
} \ | |||
} \ | |||
MIDOUT_END() | |||
#define cb2(_packmode, _i_src_type, _i_bias_type, _i_dst_type, _src_ctype, \ | |||
_bias_ctype, _dst_ctype, _postprocess_mode, _midout_tag) \ | |||
MIDOUT_BEGIN(megdnn_fallback_conv1x1_factory_strategy, \ | |||
midout_iv(_midout_tag)) { \ | |||
if (param.filter_type.enumv() == param.src_type.enumv() && \ | |||
param.src_type.enumv() == DTypeTrait<_i_src_type>::enumv && \ | |||
param.dst_type.enumv() == DTypeTrait<_i_dst_type>::enumv) { \ | |||
return std::make_unique< \ | |||
Conv1x1Strategy<_src_ctype, _bias_ctype, _dst_ctype, \ | |||
DTypeTrait<_i_bias_type>::ctype, \ | |||
DTypeTrait<_i_dst_type>::ctype, \ | |||
_postprocess_mode, _packmode>>(); \ | |||
} \ | |||
} \ | |||
MIDOUT_END() | |||
switch (pack_mode) { | |||
case MatrixMulImpl::AlgoBase::PackMode::DEFAULT: | |||
cb1(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_float32, | |||
dt_float32, PostprocessMode::FLOAT, "Default::FLOAT"_hash); | |||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | |||
cb1(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_float16, __fp16, | |||
PostprocessMode::FLOAT, "Default::FLOAT16_FP16"_hash); | |||
#else | |||
#if !MEGDNN_DISABLE_FLOAT16 | |||
cb1(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_float16, | |||
dt_float16, PostprocessMode::NO_PROCESS, | |||
"Default::FLOAT16_FLOAT16"_hash); | |||
#endif | |||
#endif | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_int8, dt_int32, | |||
dt_int32, dt_int8, dt_int32, dt_int32, | |||
PostprocessMode::NO_PROCESS, "Default::INT8x8x32_INT32"_hash); | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dt_int8, dt_int16, | |||
dt_int16, dt_int8, dt_int16, dt_int16, | |||
PostprocessMode::NO_PROCESS, "Default::INT8x8x16_INT16"_hash); | |||
#if MEGDNN_AARCH64 || MEGDNN_ARMV7 | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, | |||
dtype::Quantized8Asymm, dtype::QuantizedS32, | |||
dtype::QuantizedS32, dt_uint8, dt_int32, dt_int32, | |||
PostprocessMode::NO_PROCESS, | |||
"Default::QUINT8x8x32_QINT32"_hash); | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, | |||
dtype::Quantized8Asymm, dtype::QuantizedS32, | |||
dtype::Quantized8Asymm, dt_uint8, dt_int32, dt_uint8, | |||
PostprocessMode::QUANTIZED, "Default::QUINT8x8x32_QUINT8"_hash); | |||
#endif | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dtype::QuantizedS8, | |||
dtype::QuantizedS32, dtype::QuantizedS32, dt_int8, dt_int32, | |||
dt_int32, PostprocessMode::NO_PROCESS, | |||
"Default::QINT8x8x32_QINT32"_hash); | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::DEFAULT, dtype::QuantizedS8, | |||
dtype::QuantizedS32, dtype::QuantizedS8, dt_int8, dt_int32, | |||
dt_int8, PostprocessMode::QUANTIZED, | |||
"Default::QINT8x8x32_QINT8"_hash); | |||
break; | |||
case MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA: | |||
cb1(MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA, dt_float32, | |||
dt_float32, PostprocessMode::FLOAT, "OnlyPackA::FLOAT"_hash); | |||
break; | |||
case MatrixMulImpl::AlgoBase::PackMode::NO_PACK: | |||
cb1(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dt_float32, | |||
dt_float32, PostprocessMode::FLOAT, "NoPack::FLOAT"_hash); | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dt_int8, dt_int16, | |||
dt_int16, dt_int8, dt_int16, dt_int16, | |||
PostprocessMode::NO_PROCESS, "NoPack::INT8x8x16_INT16"_hash); | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dt_int8, dt_int32, | |||
dt_int32, dt_int8, dt_int32, dt_int32, | |||
PostprocessMode::NO_PROCESS, "NoPack::INT8x8x32_INT32"_hash); | |||
cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, | |||
dtype::QuantizedS8, dtype::QuantizedS32, | |||
dtype::QuantizedS32, dt_int8, dt_int32, dt_int32, | |||
PostprocessMode::NO_PROCESS, | |||
"NoPack::QINT8x8x32_QINT32"_hash); | |||
break; | |||
default: | |||
megdnn_throw("Invalid Pack Mode"); | |||
break; | |||
} | |||
#undef cb1 | |||
#undef cb2 | |||
megdnn_throw("Invalid Data Type"); | |||
return nullptr; | |||
} | |||
class StrategyDelegationStorage { | |||
public: | |||
Conv1x1StrategyBase* get(const ConvBiasImpl::NCBKernSizeParam& param, | |||
MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
param::ConvBias::Format format) { | |||
MEGDNN_LOCK_GUARD(m_mtx); | |||
StrategyHashParam sparam; | |||
sparam.param = param; | |||
sparam.format = format; | |||
sparam.packmode = pack_mode; | |||
if (m_map_strategies.find(sparam) == m_map_strategies.end()) { | |||
auto strategy = create_conv1x1_strategy(param, pack_mode, format); | |||
m_map_strategies[sparam] = std::move(strategy); | |||
} | |||
return m_map_strategies[sparam].get(); | |||
} | |||
private: | |||
std::mutex m_mtx; | |||
std::unordered_map<StrategyHashParam, std::unique_ptr<Conv1x1StrategyBase>, | |||
StrategyHashParamHash, StrategyHashParamEqual> | |||
m_map_strategies; | |||
}; | |||
} // anonymous namespace | |||
Conv1x1StrategyBase* Conv1x1Factory::make_conv1x1_strategy( | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
param::ConvBias::Format format) { | |||
static StrategyDelegationStorage storage; | |||
return storage.get(param, pack_mode, format); | |||
} | |||
} // namespace conv1x1 | |||
} // namespace fallback | |||
} // namespace megdnn |
@@ -0,0 +1,310 @@ | |||
/** | |||
* \file dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/opr_param_defs.h" | |||
#include "src/fallback/conv_bias/opr_impl.h" | |||
#if MEGDNN_X86 | |||
#include "src/x86/conv_bias/postprocess_helper.h" | |||
#endif | |||
namespace megdnn { | |||
namespace fallback { | |||
namespace conv1x1 { | |||
#if MEGDNN_X86 | |||
using namespace x86; | |||
#endif | |||
namespace { | |||
//! get_matmul_kern_param | |||
MatrixMulImpl::KernSizeParam get_matmul_kern_param( | |||
const ConvBiasImpl::NCBKernSizeParam& param, size_t n, size_t m) { | |||
size_t M = m; | |||
size_t N = n; | |||
size_t K = param.filter_meta.icpg; //! K = IC | |||
size_t LDA = K, LDB = N, LDC = N; | |||
bool is_dst_8bit = (param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | |||
param.dst_type.enumv() == DTypeEnum::Quantized8Asymm); | |||
return {param.filter_type, | |||
param.src_type, | |||
is_dst_8bit ? param.bias_type : param.dst_type, | |||
M, | |||
N, | |||
K, | |||
LDA, | |||
LDB, | |||
LDC, | |||
false, | |||
false, | |||
param::MatrixMul::ComputeMode::DEFAULT, | |||
param::MatrixMul::Format::DEFAULT}; | |||
} | |||
} // namespace | |||
class Conv1x1StrategyBase { | |||
public: | |||
virtual void packA(WorkspaceBundle& whole_bundle, | |||
WorkspaceBundle& matmul_bundle, | |||
size_t oc_tile_size, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
const ConvBiasImpl::NCBKernParam& ncb_param, | |||
const ConvBiasImpl::NCBKernIndex& ncb_index) = 0; | |||
virtual void packB(WorkspaceBundle& whole_bundle, | |||
WorkspaceBundle& matmul_bundle, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
const ConvBiasImpl::NCBKernParam& ncb_param, | |||
const ConvBiasImpl::NCBKernIndex& ncb_index) = 0; | |||
virtual void exec(WorkspaceBundle& whole_bundle, | |||
WorkspaceBundle& matmul_bundle, | |||
WorkspaceBundle& thread_bundle, | |||
size_t oc_tile_size, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
const ConvBiasImpl::NCBKernParam& ncb_param, | |||
const ConvBiasImpl::NCBKernIndex& ncb_index) = 0; | |||
virtual ~Conv1x1StrategyBase() = default; | |||
}; | |||
template <typename src_ctype, typename bias_ctype, typename dst_ctype, | |||
typename op_ctype, typename op_dtype, | |||
megdnn::PostprocessMode postprocess_mode, MatrixMulImpl::AlgoBase::PackMode pack_mode> | |||
class Conv1x1Strategy : public Conv1x1StrategyBase { | |||
public: | |||
void packA(WorkspaceBundle& whole_bundle, | |||
WorkspaceBundle& matmul_bundle, | |||
size_t oc_tile_size, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
const ConvBiasImpl::NCBKernParam& ncb_param, | |||
const ConvBiasImpl::NCBKernIndex& ncb_index) override { | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK) { | |||
megdnn_log_error("NoPack mode has no packA kernel"); | |||
return; | |||
} | |||
whole_bundle.set(ncb_param.workspace_ptr); | |||
//! packa size per group | |||
size_t OC = param.filter_meta.ocpg; | |||
size_t oc_tiles_per_group = div_ceil(OC, oc_tile_size); | |||
size_t packa_bytes_per_oc_tile = matmul_bundle.get_size(0); | |||
size_t packa_bytes_per_group = | |||
oc_tiles_per_group * packa_bytes_per_oc_tile; | |||
size_t group_id = ncb_index.ndrange_id[0]; | |||
size_t oc_tile_id_in_group = ncb_index.ndrange_id[1]; | |||
size_t oc_start = oc_tile_id_in_group * oc_tile_size; | |||
size_t oc_end = oc_start + oc_tile_size; | |||
oc_end = (oc_end <= OC ? oc_end : OC); | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t IC = param.filter_meta.icpg; | |||
MatrixMulImpl::KernParam matmul_kern_param; | |||
static_cast<MatrixMulImpl::KernSizeParam&>(matmul_kern_param) = | |||
get_matmul_kern_param(param, OH * OW, oc_end - oc_start); | |||
size_t bytes_offset_of_a_panel = | |||
group_id * packa_bytes_per_group + | |||
oc_tile_id_in_group * packa_bytes_per_oc_tile; | |||
size_t numbers_offset_of_filter = | |||
oc_tile_size * IC * oc_tile_id_in_group; | |||
src_ctype* a_panel = reinterpret_cast<src_ctype*>( | |||
reinterpret_cast<int8_t*>(whole_bundle.get(0)) + | |||
bytes_offset_of_a_panel); | |||
matmul_kern_param.A_ptr = const_cast<src_ctype*>( | |||
ncb_param.filter<src_ctype>(group_id) + | |||
numbers_offset_of_filter); | |||
matmul_algo->pack_A(matmul_kern_param, a_panel, 0, | |||
oc_end - oc_start); | |||
} | |||
void packB(WorkspaceBundle& whole_bundle, | |||
WorkspaceBundle& matmul_bundle, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
const ConvBiasImpl::NCBKernParam& ncb_param, | |||
const ConvBiasImpl::NCBKernIndex& ncb_index) override { | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | |||
whole_bundle.set(ncb_param.workspace_ptr); | |||
//! packb size per group | |||
size_t packb_bytes_per_group = matmul_bundle.get_size(1); | |||
size_t GROUP = param.filter_meta.group; | |||
size_t BATCH = param.n; | |||
size_t SH = param.filter_meta.stride[0]; | |||
size_t SW = param.filter_meta.stride[1]; | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t OC = param.filter_meta.ocpg; | |||
MatrixMulImpl::KernParam matmul_kern_param; | |||
static_cast<MatrixMulImpl::KernSizeParam&>(matmul_kern_param) = | |||
get_matmul_kern_param(param, OH * OW, OC); | |||
rep(batch, BATCH) { | |||
rep(g, GROUP) { | |||
if (SH == 2 && SW == 2) | |||
megdnn_throw("no support for stride = 2"); | |||
size_t bytes_offset_of_b_panel = | |||
batch * packb_bytes_per_group * GROUP + | |||
g * packb_bytes_per_group; | |||
src_ctype* b_panel = reinterpret_cast<src_ctype*>( | |||
reinterpret_cast<int8_t*>(whole_bundle.get(1)) + | |||
bytes_offset_of_b_panel); | |||
matmul_kern_param.B_ptr = const_cast<src_ctype*>( | |||
ncb_param.src<src_ctype>(batch, g)); | |||
matmul_algo->pack_B(matmul_kern_param, b_panel, 0, OH * OW); | |||
} | |||
} | |||
} else { | |||
megdnn_log_error("OnlyPackA mode and NoPack mode has no packB kernel"); | |||
} | |||
} | |||
void exec(WorkspaceBundle& whole_bundle, | |||
WorkspaceBundle& matmul_bundle, | |||
WorkspaceBundle& thread_bundle, | |||
size_t oc_tile_size, | |||
const MatrixMulImpl::AlgoBase* matmul_algo, | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
const ConvBiasImpl::NCBKernParam& ncb_param, | |||
const ConvBiasImpl::NCBKernIndex& ncb_index) override { | |||
whole_bundle.set(ncb_param.workspace_ptr); | |||
size_t OC = param.filter_meta.ocpg; | |||
size_t IC = param.filter_meta.icpg; | |||
//! packa bytes per group | |||
size_t oc_tiles_per_group = div_ceil(OC, oc_tile_size); | |||
size_t packa_bytes_per_oc_tile = matmul_bundle.get_size(0); | |||
size_t packa_bytes_per_group = | |||
packa_bytes_per_oc_tile * oc_tiles_per_group; | |||
//! packb bytes per group | |||
size_t packb_bytes_per_group = matmul_bundle.get_size(1); | |||
//! matmul bytes per thread | |||
size_t matmul_bytes_per_thread = thread_bundle.get_size(0); | |||
size_t batch_id = ncb_index.ndrange_id[0]; | |||
size_t group_id = ncb_index.ndrange_id[1]; | |||
size_t oc_tile_id_in_group = ncb_index.ndrange_id[2]; | |||
size_t thread_id = ncb_index.thread_id; | |||
size_t GROUP = param.filter_meta.group; | |||
size_t OH = param.osz[0]; | |||
size_t OW = param.osz[1]; | |||
size_t oc_start = oc_tile_size * oc_tile_id_in_group; | |||
size_t oc_end = oc_start + oc_tile_size; | |||
oc_end = (oc_end <= OC ? oc_end : OC); | |||
MatrixMulImpl::KernParam matmul_kern_param; | |||
static_cast<MatrixMulImpl::KernSizeParam&>(matmul_kern_param) = | |||
get_matmul_kern_param(param, OH * OW, oc_end - oc_start); | |||
size_t bytes_offset_of_a_panel = | |||
group_id * packa_bytes_per_group + | |||
oc_tile_id_in_group * packa_bytes_per_oc_tile; | |||
int8_t* a_panel = reinterpret_cast<int8_t*>(whole_bundle.get(0)) + | |||
bytes_offset_of_a_panel; | |||
size_t bytes_offset_of_b_panel = | |||
batch_id * packb_bytes_per_group * GROUP + | |||
group_id * packb_bytes_per_group; | |||
int8_t* b_panel = reinterpret_cast<int8_t*>(whole_bundle.get(1)) + | |||
bytes_offset_of_b_panel; | |||
size_t thread_offset = thread_bundle.total_size_in_bytes() * thread_id; | |||
size_t bytes_offset_of_matmul_dst_this_thread = | |||
thread_offset + thread_bundle.get_size(0); | |||
int8_t* matmul_temp_dst = | |||
reinterpret_cast<int8_t*>(whole_bundle.get(2)) + | |||
bytes_offset_of_matmul_dst_this_thread; | |||
size_t numbers_of_ncb_dst_offset = | |||
oc_tile_size * OH * OW * oc_tile_id_in_group; | |||
void* conv_bias_dst = static_cast<void*>( | |||
ncb_param.dst<dst_ctype>(batch_id, group_id) + | |||
numbers_of_ncb_dst_offset); | |||
size_t numbers_of_ncb_filter_offset = | |||
oc_tile_size * IC * oc_tile_id_in_group; | |||
matmul_kern_param.A_ptr = const_cast<src_ctype*>( | |||
ncb_param.filter<src_ctype>(group_id) + | |||
numbers_of_ncb_filter_offset); | |||
matmul_kern_param.B_ptr = const_cast<src_ctype*>( | |||
ncb_param.src<src_ctype>(batch_id, group_id)); | |||
matmul_kern_param.workspace_ptr = | |||
reinterpret_cast<int8_t*>(whole_bundle.get(2)) + thread_offset; | |||
matmul_kern_param.workspace_size = matmul_bytes_per_thread; | |||
bool is_dst_8bit = | |||
(param.src_type.enumv() == DTypeEnum::QuantizedS8 && | |||
param.dst_type.enumv() == DTypeEnum::QuantizedS8) || | |||
(param.src_type.enumv() == DTypeEnum::Quantized8Asymm && | |||
param.dst_type.enumv() == DTypeEnum::Quantized8Asymm); | |||
void* matmul_dst = is_dst_8bit ? matmul_temp_dst : conv_bias_dst; | |||
matmul_kern_param.C_ptr = matmul_dst; | |||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK) { | |||
auto matmul_kern = matmul_algo->get_kern(matmul_kern_param); | |||
matmul_kern(matmul_kern_param); | |||
} else { | |||
auto matmul_kern_naked = | |||
matmul_algo->get_kern_naked(matmul_kern_param); | |||
matmul_kern_naked(matmul_kern_param, a_panel, b_panel); | |||
} | |||
//! do postprocess | |||
void* bias_ptr = nullptr; | |||
if (param.bias_mode == megdnn::BiasMode::BIAS) | |||
bias_ptr = static_cast<void*>(const_cast<bias_ctype*>( | |||
ncb_param.bias<bias_ctype>(batch_id, group_id) + | |||
numbers_of_ncb_dst_offset)); | |||
else | |||
bias_ptr = static_cast<void*>(const_cast<bias_ctype*>( | |||
ncb_param.bias<bias_ctype>(batch_id, group_id) + oc_start)); | |||
PostProcess<op_ctype, op_dtype, postprocess_mode>::run( | |||
matmul_dst, bias_ptr, conv_bias_dst, param.bias_mode, | |||
param.nonlineMode, param.bias_type, param.dst_type, 1_z, | |||
oc_end - oc_start, OH, OW); | |||
} | |||
}; | |||
class Conv1x1Factory { | |||
public: | |||
static Conv1x1StrategyBase* make_conv1x1_strategy( | |||
const ConvBiasImpl::NCBKernSizeParam& param, | |||
MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||
param::ConvBias::Format format); | |||
}; | |||
} // namespace conv1x1 | |||
} // namespace fallback | |||
} // namespace megdnn |
@@ -15,6 +15,7 @@ | |||
#include "src/common/opr_delegate.h" | |||
#include "src/common/utils.h" | |||
#include "src/fallback/conv_bias/algos.h" | |||
#include "src/fallback/conv_bias/conv1x1/algos.h" | |||
#include "src/fallback/conv_bias/im2col/algos.h" | |||
#include "src/fallback/conv_bias/opr_impl.h" | |||
#include "src/naive/convolution/algorithms.h" | |||
@@ -54,7 +55,13 @@ public: | |||
ohw_tile_size)); | |||
all_algos.emplace_back(refhold.back().get()); | |||
} | |||
#if 1 | |||
for (size_t oc_tile_size : {24, 48}) { | |||
refhold.emplace_back(new AlgoConv1x1( | |||
static_cast<MatrixMulImpl::AlgoBase*>(algo), | |||
oc_tile_size)); | |||
all_algos.emplace_back(refhold.back().get()); | |||
} | |||
#if 0 | |||
//! As these algos maybe very slow, it will make fastrun search slow, so | |||
//! we disable it, but for the test of strategyhelper, we just keep it. | |||
//! FIXME: I do not know a better way to do it. | |||
@@ -248,6 +248,7 @@ protected: | |||
private: | |||
class AlgoNaive; | |||
class AlgoIm2col; | |||
class AlgoConv1x1; | |||
class AlgoWinogradF32; | |||
class AlgoWinogradF32_4x4; | |||
class AlgoWinogradQS8; | |||
@@ -438,7 +438,6 @@ size_t MatrixMulImpl::AlgoInt8x8x32SSEM4N8K2::get_workspace( | |||
m, n, k, trans_a, trans_b, strategy, cacheline) | |||
.get_workspace_size(); | |||
} | |||
MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL( | |||
AlgoInt8x8x32SSEM4N8K2, megdnn_x86_matmul_kern, 9, | |||
x86::matmul::gemm_sse_s8s8s32_4x8x2, dt_int8, dt_int32, dt_int16); | |||
@@ -875,6 +875,82 @@ std::vector<conv_bias::TestArg> get_conv_bias_args( | |||
return args; | |||
} | |||
std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args( | |||
bool no_bias, bool no_nonlinemode, bool quantized_nlmod, | |||
bool only_broadcast_bias) { | |||
using namespace conv_bias; | |||
using Param = param::ConvBias; | |||
using NLMode = param::ConvBias::NonlineMode; | |||
using CONVMode = param::ConvBias::Mode; | |||
std::vector<TestArg> args; | |||
auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, | |||
size_t stride, NLMode nlmode, CONVMode convmode) { | |||
Param param; | |||
param.stride_h = stride; | |||
param.stride_w = stride; | |||
param.pad_h = 0; | |||
param.pad_w = 0; | |||
param.mode = convmode; | |||
param.nonlineMode = nlmode; | |||
args.emplace_back(param, TensorShape{n, ic, h, w}, | |||
TensorShape{oc, ic, 1, 1}, TensorShape{}); | |||
if (!no_bias) { | |||
args.emplace_back(param, TensorShape{n, ic, h, w}, | |||
TensorShape{oc, ic, 1, 1}, | |||
TensorShape{1, oc, 1, 1}); | |||
if (!only_broadcast_bias) { | |||
args.emplace_back(param, TensorShape{n, ic, h, w}, | |||
TensorShape{oc, ic, 1, 1}, | |||
TensorShape{n, oc, (h - 1) / stride + 1, | |||
(w - 1) / stride + 1}); | |||
} | |||
} | |||
param.sparse = param::ConvBias::Sparse::GROUP; | |||
args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, | |||
TensorShape{2, oc, ic, 1, 1}, TensorShape{}); | |||
if (!no_bias) { | |||
args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, | |||
TensorShape{2, oc, ic, 1, 1}, | |||
TensorShape{1, 2 * oc, 1, 1}); | |||
if (!only_broadcast_bias) { | |||
args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, | |||
TensorShape{2, oc, ic, 1, 1}, | |||
TensorShape{n, 2 * oc, (h - 1) / stride + 1, | |||
(w - 1) / stride + 1}); | |||
} | |||
} | |||
}; | |||
std::vector<NLMode> nonlinemode = {NLMode::IDENTITY}; | |||
if (!no_nonlinemode) { | |||
nonlinemode.emplace_back(NLMode::RELU); | |||
nonlinemode.emplace_back(NLMode::H_SWISH); | |||
if (!quantized_nlmod) { | |||
nonlinemode.emplace_back(NLMode::SIGMOID); | |||
} | |||
} | |||
std::vector<CONVMode> convmodes{param::ConvBias::Mode::CONVOLUTION, | |||
param::ConvBias::Mode::CROSS_CORRELATION}; | |||
for (size_t n : {1, 2}) | |||
for (size_t oc : {1, 9, 33}) | |||
for (size_t ic : {1, 16, 64}) | |||
for (size_t size : {7, 14, 28}) | |||
for (auto nlmode : nonlinemode) | |||
for (auto convmode : convmodes) { | |||
pack(n, oc, ic, size, size, 1, nlmode, convmode); | |||
} | |||
return args; | |||
} | |||
void check_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, | |||
const char* algo_name) { | |||
using namespace conv_bias; | |||
@@ -76,6 +76,10 @@ std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_args( | |||
bool no_nonlinemode, bool quantized_nlmod = false, | |||
bool only_broadcast_bias = false); | |||
std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args( | |||
bool no_bias, bool no_nonlinemode, bool quantized_nlmod = false, | |||
bool only_broadcast_bias = false); | |||
void check_conv_bias(std::vector<megdnn::test::conv_bias::TestArg> args, | |||
megdnn::Handle* handle, const char* algo_name); | |||
@@ -919,6 +919,79 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) { | |||
#undef cb | |||
} | |||
/**************************** Conv1x1 PackA *************************/ | |||
namespace { | |||
void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, | |||
RNG* rng, float epsilon, DType type0, DType type1, | |||
DType type2, DType type3, const char* algo_name) { | |||
using namespace conv_bias; | |||
Checker<ConvBias> checker(handle); | |||
checker.set_before_exec_callback( | |||
conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name)); | |||
checker.set_dtype(0, type0); | |||
checker.set_dtype(1, type1); | |||
checker.set_dtype(2, type2); | |||
checker.set_dtype(4, type3); | |||
checker.set_epsilon(epsilon); | |||
if (NULL != rng) { | |||
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng); | |||
} | |||
for (auto&& arg : args) { | |||
checker.set_param(arg.param).execs( | |||
{arg.src, arg.filter, arg.bias, {}, {}}); | |||
} | |||
} | |||
} // namespace | |||
#if MEGDNN_X86_WITH_MKL | |||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_CONV1X1_S1_FP32_PACKA) { | |||
using namespace conv_bias; | |||
std::vector<conv_bias::TestArg> args = get_conv_bias_1x1_args(false, false); | |||
check_conv_bias(args, handle(), "CONV1x1:X86_F32_MKL_PACKA:24"); | |||
} | |||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_CONV1X1_S1_FP32_BLAS) { | |||
using namespace conv_bias; | |||
std::vector<conv_bias::TestArg> args = get_conv_bias_1x1_args(false, false); | |||
check_conv_bias(args, handle(), "CONV1x1:X86_F32_BLAS:48"); | |||
} | |||
#endif | |||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_CONV1X1_S1_INT8X8X32) { | |||
using namespace conv_bias; | |||
UniformIntRNG rng{-50, 50}; | |||
float epsilon = 0.001; | |||
std::vector<conv_bias::TestArg> args = get_conv_bias_1x1_args(true, true); | |||
#if MEGDNN_X86_WITH_MKL_DNN | |||
if (x86::is_supported(x86::SIMDType::VNNI)) { | |||
checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
"CONV1x1:X86_INT8X8X32_MKLDNN:24"); | |||
} | |||
#endif | |||
#if MEGDNN_X86_WITH_VNNI | |||
if (x86::is_supported(x86::SIMDType::VNNI)) { | |||
checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
"CONV1x1:X86_INT8X8X32_VNNI:24"); | |||
} | |||
#endif | |||
if (x86::is_supported(x86::SIMDType::AVX2)) { | |||
checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
"CONV1x1:X86_INT8X8X32_AVX2_4X16X2:24"); | |||
checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
"CONV1x1:X86_INT8X8X32_AVX2_2X4X16:24"); | |||
} | |||
checker_conv_bias(args, handle(), &rng, epsilon, dtype::Int8{}, | |||
dtype::Int8{}, dtype::Int32{}, dtype::Int32{}, | |||
"CONV1x1:X86_INT8X8X32_SSE_4X8X2:48"); | |||
} | |||
/************************* End Conv1x1 PackA ************************/ | |||
#endif | |||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) { | |||