GitOrigin-RevId: d946e22243
tags/v0.5.0
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "src/fallback/conv_bias/conv1x1/algos.h" | #include "src/fallback/conv_bias/conv1x1/algos.h" | ||||
@@ -67,7 +68,8 @@ size_t ConvBiasImpl::AlgoConv1x1::get_workspace( | |||||
MIDOUT_END(); | MIDOUT_END(); | ||||
} else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | ||||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 1) { | MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 0, 1) { | ||||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> dispatcher; | |||||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> | |||||
dispatcher; | |||||
return dispatcher | return dispatcher | ||||
.get_bundle(param, matmul_param, m_matmul_algo, | .get_bundle(param, matmul_param, m_matmul_algo, | ||||
compt_oc_block_size) | compt_oc_block_size) | ||||
@@ -116,7 +118,8 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||||
MIDOUT_END(); | MIDOUT_END(); | ||||
} else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | ||||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 1) { | MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 1, 1) { | ||||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> dispatcher; | |||||
Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA> | |||||
dispatcher; | |||||
whole_bundle = dispatcher.get_bundle( | whole_bundle = dispatcher.get_bundle( | ||||
param, matmul_param, m_matmul_algo, compt_oc_block_size); | param, matmul_param, m_matmul_algo, compt_oc_block_size); | ||||
matmul_bundle = m_matmul_algo->get_bundle(matmul_param); | matmul_bundle = m_matmul_algo->get_bundle(matmul_param); | ||||
@@ -140,7 +143,7 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||||
Conv1x1StrategyBase* conv1x1_strategy = | Conv1x1StrategyBase* conv1x1_strategy = | ||||
Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, | Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, | ||||
opr->param().format); | |||||
opr->param().format); | |||||
auto kern_packA = [this, whole_bundle, matmul_bundle, param, | auto kern_packA = [this, whole_bundle, matmul_bundle, param, | ||||
compt_oc_block_size, conv1x1_strategy]( | compt_oc_block_size, conv1x1_strategy]( | ||||
@@ -171,8 +174,8 @@ SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns( | |||||
pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) { | ||||
ret_kern.push_back({kern_packA, {GROUP, oc_blocks_per_group}}); | ret_kern.push_back({kern_packA, {GROUP, oc_blocks_per_group}}); | ||||
if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { | ||||
ret_kern.push_back({kern_packB, {1}}); | |||||
} | |||||
ret_kern.push_back({kern_packB, {1}}); | |||||
} | |||||
} | } | ||||
ret_kern.push_back({kern_compt, {BATCH, GROUP, oc_blocks_per_group}}); | ret_kern.push_back({kern_compt, {BATCH, GROUP, oc_blocks_per_group}}); | ||||
@@ -230,7 +233,11 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, | |||||
param, OH * OW, get_oc_tile_size_heuristic(param)); | param, OH * OW, get_oc_tile_size_heuristic(param)); | ||||
bool matmul_usable = m_matmul_algo->usable(matmul_param); | bool matmul_usable = m_matmul_algo->usable(matmul_param); | ||||
return matmul_usable && | |||||
auto pack_mode = m_matmul_algo->packmode(); | |||||
bool strategy_usable = Conv1x1Factory::can_make_conv1x1_strategy( | |||||
param, pack_mode, opr->param().format); | |||||
return matmul_usable && strategy_usable && | |||||
(param.filter_meta.dilation[0] == | (param.filter_meta.dilation[0] == | ||||
param.filter_meta.dilation[1] && | param.filter_meta.dilation[1] && | ||||
param.filter_meta.dilation[0] == 1) && | param.filter_meta.dilation[0] == 1) && | ||||
@@ -6,11 +6,12 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include <unordered_map> | |||||
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | #include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h" | ||||
#include <unordered_map> | |||||
#include "midout.h" | #include "midout.h" | ||||
@@ -157,10 +158,9 @@ std::unique_ptr<Conv1x1StrategyBase> create_conv1x1_strategy( | |||||
dt_int32, dt_int8, dt_int32, dt_int32, | dt_int32, dt_int8, dt_int32, dt_int32, | ||||
PostprocessMode::NO_PROCESS, "NoPack::INT8x8x32_INT32"_hash); | PostprocessMode::NO_PROCESS, "NoPack::INT8x8x32_INT32"_hash); | ||||
cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, | |||||
dtype::QuantizedS8, dtype::QuantizedS32, | |||||
dtype::QuantizedS32, dt_int8, dt_int32, dt_int32, | |||||
PostprocessMode::NO_PROCESS, | |||||
cb2(MatrixMulImpl::AlgoBase::PackMode::NO_PACK, dtype::QuantizedS8, | |||||
dtype::QuantizedS32, dtype::QuantizedS32, dt_int8, dt_int32, | |||||
dt_int32, PostprocessMode::NO_PROCESS, | |||||
"NoPack::QINT8x8x32_QINT32"_hash); | "NoPack::QINT8x8x32_QINT32"_hash); | ||||
break; | break; | ||||
@@ -208,6 +208,19 @@ Conv1x1StrategyBase* Conv1x1Factory::make_conv1x1_strategy( | |||||
return storage.get(param, pack_mode, format); | return storage.get(param, pack_mode, format); | ||||
} | } | ||||
bool Conv1x1Factory::can_make_conv1x1_strategy( | |||||
const ConvBiasImpl::NCBKernSizeParam& param, | |||||
MatrixMulImpl::AlgoBase::PackMode pack_mode, param::ConvBias::Format) { | |||||
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC || !MEGDNN_DISABLE_FLOAT16 | |||||
if ((pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK || | |||||
pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) && | |||||
param.src_type.enumv() == DTypeTrait<dt_float16>::enumv) { | |||||
return false; | |||||
} | |||||
#endif | |||||
return true; | |||||
} | |||||
} // namespace conv1x1 | } // namespace conv1x1 | ||||
} // namespace fallback | } // namespace fallback | ||||
} // namespace megdnn | } // namespace megdnn |
@@ -320,6 +320,11 @@ public: | |||||
const ConvBiasImpl::NCBKernSizeParam& param, | const ConvBiasImpl::NCBKernSizeParam& param, | ||||
MatrixMulImpl::AlgoBase::PackMode pack_mode, | MatrixMulImpl::AlgoBase::PackMode pack_mode, | ||||
param::ConvBias::Format format); | param::ConvBias::Format format); | ||||
static bool can_make_conv1x1_strategy( | |||||
const ConvBiasImpl::NCBKernSizeParam& param, | |||||
MatrixMulImpl::AlgoBase::PackMode pack_mode, | |||||
param::ConvBias::Format format); | |||||
}; | }; | ||||
} // namespace conv1x1 | } // namespace conv1x1 | ||||
@@ -27,7 +27,7 @@ using namespace megdnn; | |||||
using namespace fallback; | using namespace fallback; | ||||
size_t megdnn::fallback::get_format_pack_size(param::ConvBias::Format format) { | size_t megdnn::fallback::get_format_pack_size(param::ConvBias::Format format) { | ||||
switch(format){ | |||||
switch (format) { | |||||
case param::ConvBias::Format::NCHW44: | case param::ConvBias::Format::NCHW44: | ||||
case param::ConvBias::Format::NCHW4: | case param::ConvBias::Format::NCHW4: | ||||
return 4_z; | return 4_z; | ||||
@@ -57,10 +57,18 @@ public: | |||||
auto&& matmul_algos = | auto&& matmul_algos = | ||||
static_cast<fallback::MatrixMulImpl*>(matmul_opr)->algo_pack(); | static_cast<fallback::MatrixMulImpl*>(matmul_opr)->algo_pack(); | ||||
for (auto&& algo : matmul_algos) { | for (auto&& algo : matmul_algos) { | ||||
#if MEGDNN_X86 | |||||
//! As we haven't direct conv for int8x8x16 yet, if we disable gemv here, it may | |||||
//! fallback to naive implementation, which may cause performance very low, so | |||||
//! here we just enable im2col for gemv in x86 backend. | |||||
//! FIXME: remove it when we add direct conv support for int8x8x16 | |||||
#else | |||||
if (algo->algoset() == | if (algo->algoset() == | ||||
MatrixMulImpl::AlgoBase::AlgoSet::ALGO_TYPE_GEMV) { | MatrixMulImpl::AlgoBase::AlgoSet::ALGO_TYPE_GEMV) { | ||||
continue; | continue; | ||||
} | } | ||||
#endif | |||||
for (size_t ohw_tile_size : {192, 384, 96, 48, 24}) { | for (size_t ohw_tile_size : {192, 384, 96, 48, 24}) { | ||||
refhold.emplace_back(new AlgoIm2col( | refhold.emplace_back(new AlgoIm2col( | ||||
static_cast<MatrixMulImpl::AlgoBase*>(algo), | static_cast<MatrixMulImpl::AlgoBase*>(algo), | ||||
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#include "test/fallback/fixture.h" | #include "test/fallback/fixture.h" | ||||
@@ -73,24 +74,115 @@ TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) { | |||||
profile(3, 3, 112, 112, 3, 1); | profile(3, 3, 112, 112, 3, 1); | ||||
} | } | ||||
TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8832) { | |||||
using Param = Convolution::Param; | |||||
auto run = [&](const TensorShapeArray& shapes, Param param) { | |||||
Benchmarker<Convolution> benchmarker_float(handle()); | |||||
size_t RUN = 50; | |||||
auto tfloat = benchmarker_float.set_display(false) | |||||
.set_dtype(0, dtype::Int8{}) | |||||
.set_dtype(1, dtype::Int8{}) | |||||
.set_dtype(2, dtype::Int32{}) | |||||
.set_times(RUN) | |||||
.set_param(param) | |||||
.exec(shapes); | |||||
size_t IC = shapes[1][1]; | |||||
size_t FH = shapes[1][2]; | |||||
size_t FW = shapes[1][3]; | |||||
TensorLayout dst_layout; | |||||
auto opr = handle()->create_operator<Convolution>(); | |||||
opr->param() = param; | |||||
opr->deduce_layout({shapes[0], dtype::Float32()}, | |||||
{shapes[1], dtype::Float32()}, dst_layout); | |||||
printf("fp32 flops: %.3f mflops\n", | |||||
(IC * dst_layout.total_nr_elems() * FH * FW * 2) / | |||||
(tfloat / RUN * 1000)); | |||||
}; | |||||
auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, | |||||
size_t stride) { | |||||
Param param; | |||||
param.stride_h = stride; | |||||
param.stride_w = stride; | |||||
param.pad_h = kernel / 2; | |||||
param.pad_w = kernel / 2; | |||||
param.pad_h = 0; | |||||
param.pad_w = 0; | |||||
printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", | |||||
oc, ic, w, h, stride, kernel); | |||||
run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param); | |||||
}; | |||||
profile(48, 128, 56, 88, 1, 1); | |||||
profile(56, 128, 64, 80, 3, 1); | |||||
profile(24, 3, 256, 320, 3, 2); | |||||
} | |||||
TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8816) { | |||||
using Param = Convolution::Param; | |||||
auto run = [&](const TensorShapeArray& shapes, Param param) { | |||||
Benchmarker<Convolution> benchmarker_float(handle()); | |||||
size_t RUN = 50; | |||||
auto tfloat = benchmarker_float.set_display(false) | |||||
.set_dtype(0, dtype::Int8{}) | |||||
.set_dtype(1, dtype::Int8{}) | |||||
.set_dtype(2, dtype::Int16{}) | |||||
.set_times(RUN) | |||||
.set_param(param) | |||||
.exec(shapes); | |||||
size_t IC = shapes[1][1]; | |||||
size_t FH = shapes[1][2]; | |||||
size_t FW = shapes[1][3]; | |||||
TensorLayout dst_layout; | |||||
auto opr = handle()->create_operator<Convolution>(); | |||||
opr->param() = param; | |||||
opr->deduce_layout({shapes[0], dtype::Float32()}, | |||||
{shapes[1], dtype::Float32()}, dst_layout); | |||||
printf("fp32 flops: %.3f mflops\n", | |||||
(IC * dst_layout.total_nr_elems() * FH * FW * 2) / | |||||
(tfloat / RUN * 1000)); | |||||
}; | |||||
auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, | |||||
size_t stride) { | |||||
Param param; | |||||
param.stride_h = stride; | |||||
param.stride_w = stride; | |||||
param.pad_h = kernel / 2; | |||||
param.pad_w = kernel / 2; | |||||
param.pad_h = 0; | |||||
param.pad_w = 0; | |||||
printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", | |||||
oc, ic, w, h, stride, kernel); | |||||
run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param); | |||||
}; | |||||
profile(48, 128, 56, 88, 1, 1); | |||||
profile(48, 128, 56, 88, 1, 2); | |||||
profile(56, 128, 64, 80, 3, 1); | |||||
profile(24, 3, 256, 320, 3, 2); | |||||
} | |||||
TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) { | TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) { | ||||
using Param = ConvolutionBackwardData::Param; | using Param = ConvolutionBackwardData::Param; | ||||
auto run = [&](const TensorLayoutArray& tensors, Param param) { | auto run = [&](const TensorLayoutArray& tensors, Param param) { | ||||
Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle()); | Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle()); | ||||
size_t RUN = 500; | size_t RUN = 500; | ||||
benchmarker_fallback.set_display(false) | benchmarker_fallback.set_display(false) | ||||
.set_dtype(0, dtype::Float32{}) | |||||
.set_dtype(1, dtype::Float32{}) | |||||
.set_times(RUN) | |||||
.set_param(param); | |||||
auto tmatmul = benchmarker_fallback.set_before_exec_callback( | |||||
AlgoChecker<ConvolutionBackwardData>( | |||||
"DeconvMatmul")) | |||||
.exec(tensors); | |||||
auto tdirect = benchmarker_fallback.set_before_exec_callback( | |||||
AlgoChecker<ConvolutionBackwardData>( | |||||
"DeconvDirect")) | |||||
.exec(tensors); | |||||
.set_dtype(0, dtype::Float32{}) | |||||
.set_dtype(1, dtype::Float32{}) | |||||
.set_times(RUN) | |||||
.set_param(param); | |||||
auto tmatmul = benchmarker_fallback | |||||
.set_before_exec_callback( | |||||
AlgoChecker<ConvolutionBackwardData>( | |||||
"DeconvMatmul")) | |||||
.exec(tensors); | |||||
auto tdirect = benchmarker_fallback | |||||
.set_before_exec_callback( | |||||
AlgoChecker<ConvolutionBackwardData>( | |||||
"DeconvDirect")) | |||||
.exec(tensors); | |||||
size_t IC = tensors[0][1]; | size_t IC = tensors[0][1]; | ||||
size_t FH = tensors[0][2]; | size_t FH = tensors[0][2]; | ||||
size_t FW = tensors[0][3]; | size_t FW = tensors[0][3]; | ||||
@@ -98,8 +190,8 @@ TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) { | |||||
printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect, | printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect, | ||||
total_flops / (tdirect / RUN * 1000)); | total_flops / (tdirect / RUN * 1000)); | ||||
printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul, | printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul, | ||||
total_flops / (tmatmul/ RUN * 1000)); | |||||
printf("speedup: %.3f\n", tdirect/tmatmul); | |||||
total_flops / (tmatmul / RUN * 1000)); | |||||
printf("speedup: %.3f\n", tdirect / tmatmul); | |||||
}; | }; | ||||
auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, | auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, | ||||
@@ -154,6 +246,51 @@ TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) { | |||||
run(1, 3, 3, 112, 112, 3, 1); | run(1, 3, 3, 112, 112, 3, 1); | ||||
run(1, 1, 1, 1, 1, 3, 3); | run(1, 1, 1, 1, 1, 3, 3); | ||||
} | } | ||||
#if MEGDNN_X86 | |||||
TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_8816) { | |||||
Checker<Convolution> checker(handle()); | |||||
using Param = Convolution::Param; | |||||
checker.set_before_exec_callback(AlgoChecker<Convolution>(".+FB_GEMV.+")); | |||||
auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, | |||||
size_t fh, size_t fw, size_t pad, size_t stride, | |||||
size_t group) { | |||||
Param param; | |||||
param.sparse = group > 1 ? param::Convolution::Sparse::GROUP | |||||
: param::Convolution::Sparse::DENSE; | |||||
param.pad_h = param.pad_w = pad; | |||||
param.stride_h = param.stride_w = stride; | |||||
checker.set_param(param); | |||||
if (group > 1) { | |||||
checker.execl( | |||||
{{{n, ic, ih, iw}, dtype::Int8()}, | |||||
{{group, oc / group, ic / group, fh, fw}, dtype::Int8()}, | |||||
{{}, dtype::Int16()}}); | |||||
} else { | |||||
checker.execl({{{n, ic, ih, iw}, dtype::Int8()}, | |||||
{{oc, ic, fh, fw}, dtype::Int8()}, | |||||
{{}, dtype::Int16()}}); | |||||
} | |||||
}; | |||||
for (auto n : {1, 2}) | |||||
for (auto ic : {3, 4, 8, 12, 16}) | |||||
for (auto oc : {4, 8, 16, 32}) | |||||
for (auto ih : {7, 14, 15, 22}) | |||||
for (auto iw : {7, 13, 11, 32}) | |||||
for (auto filter : {1, 2, 3, 5, 7}) | |||||
for (auto stride : {1, 2}) | |||||
for (auto pad : {0, filter / 2}) { | |||||
run(n, ic, ih, iw, oc, filter, filter, pad, | |||||
stride, 1); | |||||
if (ic == oc) { | |||||
run(n, ic, ih, iw, oc, filter, filter, | |||||
pad, stride, ic); | |||||
} | |||||
} | |||||
} | |||||
#endif | |||||
TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) { | TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) { | ||||
Checker<Convolution> checker(handle()); | Checker<Convolution> checker(handle()); | ||||
using Param = Convolution::Param; | using Param = Convolution::Param; | ||||
@@ -222,7 +359,7 @@ TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) { | |||||
TensorShape src{n, ic, ih, iw}, | TensorShape src{n, ic, ih, iw}, | ||||
filter{group, oc / group, ic / group, fh, fw}; | filter{group, oc / group, ic / group, fh, fw}; | ||||
checker.set_param(param).set_dtype(2, {}); | checker.set_param(param).set_dtype(2, {}); | ||||
//!float32 | |||||
//! float32 | |||||
checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32()); | checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32()); | ||||
checker.execs({src, filter, {}}); | checker.execs({src, filter, {}}); | ||||
//! float16 | //! float16 | ||||
@@ -257,10 +394,10 @@ TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) { | |||||
param.pad_h = param.pad_w = 1; | param.pad_h = param.pad_w = 1; | ||||
param.stride_h = param.stride_w = 1; | param.stride_h = param.stride_w = 1; | ||||
checker.set_param(param) | checker.set_param(param) | ||||
.set_dtype(0, dtype::QuantizedS8(0.2f)) | |||||
.set_dtype(1, dtype::QuantizedS8(0.2f)) | |||||
.set_dtype(0, dtype::QuantizedS8(0.2f)) | |||||
.set_dtype(1, dtype::QuantizedS8(0.2f)) | |||||
// Use inferred output dtype. | // Use inferred output dtype. | ||||
.set_dtype(2, {}); | |||||
.set_dtype(2, {}); | |||||
checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}}); | checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}}); | ||||
}; | }; | ||||