testcase, which caused by timeout
GitOrigin-RevId: ffed9d7820
tags/v0.5.0
@@ -57,31 +57,44 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) { | |||
} | |||
} | |||
TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8) { | |||
using namespace conv_bias; | |||
std::vector<TestArg> args = get_quantized_args(); | |||
Checker<ConvBiasForward> checker(handle()); | |||
checker.set_before_exec_callback( | |||
conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL")); | |||
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \ | |||
using namespace conv_bias; \ | |||
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \ | |||
Checker<ConvBiasForward> checker(handle()); \ | |||
checker.set_before_exec_callback( \ | |||
conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL")); \ | |||
UniformIntRNG rng{0, 127}; \ | |||
for (auto&& arg : args) { \ | |||
if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1) \ | |||
continue; \ | |||
checker.set_dtype(0, dtype::Quantized8Asymm( \ | |||
2.5f, static_cast<uint8_t>(127))) \ | |||
.set_dtype(1, dtype::Quantized8Asymm( \ | |||
2.7f, static_cast<uint8_t>(126))) \ | |||
.set_dtype(2, dtype::QuantizedS32(6.75f)) \ | |||
.set_dtype(4, dtype::Quantized8Asymm( \ | |||
60.25f, static_cast<uint8_t>(125))) \ | |||
.set_rng(0, &rng) \ | |||
.set_rng(1, &rng) \ | |||
.set_rng(2, &rng) \ | |||
.set_param(arg.param) \ | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); \ | |||
} | |||
UniformIntRNG rng{0, 127}; | |||
for (auto&& arg : args) { | |||
if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1) | |||
continue; | |||
checker.set_dtype(0, dtype::Quantized8Asymm(2.5f, | |||
static_cast<uint8_t>(127))) | |||
.set_dtype(1, dtype::Quantized8Asymm(2.7f, | |||
static_cast<uint8_t>(126))) | |||
.set_dtype(2, dtype::QuantizedS32(6.75f)) | |||
.set_dtype(4, dtype::Quantized8Asymm(60.25f, | |||
static_cast<uint8_t>(125))) | |||
.set_rng(0, &rng) | |||
.set_rng(1, &rng) | |||
.set_rng(2, &rng) | |||
.set_param(arg.param) | |||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||
#define MODE_STR(mode) param::ConvBias::NonlineMode::mode | |||
#define CB_TEST(MODE) \ | |||
TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8_##MODE) { \ | |||
CONV_BIAS_MATMUL_QU8_MODE(MODE_STR(MODE)); \ | |||
} | |||
} | |||
CB_TEST(IDENTITY); | |||
CB_TEST(RELU); | |||
CB_TEST(H_SWISH); | |||
#undef MODE_STR | |||
#undef CB_TEST | |||
#undef CONV_BIAS_MATMUL_QU8_MODE | |||
#if MEGDNN_WITH_BENCHMARK | |||
@@ -450,7 +450,8 @@ std::vector<TestArg> convolution::get_dilated_args() { | |||
return args; | |||
} | |||
void convolution::test_conv_config_combinations(Handle* handle, bool test_int8, | |||
void convolution::test_conv_config_combinations(int k_size, | |||
Handle* handle, bool test_int8, | |||
bool test_backward, | |||
bool is_cuda, | |||
ConvEPSGetter eps_getter, | |||
@@ -484,7 +485,7 @@ void convolution::test_conv_config_combinations(Handle* handle, bool test_int8, | |||
CONF_BOOL(format) | |||
// dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32 | |||
for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype) | |||
for (int ksize: {1, 2, 3, 5}) { | |||
for (int ksize: {1, k_size}) { | |||
// When is_cuda is on, test cases where format is NHWC and | |||
// data type is not INT8x8x32 are disabled. | |||
if (is_cuda) { | |||
@@ -55,7 +55,8 @@ using ConvEPSGetter = | |||
//! check for various conv configurations (dilation, group, stride, padding) | |||
//! and run all usable algorithms | |||
void test_conv_config_combinations( | |||
Handle* handle, bool test_int8, bool test_backward, bool is_cuda, | |||
int k_size, Handle* handle, bool test_int8, bool test_backward, | |||
bool is_cuda, | |||
ConvEPSGetter conv_eps_getter = [](bool f16, int, const char*) | |||
-> float { return f16 ? 1e-1 : 1e-3; }, | |||
bool use_io16xc32 = false); | |||
@@ -39,35 +39,44 @@ namespace megdnn { | |||
namespace test { | |||
namespace relayout { | |||
#define DEF_TEST(name) \ | |||
template<> \ | |||
void run_test<name>(Handle *handle) | |||
DEF_TEST(cv) { | |||
void run_test_cv(Handle* handle, size_t CH) { | |||
std::vector<TestArg> args; | |||
for (size_t M = 124; M <= 130; ++M) { | |||
for (size_t N = 124; N <= 130; ++N) { | |||
for (size_t CH : {1, 3, 5}) { | |||
args.push_back( | |||
args.push_back( | |||
generate_transpose_args(1, M, N, CH, dtype::Uint8())); | |||
args.push_back( | |||
args.push_back( | |||
generate_transpose_args(1, M, N, CH, dtype::Int32())); | |||
args.push_back( | |||
args.push_back( | |||
generate_transpose_args(1, M, N, CH, dtype::Float32())); | |||
args.push_back( | |||
args.push_back( | |||
generate_transpose_args(3, M, N, CH, dtype::Float32())); | |||
} | |||
} | |||
} | |||
Checker<Relayout> checker(handle); | |||
for (auto &&arg : args) { | |||
for (auto&& arg : args) { | |||
checker.execl({arg.src, arg.dst}); | |||
} | |||
} | |||
#define DEF_TEST(name) \ | |||
template<> \ | |||
void run_test<name>(Handle *handle) | |||
DEF_TEST(cv) { | |||
run_test_cv(handle, 1); | |||
} | |||
DEF_TEST(cv_ch3) { | |||
run_test_cv(handle, 3); | |||
} | |||
DEF_TEST(cv_ch5) { | |||
run_test_cv(handle, 5); | |||
} | |||
DEF_TEST(broadcast) { | |||
std::vector<TestArg> args; | |||
@@ -23,6 +23,8 @@ namespace relayout { | |||
#define FIRST_RELAYOUT_CASE cv | |||
#define FOREACH_RELAYOUT_NONFIRST_CASE(cb) \ | |||
cb(cv_ch3) \ | |||
cb(cv_ch5) \ | |||
cb(broadcast) \ | |||
cb(negative) \ | |||
cb(transpose) \ | |||
@@ -26,19 +26,43 @@ Convolution::Param gconv_param(Convolution::Param p) { | |||
} // anonymous namespace | |||
TEST_F(CPU, CONVOLUTION) | |||
{ | |||
#define CONVOLUTION_ARG_DIV_SIZE 230 | |||
TEST_F(CPU, CONVOLUTION_0) { | |||
using namespace convolution; | |||
std::vector<TestArg> args = get_args(); | |||
auto loop_size = args.size(); | |||
ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE); | |||
Checker<Convolution> checker(handle()); | |||
for (auto &&arg: args) { | |||
checker.set_param(arg.param).execs({arg.src, arg.filter, {}}); | |||
for (unsigned int i = 0; i < CONVOLUTION_ARG_DIV_SIZE; i++) { | |||
checker.set_param(args[i].param) | |||
.execs({args[i].src, args[i].filter, {}}); | |||
} | |||
} | |||
TEST_F(CPU, CONV_CONFIG_COMBINATIONS) { | |||
convolution::test_conv_config_combinations(handle(), true, false, false); | |||
TEST_F(CPU, CONVOLUTION_1) { | |||
using namespace convolution; | |||
std::vector<TestArg> args = get_args(); | |||
auto loop_size = args.size(); | |||
ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE); | |||
Checker<Convolution> checker(handle()); | |||
for (unsigned int i = CONVOLUTION_ARG_DIV_SIZE; i < loop_size; i++) { | |||
checker.set_param(args[i].param) | |||
.execs({args[i].src, args[i].filter, {}}); | |||
} | |||
} | |||
#undef CONVOLUTION_ARG_DIV_SIZE | |||
#define CB_CONV_CONFIG_COMBINATIONS(KSIZE) \ | |||
TEST_F(CPU, CONV_CONFIG_COMBINATIONS_KSIZE_1_KSIZE_##KSIZE) { \ | |||
convolution::test_conv_config_combinations(KSIZE, handle(), true, \ | |||
false, false); \ | |||
} | |||
// FIXME: only test ksize=1, will crash on IOS, so we tmp test ksize_1##other_ksize | |||
CB_CONV_CONFIG_COMBINATIONS(2); | |||
CB_CONV_CONFIG_COMBINATIONS(3); | |||
CB_CONV_CONFIG_COMBINATIONS(5); | |||
#undef CB_CONV_CONFIG_COMBINATIONS | |||
#if MEGDNN_WITH_BENCHMARK | |||
TEST_F(CPU, BENCHMARK_CONVOLUTION) | |||
@@ -340,8 +340,12 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | |||
return 0.3; | |||
return 1e-3; | |||
}; | |||
convolution::test_conv_config_combinations(handle_cuda(), false, true, true, | |||
eps_getter, true); | |||
convolution::test_conv_config_combinations(2, handle_cuda(), false, true, | |||
true, eps_getter, true); | |||
convolution::test_conv_config_combinations(3, handle_cuda(), false, true, | |||
true, eps_getter, true); | |||
convolution::test_conv_config_combinations(5, handle_cuda(), false, true, | |||
true, eps_getter, true); | |||
} | |||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) { | |||
@@ -404,71 +404,179 @@ void run_bgemm_trans_inp_test_case(bool trans_a, bool trans_b) { | |||
} // anonymous namespace | |||
TEST(TestOprBlas, MatrixMul) { | |||
TEST(TestOprBlas, MatrixMul_NN) { | |||
run_sgemm_test(false, false); | |||
} | |||
TEST(TestOprBlas, MatrixMul_NT) { | |||
run_sgemm_test(false, true); | |||
} | |||
TEST(TestOprBlas, MatrixMul_TN) { | |||
run_sgemm_test(true, false); | |||
} | |||
TEST(TestOprBlas, MatrixMul_TT) { | |||
run_sgemm_test(true, true); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp32) { | |||
TEST(TestOprBlas, BatchedMatrixMulFp32_NN) { | |||
run_batched_sgemm_test(false, false); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp32_NT) { | |||
run_batched_sgemm_test(false, true); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp32_TN) { | |||
run_batched_sgemm_test(true, false); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp32_TT) { | |||
run_batched_sgemm_test(true, true); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp16) { | |||
TEST(TestOprBlas, BatchedMatrixMulFp16_NN) { | |||
run_batched_hgemm_test(false, false); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp16_NT) { | |||
run_batched_hgemm_test(false, true); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp16_TN) { | |||
run_batched_hgemm_test(true, false); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulFp16_TT) { | |||
run_batched_hgemm_test(true, true); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulInt8) { | |||
TEST(TestOprBlas, BatchedMatrixMulInt8_NN) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_batched_igemm_test(false, false); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulInt8_NT) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_batched_igemm_test(false, true); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulInt8_TN) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_batched_igemm_test(true, false); | |||
} | |||
TEST(TestOprBlas, BatchedMatrixMulInt8_TT) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_batched_igemm_test(true, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp32) { | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_NN) { | |||
run_bgemm_trans_inp_test_case<float, float>(false, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_NT) { | |||
run_bgemm_trans_inp_test_case<float, float>(false, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_TN) { | |||
run_bgemm_trans_inp_test_case<float, float>(true, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_TT) { | |||
run_bgemm_trans_inp_test_case<float, float>(true, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulInt8) { | |||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_NN) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_NT) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_TN) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_TT) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp16) { | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_NN) { | |||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_NT) { | |||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_TN) { | |||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_TT) { | |||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulQS8) { | |||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_NN) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_NT) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, true); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_TN) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, false); | |||
} | |||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_TT) { | |||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||
!check_compute_capability(6, 1)) { | |||
return; | |||
} | |||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, true); | |||
} | |||