testcase, which caused by timeout
GitOrigin-RevId: ffed9d7820
tags/v0.5.0
@@ -57,31 +57,44 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) { | |||||
} | } | ||||
} | } | ||||
TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8) { | |||||
using namespace conv_bias; | |||||
std::vector<TestArg> args = get_quantized_args(); | |||||
Checker<ConvBiasForward> checker(handle()); | |||||
checker.set_before_exec_callback( | |||||
conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL")); | |||||
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \ | |||||
using namespace conv_bias; \ | |||||
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \ | |||||
Checker<ConvBiasForward> checker(handle()); \ | |||||
checker.set_before_exec_callback( \ | |||||
conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL")); \ | |||||
UniformIntRNG rng{0, 127}; \ | |||||
for (auto&& arg : args) { \ | |||||
if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1) \ | |||||
continue; \ | |||||
checker.set_dtype(0, dtype::Quantized8Asymm( \ | |||||
2.5f, static_cast<uint8_t>(127))) \ | |||||
.set_dtype(1, dtype::Quantized8Asymm( \ | |||||
2.7f, static_cast<uint8_t>(126))) \ | |||||
.set_dtype(2, dtype::QuantizedS32(6.75f)) \ | |||||
.set_dtype(4, dtype::Quantized8Asymm( \ | |||||
60.25f, static_cast<uint8_t>(125))) \ | |||||
.set_rng(0, &rng) \ | |||||
.set_rng(1, &rng) \ | |||||
.set_rng(2, &rng) \ | |||||
.set_param(arg.param) \ | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); \ | |||||
} | |||||
UniformIntRNG rng{0, 127}; | |||||
for (auto&& arg : args) { | |||||
if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1) | |||||
continue; | |||||
checker.set_dtype(0, dtype::Quantized8Asymm(2.5f, | |||||
static_cast<uint8_t>(127))) | |||||
.set_dtype(1, dtype::Quantized8Asymm(2.7f, | |||||
static_cast<uint8_t>(126))) | |||||
.set_dtype(2, dtype::QuantizedS32(6.75f)) | |||||
.set_dtype(4, dtype::Quantized8Asymm(60.25f, | |||||
static_cast<uint8_t>(125))) | |||||
.set_rng(0, &rng) | |||||
.set_rng(1, &rng) | |||||
.set_rng(2, &rng) | |||||
.set_param(arg.param) | |||||
.execs({arg.src, arg.filter, arg.bias, {}, {}}); | |||||
#define MODE_STR(mode) param::ConvBias::NonlineMode::mode | |||||
#define CB_TEST(MODE) \ | |||||
TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8_##MODE) { \ | |||||
CONV_BIAS_MATMUL_QU8_MODE(MODE_STR(MODE)); \ | |||||
} | } | ||||
} | |||||
CB_TEST(IDENTITY); | |||||
CB_TEST(RELU); | |||||
CB_TEST(H_SWISH); | |||||
#undef MODE_STR | |||||
#undef CB_TEST | |||||
#undef CONV_BIAS_MATMUL_QU8_MODE | |||||
#if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
@@ -450,7 +450,8 @@ std::vector<TestArg> convolution::get_dilated_args() { | |||||
return args; | return args; | ||||
} | } | ||||
void convolution::test_conv_config_combinations(Handle* handle, bool test_int8, | |||||
void convolution::test_conv_config_combinations(int k_size, | |||||
Handle* handle, bool test_int8, | |||||
bool test_backward, | bool test_backward, | ||||
bool is_cuda, | bool is_cuda, | ||||
ConvEPSGetter eps_getter, | ConvEPSGetter eps_getter, | ||||
@@ -484,7 +485,7 @@ void convolution::test_conv_config_combinations(Handle* handle, bool test_int8, | |||||
CONF_BOOL(format) | CONF_BOOL(format) | ||||
// dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32 | // dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32 | ||||
for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype) | for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype) | ||||
for (int ksize: {1, 2, 3, 5}) { | |||||
for (int ksize: {1, k_size}) { | |||||
// When is_cuda is on, test cases where format is NHWC and | // When is_cuda is on, test cases where format is NHWC and | ||||
// data type is not INT8x8x32 are disabled. | // data type is not INT8x8x32 are disabled. | ||||
if (is_cuda) { | if (is_cuda) { | ||||
@@ -55,7 +55,8 @@ using ConvEPSGetter = | |||||
//! check for various conv configurations (dilation, group, stride, padding) | //! check for various conv configurations (dilation, group, stride, padding) | ||||
//! and run all usable algorithms | //! and run all usable algorithms | ||||
void test_conv_config_combinations( | void test_conv_config_combinations( | ||||
Handle* handle, bool test_int8, bool test_backward, bool is_cuda, | |||||
int k_size, Handle* handle, bool test_int8, bool test_backward, | |||||
bool is_cuda, | |||||
ConvEPSGetter conv_eps_getter = [](bool f16, int, const char*) | ConvEPSGetter conv_eps_getter = [](bool f16, int, const char*) | ||||
-> float { return f16 ? 1e-1 : 1e-3; }, | -> float { return f16 ? 1e-1 : 1e-3; }, | ||||
bool use_io16xc32 = false); | bool use_io16xc32 = false); | ||||
@@ -39,35 +39,44 @@ namespace megdnn { | |||||
namespace test { | namespace test { | ||||
namespace relayout { | namespace relayout { | ||||
#define DEF_TEST(name) \ | |||||
template<> \ | |||||
void run_test<name>(Handle *handle) | |||||
DEF_TEST(cv) { | |||||
void run_test_cv(Handle* handle, size_t CH) { | |||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
for (size_t M = 124; M <= 130; ++M) { | for (size_t M = 124; M <= 130; ++M) { | ||||
for (size_t N = 124; N <= 130; ++N) { | for (size_t N = 124; N <= 130; ++N) { | ||||
for (size_t CH : {1, 3, 5}) { | |||||
args.push_back( | |||||
args.push_back( | |||||
generate_transpose_args(1, M, N, CH, dtype::Uint8())); | generate_transpose_args(1, M, N, CH, dtype::Uint8())); | ||||
args.push_back( | |||||
args.push_back( | |||||
generate_transpose_args(1, M, N, CH, dtype::Int32())); | generate_transpose_args(1, M, N, CH, dtype::Int32())); | ||||
args.push_back( | |||||
args.push_back( | |||||
generate_transpose_args(1, M, N, CH, dtype::Float32())); | generate_transpose_args(1, M, N, CH, dtype::Float32())); | ||||
args.push_back( | |||||
args.push_back( | |||||
generate_transpose_args(3, M, N, CH, dtype::Float32())); | generate_transpose_args(3, M, N, CH, dtype::Float32())); | ||||
} | |||||
} | } | ||||
} | } | ||||
Checker<Relayout> checker(handle); | Checker<Relayout> checker(handle); | ||||
for (auto &&arg : args) { | |||||
for (auto&& arg : args) { | |||||
checker.execl({arg.src, arg.dst}); | checker.execl({arg.src, arg.dst}); | ||||
} | } | ||||
} | } | ||||
#define DEF_TEST(name) \ | |||||
template<> \ | |||||
void run_test<name>(Handle *handle) | |||||
DEF_TEST(cv) { | |||||
run_test_cv(handle, 1); | |||||
} | |||||
DEF_TEST(cv_ch3) { | |||||
run_test_cv(handle, 3); | |||||
} | |||||
DEF_TEST(cv_ch5) { | |||||
run_test_cv(handle, 5); | |||||
} | |||||
DEF_TEST(broadcast) { | DEF_TEST(broadcast) { | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
@@ -23,6 +23,8 @@ namespace relayout { | |||||
#define FIRST_RELAYOUT_CASE cv | #define FIRST_RELAYOUT_CASE cv | ||||
#define FOREACH_RELAYOUT_NONFIRST_CASE(cb) \ | #define FOREACH_RELAYOUT_NONFIRST_CASE(cb) \ | ||||
cb(cv_ch3) \ | |||||
cb(cv_ch5) \ | |||||
cb(broadcast) \ | cb(broadcast) \ | ||||
cb(negative) \ | cb(negative) \ | ||||
cb(transpose) \ | cb(transpose) \ | ||||
@@ -26,19 +26,43 @@ Convolution::Param gconv_param(Convolution::Param p) { | |||||
} // anonymous namespace | } // anonymous namespace | ||||
TEST_F(CPU, CONVOLUTION) | |||||
{ | |||||
#define CONVOLUTION_ARG_DIV_SIZE 230 | |||||
TEST_F(CPU, CONVOLUTION_0) { | |||||
using namespace convolution; | using namespace convolution; | ||||
std::vector<TestArg> args = get_args(); | std::vector<TestArg> args = get_args(); | ||||
auto loop_size = args.size(); | |||||
ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE); | |||||
Checker<Convolution> checker(handle()); | Checker<Convolution> checker(handle()); | ||||
for (auto &&arg: args) { | |||||
checker.set_param(arg.param).execs({arg.src, arg.filter, {}}); | |||||
for (unsigned int i = 0; i < CONVOLUTION_ARG_DIV_SIZE; i++) { | |||||
checker.set_param(args[i].param) | |||||
.execs({args[i].src, args[i].filter, {}}); | |||||
} | } | ||||
} | } | ||||
TEST_F(CPU, CONV_CONFIG_COMBINATIONS) { | |||||
convolution::test_conv_config_combinations(handle(), true, false, false); | |||||
TEST_F(CPU, CONVOLUTION_1) { | |||||
using namespace convolution; | |||||
std::vector<TestArg> args = get_args(); | |||||
auto loop_size = args.size(); | |||||
ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE); | |||||
Checker<Convolution> checker(handle()); | |||||
for (unsigned int i = CONVOLUTION_ARG_DIV_SIZE; i < loop_size; i++) { | |||||
checker.set_param(args[i].param) | |||||
.execs({args[i].src, args[i].filter, {}}); | |||||
} | |||||
} | } | ||||
#undef CONVOLUTION_ARG_DIV_SIZE | |||||
#define CB_CONV_CONFIG_COMBINATIONS(KSIZE) \ | |||||
TEST_F(CPU, CONV_CONFIG_COMBINATIONS_KSIZE_1_KSIZE_##KSIZE) { \ | |||||
convolution::test_conv_config_combinations(KSIZE, handle(), true, \ | |||||
false, false); \ | |||||
} | |||||
// FIXME: only test ksize=1, will crash on IOS, so we tmp test ksize_1##other_ksize | |||||
CB_CONV_CONFIG_COMBINATIONS(2); | |||||
CB_CONV_CONFIG_COMBINATIONS(3); | |||||
CB_CONV_CONFIG_COMBINATIONS(5); | |||||
#undef CB_CONV_CONFIG_COMBINATIONS | |||||
#if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
TEST_F(CPU, BENCHMARK_CONVOLUTION) | TEST_F(CPU, BENCHMARK_CONVOLUTION) | ||||
@@ -340,8 +340,12 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | |||||
return 0.3; | return 0.3; | ||||
return 1e-3; | return 1e-3; | ||||
}; | }; | ||||
convolution::test_conv_config_combinations(handle_cuda(), false, true, true, | |||||
eps_getter, true); | |||||
convolution::test_conv_config_combinations(2, handle_cuda(), false, true, | |||||
true, eps_getter, true); | |||||
convolution::test_conv_config_combinations(3, handle_cuda(), false, true, | |||||
true, eps_getter, true); | |||||
convolution::test_conv_config_combinations(5, handle_cuda(), false, true, | |||||
true, eps_getter, true); | |||||
} | } | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) { | ||||
@@ -404,71 +404,179 @@ void run_bgemm_trans_inp_test_case(bool trans_a, bool trans_b) { | |||||
} // anonymous namespace | } // anonymous namespace | ||||
TEST(TestOprBlas, MatrixMul) { | |||||
TEST(TestOprBlas, MatrixMul_NN) { | |||||
run_sgemm_test(false, false); | run_sgemm_test(false, false); | ||||
} | |||||
TEST(TestOprBlas, MatrixMul_NT) { | |||||
run_sgemm_test(false, true); | run_sgemm_test(false, true); | ||||
} | |||||
TEST(TestOprBlas, MatrixMul_TN) { | |||||
run_sgemm_test(true, false); | run_sgemm_test(true, false); | ||||
} | |||||
TEST(TestOprBlas, MatrixMul_TT) { | |||||
run_sgemm_test(true, true); | run_sgemm_test(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, BatchedMatrixMulFp32) { | |||||
TEST(TestOprBlas, BatchedMatrixMulFp32_NN) { | |||||
run_batched_sgemm_test(false, false); | run_batched_sgemm_test(false, false); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulFp32_NT) { | |||||
run_batched_sgemm_test(false, true); | run_batched_sgemm_test(false, true); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulFp32_TN) { | |||||
run_batched_sgemm_test(true, false); | run_batched_sgemm_test(true, false); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulFp32_TT) { | |||||
run_batched_sgemm_test(true, true); | run_batched_sgemm_test(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, BatchedMatrixMulFp16) { | |||||
TEST(TestOprBlas, BatchedMatrixMulFp16_NN) { | |||||
run_batched_hgemm_test(false, false); | run_batched_hgemm_test(false, false); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulFp16_NT) { | |||||
run_batched_hgemm_test(false, true); | run_batched_hgemm_test(false, true); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulFp16_TN) { | |||||
run_batched_hgemm_test(true, false); | run_batched_hgemm_test(true, false); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulFp16_TT) { | |||||
run_batched_hgemm_test(true, true); | run_batched_hgemm_test(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, BatchedMatrixMulInt8) { | |||||
TEST(TestOprBlas, BatchedMatrixMulInt8_NN) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | ||||
!check_compute_capability(6, 1)) { | !check_compute_capability(6, 1)) { | ||||
return; | return; | ||||
} | } | ||||
run_batched_igemm_test(false, false); | run_batched_igemm_test(false, false); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulInt8_NT) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_batched_igemm_test(false, true); | run_batched_igemm_test(false, true); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulInt8_TN) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_batched_igemm_test(true, false); | run_batched_igemm_test(true, false); | ||||
} | |||||
TEST(TestOprBlas, BatchedMatrixMulInt8_TT) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_batched_igemm_test(true, true); | run_batched_igemm_test(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, TransBatchedMatrixMulFp32) { | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_NN) { | |||||
run_bgemm_trans_inp_test_case<float, float>(false, false); | run_bgemm_trans_inp_test_case<float, float>(false, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_NT) { | |||||
run_bgemm_trans_inp_test_case<float, float>(false, true); | run_bgemm_trans_inp_test_case<float, float>(false, true); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_TN) { | |||||
run_bgemm_trans_inp_test_case<float, float>(true, false); | run_bgemm_trans_inp_test_case<float, float>(true, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp32_TT) { | |||||
run_bgemm_trans_inp_test_case<float, float>(true, true); | run_bgemm_trans_inp_test_case<float, float>(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, TransBatchedMatrixMulInt8) { | |||||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_NN) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | ||||
!check_compute_capability(6, 1)) { | !check_compute_capability(6, 1)) { | ||||
return; | return; | ||||
} | } | ||||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, false); | run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_NT) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, true); | run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, true); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_TN) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, false); | run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulInt8_TT) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, true); | run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, TransBatchedMatrixMulFp16) { | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_NN) { | |||||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, false); | run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_NT) { | |||||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, true); | run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, true); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_TN) { | |||||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, false); | run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulFp16_TT) { | |||||
run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, true); | run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, true); | ||||
} | } | ||||
TEST(TestOprBlas, TransBatchedMatrixMulQS8) { | |||||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_NN) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | ||||
!check_compute_capability(6, 1)) { | !check_compute_capability(6, 1)) { | ||||
return; | return; | ||||
} | } | ||||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, false); | run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_NT) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, true); | run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, true); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_TN) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, false); | run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, false); | ||||
} | |||||
TEST(TestOprBlas, TransBatchedMatrixMulQS8_TT) { | |||||
if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA && | |||||
!check_compute_capability(6, 1)) { | |||||
return; | |||||
} | |||||
run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, true); | run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, true); | ||||
} | } | ||||