From f3547242205d4c76640f047beeb169d4ff30343f Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Tue, 12 May 2020 16:39:51 +0800
Subject: [PATCH] fix(ci/megdnn_test/megbrain_test): split some testcase, which
 caused by timeout

GitOrigin-RevId: ffed9d782067982a52e009806f295a4ca7e3ff63
---
 dnn/test/arm_common/conv_bias.cpp |  59 +++++++++++-------
 dnn/test/common/convolution.cpp   |   5 +-
 dnn/test/common/convolution.h     |   3 +-
 dnn/test/common/relayout.cpp      |  33 ++++++----
 dnn/test/common/relayout.h        |   2 +
 dnn/test/cpu/convolution.cpp      |  36 +++++++++--
 dnn/test/cuda/convolution.cpp     |   8 ++-
 src/opr/test/blas.cpp             | 124 +++++++++++++++++++++++++++++++++++---
 8 files changed, 216 insertions(+), 54 deletions(-)
diff --git a/dnn/test/arm_common/conv_bias.cpp b/dnn/test/arm_common/conv_bias.cpp
index 65134ce7..11badbbc 100644
--- a/dnn/test/arm_common/conv_bias.cpp
+++ b/dnn/test/arm_common/conv_bias.cpp
@@ -57,31 +57,44 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
     }
 }
 
-TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8) {
-    using namespace conv_bias;
-    std::vector<TestArg> args = get_quantized_args();
-    Checker<ConvBiasForward> checker(handle());
-    checker.set_before_exec_callback(
-            conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL"));
+#define CONV_BIAS_MATMUL_QU8_MODE(MODE)                                   \
+    using namespace conv_bias;                                            \
+    std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE);     \
+    Checker<ConvBiasForward> checker(handle());                           \
+    checker.set_before_exec_callback(                                     \
+            conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL"));       \
+    UniformIntRNG rng{0, 127};                                            \
+    for (auto&& arg : args) {                                             \
+        if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1)   \
+            continue;                                                     \
+        checker.set_dtype(0, dtype::Quantized8Asymm(                      \
+                                     2.5f, static_cast<uint8_t>(127)))    \
+                .set_dtype(1, dtype::Quantized8Asymm(                     \
+                                      2.7f, static_cast<uint8_t>(126)))   \
+                .set_dtype(2, dtype::QuantizedS32(6.75f))                 \
+                .set_dtype(4, dtype::Quantized8Asymm(                     \
+                                      60.25f, static_cast<uint8_t>(125))) \
+                .set_rng(0, &rng)                                         \
+                .set_rng(1, &rng)                                         \
+                .set_rng(2, &rng)                                         \
+                .set_param(arg.param)                                     \
+                .execs({arg.src, arg.filter, arg.bias, {}, {}});          \
+    }
 
-    UniformIntRNG rng{0, 127};
-    for (auto&& arg : args) {
-        if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1)
-            continue;
-        checker.set_dtype(0, dtype::Quantized8Asymm(2.5f,
-                                                    static_cast<uint8_t>(127)))
-                .set_dtype(1, dtype::Quantized8Asymm(2.7f,
-                                                     static_cast<uint8_t>(126)))
-                .set_dtype(2, dtype::QuantizedS32(6.75f))
-                .set_dtype(4, dtype::Quantized8Asymm(60.25f,
-                                                     static_cast<uint8_t>(125)))
-                .set_rng(0, &rng)
-                .set_rng(1, &rng)
-                .set_rng(2, &rng)
-                .set_param(arg.param)
-                .execs({arg.src, arg.filter, arg.bias, {}, {}});
+#define MODE_STR(mode) param::ConvBias::NonlineMode::mode
+
+#define CB_TEST(MODE)                                 \
+    TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8_##MODE) { \
+        CONV_BIAS_MATMUL_QU8_MODE(MODE_STR(MODE));    \
     }
-}
+
+CB_TEST(IDENTITY);
+CB_TEST(RELU);
+CB_TEST(H_SWISH);
+
+#undef MODE_STR
+#undef CB_TEST
+#undef CONV_BIAS_MATMUL_QU8_MODE
 
 #if MEGDNN_WITH_BENCHMARK
 
diff --git a/dnn/test/common/convolution.cpp b/dnn/test/common/convolution.cpp
index d39f1c45..67a0db3f 100644
--- a/dnn/test/common/convolution.cpp
+++ b/dnn/test/common/convolution.cpp
@@ -450,7 +450,8 @@ std::vector<TestArg> convolution::get_dilated_args() {
     return args;
 }
 
-void convolution::test_conv_config_combinations(Handle* handle, bool test_int8,
+void convolution::test_conv_config_combinations(int k_size,
+                                                Handle* handle, bool test_int8,
                                                 bool test_backward,
                                                 bool is_cuda,
                                                 ConvEPSGetter eps_getter,
@@ -484,7 +485,7 @@ void convolution::test_conv_config_combinations(Handle* handle, bool test_int8,
     CONF_BOOL(format)
     // dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32
     for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype)
-    for (int ksize: {1, 2, 3, 5}) {
+    for (int ksize: {1, k_size}) {
         // When is_cuda is on, test cases where format is NHWC and
         // data type is not INT8x8x32 are disabled.
         if (is_cuda) {
diff --git a/dnn/test/common/convolution.h b/dnn/test/common/convolution.h
index b42e897f..a971ab1d 100644
--- a/dnn/test/common/convolution.h
+++ b/dnn/test/common/convolution.h
@@ -55,7 +55,8 @@ using ConvEPSGetter =
 //! check for various conv configurations (dilation, group, stride, padding)
 //! and run all usable algorithms
 void test_conv_config_combinations(
-        Handle* handle, bool test_int8, bool test_backward, bool is_cuda,
+        int k_size, Handle* handle, bool test_int8, bool test_backward,
+        bool is_cuda,
         ConvEPSGetter conv_eps_getter = [](bool f16, int, const char*)
                 -> float { return f16 ? 1e-1 : 1e-3; },
         bool use_io16xc32 = false);
diff --git a/dnn/test/common/relayout.cpp b/dnn/test/common/relayout.cpp
index 2a76723f..bccb40e2 100644
--- a/dnn/test/common/relayout.cpp
+++ b/dnn/test/common/relayout.cpp
@@ -39,35 +39,44 @@ namespace megdnn {
 namespace test {
 namespace relayout {
 
-#define DEF_TEST(name) \
-template<> \
-void run_test<name>(Handle *handle)
-
-DEF_TEST(cv) {
+void run_test_cv(Handle* handle, size_t CH) {
     std::vector<TestArg> args;
 
     for (size_t M = 124; M <= 130; ++M) {
         for (size_t N = 124; N <= 130; ++N) {
-            for (size_t CH : {1, 3, 5}) {
-                args.push_back(
+            args.push_back(
                     generate_transpose_args(1, M, N, CH, dtype::Uint8()));
-                args.push_back(
+            args.push_back(
                     generate_transpose_args(1, M, N, CH, dtype::Int32()));
-                args.push_back(
+            args.push_back(
                     generate_transpose_args(1, M, N, CH, dtype::Float32()));
-                args.push_back(
+            args.push_back(
                     generate_transpose_args(3, M, N, CH, dtype::Float32()));
-            }
         }
     }
 
     Checker<Relayout> checker(handle);
 
-    for (auto &&arg : args) {
+    for (auto&& arg : args) {
         checker.execl({arg.src, arg.dst});
     }
 }
 
+#define DEF_TEST(name) \
+template<> \
+void run_test<name>(Handle *handle)
+
+DEF_TEST(cv) {
+    run_test_cv(handle, 1);
+}
+
+DEF_TEST(cv_ch3) {
+    run_test_cv(handle, 3);
+}
+
+DEF_TEST(cv_ch5) {
+    run_test_cv(handle, 5);
+}
 
 DEF_TEST(broadcast) {
     std::vector<TestArg> args;
diff --git a/dnn/test/common/relayout.h b/dnn/test/common/relayout.h
index 33726d5b..5b4892cc 100644
--- a/dnn/test/common/relayout.h
+++ b/dnn/test/common/relayout.h
@@ -23,6 +23,8 @@ namespace relayout {
 #define FIRST_RELAYOUT_CASE cv
 
 #define FOREACH_RELAYOUT_NONFIRST_CASE(cb) \
+    cb(cv_ch3) \
+    cb(cv_ch5) \
     cb(broadcast) \
     cb(negative) \
     cb(transpose) \
diff --git a/dnn/test/cpu/convolution.cpp b/dnn/test/cpu/convolution.cpp
index 502a61d2..38db86ce 100644
--- a/dnn/test/cpu/convolution.cpp
+++ b/dnn/test/cpu/convolution.cpp
@@ -26,19 +26,43 @@ Convolution::Param gconv_param(Convolution::Param p) {
 
 } // anonymous namespace
 
-TEST_F(CPU, CONVOLUTION)
-{
+#define CONVOLUTION_ARG_DIV_SIZE 230
+TEST_F(CPU, CONVOLUTION_0) {
     using namespace convolution;
     std::vector<TestArg> args = get_args();
+    auto loop_size = args.size();
+    ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
     Checker<Convolution> checker(handle());
-    for (auto &&arg: args) {
-        checker.set_param(arg.param).execs({arg.src, arg.filter, {}});
+    for (unsigned int i = 0; i < CONVOLUTION_ARG_DIV_SIZE; i++) {
+        checker.set_param(args[i].param)
+                .execs({args[i].src, args[i].filter, {}});
     }
 }
 
-TEST_F(CPU, CONV_CONFIG_COMBINATIONS) {
-    convolution::test_conv_config_combinations(handle(), true, false, false);
+TEST_F(CPU, CONVOLUTION_1) {
+    using namespace convolution;
+    std::vector<TestArg> args = get_args();
+    auto loop_size = args.size();
+    ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
+    Checker<Convolution> checker(handle());
+    for (unsigned int i = CONVOLUTION_ARG_DIV_SIZE; i < loop_size; i++) {
+        checker.set_param(args[i].param)
+                .execs({args[i].src, args[i].filter, {}});
+    }
 }
+#undef CONVOLUTION_ARG_DIV_SIZE
+
+#define CB_CONV_CONFIG_COMBINATIONS(KSIZE)                                \
+    TEST_F(CPU, CONV_CONFIG_COMBINATIONS_KSIZE_1_KSIZE_##KSIZE) {         \
+        convolution::test_conv_config_combinations(KSIZE, handle(), true, \
+                                                   false, false);         \
+    }
+
+// FIXME: only test ksize=1, will crash on IOS, so we tmp test ksize_1##other_ksize
+CB_CONV_CONFIG_COMBINATIONS(2);
+CB_CONV_CONFIG_COMBINATIONS(3);
+CB_CONV_CONFIG_COMBINATIONS(5);
+#undef CB_CONV_CONFIG_COMBINATIONS
 
 #if MEGDNN_WITH_BENCHMARK
 TEST_F(CPU, BENCHMARK_CONVOLUTION)
diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp
index ff3e42e0..c8f47857 100644
--- a/dnn/test/cuda/convolution.cpp
+++ b/dnn/test/cuda/convolution.cpp
@@ -340,8 +340,12 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
             return 0.3;
         return 1e-3;
     };
-    convolution::test_conv_config_combinations(handle_cuda(), false, true, true,
-            eps_getter, true);
+    convolution::test_conv_config_combinations(2, handle_cuda(), false, true,
+                                               true, eps_getter, true);
+    convolution::test_conv_config_combinations(3, handle_cuda(), false, true,
+                                               true, eps_getter, true);
+    convolution::test_conv_config_combinations(5, handle_cuda(), false, true,
+                                               true, eps_getter, true);
 }
 
 TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) {
diff --git a/src/opr/test/blas.cpp b/src/opr/test/blas.cpp
index dd6ef296..dab00573 100644
--- a/src/opr/test/blas.cpp
+++ b/src/opr/test/blas.cpp
@@ -404,71 +404,179 @@ void run_bgemm_trans_inp_test_case(bool trans_a, bool trans_b) {
 
 }  // anonymous namespace
 
-TEST(TestOprBlas, MatrixMul) {
+TEST(TestOprBlas, MatrixMul_NN) {
     run_sgemm_test(false, false);
+}
+
+TEST(TestOprBlas, MatrixMul_NT) {
     run_sgemm_test(false, true);
+}
+
+TEST(TestOprBlas, MatrixMul_TN) {
     run_sgemm_test(true, false);
+}
+
+TEST(TestOprBlas, MatrixMul_TT) {
     run_sgemm_test(true, true);
 }
 
-TEST(TestOprBlas, BatchedMatrixMulFp32) {
+TEST(TestOprBlas, BatchedMatrixMulFp32_NN) {
     run_batched_sgemm_test(false, false);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulFp32_NT) {
     run_batched_sgemm_test(false, true);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulFp32_TN) {
     run_batched_sgemm_test(true, false);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulFp32_TT) {
     run_batched_sgemm_test(true, true);
 }
 
-TEST(TestOprBlas, BatchedMatrixMulFp16) {
+TEST(TestOprBlas, BatchedMatrixMulFp16_NN) {
     run_batched_hgemm_test(false, false);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulFp16_NT) {
     run_batched_hgemm_test(false, true);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulFp16_TN) {
     run_batched_hgemm_test(true, false);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulFp16_TT) {
     run_batched_hgemm_test(true, true);
 }
 
-TEST(TestOprBlas, BatchedMatrixMulInt8) {
+TEST(TestOprBlas, BatchedMatrixMulInt8_NN) {
     if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
         !check_compute_capability(6, 1)) {
         return;
     }
     run_batched_igemm_test(false, false);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulInt8_NT) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_batched_igemm_test(false, true);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulInt8_TN) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_batched_igemm_test(true, false);
+}
+
+TEST(TestOprBlas, BatchedMatrixMulInt8_TT) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_batched_igemm_test(true, true);
 }
 
-TEST(TestOprBlas, TransBatchedMatrixMulFp32) {
+TEST(TestOprBlas, TransBatchedMatrixMulFp32_NN) {
     run_bgemm_trans_inp_test_case<float, float>(false, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulFp32_NT) {
     run_bgemm_trans_inp_test_case<float, float>(false, true);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulFp32_TN) {
     run_bgemm_trans_inp_test_case<float, float>(true, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulFp32_TT) {
     run_bgemm_trans_inp_test_case<float, float>(true, true);
 }
 
-TEST(TestOprBlas, TransBatchedMatrixMulInt8) {
+TEST(TestOprBlas, TransBatchedMatrixMulInt8_NN) {
     if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
         !check_compute_capability(6, 1)) {
         return;
     }
     run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulInt8_NT) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, true);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulInt8_TN) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulInt8_TT) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, true);
 }
 
-TEST(TestOprBlas, TransBatchedMatrixMulFp16) {
+TEST(TestOprBlas, TransBatchedMatrixMulFp16_NN) {
     run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulFp16_NT) {
     run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, true);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulFp16_TN) {
     run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulFp16_TT) {
     run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, true);
 }
 
-TEST(TestOprBlas, TransBatchedMatrixMulQS8) {
+TEST(TestOprBlas, TransBatchedMatrixMulQS8_NN) {
     if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
         !check_compute_capability(6, 1)) {
         return;
     }
     run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulQS8_NT) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, true);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulQS8_TN) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, false);
+}
+
+TEST(TestOprBlas, TransBatchedMatrixMulQS8_TT) {
+    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
+        !check_compute_capability(6, 1)) {
+        return;
+    }
     run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, true);
 }