fix(ci/megdnn_test/megbrain_test): split some

testcase, which caused by timeout GitOrigin-RevId: ffed9d7820
5 years ago · f354724220
--- a/dnn/test/arm_common/conv_bias.cpp
+++ b/dnn/test/arm_common/conv_bias.cpp
@@ -57,31 +57,44 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
    }
 }

 TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8) {
    using namespace conv_bias;
    std::vector<TestArg> args = get_quantized_args();
    Checker<ConvBiasForward> checker(handle());
    checker.set_before_exec_callback(
            conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL"));
 #define CONV_BIAS_MATMUL_QU8_MODE(MODE)                                   \
    using namespace conv_bias;                                            \
    std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE);     \
    Checker<ConvBiasForward> checker(handle());                           \
    checker.set_before_exec_callback(                                     \
            conv_bias::ConvBiasAlgoChecker<ConvBias>("QU8MATMUL"));       \
    UniformIntRNG rng{0, 127};                                            \
    for (auto&& arg : args) {                                             \
        if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1)   \
            continue;                                                     \
        checker.set_dtype(0, dtype::Quantized8Asymm(                      \
                                     2.5f, static_cast<uint8_t>(127)))    \
                .set_dtype(1, dtype::Quantized8Asymm(                     \
                                      2.7f, static_cast<uint8_t>(126)))   \
                .set_dtype(2, dtype::QuantizedS32(6.75f))                 \
                .set_dtype(4, dtype::Quantized8Asymm(                     \
                                      60.25f, static_cast<uint8_t>(125))) \
                .set_rng(0, &rng)                                         \
                .set_rng(1, &rng)                                         \
                .set_rng(2, &rng)                                         \
                .set_param(arg.param)                                     \
                .execs({arg.src, arg.filter, arg.bias, {}, {}});          \
    }

    UniformIntRNG rng{0, 127};
    for (auto&& arg : args) {
        if (arg.bias.ndim == 4 && arg.bias[2] != 1 && arg.bias[3] != 1)
            continue;
        checker.set_dtype(0, dtype::Quantized8Asymm(2.5f,
                                                    static_cast<uint8_t>(127)))
                .set_dtype(1, dtype::Quantized8Asymm(2.7f,
                                                     static_cast<uint8_t>(126)))
                .set_dtype(2, dtype::QuantizedS32(6.75f))
                .set_dtype(4, dtype::Quantized8Asymm(60.25f,
                                                     static_cast<uint8_t>(125)))
                .set_rng(0, &rng)
                .set_rng(1, &rng)
                .set_rng(2, &rng)
                .set_param(arg.param)
                .execs({arg.src, arg.filter, arg.bias, {}, {}});
 #define MODE_STR(mode) param::ConvBias::NonlineMode::mode

 #define CB_TEST(MODE)                                 \
    TEST_F(ARM_COMMON, CONV_BIAS_MATMUL_QU8_##MODE) { \
        CONV_BIAS_MATMUL_QU8_MODE(MODE_STR(MODE));    \
    }
 }

 CB_TEST(IDENTITY);
 CB_TEST(RELU);
 CB_TEST(H_SWISH);

 #undef MODE_STR
 #undef CB_TEST
 #undef CONV_BIAS_MATMUL_QU8_MODE

 #if MEGDNN_WITH_BENCHMARK

--- a/dnn/test/common/convolution.cpp
+++ b/dnn/test/common/convolution.cpp
@@ -450,7 +450,8 @@ std::vector<TestArg> convolution::get_dilated_args() {
    return args;
 }

 void convolution::test_conv_config_combinations(Handle* handle, bool test_int8,
 void convolution::test_conv_config_combinations(int k_size,
                                                Handle* handle, bool test_int8,
                                                bool test_backward,
                                                bool is_cuda,
                                                ConvEPSGetter eps_getter,
@@ -484,7 +485,7 @@ void convolution::test_conv_config_combinations(Handle* handle, bool test_int8,
    CONF_BOOL(format)
    // dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32
    for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype)
    for (int ksize: {1, 2, 3, 5}) {
    for (int ksize: {1, k_size}) {
        // When is_cuda is on, test cases where format is NHWC and
        // data type is not INT8x8x32 are disabled.
        if (is_cuda) {
--- a/dnn/test/common/convolution.h
+++ b/dnn/test/common/convolution.h
@@ -55,7 +55,8 @@ using ConvEPSGetter =
 //! check for various conv configurations (dilation, group, stride, padding)
 //! and run all usable algorithms
 void test_conv_config_combinations(
        Handle* handle, bool test_int8, bool test_backward, bool is_cuda,
        int k_size, Handle* handle, bool test_int8, bool test_backward,
        bool is_cuda,
        ConvEPSGetter conv_eps_getter = [](bool f16, int, const char*)
                -> float { return f16 ? 1e-1 : 1e-3; },
        bool use_io16xc32 = false);
--- a/dnn/test/common/relayout.cpp
+++ b/dnn/test/common/relayout.cpp
@@ -39,35 +39,44 @@ namespace megdnn {
 namespace test {
 namespace relayout {

 #define DEF_TEST(name) \
 template<> \
 void run_test<name>(Handle *handle)

 DEF_TEST(cv) {
 void run_test_cv(Handle* handle, size_t CH) {
    std::vector<TestArg> args;

    for (size_t M = 124; M <= 130; ++M) {
        for (size_t N = 124; N <= 130; ++N) {
            for (size_t CH : {1, 3, 5}) {
                args.push_back(
            args.push_back(
                    generate_transpose_args(1, M, N, CH, dtype::Uint8()));
                args.push_back(
            args.push_back(
                    generate_transpose_args(1, M, N, CH, dtype::Int32()));
                args.push_back(
            args.push_back(
                    generate_transpose_args(1, M, N, CH, dtype::Float32()));
                args.push_back(
            args.push_back(
                    generate_transpose_args(3, M, N, CH, dtype::Float32()));
            }
        }
    }

    Checker<Relayout> checker(handle);

    for (auto &&arg : args) {
    for (auto&& arg : args) {
        checker.execl({arg.src, arg.dst});
    }
 }

 #define DEF_TEST(name) \
 template<> \
 void run_test<name>(Handle *handle)

 DEF_TEST(cv) {
    run_test_cv(handle, 1);
 }

 DEF_TEST(cv_ch3) {
    run_test_cv(handle, 3);
 }

 DEF_TEST(cv_ch5) {
    run_test_cv(handle, 5);
 }

 DEF_TEST(broadcast) {
    std::vector<TestArg> args;
--- a/dnn/test/common/relayout.h
+++ b/dnn/test/common/relayout.h
@@ -23,6 +23,8 @@ namespace relayout {
 #define FIRST_RELAYOUT_CASE cv

 #define FOREACH_RELAYOUT_NONFIRST_CASE(cb) \
    cb(cv_ch3) \
    cb(cv_ch5) \
    cb(broadcast) \
    cb(negative) \
    cb(transpose) \
--- a/dnn/test/cpu/convolution.cpp
+++ b/dnn/test/cpu/convolution.cpp
@@ -26,19 +26,43 @@ Convolution::Param gconv_param(Convolution::Param p) {

 } // anonymous namespace

 TEST_F(CPU, CONVOLUTION)
 {
 #define CONVOLUTION_ARG_DIV_SIZE 230
 TEST_F(CPU, CONVOLUTION_0) {
    using namespace convolution;
    std::vector<TestArg> args = get_args();
    auto loop_size = args.size();
    ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
    Checker<Convolution> checker(handle());
    for (auto &&arg: args) {
        checker.set_param(arg.param).execs({arg.src, arg.filter, {}});
    for (unsigned int i = 0; i < CONVOLUTION_ARG_DIV_SIZE; i++) {
        checker.set_param(args[i].param)
                .execs({args[i].src, args[i].filter, {}});
    }
 }

 TEST_F(CPU, CONV_CONFIG_COMBINATIONS) {
    convolution::test_conv_config_combinations(handle(), true, false, false);
 TEST_F(CPU, CONVOLUTION_1) {
    using namespace convolution;
    std::vector<TestArg> args = get_args();
    auto loop_size = args.size();
    ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
    Checker<Convolution> checker(handle());
    for (unsigned int i = CONVOLUTION_ARG_DIV_SIZE; i < loop_size; i++) {
        checker.set_param(args[i].param)
                .execs({args[i].src, args[i].filter, {}});
    }
 }
 #undef CONVOLUTION_ARG_DIV_SIZE

 #define CB_CONV_CONFIG_COMBINATIONS(KSIZE)                                \
    TEST_F(CPU, CONV_CONFIG_COMBINATIONS_KSIZE_1_KSIZE_##KSIZE) {         \
        convolution::test_conv_config_combinations(KSIZE, handle(), true, \
                                                   false, false);         \
    }

 // FIXME: only test ksize=1, will crash on IOS, so we tmp test ksize_1##other_ksize
 CB_CONV_CONFIG_COMBINATIONS(2);
 CB_CONV_CONFIG_COMBINATIONS(3);
 CB_CONV_CONFIG_COMBINATIONS(5);
 #undef CB_CONV_CONFIG_COMBINATIONS

 #if MEGDNN_WITH_BENCHMARK
 TEST_F(CPU, BENCHMARK_CONVOLUTION)
--- a/dnn/test/cuda/convolution.cpp
+++ b/dnn/test/cuda/convolution.cpp
@@ -340,8 +340,12 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
            return 0.3;
        return 1e-3;
    };
    convolution::test_conv_config_combinations(handle_cuda(), false, true, true,
            eps_getter, true);
    convolution::test_conv_config_combinations(2, handle_cuda(), false, true,
                                               true, eps_getter, true);
    convolution::test_conv_config_combinations(3, handle_cuda(), false, true,
                                               true, eps_getter, true);
    convolution::test_conv_config_combinations(5, handle_cuda(), false, true,
                                               true, eps_getter, true);
 }

 TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) {
--- a/src/opr/test/blas.cpp
+++ b/src/opr/test/blas.cpp
@@ -404,71 +404,179 @@ void run_bgemm_trans_inp_test_case(bool trans_a, bool trans_b) {

 }  // anonymous namespace

 TEST(TestOprBlas, MatrixMul) {
 TEST(TestOprBlas, MatrixMul_NN) {
    run_sgemm_test(false, false);
 }

 TEST(TestOprBlas, MatrixMul_NT) {
    run_sgemm_test(false, true);
 }

 TEST(TestOprBlas, MatrixMul_TN) {
    run_sgemm_test(true, false);
 }

 TEST(TestOprBlas, MatrixMul_TT) {
    run_sgemm_test(true, true);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp32) {
 TEST(TestOprBlas, BatchedMatrixMulFp32_NN) {
    run_batched_sgemm_test(false, false);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp32_NT) {
    run_batched_sgemm_test(false, true);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp32_TN) {
    run_batched_sgemm_test(true, false);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp32_TT) {
    run_batched_sgemm_test(true, true);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp16) {
 TEST(TestOprBlas, BatchedMatrixMulFp16_NN) {
    run_batched_hgemm_test(false, false);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp16_NT) {
    run_batched_hgemm_test(false, true);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp16_TN) {
    run_batched_hgemm_test(true, false);
 }

 TEST(TestOprBlas, BatchedMatrixMulFp16_TT) {
    run_batched_hgemm_test(true, true);
 }

 TEST(TestOprBlas, BatchedMatrixMulInt8) {
 TEST(TestOprBlas, BatchedMatrixMulInt8_NN) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_batched_igemm_test(false, false);
 }

 TEST(TestOprBlas, BatchedMatrixMulInt8_NT) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_batched_igemm_test(false, true);
 }

 TEST(TestOprBlas, BatchedMatrixMulInt8_TN) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_batched_igemm_test(true, false);
 }

 TEST(TestOprBlas, BatchedMatrixMulInt8_TT) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_batched_igemm_test(true, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp32) {
 TEST(TestOprBlas, TransBatchedMatrixMulFp32_NN) {
    run_bgemm_trans_inp_test_case<float, float>(false, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp32_NT) {
    run_bgemm_trans_inp_test_case<float, float>(false, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp32_TN) {
    run_bgemm_trans_inp_test_case<float, float>(true, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp32_TT) {
    run_bgemm_trans_inp_test_case<float, float>(true, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulInt8) {
 TEST(TestOprBlas, TransBatchedMatrixMulInt8_NN) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulInt8_NT) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<int8_t, int32_t>(false, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulInt8_TN) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulInt8_TT) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<int8_t, int32_t>(true, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp16) {
 TEST(TestOprBlas, TransBatchedMatrixMulFp16_NN) {
    run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp16_NT) {
    run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(false, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp16_TN) {
    run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulFp16_TT) {
    run_bgemm_trans_inp_test_case<dt_float16, dt_float16>(true, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulQS8) {
 TEST(TestOprBlas, TransBatchedMatrixMulQS8_NN) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulQS8_NT) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(false, true);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulQS8_TN) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, false);
 }

 TEST(TestOprBlas, TransBatchedMatrixMulQS8_TT) {
    if (CompNode::load("xpux").device_type() == CompNode::DeviceType::CUDA &&
        !check_compute_capability(6, 1)) {
        return;
    }
    run_bgemm_trans_inp_test_case<dt_qint8, dt_qint32>(true, true);
 }