diff --git a/dnn/test/cuda/accuracy_shake.cpp b/dnn/test/cuda/accuracy_shake.cpp index 41b116b2..2e37b3ce 100644 --- a/dnn/test/cuda/accuracy_shake.cpp +++ b/dnn/test/cuda/accuracy_shake.cpp @@ -96,8 +96,8 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) { } TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) { - using Format = ConvBias::Param::Format; require_compute_capability(7, 5); + using Format = ConvBias::Param::Format; AccuracyShakeChecker checker(handle_cuda()); UniformIntRNG int_rng{-5, 5}; UniformFloatRNG float_rng{-50, 50}; @@ -135,6 +135,7 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) { } TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) { + require_compute_capability(6, 1); AccuracyShakeChecker checker(handle_cuda()); checker.set_dtype(0, dtype::Float32()) @@ -167,6 +168,7 @@ TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) { } TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) { + require_compute_capability(6, 1); AccuracyShakeChecker checker(handle_cuda()); UniformIntRNG int_rng{-127, 127}; @@ -189,6 +191,7 @@ TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) { } TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) { + require_compute_capability(6, 1); AccuracyShakeChecker checker(handle_cuda()); NormalRNG default_rng; float scale = 1.0f / sqrt(5); @@ -207,6 +210,7 @@ TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) { } TEST_F(CUDA, SHAKE_LOCAL_SHARE) { + require_compute_capability(6, 1); AccuracyShakeChecker checker(handle_cuda()); using Param = LocalShare::Param; Param param; @@ -216,6 +220,7 @@ TEST_F(CUDA, SHAKE_LOCAL_SHARE) { } TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) { + require_compute_capability(6, 1); AccuracyShakeChecker checker(handle_cuda()); NormalRNG default_rng; checker.set_dtype(0, dtype::Float32()) @@ -229,6 +234,7 @@ TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) { } TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_FILTER) { + require_compute_capability(6, 1); AccuracyShakeChecker checker(handle_cuda()); NormalRNG default_rng; checker.set_dtype(0, dtype::Float32()) diff --git a/dnn/test/cuda/benchmark.cpp b/dnn/test/cuda/benchmark.cpp index 10c22c6c..29b0cace 100644 --- a/dnn/test/cuda/benchmark.cpp +++ b/dnn/test/cuda/benchmark.cpp @@ -11,11 +11,11 @@ #include "test/cuda/fixture.h" #include "megdnn/oprs.h" -#include "src/cuda/utils.h" #include "test/common/benchmarker.h" #include "test/common/tensor.h" #include "test/common/timer.h" #include "test/common/workspace_wrapper.h" +#include "test/cuda/utils.h" namespace megdnn { namespace test { @@ -23,11 +23,7 @@ namespace test { #if MEGDNN_WITH_BENCHMARK TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32) { - if (!cuda::is_compute_capability_required(6, 1)) { - printf("Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device" - "doesn't support\n"); - return; - } + require_compute_capability(6, 1); using Param = param::Convolution; auto run_1x1 = [&](size_t N, size_t OC, size_t IC, size_t H, size_t W) { Benchmarker benchmarker(handle_cuda()); diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 78cdb9e1..4429e4b0 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -13,7 +13,6 @@ #include "megdnn/dtype.h" #include "megdnn/opr_param_defs.h" #include "megdnn/oprs.h" -#include "src/cuda/utils.h" #include "test/common/accuracy_shake_checker.h" #include "test/common/checker.h" #include "test/common/rng.h" @@ -21,6 +20,7 @@ #include "test/common/workspace_wrapper.h" #include "test/cuda/benchmark.h" #include "test/cuda/fixture.h" +#include "test/cuda/utils.h" #define V1(x) #x #define V(x) V1(x) @@ -31,11 +31,7 @@ namespace megdnn { namespace test { TEST_F(CUDA, CONVOLUTION_8X8X32) { - if (!cuda::is_compute_capability_required(6, 1)) { - printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" - "doesn't support\n"); - return; - } + require_compute_capability(6, 1); using namespace convolution; std::vector args; @@ -116,8 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) { } TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) { - if (!cuda::is_compute_capability_required(6, 1)) - return; + require_compute_capability(6, 1); using namespace convolution; Checker checker(handle_cuda()); UniformIntRNG int_rng{-127, 127}; @@ -205,7 +200,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA) { .set_epsilon(1e-3) .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); - if (!cuda::is_compute_capability_required(6, 0)) { + if (!check_compute_capability(6, 0)) { src.dtype = dst.dtype = filter.dtype = dtype::Float16(); checker.set_rng(0, &rng) .set_rng(1, &rng) @@ -315,8 +310,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_NHWC) { } TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_CUDNN) { - if (cuda::is_compute_capability_required(7, 0)) - return; + require_compute_capability(7, 0); using namespace convolution; Checker checker(handle_cuda()); checker.set_before_exec_callback( @@ -372,11 +366,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_MATMUL) { } TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) { - if (!cuda::is_compute_capability_required(6, 1)) { - printf("Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A test as " - "current device doesn't support\n"); - return; - } + require_compute_capability(6, 1); using namespace convolution; std::vector args = get_args_int8_nchw4_conv_bwd_data(); @@ -430,12 +420,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) { } TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { - if (!cuda::is_compute_capability_required(6, 1)) { - printf("Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A test as " - "current device doesn't support\n"); - return; - } - + require_compute_capability(6, 1); using namespace convolution; std::vector args = get_args_int8_nchw_conv_bwd_data(); Checker checker(handle_cuda()); @@ -463,11 +448,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { #if CUDA_VERSION >= 10020 TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA) { - if (!cuda::is_compute_capability_required(7, 5)) { - printf("Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA test as " - "current device doesn't support\n"); - return; - } + require_compute_capability(7, 5); using namespace convolution; std::vector args = get_args_int8_nhwc_conv_bwd_data(); @@ -527,8 +508,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA) { TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_FAILED_CUDNN7_5) { // BRAIN-481 failed on architectures 7.0, remove the following if statement, // when cudnn fixed the problem. - if (cuda::is_compute_capability_required(7, 0)) - return; + require_compute_capability(7, 0); using namespace convolution; std::vector args = get_args_cudnn_7_5_failures(); Checker checker(handle_cuda()); @@ -662,8 +642,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) { } TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) { - if (cuda::is_compute_capability_required(7, 0)) - return; + require_compute_capability(7, 0); using namespace convolution; Checker checker(handle_cuda()); checker.set_before_exec_callback( @@ -697,8 +676,7 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { } TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) { - if (cuda::is_compute_capability_required(7, 0)) - return; + require_compute_capability(7, 0); using namespace convolution; Checker checker(handle_cuda()); checker.set_before_exec_callback(AlgoChecker( diff --git a/dnn/test/cuda/convolution3d.cpp b/dnn/test/cuda/convolution3d.cpp index 0de2da4e..db16f7e1 100644 --- a/dnn/test/cuda/convolution3d.cpp +++ b/dnn/test/cuda/convolution3d.cpp @@ -11,20 +11,20 @@ #include "test/common/convolution3d.h" #include "megdnn/opr_param_defs.h" #include "megdnn/oprs.h" -#include "src/cuda/utils.h" #include "test/common/benchmarker.h" #include "test/common/checker.h" #include "test/common/rng.h" #include "test/common/tensor.h" #include "test/common/workspace_wrapper.h" #include "test/cuda/fixture.h" +#include "test/cuda/utils.h" namespace megdnn { namespace test { #if 0 TEST_F(CUDA, CONVOLUTION3D_8X8X32) { - if (!cuda::is_compute_capability_required(6, 1)) { + if (!check_compute_capability(6, 1)) { printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; diff --git a/dnn/test/cuda/group_conv.cpp b/dnn/test/cuda/group_conv.cpp index 74f68445..7770711f 100644 --- a/dnn/test/cuda/group_conv.cpp +++ b/dnn/test/cuda/group_conv.cpp @@ -15,13 +15,13 @@ #include "test/common/convolution.h" #include "test/cuda/fixture.h" -#include "src/cuda/utils.h" +#include "test/cuda/utils.h" namespace megdnn { namespace test { TEST_F(CUDA, GROUP_CONV_FORWARD) { - bool is_int_available = cuda::is_compute_capability_required(6, 1); + bool is_int_available = check_compute_capability(6, 1); auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW, size_t OC, size_t /* OH */, size_t /* OW */, size_t PH, size_t PW, size_t SH, size_t SW, size_t DH, size_t DW, size_t group) { diff --git a/dnn/test/cuda/group_conv3d.cpp b/dnn/test/cuda/group_conv3d.cpp index b79a0b05..1db645bc 100644 --- a/dnn/test/cuda/group_conv3d.cpp +++ b/dnn/test/cuda/group_conv3d.cpp @@ -15,13 +15,13 @@ #include "test/common/convolution3d.h" #include "test/cuda/fixture.h" -#include "src/cuda/utils.h" +#include "test/cuda/utils.h" namespace megdnn { namespace test { TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD) { - bool is_int_available = cuda::is_compute_capability_required(6, 1); + bool is_int_available = check_compute_capability(6, 1); static_cast(is_int_available); auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, size_t FH, size_t FW, size_t OC, size_t PD, size_t PH, size_t PW, diff --git a/dnn/test/cuda/matrix_mul.cpp b/dnn/test/cuda/matrix_mul.cpp index a5df4faf..1d48a35a 100644 --- a/dnn/test/cuda/matrix_mul.cpp +++ b/dnn/test/cuda/matrix_mul.cpp @@ -15,7 +15,6 @@ #include "test/common/checker.h" #include "test/common/matrix_mul.h" -#include "src/cuda/utils.h" #if defined(cuda_check) #undef cuda_check #endif @@ -130,10 +129,7 @@ TEST_F(CUDA, PEAK_BENCHMARK_MATRIX_MUL_QUANTIZED4x4x32) { #endif TEST_F(CUDA, MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES) { - if (!cuda::is_compute_capability_required(6, 1)) { - printf("Skip CUDA.MATRIX_MUL test as current device doesn't support\n"); - return; - } + require_compute_capability(6, 1); Checker checker(handle_cuda()); using Param = MatrixMul::Param; Param param; @@ -152,10 +148,7 @@ TEST_F(CUDA, MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES) { } TEST_F(CUDA, MATRIX_MUL_INT8x8x32_NAIVE) { - if (!cuda::is_compute_capability_required(6, 1)) { - printf("Skip CUDA.MATRIX_MUL test as current device doesn't support\n"); - return; - } + require_compute_capability(6, 1); using Param = MatrixMul::Param; UniformIntRNG rng{-128, 127}; @@ -224,16 +217,12 @@ TEST_F(CUDA, MATRIX_MUL_FLOAT_NAIVE) { } TEST_F(CUDA, MATRIX_MUL) { - if (cuda::current_device_prop().major < 6) { - printf("Skip CUDA.MATRIX_MUL test as current device doesn't support\n"); - return; - } Checker checker(handle_cuda()); using Param = MatrixMul::Param; size_t m = 12, n = 16, k = 20; - bool is_int_available = cuda::is_compute_capability_required(6, 1); + bool is_int_available = check_compute_capability(6, 1); std::vector dtype_array; dtype_array.push_back(dtype::Float32()); dtype_array.push_back(dtype::Float16()); diff --git a/dnn/test/cuda/reduce.cpp b/dnn/test/cuda/reduce.cpp index bf6b091d..ba46a613 100644 --- a/dnn/test/cuda/reduce.cpp +++ b/dnn/test/cuda/reduce.cpp @@ -41,7 +41,7 @@ TEST_F(CUDA, REDUCE) { checker.execs({{2, 16 * 16 + 1, 31}, {}}); checker.execs({{2, 16 * 16 * 16 + 1, 31}, {}}); checker.execs({{2, 16 * 16 * 16 * 16 + 1, 31}, {}}); -#if MEGDNN_TEGRA_X1 +#if MEGDNN_TEGRA_X1 || MEGDNN_TEGRA_X2 checker.execs({{2, 8 * 16 * 16 * 16 * 16 + 1, 31}, {}}); #else checker.execs({{2, 16 * 16 * 16 * 16 * 16 + 1, 31}, {}}); diff --git a/dnn/test/cuda/utils.cpp b/dnn/test/cuda/utils.cpp index b1f529bd..778e18eb 100644 --- a/dnn/test/cuda/utils.cpp +++ b/dnn/test/cuda/utils.cpp @@ -18,6 +18,13 @@ bool check_compute_capability(int major, int minor) { cuda_check(cudaGetDevice(&dev)); cudaDeviceProp prop; cuda_check(cudaGetDeviceProperties(&prop, dev)); + + //! we just skip sm_62 here, which means jetson tx2 + //! unless require sm_62 explicitly + if (prop.major == 6 && prop.minor == 2) { + return prop.major == major && prop.minor == minor; + } + return prop.major > major || (prop.major == major && prop.minor >= minor); }