@@ -96,8 +96,8 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) { | TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) { | ||||
using Format = ConvBias::Param::Format; | |||||
require_compute_capability(7, 5); | require_compute_capability(7, 5); | ||||
using Format = ConvBias::Param::Format; | |||||
AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda()); | AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda()); | ||||
UniformIntRNG int_rng{-5, 5}; | UniformIntRNG int_rng{-5, 5}; | ||||
UniformFloatRNG float_rng{-50, 50}; | UniformFloatRNG float_rng{-50, 50}; | ||||
@@ -135,6 +135,7 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) { | TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) { | ||||
require_compute_capability(6, 1); | |||||
AccuracyShakeChecker<MatrixMul> checker(handle_cuda()); | AccuracyShakeChecker<MatrixMul> checker(handle_cuda()); | ||||
checker.set_dtype(0, dtype::Float32()) | checker.set_dtype(0, dtype::Float32()) | ||||
@@ -167,6 +168,7 @@ TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) { | TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) { | ||||
require_compute_capability(6, 1); | |||||
AccuracyShakeChecker<BatchedMatrixMul> checker(handle_cuda()); | AccuracyShakeChecker<BatchedMatrixMul> checker(handle_cuda()); | ||||
UniformIntRNG int_rng{-127, 127}; | UniformIntRNG int_rng{-127, 127}; | ||||
@@ -189,6 +191,7 @@ TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) { | TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) { | ||||
require_compute_capability(6, 1); | |||||
AccuracyShakeChecker<Convolution3DForward> checker(handle_cuda()); | AccuracyShakeChecker<Convolution3DForward> checker(handle_cuda()); | ||||
NormalRNG default_rng; | NormalRNG default_rng; | ||||
float scale = 1.0f / sqrt(5); | float scale = 1.0f / sqrt(5); | ||||
@@ -207,6 +210,7 @@ TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_LOCAL_SHARE) { | TEST_F(CUDA, SHAKE_LOCAL_SHARE) { | ||||
require_compute_capability(6, 1); | |||||
AccuracyShakeChecker<LocalShare> checker(handle_cuda()); | AccuracyShakeChecker<LocalShare> checker(handle_cuda()); | ||||
using Param = LocalShare::Param; | using Param = LocalShare::Param; | ||||
Param param; | Param param; | ||||
@@ -216,6 +220,7 @@ TEST_F(CUDA, SHAKE_LOCAL_SHARE) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) { | TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) { | ||||
require_compute_capability(6, 1); | |||||
AccuracyShakeChecker<ConvolutionBackwardData> checker(handle_cuda()); | AccuracyShakeChecker<ConvolutionBackwardData> checker(handle_cuda()); | ||||
NormalRNG default_rng; | NormalRNG default_rng; | ||||
checker.set_dtype(0, dtype::Float32()) | checker.set_dtype(0, dtype::Float32()) | ||||
@@ -229,6 +234,7 @@ TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) { | |||||
} | } | ||||
TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_FILTER) { | TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_FILTER) { | ||||
require_compute_capability(6, 1); | |||||
AccuracyShakeChecker<ConvolutionBackwardFilter> checker(handle_cuda()); | AccuracyShakeChecker<ConvolutionBackwardFilter> checker(handle_cuda()); | ||||
NormalRNG default_rng; | NormalRNG default_rng; | ||||
checker.set_dtype(0, dtype::Float32()) | checker.set_dtype(0, dtype::Float32()) | ||||
@@ -11,11 +11,11 @@ | |||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
#include "src/cuda/utils.h" | |||||
#include "test/common/benchmarker.h" | #include "test/common/benchmarker.h" | ||||
#include "test/common/tensor.h" | #include "test/common/tensor.h" | ||||
#include "test/common/timer.h" | #include "test/common/timer.h" | ||||
#include "test/common/workspace_wrapper.h" | #include "test/common/workspace_wrapper.h" | ||||
#include "test/cuda/utils.h" | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace test { | namespace test { | ||||
@@ -23,11 +23,7 @@ namespace test { | |||||
#if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32) { | TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
printf("Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device" | |||||
"doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(6, 1); | |||||
using Param = param::Convolution; | using Param = param::Convolution; | ||||
auto run_1x1 = [&](size_t N, size_t OC, size_t IC, size_t H, size_t W) { | auto run_1x1 = [&](size_t N, size_t OC, size_t IC, size_t H, size_t W) { | ||||
Benchmarker<Convolution> benchmarker(handle_cuda()); | Benchmarker<Convolution> benchmarker(handle_cuda()); | ||||
@@ -13,7 +13,6 @@ | |||||
#include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
#include "megdnn/opr_param_defs.h" | #include "megdnn/opr_param_defs.h" | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
#include "src/cuda/utils.h" | |||||
#include "test/common/accuracy_shake_checker.h" | #include "test/common/accuracy_shake_checker.h" | ||||
#include "test/common/checker.h" | #include "test/common/checker.h" | ||||
#include "test/common/rng.h" | #include "test/common/rng.h" | ||||
@@ -21,6 +20,7 @@ | |||||
#include "test/common/workspace_wrapper.h" | #include "test/common/workspace_wrapper.h" | ||||
#include "test/cuda/benchmark.h" | #include "test/cuda/benchmark.h" | ||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
#include "test/cuda/utils.h" | |||||
#define V1(x) #x | #define V1(x) #x | ||||
#define V(x) V1(x) | #define V(x) V1(x) | ||||
@@ -31,11 +31,7 @@ namespace megdnn { | |||||
namespace test { | namespace test { | ||||
TEST_F(CUDA, CONVOLUTION_8X8X32) { | TEST_F(CUDA, CONVOLUTION_8X8X32) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" | |||||
"doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(6, 1); | |||||
using namespace convolution; | using namespace convolution; | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
@@ -116,8 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) { | |||||
} | } | ||||
TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) { | TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) | |||||
return; | |||||
require_compute_capability(6, 1); | |||||
using namespace convolution; | using namespace convolution; | ||||
Checker<Convolution> checker(handle_cuda()); | Checker<Convolution> checker(handle_cuda()); | ||||
UniformIntRNG int_rng{-127, 127}; | UniformIntRNG int_rng{-127, 127}; | ||||
@@ -205,7 +200,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA) { | |||||
.set_epsilon(1e-3) | .set_epsilon(1e-3) | ||||
.set_param(arg.param) | .set_param(arg.param) | ||||
.exec(TensorLayoutArray{filter, dst, src}); | .exec(TensorLayoutArray{filter, dst, src}); | ||||
if (!cuda::is_compute_capability_required(6, 0)) { | |||||
if (!check_compute_capability(6, 0)) { | |||||
src.dtype = dst.dtype = filter.dtype = dtype::Float16(); | src.dtype = dst.dtype = filter.dtype = dtype::Float16(); | ||||
checker.set_rng(0, &rng) | checker.set_rng(0, &rng) | ||||
.set_rng(1, &rng) | .set_rng(1, &rng) | ||||
@@ -315,8 +310,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_NHWC) { | |||||
} | } | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_CUDNN) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_CUDNN) { | ||||
if (cuda::is_compute_capability_required(7, 0)) | |||||
return; | |||||
require_compute_capability(7, 0); | |||||
using namespace convolution; | using namespace convolution; | ||||
Checker<ConvolutionBackwardData> checker(handle_cuda()); | Checker<ConvolutionBackwardData> checker(handle_cuda()); | ||||
checker.set_before_exec_callback( | checker.set_before_exec_callback( | ||||
@@ -372,11 +366,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_MATMUL) { | |||||
} | } | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
printf("Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A test as " | |||||
"current device doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(6, 1); | |||||
using namespace convolution; | using namespace convolution; | ||||
std::vector<TestArg> args = get_args_int8_nchw4_conv_bwd_data(); | std::vector<TestArg> args = get_args_int8_nchw4_conv_bwd_data(); | ||||
@@ -430,12 +420,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) { | |||||
} | } | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
printf("Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A test as " | |||||
"current device doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(6, 1); | |||||
using namespace convolution; | using namespace convolution; | ||||
std::vector<TestArg> args = get_args_int8_nchw_conv_bwd_data(); | std::vector<TestArg> args = get_args_int8_nchw_conv_bwd_data(); | ||||
Checker<ConvolutionBackwardData> checker(handle_cuda()); | Checker<ConvolutionBackwardData> checker(handle_cuda()); | ||||
@@ -463,11 +448,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { | |||||
#if CUDA_VERSION >= 10020 | #if CUDA_VERSION >= 10020 | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA) { | ||||
if (!cuda::is_compute_capability_required(7, 5)) { | |||||
printf("Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA test as " | |||||
"current device doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(7, 5); | |||||
using namespace convolution; | using namespace convolution; | ||||
std::vector<TestArg> args = get_args_int8_nhwc_conv_bwd_data(); | std::vector<TestArg> args = get_args_int8_nhwc_conv_bwd_data(); | ||||
@@ -527,8 +508,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA) { | |||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_FAILED_CUDNN7_5) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_FAILED_CUDNN7_5) { | ||||
// BRAIN-481 failed on architectures 7.0, remove the following if statement, | // BRAIN-481 failed on architectures 7.0, remove the following if statement, | ||||
// when cudnn fixed the problem. | // when cudnn fixed the problem. | ||||
if (cuda::is_compute_capability_required(7, 0)) | |||||
return; | |||||
require_compute_capability(7, 0); | |||||
using namespace convolution; | using namespace convolution; | ||||
std::vector<TestArg> args = get_args_cudnn_7_5_failures(); | std::vector<TestArg> args = get_args_cudnn_7_5_failures(); | ||||
Checker<ConvolutionBackwardData> checker(handle_cuda()); | Checker<ConvolutionBackwardData> checker(handle_cuda()); | ||||
@@ -662,8 +642,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) { | |||||
} | } | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) { | ||||
if (cuda::is_compute_capability_required(7, 0)) | |||||
return; | |||||
require_compute_capability(7, 0); | |||||
using namespace convolution; | using namespace convolution; | ||||
Checker<ConvolutionBackwardFilter> checker(handle_cuda()); | Checker<ConvolutionBackwardFilter> checker(handle_cuda()); | ||||
checker.set_before_exec_callback( | checker.set_before_exec_callback( | ||||
@@ -697,8 +676,7 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | |||||
} | } | ||||
TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) { | TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_1) { | ||||
if (cuda::is_compute_capability_required(7, 0)) | |||||
return; | |||||
require_compute_capability(7, 0); | |||||
using namespace convolution; | using namespace convolution; | ||||
Checker<ConvolutionBackwardData> checker(handle_cuda()); | Checker<ConvolutionBackwardData> checker(handle_cuda()); | ||||
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>( | checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>( | ||||
@@ -11,20 +11,20 @@ | |||||
#include "test/common/convolution3d.h" | #include "test/common/convolution3d.h" | ||||
#include "megdnn/opr_param_defs.h" | #include "megdnn/opr_param_defs.h" | ||||
#include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
#include "src/cuda/utils.h" | |||||
#include "test/common/benchmarker.h" | #include "test/common/benchmarker.h" | ||||
#include "test/common/checker.h" | #include "test/common/checker.h" | ||||
#include "test/common/rng.h" | #include "test/common/rng.h" | ||||
#include "test/common/tensor.h" | #include "test/common/tensor.h" | ||||
#include "test/common/workspace_wrapper.h" | #include "test/common/workspace_wrapper.h" | ||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
#include "test/cuda/utils.h" | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace test { | namespace test { | ||||
#if 0 | #if 0 | ||||
TEST_F(CUDA, CONVOLUTION3D_8X8X32) { | TEST_F(CUDA, CONVOLUTION3D_8X8X32) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
if (!check_compute_capability(6, 1)) { | |||||
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" | printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" | ||||
"doesn't support\n"); | "doesn't support\n"); | ||||
return; | return; | ||||
@@ -15,13 +15,13 @@ | |||||
#include "test/common/convolution.h" | #include "test/common/convolution.h" | ||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
#include "src/cuda/utils.h" | |||||
#include "test/cuda/utils.h" | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace test { | namespace test { | ||||
TEST_F(CUDA, GROUP_CONV_FORWARD) { | TEST_F(CUDA, GROUP_CONV_FORWARD) { | ||||
bool is_int_available = cuda::is_compute_capability_required(6, 1); | |||||
bool is_int_available = check_compute_capability(6, 1); | |||||
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW, | auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW, | ||||
size_t OC, size_t /* OH */, size_t /* OW */, size_t PH, size_t PW, | size_t OC, size_t /* OH */, size_t /* OW */, size_t PH, size_t PW, | ||||
size_t SH, size_t SW, size_t DH, size_t DW, size_t group) { | size_t SH, size_t SW, size_t DH, size_t DW, size_t group) { | ||||
@@ -15,13 +15,13 @@ | |||||
#include "test/common/convolution3d.h" | #include "test/common/convolution3d.h" | ||||
#include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
#include "src/cuda/utils.h" | |||||
#include "test/cuda/utils.h" | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace test { | namespace test { | ||||
TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD) { | TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD) { | ||||
bool is_int_available = cuda::is_compute_capability_required(6, 1); | |||||
bool is_int_available = check_compute_capability(6, 1); | |||||
static_cast<void>(is_int_available); | static_cast<void>(is_int_available); | ||||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, | auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, | ||||
size_t FH, size_t FW, size_t OC, size_t PD, size_t PH, size_t PW, | size_t FH, size_t FW, size_t OC, size_t PD, size_t PH, size_t PW, | ||||
@@ -15,7 +15,6 @@ | |||||
#include "test/common/checker.h" | #include "test/common/checker.h" | ||||
#include "test/common/matrix_mul.h" | #include "test/common/matrix_mul.h" | ||||
#include "src/cuda/utils.h" | |||||
#if defined(cuda_check) | #if defined(cuda_check) | ||||
#undef cuda_check | #undef cuda_check | ||||
#endif | #endif | ||||
@@ -130,10 +129,7 @@ TEST_F(CUDA, PEAK_BENCHMARK_MATRIX_MUL_QUANTIZED4x4x32) { | |||||
#endif | #endif | ||||
TEST_F(CUDA, MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES) { | TEST_F(CUDA, MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
printf("Skip CUDA.MATRIX_MUL test as current device doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(6, 1); | |||||
Checker<MatrixMul> checker(handle_cuda()); | Checker<MatrixMul> checker(handle_cuda()); | ||||
using Param = MatrixMul::Param; | using Param = MatrixMul::Param; | ||||
Param param; | Param param; | ||||
@@ -152,10 +148,7 @@ TEST_F(CUDA, MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES) { | |||||
} | } | ||||
TEST_F(CUDA, MATRIX_MUL_INT8x8x32_NAIVE) { | TEST_F(CUDA, MATRIX_MUL_INT8x8x32_NAIVE) { | ||||
if (!cuda::is_compute_capability_required(6, 1)) { | |||||
printf("Skip CUDA.MATRIX_MUL test as current device doesn't support\n"); | |||||
return; | |||||
} | |||||
require_compute_capability(6, 1); | |||||
using Param = MatrixMul::Param; | using Param = MatrixMul::Param; | ||||
UniformIntRNG rng{-128, 127}; | UniformIntRNG rng{-128, 127}; | ||||
@@ -224,16 +217,12 @@ TEST_F(CUDA, MATRIX_MUL_FLOAT_NAIVE) { | |||||
} | } | ||||
TEST_F(CUDA, MATRIX_MUL) { | TEST_F(CUDA, MATRIX_MUL) { | ||||
if (cuda::current_device_prop().major < 6) { | |||||
printf("Skip CUDA.MATRIX_MUL test as current device doesn't support\n"); | |||||
return; | |||||
} | |||||
Checker<MatrixMul> checker(handle_cuda()); | Checker<MatrixMul> checker(handle_cuda()); | ||||
using Param = MatrixMul::Param; | using Param = MatrixMul::Param; | ||||
size_t m = 12, n = 16, k = 20; | size_t m = 12, n = 16, k = 20; | ||||
bool is_int_available = cuda::is_compute_capability_required(6, 1); | |||||
bool is_int_available = check_compute_capability(6, 1); | |||||
std::vector<DType> dtype_array; | std::vector<DType> dtype_array; | ||||
dtype_array.push_back(dtype::Float32()); | dtype_array.push_back(dtype::Float32()); | ||||
dtype_array.push_back(dtype::Float16()); | dtype_array.push_back(dtype::Float16()); | ||||
@@ -41,7 +41,7 @@ TEST_F(CUDA, REDUCE) { | |||||
checker.execs({{2, 16 * 16 + 1, 31}, {}}); | checker.execs({{2, 16 * 16 + 1, 31}, {}}); | ||||
checker.execs({{2, 16 * 16 * 16 + 1, 31}, {}}); | checker.execs({{2, 16 * 16 * 16 + 1, 31}, {}}); | ||||
checker.execs({{2, 16 * 16 * 16 * 16 + 1, 31}, {}}); | checker.execs({{2, 16 * 16 * 16 * 16 + 1, 31}, {}}); | ||||
#if MEGDNN_TEGRA_X1 | |||||
#if MEGDNN_TEGRA_X1 || MEGDNN_TEGRA_X2 | |||||
checker.execs({{2, 8 * 16 * 16 * 16 * 16 + 1, 31}, {}}); | checker.execs({{2, 8 * 16 * 16 * 16 * 16 + 1, 31}, {}}); | ||||
#else | #else | ||||
checker.execs({{2, 16 * 16 * 16 * 16 * 16 + 1, 31}, {}}); | checker.execs({{2, 16 * 16 * 16 * 16 * 16 + 1, 31}, {}}); | ||||
@@ -18,6 +18,13 @@ bool check_compute_capability(int major, int minor) { | |||||
cuda_check(cudaGetDevice(&dev)); | cuda_check(cudaGetDevice(&dev)); | ||||
cudaDeviceProp prop; | cudaDeviceProp prop; | ||||
cuda_check(cudaGetDeviceProperties(&prop, dev)); | cuda_check(cudaGetDeviceProperties(&prop, dev)); | ||||
//! we just skip sm_62 here, which means jetson tx2 | |||||
//! unless require sm_62 explicitly | |||||
if (prop.major == 6 && prop.minor == 2) { | |||||
return prop.major == major && prop.minor == minor; | |||||
} | |||||
return prop.major > major || (prop.major == major && prop.minor >= minor); | return prop.major > major || (prop.major == major && prop.minor >= minor); | ||||
} | } | ||||