Browse Source

fix(dnn/cuda): fix cublas matmul on sm60

GitOrigin-RevId: 3fc0c30a23
tags/v0.4.0
Megvii Engine Team Xinran Xu 5 years ago
parent
commit
f5833a5294
9 changed files with 20 additions and 13 deletions
  1. +1
    -1
      dnn/src/cuda/conv_bias/matmul_8x8x32.cpp
  2. +1
    -1
      dnn/src/cuda/matrix_mul/cublas.cpp
  3. +1
    -1
      dnn/test/cuda/benchmark.cpp
  4. +3
    -3
      dnn/test/cuda/conv_bias.cpp
  5. +2
    -2
      dnn/test/cuda/convolution.cpp
  6. +1
    -1
      dnn/test/cuda/convolution3d.cpp
  7. +1
    -1
      dnn/test/cuda/group_conv.cpp
  8. +1
    -1
      dnn/test/cuda/group_conv3d.cpp
  9. +9
    -2
      dnn/test/cuda/matrix_mul.cpp

+ 1
- 1
dnn/src/cuda/conv_bias/matmul_8x8x32.cpp View File

@@ -21,7 +21,7 @@ bool ConvBiasForwardImpl::AlgoMatmul8x8x32::is_available(
const SizeArgs& args) const {
if (args.z_layout->ndim > 0)
return false;
if (cuda::current_device_prop().major < 6)
if (!is_compute_capability_required(6, 1))
return false;

auto dst_layout = *args.dst_layout;


+ 1
- 1
dnn/src/cuda/matrix_mul/cublas.cpp View File

@@ -42,7 +42,7 @@ bool MatrixMulForwardImpl::AlgoCuBlas::is_available(
*/
return args.layout_a.stride[0] % 4 == 0 &&
args.layout_b.stride[0] % 4 == 0 &&
current_device_prop().major > 5;
is_compute_capability_required(6, 1);
}
return false;
}


+ 1
- 1
dnn/test/cuda/benchmark.cpp View File

@@ -24,7 +24,7 @@ namespace test {

TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32)
{
if (cuda::current_device_prop().major < 6) {
if (!cuda::is_compute_capability_required(6, 1)) {
printf("Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device"
"doesn't support\n");
return;


+ 3
- 3
dnn/test/cuda/conv_bias.cpp View File

@@ -325,7 +325,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_SMALL) {
}

TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_8x8x32) {
require_compute_capability(6, 0);
require_compute_capability(6, 1);
Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ConvBiasForward::algo_name<ConvBias::DirectParam>(
@@ -472,7 +472,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL) {
}

TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) {
require_compute_capability(6, 0);
require_compute_capability(6, 1);
Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>(
@@ -517,7 +517,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) {
}

TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_NCHW4) {
require_compute_capability(6, 0);
require_compute_capability(6, 1);
Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>(


+ 2
- 2
dnn/test/cuda/convolution.cpp View File

@@ -30,7 +30,7 @@ namespace test {

TEST_F(CUDA, CONVOLUTION_8X8X32)
{
if (cuda::current_device_prop().major < 6) {
if (!cuda::is_compute_capability_required(6, 1)) {
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
"doesn't support\n");
return;
@@ -112,7 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
}

TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) {
if (cuda::current_device_prop().major < 6)
if (!cuda::is_compute_capability_required(6, 1))
return;
using namespace convolution;
Checker<Convolution> checker(handle_cuda());


+ 1
- 1
dnn/test/cuda/convolution3d.cpp View File

@@ -24,7 +24,7 @@ namespace test {

#if 0
TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
if (cuda::current_device_prop().major < 6) {
if (!cuda::is_compute_capability_required(6, 1)) {
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
"doesn't support\n");
return;


+ 1
- 1
dnn/test/cuda/group_conv.cpp View File

@@ -23,7 +23,7 @@ namespace test {

TEST_F(CUDA, GROUP_CONV_FORWARD)
{
bool is_int_available = (cuda::current_device_prop().major >= 6);
bool is_int_available = cuda::is_compute_capability_required(6, 1);
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
size_t FH, size_t FW,
size_t OC, size_t /* OH */, size_t /* OW */,


+ 1
- 1
dnn/test/cuda/group_conv3d.cpp View File

@@ -21,7 +21,7 @@ namespace megdnn {
namespace test {

TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD) {
bool is_int_available = (cuda::current_device_prop().major >= 6);
bool is_int_available = cuda::is_compute_capability_required(6, 1);
static_cast<void>(is_int_available);
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW,
size_t FD, size_t FH, size_t FW, size_t OC, size_t PD,


+ 9
- 2
dnn/test/cuda/matrix_mul.cpp View File

@@ -193,8 +193,15 @@ TEST_F(CUDA, MATRIX_MUL)
Checker<MatrixMul> checker(handle_cuda());
using Param = MatrixMul::Param;
size_t m = 12, n = 16, k = 20;
for (DType dtype: std::array<DType, 3>{
{dtype::Float32(), dtype::Float16(), dtype::Int32()}}) {

bool is_int_available = cuda::is_compute_capability_required(6, 1);
std::vector<DType> dtype_array;
dtype_array.push_back(dtype::Float32());
dtype_array.push_back(dtype::Float16());
if (is_int_available)
dtype_array.push_back(dtype::Int32());

for (DType dtype : dtype_array) {
for (unsigned mask = 0; mask < 4; ++mask) {
Param param;
param.transposeA = mask & 1;


Loading…
Cancel
Save