You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dilated_convolution.cpp 6.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. #include "test/cuda/fixture.h"
  2. #include "src/cuda/cudnn_with_check.h"
  3. #include "test/common/checker.h"
  4. #include "test/common/convolution.h"
  5. #include "test/common/tensor.h"
  6. #include "test/cuda/utils.h"
  7. using namespace megdnn;
  8. using namespace test;
  9. using namespace convolution;
  10. #define V1(x) #x
  11. #define V(x) V1(x)
  12. #define CUDNN_VERSION_STRING \
  13. "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
  14. TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD) {
  15. auto args = get_dilated_args();
  16. Checker<ConvolutionForward> checker(handle_cuda());
  17. #if CUDNN_VERSION >= 7500
  18. checker.set_before_exec_callback(
  19. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  20. "DEFAULT",
  21. {{ConvBiasForward::algo_name<ConvBiasForward::DefaultParam>(
  22. "CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_"
  23. "IMPLICIT_"
  24. "PRECOMP_"
  25. "GEMM" CUDNN_VERSION_STRING,
  26. {})
  27. .c_str(),
  28. {}}}}));
  29. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  30. #else
  31. checker.set_before_exec_callback(
  32. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  33. "DEFAULT",
  34. {{ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>(
  35. "MATMUL", {})
  36. .c_str(),
  37. {{"CUBLAS", {}}}}}}));
  38. #endif
  39. NormalRNG default_rng;
  40. for (auto&& arg : args) {
  41. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  42. UniformFloatRNG rng(scale, 2 * scale);
  43. checker.set_dtype(0, dtype::Float32())
  44. .set_dtype(1, dtype::Float32())
  45. .set_rng(0, &default_rng)
  46. .set_rng(1, &default_rng)
  47. .set_epsilon(1e-3)
  48. .set_param(arg.param)
  49. .execs({arg.src, arg.filter, {}});
  50. }
  51. }
  52. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_DATA) {
  53. std::vector<TestArg> args = get_dilated_args();
  54. Checker<ConvolutionBackwardData> checker(handle_cuda());
  55. #if CUDNN_VERSION >= 7500
  56. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>(
  57. "CUDNN_CONVOLUTION_BWD_DATA_ALGO_1" CUDNN_VERSION_STRING));
  58. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  59. #else
  60. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>(
  61. ExecutionPolicyAlgoName{"MATMUL", {{"CUBLAS", {}}}}));
  62. #endif
  63. NormalRNG default_rng;
  64. for (auto&& arg : args) {
  65. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  66. UniformFloatRNG rng(scale, 2 * scale);
  67. auto src = TensorLayout(arg.src, dtype::Float32());
  68. auto filter = TensorLayout(arg.filter, dtype::Float32());
  69. TensorLayout dst;
  70. {
  71. auto opr = handle_cuda()->create_operator<Convolution>();
  72. opr->param() = arg.param;
  73. opr->deduce_layout(src, filter, dst);
  74. }
  75. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  76. checker.set_rng(0, &default_rng)
  77. .set_rng(1, &default_rng)
  78. .set_epsilon(1e-3)
  79. .set_param(arg.param)
  80. .exec(TensorLayoutArray{filter, dst, src});
  81. // cudnn7.5.0 or later, CUDNN_CONVOLUTION_BACKWARD_DATA_ALGO_1 produces
  82. // incorrect results on architecture 7.0 or later, so disable the
  83. // following test with float16. remove the if statement, when cudnn
  84. // fixed precision issue
  85. if (!check_compute_capability(7, 0)) {
  86. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  87. checker.set_rng(0, &rng)
  88. .set_rng(1, &rng)
  89. .set_epsilon(1e-1)
  90. .set_param(arg.param)
  91. .exec(TensorLayoutArray{filter, dst, src});
  92. }
  93. }
  94. {
  95. auto handle = handle_cuda();
  96. auto opr = handle->create_operator<ConvolutionBackwardData>();
  97. param::Convolution param;
  98. param.stride_h = param.stride_w = 1;
  99. param.pad_h = param.pad_w = 2;
  100. param.dilate_h = param.dilate_w = 2;
  101. opr->param() = param;
  102. TensorLayout srcl({600, 512, 7, 7}, dtype::Float32()),
  103. filterl({512, 512, 3, 3}, dtype::Float32()),
  104. dstl({600, 512, 7, 7}, dtype::Float32());
  105. auto wsize = opr->get_workspace_in_bytes(filterl, dstl, srcl);
  106. Tensor<> src(handle, srcl), filter(handle, filterl), dst(handle, dstl);
  107. WorkspaceWrapper w(handle, wsize);
  108. opr->exec(filter.tensornd(), dst.tensornd(), src.tensornd(), w.workspace());
  109. megcore_check(megcoreSynchronize(handle->megcore_computing_handle()));
  110. }
  111. }
  112. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_FILTER) {
  113. std::vector<TestArg> args = get_dilated_args();
  114. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  115. #if CUDNN_VERSION >= 7500
  116. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
  117. "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1" CUDNN_VERSION_STRING));
  118. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  119. #else
  120. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
  121. ExecutionPolicyAlgoName{"MATMUL", {{"CUBLAS", {}}}}));
  122. #endif
  123. NormalRNG default_rng;
  124. bool first_run = true;
  125. for (auto&& arg : args) {
  126. auto src = TensorLayout(arg.src, dtype::Float32());
  127. auto filter = TensorLayout(arg.filter, dtype::Float32());
  128. TensorLayout dst;
  129. {
  130. auto opr = handle_cuda()->create_operator<Convolution>();
  131. opr->param() = arg.param;
  132. opr->deduce_layout(src, filter, dst);
  133. }
  134. float scale = 1.0f / sqrt(dst[2] * dst[3]);
  135. UniformFloatRNG rng(scale, 2 * scale);
  136. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  137. checker.set_rng(0, &default_rng)
  138. .set_rng(1, &default_rng)
  139. .set_epsilon(1e-2)
  140. .set_param(arg.param)
  141. .exec(TensorLayoutArray{src, dst, filter});
  142. if (!first_run) {
  143. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  144. checker.set_rng(0, &rng)
  145. .set_rng(1, &rng)
  146. .set_epsilon(1e-1)
  147. .set_param(arg.param)
  148. .exec(TensorLayoutArray{src, dst, filter});
  149. } else {
  150. // first arg is big, and float16 suffers from precision problems
  151. first_run = false;
  152. }
  153. }
  154. }
  155. #undef CUDNN_VERSION_STRING
  156. #undef V
  157. #undef V1
  158. // vim: syntax=cpp.doxygen