You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dilated_convolution.cpp 7.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. /**
  2. * \file dnn/test/cuda/dilated_convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cuda/fixture.h"
  12. #include "src/cuda/cudnn_with_check.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/convolution.h"
  15. #include "test/common/tensor.h"
  16. #include "test/cuda/utils.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. using namespace convolution;
  20. #define V1(x) #x
  21. #define V(x) V1(x)
  22. #define CUDNN_VERSION_STRING \
  23. "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
  24. TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD) {
  25. auto args = get_dilated_args();
  26. Checker<ConvolutionForward> checker(handle_cuda());
  27. #if CUDNN_VERSION >= 7500
  28. checker.set_before_exec_callback(
  29. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  30. "DEFAULT",
  31. {{ConvBiasForward::algo_name<ConvBiasForward::DefaultParam>(
  32. "CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_"
  33. "IMPLICIT_"
  34. "PRECOMP_"
  35. "GEMM" CUDNN_VERSION_STRING,
  36. {})
  37. .c_str(),
  38. {}}}}));
  39. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  40. #else
  41. checker.set_before_exec_callback(
  42. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  43. "DEFAULT",
  44. {{ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>(
  45. "MATMUL", {})
  46. .c_str(),
  47. {{"CUBLAS", {}}}}}}));
  48. #endif
  49. NormalRNG default_rng;
  50. for (auto&& arg : args) {
  51. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  52. UniformFloatRNG rng(scale, 2 * scale);
  53. checker.set_dtype(0, dtype::Float32())
  54. .set_dtype(1, dtype::Float32())
  55. .set_rng(0, &default_rng)
  56. .set_rng(1, &default_rng)
  57. .set_epsilon(1e-3)
  58. .set_param(arg.param)
  59. .execs({arg.src, arg.filter, {}});
  60. }
  61. }
  62. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_DATA) {
  63. std::vector<TestArg> args = get_dilated_args();
  64. Checker<ConvolutionBackwardData> checker(handle_cuda());
  65. #if CUDNN_VERSION >= 7500
  66. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>(
  67. "CUDNN_CONVOLUTION_BWD_DATA_ALGO_1" CUDNN_VERSION_STRING));
  68. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  69. #else
  70. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>(
  71. ExecutionPolicyAlgoName{"MATMUL", {{"CUBLAS", {}}}}));
  72. #endif
  73. NormalRNG default_rng;
  74. for (auto&& arg : args) {
  75. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  76. UniformFloatRNG rng(scale, 2 * scale);
  77. auto src = TensorLayout(arg.src, dtype::Float32());
  78. auto filter = TensorLayout(arg.filter, dtype::Float32());
  79. TensorLayout dst;
  80. {
  81. auto opr = handle_cuda()->create_operator<Convolution>();
  82. opr->param() = arg.param;
  83. opr->deduce_layout(src, filter, dst);
  84. }
  85. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  86. checker.set_rng(0, &default_rng)
  87. .set_rng(1, &default_rng)
  88. .set_epsilon(1e-3)
  89. .set_param(arg.param)
  90. .exec(TensorLayoutArray{filter, dst, src});
  91. // cudnn7.5.0 or later, CUDNN_CONVOLUTION_BACKWARD_DATA_ALGO_1 produces
  92. // incorrect results on architecture 7.0 or later, so disable the
  93. // following test with float16. remove the if statement, when cudnn
  94. // fixed precision issue
  95. if (!check_compute_capability(7, 0)) {
  96. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  97. checker.set_rng(0, &rng)
  98. .set_rng(1, &rng)
  99. .set_epsilon(1e-1)
  100. .set_param(arg.param)
  101. .exec(TensorLayoutArray{filter, dst, src});
  102. }
  103. }
  104. {
  105. auto handle = handle_cuda();
  106. auto opr = handle->create_operator<ConvolutionBackwardData>();
  107. param::Convolution param;
  108. param.stride_h = param.stride_w = 1;
  109. param.pad_h = param.pad_w = 2;
  110. param.dilate_h = param.dilate_w = 2;
  111. opr->param() = param;
  112. TensorLayout srcl({600, 512, 7, 7}, dtype::Float32()),
  113. filterl({512, 512, 3, 3}, dtype::Float32()),
  114. dstl({600, 512, 7, 7}, dtype::Float32());
  115. auto wsize = opr->get_workspace_in_bytes(filterl, dstl, srcl);
  116. Tensor<> src(handle, srcl), filter(handle, filterl), dst(handle, dstl);
  117. WorkspaceWrapper w(handle, wsize);
  118. opr->exec(filter.tensornd(), dst.tensornd(), src.tensornd(), w.workspace());
  119. megcore_check(megcoreSynchronize(handle->megcore_computing_handle()));
  120. }
  121. }
  122. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_FILTER) {
  123. std::vector<TestArg> args = get_dilated_args();
  124. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  125. #if CUDNN_VERSION >= 7500
  126. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
  127. "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1" CUDNN_VERSION_STRING));
  128. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  129. #else
  130. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
  131. ExecutionPolicyAlgoName{"MATMUL", {{"CUBLAS", {}}}}));
  132. #endif
  133. NormalRNG default_rng;
  134. bool first_run = true;
  135. for (auto&& arg : args) {
  136. auto src = TensorLayout(arg.src, dtype::Float32());
  137. auto filter = TensorLayout(arg.filter, dtype::Float32());
  138. TensorLayout dst;
  139. {
  140. auto opr = handle_cuda()->create_operator<Convolution>();
  141. opr->param() = arg.param;
  142. opr->deduce_layout(src, filter, dst);
  143. }
  144. float scale = 1.0f / sqrt(dst[2] * dst[3]);
  145. UniformFloatRNG rng(scale, 2 * scale);
  146. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  147. checker.set_rng(0, &default_rng)
  148. .set_rng(1, &default_rng)
  149. .set_epsilon(1e-2)
  150. .set_param(arg.param)
  151. .exec(TensorLayoutArray{src, dst, filter});
  152. if (!first_run) {
  153. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  154. checker.set_rng(0, &rng)
  155. .set_rng(1, &rng)
  156. .set_epsilon(1e-1)
  157. .set_param(arg.param)
  158. .exec(TensorLayoutArray{src, dst, filter});
  159. } else {
  160. // first arg is big, and float16 suffers from precision problems
  161. first_run = false;
  162. }
  163. }
  164. }
  165. #undef CUDNN_VERSION_STRING
  166. #undef V
  167. #undef V1
  168. // vim: syntax=cpp.doxygen