You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dilated_convolution.cpp 6.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. /**
  2. * \file dnn/test/cuda/dilated_convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cuda/fixture.h"
  12. #include "test/common/convolution.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/tensor.h"
  15. #include "src/cuda/cudnn_with_check.h"
  16. #include "test/cuda/utils.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. using namespace convolution;
  20. #define V1(x) #x
  21. #define V(x) V1(x)
  22. #define CUDNN_VERSION_STRING \
  23. "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
  24. TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD)
  25. {
  26. auto args = get_dilated_args();
  27. Checker<ConvolutionForward> checker(handle_cuda());
  28. #if CUDNN_VERSION >= 7500
  29. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>(
  30. ConvBiasForward::algo_name<ConvBiasForward::DefaultParam>(
  31. "CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_"
  32. "PRECOMP_"
  33. "GEMM" CUDNN_VERSION_STRING,
  34. {})
  35. .c_str()));
  36. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  37. #else
  38. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>(
  39. ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>("MATMUL",
  40. {})
  41. .c_str()));
  42. #endif
  43. NormalRNG default_rng;
  44. for (auto &&arg: args) {
  45. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  46. UniformFloatRNG rng(scale, 2 * scale);
  47. checker.
  48. set_dtype(0, dtype::Float32()).
  49. set_dtype(1, dtype::Float32()).
  50. set_rng(0, &default_rng).
  51. set_rng(1, &default_rng).
  52. set_epsilon(1e-3).
  53. set_param(arg.param).
  54. execs({arg.src, arg.filter, {}});
  55. }
  56. }
  57. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_DATA)
  58. {
  59. std::vector<TestArg> args = get_dilated_args();
  60. Checker<ConvolutionBackwardData> checker(handle_cuda());
  61. #if CUDNN_VERSION >= 7500
  62. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>(
  63. "CUDNN_CONVOLUTION_BWD_DATA_ALGO_1" CUDNN_VERSION_STRING));
  64. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  65. #else
  66. checker.set_before_exec_callback(
  67. AlgoChecker<ConvolutionBackwardData>("MATMUL"));
  68. #endif
  69. NormalRNG default_rng;
  70. for (auto &&arg: args) {
  71. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  72. UniformFloatRNG rng(scale, 2 * scale);
  73. auto src = TensorLayout(arg.src, dtype::Float32());
  74. auto filter = TensorLayout(arg.filter, dtype::Float32());
  75. TensorLayout dst;
  76. {
  77. auto opr = handle_cuda()->create_operator<Convolution>();
  78. opr->param() = arg.param;
  79. opr->deduce_layout(src, filter, dst);
  80. }
  81. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  82. checker.
  83. set_rng(0, &default_rng).
  84. set_rng(1, &default_rng).
  85. set_epsilon(1e-3).
  86. set_param(arg.param).
  87. exec(TensorLayoutArray{filter, dst, src});
  88. // cudnn7.5.0 or later, CUDNN_CONVOLUTION_BACKWARD_DATA_ALGO_1 produces
  89. // incorrect results on architecture 7.0 or later, so disable the
  90. // following test with float16. remove the if statement, when cudnn
  91. // fixed precision issue
  92. if (!check_compute_capability(7, 0)) {
  93. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  94. checker.set_rng(0, &rng)
  95. .set_rng(1, &rng)
  96. .set_epsilon(1e-1)
  97. .set_param(arg.param)
  98. .exec(TensorLayoutArray{filter, dst, src});
  99. }
  100. }
  101. {
  102. auto handle = handle_cuda();
  103. auto opr = handle->create_operator<ConvolutionBackwardData>();
  104. param::Convolution param;
  105. param.stride_h = param.stride_w = 1;
  106. param.pad_h = param.pad_w = 2;
  107. param.dilate_h = param.dilate_w = 2;
  108. opr->param() = param;
  109. TensorLayout srcl({600, 512, 7, 7}, dtype::Float32()),
  110. filterl({512, 512, 3, 3}, dtype::Float32()),
  111. dstl({600, 512, 7, 7}, dtype::Float32());
  112. auto wsize = opr->get_workspace_in_bytes(filterl, dstl, srcl);
  113. Tensor<> src(handle, srcl), filter(handle, filterl), dst(handle, dstl);
  114. WorkspaceWrapper w(handle, wsize);
  115. opr->exec(filter.tensornd(), dst.tensornd(), src.tensornd(),
  116. w.workspace());
  117. megcore_check(megcoreSynchronize(handle->megcore_computing_handle()));
  118. }
  119. }
  120. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_FILTER)
  121. {
  122. std::vector<TestArg> args = get_dilated_args();
  123. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  124. #if CUDNN_VERSION >= 7500
  125. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
  126. "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1" CUDNN_VERSION_STRING));
  127. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  128. #else
  129. checker.set_before_exec_callback(
  130. AlgoChecker<ConvolutionBackwardFilter>("MATMUL"));
  131. #endif
  132. NormalRNG default_rng;
  133. bool first_run = true;
  134. for (auto &&arg: args) {
  135. auto src = TensorLayout(arg.src, dtype::Float32());
  136. auto filter = TensorLayout(arg.filter, dtype::Float32());
  137. TensorLayout dst;
  138. {
  139. auto opr = handle_cuda()->create_operator<Convolution>();
  140. opr->param() = arg.param;
  141. opr->deduce_layout(src, filter, dst);
  142. }
  143. float scale = 1.0f / sqrt(dst[2] * dst[3]);
  144. UniformFloatRNG rng(scale, 2 * scale);
  145. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  146. checker.
  147. set_rng(0, &default_rng).
  148. set_rng(1, &default_rng).
  149. set_epsilon(1e-2).
  150. set_param(arg.param).
  151. exec(TensorLayoutArray{src, dst, filter});
  152. if (!first_run) {
  153. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  154. checker.
  155. set_rng(0, &rng).
  156. set_rng(1, &rng).
  157. set_epsilon(1e-1).
  158. set_param(arg.param).
  159. exec(TensorLayoutArray{src, dst, filter});
  160. } else {
  161. // first arg is big, and float16 suffers from precision problems
  162. first_run = false;
  163. }
  164. }
  165. }
  166. #undef CUDNN_VERSION_STRING
  167. #undef V
  168. #undef V1
  169. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台