You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dilated_convolution.cpp 7.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. /**
  2. * \file dnn/test/cuda/dilated_convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cuda/fixture.h"
  12. #include "test/common/convolution.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/tensor.h"
  15. #include "src/cuda/cudnn_with_check.h"
  16. #include "test/cuda/utils.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. using namespace convolution;
  20. #define V1(x) #x
  21. #define V(x) V1(x)
  22. #define CUDNN_VERSION_STRING \
  23. "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
  24. TEST_F(CUDA, DILATED_CONVOLUTION_FORWARD)
  25. {
  26. auto args = get_dilated_args();
  27. Checker<ConvolutionForward> checker(handle_cuda());
  28. #if CUDNN_VERSION >= 7500
  29. checker.set_before_exec_callback(
  30. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  31. "DEFAULT",
  32. {{ConvBiasForward::algo_name<ConvBiasForward::DefaultParam>(
  33. "CUDNN:Convolution:CUDNN_CONVOLUTION_FWD_ALGO_"
  34. "IMPLICIT_"
  35. "PRECOMP_"
  36. "GEMM" CUDNN_VERSION_STRING,
  37. {})
  38. .c_str(),
  39. {}}}}));
  40. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  41. #else
  42. checker.set_before_exec_callback(
  43. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  44. "DEFAULT",
  45. {{ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>(
  46. "MATMUL", {})
  47. .c_str(),
  48. {{"CUBLAS", {}}}}}}));
  49. #endif
  50. NormalRNG default_rng;
  51. for (auto &&arg: args) {
  52. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  53. UniformFloatRNG rng(scale, 2 * scale);
  54. checker.
  55. set_dtype(0, dtype::Float32()).
  56. set_dtype(1, dtype::Float32()).
  57. set_rng(0, &default_rng).
  58. set_rng(1, &default_rng).
  59. set_epsilon(1e-3).
  60. set_param(arg.param).
  61. execs({arg.src, arg.filter, {}});
  62. }
  63. }
  64. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_DATA)
  65. {
  66. std::vector<TestArg> args = get_dilated_args();
  67. Checker<ConvolutionBackwardData> checker(handle_cuda());
  68. #if CUDNN_VERSION >= 7500
  69. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardData>(
  70. "CUDNN_CONVOLUTION_BWD_DATA_ALGO_1" CUDNN_VERSION_STRING));
  71. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  72. #else
  73. checker.set_before_exec_callback(
  74. AlgoChecker<ConvolutionBackwardData>("MATMUL"));
  75. #endif
  76. NormalRNG default_rng;
  77. for (auto &&arg: args) {
  78. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  79. UniformFloatRNG rng(scale, 2 * scale);
  80. auto src = TensorLayout(arg.src, dtype::Float32());
  81. auto filter = TensorLayout(arg.filter, dtype::Float32());
  82. TensorLayout dst;
  83. {
  84. auto opr = handle_cuda()->create_operator<Convolution>();
  85. opr->param() = arg.param;
  86. opr->deduce_layout(src, filter, dst);
  87. }
  88. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  89. checker.
  90. set_rng(0, &default_rng).
  91. set_rng(1, &default_rng).
  92. set_epsilon(1e-3).
  93. set_param(arg.param).
  94. exec(TensorLayoutArray{filter, dst, src});
  95. // cudnn7.5.0 or later, CUDNN_CONVOLUTION_BACKWARD_DATA_ALGO_1 produces
  96. // incorrect results on architecture 7.0 or later, so disable the
  97. // following test with float16. remove the if statement, when cudnn
  98. // fixed precision issue
  99. if (!check_compute_capability(7, 0)) {
  100. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  101. checker.set_rng(0, &rng)
  102. .set_rng(1, &rng)
  103. .set_epsilon(1e-1)
  104. .set_param(arg.param)
  105. .exec(TensorLayoutArray{filter, dst, src});
  106. }
  107. }
  108. {
  109. auto handle = handle_cuda();
  110. auto opr = handle->create_operator<ConvolutionBackwardData>();
  111. param::Convolution param;
  112. param.stride_h = param.stride_w = 1;
  113. param.pad_h = param.pad_w = 2;
  114. param.dilate_h = param.dilate_w = 2;
  115. opr->param() = param;
  116. TensorLayout srcl({600, 512, 7, 7}, dtype::Float32()),
  117. filterl({512, 512, 3, 3}, dtype::Float32()),
  118. dstl({600, 512, 7, 7}, dtype::Float32());
  119. auto wsize = opr->get_workspace_in_bytes(filterl, dstl, srcl);
  120. Tensor<> src(handle, srcl), filter(handle, filterl), dst(handle, dstl);
  121. WorkspaceWrapper w(handle, wsize);
  122. opr->exec(filter.tensornd(), dst.tensornd(), src.tensornd(),
  123. w.workspace());
  124. megcore_check(megcoreSynchronize(handle->megcore_computing_handle()));
  125. }
  126. }
  127. TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_FILTER)
  128. {
  129. std::vector<TestArg> args = get_dilated_args();
  130. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  131. #if CUDNN_VERSION >= 7500
  132. checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
  133. "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1" CUDNN_VERSION_STRING));
  134. printf("cudnn version >= 7.5, use cudnn impl for dilated convolution\n");
  135. #else
  136. checker.set_before_exec_callback(
  137. AlgoChecker<ConvolutionBackwardFilter>("MATMUL"));
  138. #endif
  139. NormalRNG default_rng;
  140. bool first_run = true;
  141. for (auto &&arg: args) {
  142. auto src = TensorLayout(arg.src, dtype::Float32());
  143. auto filter = TensorLayout(arg.filter, dtype::Float32());
  144. TensorLayout dst;
  145. {
  146. auto opr = handle_cuda()->create_operator<Convolution>();
  147. opr->param() = arg.param;
  148. opr->deduce_layout(src, filter, dst);
  149. }
  150. float scale = 1.0f / sqrt(dst[2] * dst[3]);
  151. UniformFloatRNG rng(scale, 2 * scale);
  152. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  153. checker.
  154. set_rng(0, &default_rng).
  155. set_rng(1, &default_rng).
  156. set_epsilon(1e-2).
  157. set_param(arg.param).
  158. exec(TensorLayoutArray{src, dst, filter});
  159. if (!first_run) {
  160. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  161. checker.
  162. set_rng(0, &rng).
  163. set_rng(1, &rng).
  164. set_epsilon(1e-1).
  165. set_param(arg.param).
  166. exec(TensorLayoutArray{src, dst, filter});
  167. } else {
  168. // first arg is big, and float16 suffers from precision problems
  169. first_run = false;
  170. }
  171. }
  172. }
  173. #undef CUDNN_VERSION_STRING
  174. #undef V
  175. #undef V1
  176. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台