You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 6.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /**
  2. * \file dnn/test/cuda/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/nn.h"
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/convolution.h"
  16. #include "src/cuda/utils.h"
  17. namespace megdnn {
  18. namespace test {
  19. TEST_F(CUDA, GROUP_CONV_FORWARD)
  20. {
  21. bool is_int_available = (cuda::current_device_prop().major >= 6);
  22. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  23. size_t FH, size_t FW,
  24. size_t OC, size_t /* OH */, size_t /* OW */,
  25. size_t PH, size_t PW,
  26. size_t SH, size_t SW,
  27. size_t DH, size_t DW,
  28. size_t group)
  29. {
  30. {
  31. // float case
  32. Checker<Convolution> checker(handle_cuda());
  33. Convolution::Param param;
  34. param.sparse = Convolution::Param::Sparse::GROUP;
  35. param.pad_h = PH;
  36. param.pad_w = PW;
  37. param.stride_h = SH;
  38. param.stride_w = SW;
  39. param.dilate_h = DH;
  40. param.dilate_w = DW;
  41. auto ICg = IC / group;
  42. auto OCg = OC / group;
  43. checker.set_param(param).exec({{N, IC, IH, IW},
  44. {group, OCg, ICg, FH, FW}, {}});
  45. }
  46. if (is_int_available) {
  47. // int 8x8x32 case
  48. Checker<Convolution> checker(handle_cuda());
  49. Convolution::Param param;
  50. param.sparse = Convolution::Param::Sparse::GROUP;
  51. param.format = Convolution::Param::Format::NHWC;
  52. param.pad_h = PH;
  53. param.pad_w = PW;
  54. param.stride_h = SH;
  55. param.stride_w = SW;
  56. param.dilate_h = DH;
  57. param.dilate_w = DW;
  58. auto ICg = IC / group;
  59. auto OCg = OC / group;
  60. UniformIntRNG rng(-4, 4);
  61. checker.set_param(param).
  62. set_dtype(0, dtype::Int8()).
  63. set_dtype(1, dtype::Int8()).
  64. set_dtype(2, dtype::Int32()).
  65. set_rng(0, &rng).
  66. set_rng(1, &rng).
  67. exec({{N, IH, IW, IC}, {group, OCg, FH, FW, ICg}, {}});
  68. }
  69. };
  70. // normal case
  71. run(2, 64, 7, 7,
  72. 3, 3,
  73. 32, 5, 5,
  74. 0, 0,
  75. 1, 1,
  76. 1, 1,
  77. 2);
  78. // padded case
  79. run(2, 32, 7, 7,
  80. 3, 3,
  81. 64, 7, 7,
  82. 1, 1,
  83. 1, 1,
  84. 1, 1,
  85. 4);
  86. // strided case
  87. run(2, 32, 7, 7,
  88. 3, 3,
  89. 64, 3, 3,
  90. 0, 0,
  91. 2, 2,
  92. 1, 1,
  93. 8);
  94. // dilated case
  95. run(2, 32, 7, 7,
  96. 3, 3,
  97. 64, 3, 3,
  98. 0, 0,
  99. 1, 1,
  100. 2, 2,
  101. 8);
  102. }
  103. TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) {
  104. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  105. size_t FH, size_t FW,
  106. size_t OC, size_t group) {
  107. Checker<Convolution> checker(handle_cuda());
  108. #if CUDNN_MAJOR <= 6
  109. std::string conv1x1_name =
  110. ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>(
  111. "MATMUL1X1", {});
  112. checker.set_before_exec_callback(AlgoChecker<Convolution>(
  113. ConvBiasForward::algo_name<ConvBiasForward::DirectParam>(
  114. ssprintf("%s:%s", "CUDA:GROUP_CONV",
  115. conv1x1_name.c_str()),
  116. {})
  117. .c_str()));
  118. #endif
  119. Convolution::Param param;
  120. param.sparse = Convolution::Param::Sparse::GROUP;
  121. auto ICg = IC / group;
  122. auto OCg = OC / group;
  123. checker.set_param(param).exec({{N, IC, IH, IW},
  124. {group, OCg, ICg, FH, FW}, {}});
  125. };
  126. size_t ic = 192;
  127. for (size_t g = 2; g <= 3; g += 1) {
  128. for (size_t ih = 8; ih <= 128; ih *= 4) {
  129. size_t iw = ih;
  130. run(2, ic, ih, iw, 1, 1, ic / g, g);
  131. run(2, ic, ih+1, iw+1, 1, 1, ic / g, g);
  132. }
  133. }
  134. }
  135. TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA)
  136. {
  137. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  138. size_t FH, size_t FW,
  139. size_t OC, size_t OH, size_t OW,
  140. size_t PH, size_t PW,
  141. size_t SH, size_t SW,
  142. size_t group)
  143. {
  144. Checker<ConvolutionBackwardData> checker(handle_cuda());
  145. ConvolutionBackwardData::Param param;
  146. param.sparse = Convolution::Param::Sparse::GROUP;
  147. param.pad_h = PH;
  148. param.pad_w = PW;
  149. param.stride_h = SH;
  150. param.stride_w = SW;
  151. auto ICg = IC / group;
  152. auto OCg = OC / group;
  153. checker.set_param(param).exec({{group, OCg, ICg, FH, FW},
  154. {N, OC, OH, OW}, {N, IC, IH, IW}});
  155. };
  156. // normal case
  157. run(2, 64, 7, 7,
  158. 3, 3,
  159. 32, 5, 5,
  160. 0, 0,
  161. 1, 1,
  162. 2);
  163. // padded case
  164. run(2, 32, 7, 7,
  165. 3, 3,
  166. 64, 7, 7,
  167. 1, 1,
  168. 1, 1,
  169. 4);
  170. // strided case
  171. run(2, 32, 7, 7,
  172. 3, 3,
  173. 64, 3, 3,
  174. 0, 0,
  175. 2, 2,
  176. 8);
  177. }
  178. TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER)
  179. {
  180. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  181. size_t FH, size_t FW,
  182. size_t OC, size_t OH, size_t OW,
  183. size_t PH, size_t PW,
  184. size_t SH, size_t SW,
  185. size_t group)
  186. {
  187. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  188. ConvolutionBackwardFilter::Param param;
  189. param.sparse = Convolution::Param::Sparse::GROUP;
  190. param.pad_h = PH;
  191. param.pad_w = PW;
  192. param.stride_h = SH;
  193. param.stride_w = SW;
  194. auto ICg = IC / group;
  195. auto OCg = OC / group;
  196. checker.set_param(param).exec({{N, IC, IH, IW},
  197. {N, OC, OH, OW}, {group, OCg, ICg, FH, FW}});
  198. };
  199. // normal case
  200. run(2, 64, 7, 7,
  201. 3, 3,
  202. 32, 5, 5,
  203. 0, 0,
  204. 1, 1,
  205. 2);
  206. // padded case
  207. run(2, 32, 7, 7,
  208. 3, 3,
  209. 64, 7, 7,
  210. 1, 1,
  211. 1, 1,
  212. 4);
  213. // strided case
  214. run(2, 32, 7, 7,
  215. 3, 3,
  216. 64, 3, 3,
  217. 0, 0,
  218. 2, 2,
  219. 8);
  220. }
  221. } // namespace test
  222. } // namespace megdnn
  223. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台