You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 9.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. /**
  2. * \file dnn/test/cuda/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/nn.h"
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/convolution.h"
  16. #include "src/cuda/utils.h"
  17. namespace megdnn {
  18. namespace test {
  19. TEST_F(CUDA, GROUP_CONV_FORWARD)
  20. {
  21. bool is_int_available = cuda::is_compute_capability_required(6, 1);
  22. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  23. size_t FH, size_t FW,
  24. size_t OC, size_t /* OH */, size_t /* OW */,
  25. size_t PH, size_t PW,
  26. size_t SH, size_t SW,
  27. size_t DH, size_t DW,
  28. size_t group)
  29. {
  30. {
  31. // float case
  32. Checker<Convolution> checker(handle_cuda());
  33. Convolution::Param param;
  34. param.sparse = Convolution::Param::Sparse::GROUP;
  35. param.pad_h = PH;
  36. param.pad_w = PW;
  37. param.stride_h = SH;
  38. param.stride_w = SW;
  39. param.dilate_h = DH;
  40. param.dilate_w = DW;
  41. auto ICg = IC / group;
  42. auto OCg = OC / group;
  43. checker.set_param(param).exec({{N, IC, IH, IW},
  44. {group, OCg, ICg, FH, FW}, {}});
  45. }
  46. if (is_int_available) {
  47. // int 8x8x32 case
  48. Checker<Convolution> checker(handle_cuda());
  49. Convolution::Param param;
  50. param.sparse = Convolution::Param::Sparse::GROUP;
  51. param.format = Convolution::Param::Format::NHWC;
  52. param.pad_h = PH;
  53. param.pad_w = PW;
  54. param.stride_h = SH;
  55. param.stride_w = SW;
  56. param.dilate_h = DH;
  57. param.dilate_w = DW;
  58. auto ICg = IC / group;
  59. auto OCg = OC / group;
  60. UniformIntRNG rng(-4, 4);
  61. checker.set_param(param).
  62. set_dtype(0, dtype::Int8()).
  63. set_dtype(1, dtype::Int8()).
  64. set_dtype(2, dtype::Int32()).
  65. set_rng(0, &rng).
  66. set_rng(1, &rng).
  67. exec({{N, IH, IW, IC}, {group, OCg, FH, FW, ICg}, {}});
  68. }
  69. };
  70. // normal case
  71. run(2, 64, 7, 7,
  72. 3, 3,
  73. 32, 5, 5,
  74. 0, 0,
  75. 1, 1,
  76. 1, 1,
  77. 2);
  78. // padded case
  79. run(2, 32, 7, 7,
  80. 3, 3,
  81. 64, 7, 7,
  82. 1, 1,
  83. 1, 1,
  84. 1, 1,
  85. 4);
  86. // strided case
  87. run(2, 32, 7, 7,
  88. 3, 3,
  89. 64, 3, 3,
  90. 0, 0,
  91. 2, 2,
  92. 1, 1,
  93. 8);
  94. // dilated case
  95. run(2, 32, 7, 7,
  96. 3, 3,
  97. 64, 3, 3,
  98. 0, 0,
  99. 1, 1,
  100. 2, 2,
  101. 8);
  102. }
  103. TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) {
  104. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH,
  105. size_t FW, size_t OC, size_t group) {
  106. Checker<Convolution> checker(handle_cuda());
  107. std::string conv1x1_name =
  108. ConvBiasForward::algo_name<ConvBias::MatmulParam>(
  109. "INPLACE_MATMUL", {});
  110. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>(
  111. ExecutionPolicyAlgoName{"DEFAULT",
  112. {{ConvBiasForward::algo_name<
  113. ConvBiasForward::DirectParam>(
  114. "CUDA:GROUP_CONV", {})
  115. .c_str(),
  116. {{conv1x1_name.c_str(), {}}}}}}));
  117. Convolution::Param param;
  118. param.sparse = Convolution::Param::Sparse::GROUP;
  119. auto ICg = IC / group;
  120. auto OCg = OC / group;
  121. checker.set_param(param).exec(
  122. {{N, IC, IH, IW}, {group, OCg, ICg, FH, FW}, {}});
  123. };
  124. size_t ic = 192;
  125. for (size_t g = 2; g <= 3; g += 1) {
  126. for (size_t ih = 8; ih <= 128; ih *= 4) {
  127. size_t iw = ih;
  128. run(2, ic, ih, iw, 1, 1, ic / g, g);
  129. run(2, ic, ih + 1, iw + 1, 1, 1, ic / g, g);
  130. }
  131. }
  132. }
  133. TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA)
  134. {
  135. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  136. size_t FH, size_t FW,
  137. size_t OC, size_t OH, size_t OW,
  138. size_t PH, size_t PW,
  139. size_t SH, size_t SW,
  140. size_t group)
  141. {
  142. Checker<ConvolutionBackwardData> checker(handle_cuda());
  143. ConvolutionBackwardData::Param param;
  144. param.sparse = Convolution::Param::Sparse::GROUP;
  145. param.pad_h = PH;
  146. param.pad_w = PW;
  147. param.stride_h = SH;
  148. param.stride_w = SW;
  149. auto ICg = IC / group;
  150. auto OCg = OC / group;
  151. checker.set_param(param).exec({{group, OCg, ICg, FH, FW},
  152. {N, OC, OH, OW}, {N, IC, IH, IW}});
  153. };
  154. // normal case
  155. run(2, 64, 7, 7,
  156. 3, 3,
  157. 32, 5, 5,
  158. 0, 0,
  159. 1, 1,
  160. 2);
  161. // padded case
  162. run(2, 32, 7, 7,
  163. 3, 3,
  164. 64, 7, 7,
  165. 1, 1,
  166. 1, 1,
  167. 4);
  168. // strided case
  169. run(2, 32, 7, 7,
  170. 3, 3,
  171. 64, 3, 3,
  172. 0, 0,
  173. 2, 2,
  174. 8);
  175. }
  176. TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA_CUDNN)
  177. {
  178. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  179. size_t FH, size_t FW,
  180. size_t OC, size_t OH, size_t OW,
  181. size_t PH, size_t PW,
  182. size_t SH, size_t SW,
  183. size_t group)
  184. {
  185. Checker<ConvolutionBackwardData> checker(handle_cuda());
  186. checker.set_before_exec_callback(
  187. AlgoChecker<ConvolutionBackwardData>(ExecutionPolicyAlgoName{
  188. "CUDA:GROUP_CONV_BACKWARD_DATA", {{"CUDNN", {}}}}));
  189. ConvolutionBackwardData::Param param;
  190. param.sparse = Convolution::Param::Sparse::GROUP;
  191. param.pad_h = PH;
  192. param.pad_w = PW;
  193. param.stride_h = SH;
  194. param.stride_w = SW;
  195. auto ICg = IC / group;
  196. auto OCg = OC / group;
  197. checker.set_param(param).exec({{group, OCg, ICg, FH, FW},
  198. {N, OC, OH, OW}, {N, IC, IH, IW}});
  199. };
  200. // normal case
  201. run(2, 64, 7, 7,
  202. 3, 3,
  203. 32, 5, 5,
  204. 0, 0,
  205. 1, 1,
  206. 2);
  207. // padded case
  208. run(2, 32, 7, 7,
  209. 3, 3,
  210. 64, 7, 7,
  211. 1, 1,
  212. 1, 1,
  213. 4);
  214. // strided case
  215. run(2, 32, 7, 7,
  216. 3, 3,
  217. 64, 3, 3,
  218. 0, 0,
  219. 2, 2,
  220. 8);
  221. }
  222. TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER)
  223. {
  224. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  225. size_t FH, size_t FW,
  226. size_t OC, size_t OH, size_t OW,
  227. size_t PH, size_t PW,
  228. size_t SH, size_t SW,
  229. size_t group)
  230. {
  231. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  232. ConvolutionBackwardFilter::Param param;
  233. param.sparse = Convolution::Param::Sparse::GROUP;
  234. param.pad_h = PH;
  235. param.pad_w = PW;
  236. param.stride_h = SH;
  237. param.stride_w = SW;
  238. auto ICg = IC / group;
  239. auto OCg = OC / group;
  240. checker.set_param(param).exec({{N, IC, IH, IW},
  241. {N, OC, OH, OW}, {group, OCg, ICg, FH, FW}});
  242. };
  243. // normal case
  244. run(2, 64, 7, 7,
  245. 3, 3,
  246. 32, 5, 5,
  247. 0, 0,
  248. 1, 1,
  249. 2);
  250. // padded case
  251. run(2, 32, 7, 7,
  252. 3, 3,
  253. 64, 7, 7,
  254. 1, 1,
  255. 1, 1,
  256. 4);
  257. // strided case
  258. run(2, 32, 7, 7,
  259. 3, 3,
  260. 64, 3, 3,
  261. 0, 0,
  262. 2, 2,
  263. 8);
  264. }
  265. TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER_CUDNN)
  266. {
  267. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
  268. size_t FH, size_t FW,
  269. size_t OC, size_t OH, size_t OW,
  270. size_t PH, size_t PW,
  271. size_t SH, size_t SW,
  272. size_t group)
  273. {
  274. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  275. checker.set_before_exec_callback(
  276. AlgoChecker<ConvolutionBackwardFilter>(ExecutionPolicyAlgoName{
  277. "CUDA:GROUP_CONV_BACKWARD_FILTER", {{"CUDNN", {}}}}));
  278. ConvolutionBackwardFilter::Param param;
  279. param.sparse = Convolution::Param::Sparse::GROUP;
  280. param.pad_h = PH;
  281. param.pad_w = PW;
  282. param.stride_h = SH;
  283. param.stride_w = SW;
  284. auto ICg = IC / group;
  285. auto OCg = OC / group;
  286. checker.set_param(param).exec({{N, IC, IH, IW},
  287. {N, OC, OH, OW}, {group, OCg, ICg, FH, FW}});
  288. };
  289. // normal case
  290. run(2, 64, 7, 7,
  291. 3, 3,
  292. 32, 5, 5,
  293. 0, 0,
  294. 1, 1,
  295. 2);
  296. // padded case
  297. run(2, 32, 7, 7,
  298. 3, 3,
  299. 64, 7, 7,
  300. 1, 1,
  301. 1, 1,
  302. 4);
  303. // strided case
  304. run(2, 32, 7, 7,
  305. 3, 3,
  306. 64, 3, 3,
  307. 0, 0,
  308. 2, 2,
  309. 8);
  310. }
  311. } // namespace test
  312. } // namespace megdnn
  313. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台