You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deformable_conv.cpp 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /**
  2. * \file dnn/test/cuda/deformable_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/nn.h"
  12. #include "src/cuda/utils.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/random_state.h"
  15. #include "test/cuda/benchmark.h"
  16. #include "test/cuda/fixture.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. namespace {
  20. void calc_output_shape(const size_t& ih, const size_t& iw, const size_t& fh,
  21. const size_t& fw, const size_t& ph, const size_t& pw,
  22. const size_t& sh, const size_t& sw, const size_t& dh,
  23. const size_t& dw, size_t& oh, size_t& ow) {
  24. auto kh = 1 + (fh - 1) * dh;
  25. auto kw = 1 + (fw - 1) * dw;
  26. int deduced_oh = ((int)ih + ph * 2 - kh) / sh + 1;
  27. int deduced_ow = ((int)iw + pw * 2 - kw) / sw + 1;
  28. oh = deduced_oh, ow = deduced_ow;
  29. }
  30. } // namespace
  31. TEST_F(CUDA, DEFORMABLE_CONV_FWD) {
  32. Checker<DeformableConv> checker(handle_cuda());
  33. Convolution::Param param;
  34. UniformFloatRNG im_rng{-10, 10};
  35. UniformFloatRNG filter_rng{-1, 1};
  36. UniformFloatRNG offset_rng{-2, 2};
  37. UniformFloatRNG mask_rng{-1, 1};
  38. checker.set_epsilon(0.01)
  39. .set_rng(0, &im_rng)
  40. .set_rng(1, &filter_rng)
  41. .set_rng(2, &offset_rng)
  42. .set_rng(3, &mask_rng);
  43. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  44. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  45. size_t ic, size_t oc, size_t batch, size_t group,
  46. size_t deformable_group) {
  47. size_t oh, ow;
  48. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  49. param.pad_h = ph;
  50. param.pad_w = pw;
  51. param.stride_h = sh;
  52. param.stride_w = sw;
  53. param.dilate_h = dh;
  54. param.dilate_w = dw;
  55. param.format = DeformableConv::Param::Format::NCHW;
  56. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  57. if (group > 1) {
  58. param.sparse = DeformableConv::Param::Sparse::GROUP;
  59. checker.set_param(param).execs(
  60. {{batch, ic, ih, iw},
  61. {group, oc / group, ic / group, fh, fw},
  62. {batch, 2 * deformable_group * fh * fw, oh, ow},
  63. {batch, deformable_group * fh * fw, oh, ow},
  64. {batch, oc, oh, ow}});
  65. } else {
  66. param.sparse = DeformableConv::Param::Sparse::DENSE;
  67. checker.set_param(param).execs(
  68. {{batch, ic, ih, iw},
  69. {oc, ic, fh, fw},
  70. {batch, 2 * deformable_group * fh * fw, oh, ow},
  71. {batch, deformable_group * fh * fw, oh, ow},
  72. {batch, oc, oh, ow}});
  73. }
  74. };
  75. for (auto batch : std::vector<int>{1, 3})
  76. for (auto hw : std::vector<int>{16, 20})
  77. for (auto fhw : std::vector<int>{3, 5, 7})
  78. for (auto phw : std::vector<int>{2, 5})
  79. for (auto shw : std::vector<int>{1, 3})
  80. for (auto g : std::vector<int>{1, 2})
  81. for (auto icpg : std::vector<int>{1, 3})
  82. for (auto ocpg : std::vector<int>{1, 3}) {
  83. auto dhw = shw;
  84. run_test(hw, hw, fhw, fhw, phw, phw, shw,
  85. shw, dhw, dhw, g * icpg, g * ocpg,
  86. batch, g, g);
  87. }
  88. }
  89. TEST_F(CUDA, DEFORMABLE_CONV_BWD_FILTER) {
  90. Checker<DeformableConvBackwardFilter> checker(handle_cuda());
  91. Convolution::Param param;
  92. UniformFloatRNG im_rng{-10, 10};
  93. UniformFloatRNG offset_rng{-2, 2};
  94. UniformFloatRNG mask_rng{-1, 1};
  95. UniformFloatRNG out_grad_rng{-1, 1};
  96. checker.set_epsilon(0.01)
  97. .set_rng(0, &im_rng)
  98. .set_rng(1, &offset_rng)
  99. .set_rng(2, &mask_rng)
  100. .set_rng(3, &out_grad_rng);
  101. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  102. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  103. size_t ic, size_t oc, size_t batch, size_t group,
  104. size_t deformable_group) {
  105. size_t oh, ow;
  106. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  107. param.pad_h = ph;
  108. param.pad_w = pw;
  109. param.stride_h = sh;
  110. param.stride_w = sw;
  111. param.dilate_h = dh;
  112. param.dilate_w = dw;
  113. param.format = DeformableConv::Param::Format::NCHW;
  114. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  115. if (group > 1) {
  116. param.sparse = DeformableConv::Param::Sparse::GROUP;
  117. checker.set_param(param).execs(
  118. {{batch, ic, ih, iw},
  119. {batch, 2 * deformable_group * fh * fw, oh, ow},
  120. {batch, deformable_group * fh * fw, oh, ow},
  121. {batch, oc, oh, ow},
  122. {group, oc / group, ic / group, fh, fw}});
  123. } else {
  124. param.sparse = DeformableConv::Param::Sparse::DENSE;
  125. checker.set_param(param).execs(
  126. {{batch, ic, ih, iw},
  127. {batch, 2 * deformable_group * fh * fw, oh, ow},
  128. {batch, deformable_group * fh * fw, oh, ow},
  129. {batch, oc, oh, ow},
  130. {oc, ic, fh, fw}});
  131. }
  132. };
  133. for (auto batch : std::vector<int>{1, 2})
  134. for (auto hw : std::vector<int>{16, 20})
  135. for (auto fhw : std::vector<int>{3, 5, 7})
  136. for (auto phw : std::vector<int>{2, 5})
  137. for (auto shw : std::vector<int>{1, 3})
  138. for (auto g : std::vector<int>{1, 2})
  139. for (auto icpg : std::vector<int>{1, 5})
  140. for (auto ocpg : std::vector<int>{1, 5}) {
  141. auto dhw = shw;
  142. run_test(hw, hw, fhw, fhw, phw, phw, shw,
  143. shw, dhw, dhw, g * icpg, g * ocpg,
  144. batch, g, g);
  145. }
  146. }
  147. TEST_F(CUDA, DEFORMABLE_CONV_BWD_DATA) {
  148. Checker<DeformableConvBackwardData> checker(handle_cuda());
  149. Convolution::Param param;
  150. UniformFloatRNG im_rng{0, 255};
  151. UniformFloatRNG filter_rng{-1, 1};
  152. UniformFloatRNG offset_rng{-2, 2};
  153. UniformFloatRNG mask_rng{0, 1};
  154. UniformFloatRNG out_grad_rng{0, 2};
  155. checker.set_epsilon(0.1f)
  156. .set_rng(0, &im_rng)
  157. .set_rng(1, &filter_rng)
  158. .set_rng(2, &offset_rng)
  159. .set_rng(3, &mask_rng)
  160. .set_rng(4, &out_grad_rng);
  161. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  162. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  163. size_t ic, size_t oc, size_t batch, size_t group,
  164. size_t deformable_group) {
  165. size_t oh, ow;
  166. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  167. param.pad_h = ph;
  168. param.pad_w = pw;
  169. param.stride_h = sh;
  170. param.stride_w = sw;
  171. param.dilate_h = dh;
  172. param.dilate_w = dw;
  173. param.format = DeformableConv::Param::Format::NCHW;
  174. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  175. if (group > 1) {
  176. param.sparse = DeformableConv::Param::Sparse::GROUP;
  177. checker.set_param(param).execs(
  178. {{batch, ic, ih, iw},
  179. {group, oc / group, ic / group, fh, fw},
  180. {batch, 2 * deformable_group * fh * fw, oh, ow},
  181. {batch, deformable_group * fh * fw, oh, ow},
  182. {batch, oc, oh, ow},
  183. {batch, ic, ih, iw},
  184. {batch, 2 * deformable_group * fh * fw, oh, ow},
  185. {batch, deformable_group * fh * fw, oh, ow}});
  186. } else {
  187. param.sparse = DeformableConv::Param::Sparse::DENSE;
  188. checker.set_param(param).execs(
  189. {{batch, ic, ih, iw},
  190. {oc, ic, fh, fw},
  191. {batch, 2 * deformable_group * fh * fw, oh, ow},
  192. {batch, deformable_group * fh * fw, oh, ow},
  193. {batch, oc, oh, ow},
  194. {batch, ic, ih, iw},
  195. {batch, 2 * deformable_group * fh * fw, oh, ow},
  196. {batch, deformable_group * fh * fw, oh, ow}});
  197. }
  198. };
  199. for (auto batch : std::vector<int>{1, 3})
  200. for (auto hw : std::vector<int>{16, 20})
  201. for (auto fhw : std::vector<int>{3, 5, 7})
  202. for (auto phw : std::vector<int>{2, 5})
  203. for (auto shw : std::vector<int>{1, 3})
  204. for (auto g : std::vector<int>{1, 2})
  205. for (auto icpg : std::vector<int>{1, 3})
  206. for (auto ocpg : std::vector<int>{1, 3}) {
  207. auto dhw = shw;
  208. run_test(hw, hw, fhw, fhw, phw, phw, shw,
  209. shw, dhw, dhw, g * icpg, g * ocpg,
  210. batch, g, g);
  211. }
  212. }
  213. #if MEGDNN_WITH_BENCHMARK
  214. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_FORWARD) {
  215. CUBenchmarker<DeformableConvForward> bencher(handle_cuda());
  216. bencher.set_display(true);
  217. Convolution::Param param;
  218. UniformFloatRNG im_rng{-10, 10};
  219. UniformFloatRNG filter_rng{-10, 10};
  220. UniformFloatRNG offset_rng{-10, 10};
  221. UniformFloatRNG mask_rng{-10, 10};
  222. UniformFloatRNG out_grad_rng{-10, 10};
  223. auto run_bench = [&](size_t batch, size_t ic, size_t oc, size_t ih,
  224. size_t iw, size_t fh, size_t fw, size_t ph, size_t pw,
  225. size_t sh, size_t sw, size_t dh, size_t dw,
  226. size_t group, size_t deformable_group,
  227. size_t nr_times) {
  228. size_t oh, ow;
  229. param.pad_h = ph;
  230. param.pad_w = pw;
  231. param.stride_h = sh;
  232. param.stride_w = sw;
  233. param.dilate_h = dh;
  234. param.dilate_w = dw;
  235. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  236. param.format = DeformableConv::Param::Format::NCHW;
  237. param.sparse = DeformableConv::Param::Sparse::DENSE;
  238. bencher.set_param(param)
  239. .set_rng(0, &im_rng)
  240. .set_rng(1, &im_rng)
  241. .set_rng(2, &offset_rng)
  242. .set_rng(3, &mask_rng);
  243. bencher.set_times(nr_times);
  244. TensorShape im{batch, ic, ih, iw}, filter{oc, ic, fh, fw},
  245. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  246. mask{batch, deformable_group * fh * fw, oh, ow};
  247. auto time_in_ms =
  248. bencher.execs({im, filter, offset, mask, {}}) / nr_times;
  249. auto ops = 2.0 * group * (oc / group) * (oh * ow * batch) *
  250. (ic / group) * fh * fw / (time_in_ms * 1e-3) * 1e-12;
  251. printf("deformable conv forward performance: %fTops\n", ops);
  252. };
  253. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  254. }
  255. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_BWD_FILTER) {
  256. CUBenchmarker<DeformableConvBackwardFilter> bencher(handle_cuda());
  257. bencher.set_display(true);
  258. Convolution::Param param;
  259. UniformFloatRNG im_rng{-10, 10};
  260. UniformFloatRNG filter_rng{-10, 10};
  261. UniformFloatRNG offset_rng{-10, 10};
  262. UniformFloatRNG mask_rng{-10, 10};
  263. UniformFloatRNG out_grad_rng{-10, 10};
  264. auto run_bench = [&](size_t batch, size_t icpg, size_t ocpg, size_t ih,
  265. size_t iw, size_t fh, size_t fw, size_t ph, size_t pw,
  266. size_t sh, size_t sw, size_t dh, size_t dw,
  267. size_t group, size_t deformable_group,
  268. size_t nr_times) {
  269. size_t oh, ow;
  270. size_t ic = icpg * group, oc = ocpg * group;
  271. param.pad_h = ph;
  272. param.pad_w = pw;
  273. param.stride_h = sh;
  274. param.stride_w = sw;
  275. param.dilate_h = dh;
  276. param.dilate_w = dw;
  277. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  278. param.format = DeformableConv::Param::Format::NCHW;
  279. param.sparse = DeformableConv::Param::Sparse::DENSE;
  280. bencher.set_param(param)
  281. .set_rng(0, &im_rng)
  282. .set_rng(1, &im_rng)
  283. .set_rng(2, &offset_rng)
  284. .set_rng(3, &mask_rng);
  285. bencher.set_times(nr_times);
  286. TensorShape im{batch, ic, ih, iw}, filter{ic, ic, fh, fw},
  287. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  288. mask{batch, deformable_group * fh * fw, oh, ow},
  289. out_grad{batch, oc, oh, ow}, filter_grad{oc, ic, fh, fw};
  290. auto time_in_ms =
  291. bencher.execs({im, offset, mask, out_grad, filter_grad}) /
  292. nr_times;
  293. auto ops = 2.0 * group * (oc / group) * (oh * ow * batch) *
  294. (ic / group) * fh * fw / (time_in_ms * 1e-3) * 1e-12;
  295. printf("deformable conv bwd filter performance: %fTops\n", ops);
  296. };
  297. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  298. // run_bench(16, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  299. }
  300. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_BWD_DATA) {
  301. CUBenchmarker<DeformableConvBackwardData> bencher(handle_cuda());
  302. bencher.set_display(true);
  303. Convolution::Param param;
  304. UniformFloatRNG im_rng{-10, 10};
  305. UniformFloatRNG filter_rng{-10, 10};
  306. UniformFloatRNG offset_rng{-10, 10};
  307. UniformFloatRNG mask_rng{-10, 10};
  308. UniformFloatRNG out_grad_rng{-10, 10};
  309. auto run_bench = [&](size_t batch, size_t ic, size_t oc, size_t ih,
  310. size_t iw, size_t fh, size_t fw, size_t ph, size_t pw,
  311. size_t sh, size_t sw, size_t dh, size_t dw,
  312. size_t group, size_t deformable_group,
  313. size_t nr_times) {
  314. size_t oh, ow;
  315. param.pad_h = ph;
  316. param.pad_w = pw;
  317. param.stride_h = sh;
  318. param.stride_w = sw;
  319. param.dilate_h = dh;
  320. param.dilate_w = dw;
  321. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  322. param.format = DeformableConv::Param::Format::NCHW;
  323. param.sparse = DeformableConv::Param::Sparse::DENSE;
  324. bencher.set_param(param)
  325. .set_rng(0, &im_rng)
  326. .set_rng(1, &im_rng)
  327. .set_rng(2, &offset_rng)
  328. .set_rng(3, &mask_rng);
  329. bencher.set_times(nr_times);
  330. TensorShape im{batch, ic, ih, iw}, filter{oc, ic, fh, fw},
  331. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  332. mask{batch, deformable_group * fh * fw, oh, ow},
  333. out_grad{batch, oc, oh, ow}, im_grad{batch, ic, ih, iw},
  334. offset_grad{batch, 2 * deformable_group * fh * fw, oh, ow},
  335. mask_grad{batch, deformable_group * fh * fw, oh, ow};
  336. auto time_in_ms = bencher.execs({im, filter, offset, mask, out_grad,
  337. im_grad, offset_grad, mask_grad}) /
  338. nr_times;
  339. auto ops = 2.0 * group * (oc / group) * oh * ow * batch * (ic / group) *
  340. fh * fw / (time_in_ms * 1e-3) * 1e-12;
  341. printf("deformable conv bwd data performance: %fTops\n", ops);
  342. };
  343. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  344. }
  345. #endif
  346. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台