You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deformable_conv.cpp 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /**
  2. * \file dnn/test/cuda/deformable_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/nn.h"
  12. #include "src/cuda/utils.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/random_state.h"
  15. #include "test/cuda/benchmark.h"
  16. #include "test/cuda/fixture.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. namespace {
  20. void calc_output_shape(
  21. const size_t& ih, const size_t& iw, const size_t& fh, const size_t& fw,
  22. const size_t& ph, const size_t& pw, const size_t& sh, const size_t& sw,
  23. const size_t& dh, const size_t& dw, size_t& oh, size_t& ow) {
  24. auto kh = 1 + (fh - 1) * dh;
  25. auto kw = 1 + (fw - 1) * dw;
  26. int deduced_oh = ((int)ih + ph * 2 - kh) / sh + 1;
  27. int deduced_ow = ((int)iw + pw * 2 - kw) / sw + 1;
  28. oh = deduced_oh, ow = deduced_ow;
  29. }
  30. } // namespace
  31. TEST_F(CUDA, DEFORMABLE_CONV_FWD) {
  32. Checker<DeformableConv> checker(handle_cuda());
  33. Convolution::Param param;
  34. UniformFloatRNG im_rng{-10, 10};
  35. UniformFloatRNG filter_rng{-1, 1};
  36. UniformFloatRNG offset_rng{-2, 2};
  37. UniformFloatRNG mask_rng{-1, 1};
  38. checker.set_epsilon(0.01)
  39. .set_rng(0, &im_rng)
  40. .set_rng(1, &filter_rng)
  41. .set_rng(2, &offset_rng)
  42. .set_rng(3, &mask_rng);
  43. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  44. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  45. size_t ic, size_t oc, size_t batch, size_t group,
  46. size_t deformable_group) {
  47. size_t oh, ow;
  48. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  49. param.pad_h = ph;
  50. param.pad_w = pw;
  51. param.stride_h = sh;
  52. param.stride_w = sw;
  53. param.dilate_h = dh;
  54. param.dilate_w = dw;
  55. param.format = DeformableConv::Param::Format::NCHW;
  56. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  57. if (group > 1) {
  58. param.sparse = DeformableConv::Param::Sparse::GROUP;
  59. checker.set_param(param).execs(
  60. {{batch, ic, ih, iw},
  61. {group, oc / group, ic / group, fh, fw},
  62. {batch, 2 * deformable_group * fh * fw, oh, ow},
  63. {batch, deformable_group * fh * fw, oh, ow},
  64. {batch, oc, oh, ow}});
  65. } else {
  66. param.sparse = DeformableConv::Param::Sparse::DENSE;
  67. checker.set_param(param).execs(
  68. {{batch, ic, ih, iw},
  69. {oc, ic, fh, fw},
  70. {batch, 2 * deformable_group * fh * fw, oh, ow},
  71. {batch, deformable_group * fh * fw, oh, ow},
  72. {batch, oc, oh, ow}});
  73. }
  74. };
  75. for (auto batch : std::vector<int>{1, 3})
  76. for (auto hw : std::vector<int>{16, 20})
  77. for (auto fhw : std::vector<int>{3, 5, 7})
  78. for (auto phw : std::vector<int>{2, 5})
  79. for (auto shw : std::vector<int>{1, 3})
  80. for (auto g : std::vector<int>{1, 2})
  81. for (auto icpg : std::vector<int>{1, 3})
  82. for (auto ocpg : std::vector<int>{1, 3}) {
  83. auto dhw = shw;
  84. run_test(
  85. hw, hw, fhw, fhw, phw, phw, shw, shw, dhw,
  86. dhw, g * icpg, g * ocpg, batch, g, g);
  87. }
  88. }
  89. TEST_F(CUDA, DEFORMABLE_CONV_BWD_FILTER) {
  90. Checker<DeformableConvBackwardFilter> checker(handle_cuda());
  91. Convolution::Param param;
  92. UniformFloatRNG im_rng{-10, 10};
  93. UniformFloatRNG offset_rng{-2, 2};
  94. UniformFloatRNG mask_rng{-1, 1};
  95. UniformFloatRNG out_grad_rng{-1, 1};
  96. checker.set_epsilon(0.01)
  97. .set_rng(0, &im_rng)
  98. .set_rng(1, &offset_rng)
  99. .set_rng(2, &mask_rng)
  100. .set_rng(3, &out_grad_rng);
  101. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  102. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  103. size_t ic, size_t oc, size_t batch, size_t group,
  104. size_t deformable_group) {
  105. size_t oh, ow;
  106. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  107. param.pad_h = ph;
  108. param.pad_w = pw;
  109. param.stride_h = sh;
  110. param.stride_w = sw;
  111. param.dilate_h = dh;
  112. param.dilate_w = dw;
  113. param.format = DeformableConv::Param::Format::NCHW;
  114. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  115. if (group > 1) {
  116. param.sparse = DeformableConv::Param::Sparse::GROUP;
  117. checker.set_param(param).execs(
  118. {{batch, ic, ih, iw},
  119. {batch, 2 * deformable_group * fh * fw, oh, ow},
  120. {batch, deformable_group * fh * fw, oh, ow},
  121. {batch, oc, oh, ow},
  122. {group, oc / group, ic / group, fh, fw}});
  123. } else {
  124. param.sparse = DeformableConv::Param::Sparse::DENSE;
  125. checker.set_param(param).execs(
  126. {{batch, ic, ih, iw},
  127. {batch, 2 * deformable_group * fh * fw, oh, ow},
  128. {batch, deformable_group * fh * fw, oh, ow},
  129. {batch, oc, oh, ow},
  130. {oc, ic, fh, fw}});
  131. }
  132. };
  133. for (auto batch : std::vector<int>{1, 2})
  134. for (auto hw : std::vector<int>{16, 20})
  135. for (auto fhw : std::vector<int>{3, 5, 7})
  136. for (auto phw : std::vector<int>{2, 5})
  137. for (auto shw : std::vector<int>{1, 3})
  138. for (auto g : std::vector<int>{1, 2})
  139. for (auto icpg : std::vector<int>{1, 5})
  140. for (auto ocpg : std::vector<int>{1, 5}) {
  141. auto dhw = shw;
  142. run_test(
  143. hw, hw, fhw, fhw, phw, phw, shw, shw, dhw,
  144. dhw, g * icpg, g * ocpg, batch, g, g);
  145. }
  146. }
  147. TEST_F(CUDA, DEFORMABLE_CONV_BWD_DATA) {
  148. Checker<DeformableConvBackwardData> checker(handle_cuda());
  149. Convolution::Param param;
  150. UniformFloatRNG im_rng{0, 255};
  151. UniformFloatRNG filter_rng{-1, 1};
  152. UniformFloatRNG offset_rng{-2, 2};
  153. UniformFloatRNG mask_rng{0, 1};
  154. UniformFloatRNG out_grad_rng{0, 2};
  155. checker.set_epsilon(0.1f)
  156. .set_rng(0, &im_rng)
  157. .set_rng(1, &filter_rng)
  158. .set_rng(2, &offset_rng)
  159. .set_rng(3, &mask_rng)
  160. .set_rng(4, &out_grad_rng);
  161. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  162. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  163. size_t ic, size_t oc, size_t batch, size_t group,
  164. size_t deformable_group) {
  165. size_t oh, ow;
  166. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  167. param.pad_h = ph;
  168. param.pad_w = pw;
  169. param.stride_h = sh;
  170. param.stride_w = sw;
  171. param.dilate_h = dh;
  172. param.dilate_w = dw;
  173. param.format = DeformableConv::Param::Format::NCHW;
  174. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  175. if (group > 1) {
  176. param.sparse = DeformableConv::Param::Sparse::GROUP;
  177. checker.set_param(param).execs(
  178. {{batch, ic, ih, iw},
  179. {group, oc / group, ic / group, fh, fw},
  180. {batch, 2 * deformable_group * fh * fw, oh, ow},
  181. {batch, deformable_group * fh * fw, oh, ow},
  182. {batch, oc, oh, ow},
  183. {batch, ic, ih, iw},
  184. {batch, 2 * deformable_group * fh * fw, oh, ow},
  185. {batch, deformable_group * fh * fw, oh, ow}});
  186. } else {
  187. param.sparse = DeformableConv::Param::Sparse::DENSE;
  188. checker.set_param(param).execs(
  189. {{batch, ic, ih, iw},
  190. {oc, ic, fh, fw},
  191. {batch, 2 * deformable_group * fh * fw, oh, ow},
  192. {batch, deformable_group * fh * fw, oh, ow},
  193. {batch, oc, oh, ow},
  194. {batch, ic, ih, iw},
  195. {batch, 2 * deformable_group * fh * fw, oh, ow},
  196. {batch, deformable_group * fh * fw, oh, ow}});
  197. }
  198. };
  199. for (auto batch : std::vector<int>{1, 3})
  200. for (auto hw : std::vector<int>{16, 20})
  201. for (auto fhw : std::vector<int>{3, 5, 7})
  202. for (auto phw : std::vector<int>{2, 5})
  203. for (auto shw : std::vector<int>{1, 3})
  204. for (auto g : std::vector<int>{1, 2})
  205. for (auto icpg : std::vector<int>{1, 3})
  206. for (auto ocpg : std::vector<int>{1, 3}) {
  207. auto dhw = shw;
  208. run_test(
  209. hw, hw, fhw, fhw, phw, phw, shw, shw, dhw,
  210. dhw, g * icpg, g * ocpg, batch, g, g);
  211. }
  212. }
  213. #if MEGDNN_WITH_BENCHMARK
  214. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_FORWARD) {
  215. CUBenchmarker<DeformableConvForward> bencher(handle_cuda());
  216. bencher.set_display(true);
  217. Convolution::Param param;
  218. UniformFloatRNG im_rng{-10, 10};
  219. UniformFloatRNG filter_rng{-10, 10};
  220. UniformFloatRNG offset_rng{-10, 10};
  221. UniformFloatRNG mask_rng{-10, 10};
  222. UniformFloatRNG out_grad_rng{-10, 10};
  223. auto run_bench = [&](size_t batch, size_t ic, size_t oc, size_t ih, size_t iw,
  224. size_t fh, size_t fw, size_t ph, size_t pw, size_t sh,
  225. size_t sw, size_t dh, size_t dw, size_t group,
  226. size_t deformable_group, size_t nr_times) {
  227. size_t oh, ow;
  228. param.pad_h = ph;
  229. param.pad_w = pw;
  230. param.stride_h = sh;
  231. param.stride_w = sw;
  232. param.dilate_h = dh;
  233. param.dilate_w = dw;
  234. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  235. param.format = DeformableConv::Param::Format::NCHW;
  236. param.sparse = DeformableConv::Param::Sparse::DENSE;
  237. bencher.set_param(param)
  238. .set_rng(0, &im_rng)
  239. .set_rng(1, &im_rng)
  240. .set_rng(2, &offset_rng)
  241. .set_rng(3, &mask_rng);
  242. bencher.set_times(nr_times);
  243. TensorShape im{batch, ic, ih, iw}, filter{oc, ic, fh, fw},
  244. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  245. mask{batch, deformable_group * fh * fw, oh, ow};
  246. auto time_in_ms = bencher.execs({im, filter, offset, mask, {}}) / nr_times;
  247. auto ops = 2.0 * group * (oc / group) * (oh * ow * batch) * (ic / group) * fh *
  248. fw / (time_in_ms * 1e-3) * 1e-12;
  249. printf("deformable conv forward performance: %fTops\n", ops);
  250. };
  251. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  252. }
  253. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_BWD_FILTER) {
  254. CUBenchmarker<DeformableConvBackwardFilter> bencher(handle_cuda());
  255. bencher.set_display(true);
  256. Convolution::Param param;
  257. UniformFloatRNG im_rng{-10, 10};
  258. UniformFloatRNG filter_rng{-10, 10};
  259. UniformFloatRNG offset_rng{-10, 10};
  260. UniformFloatRNG mask_rng{-10, 10};
  261. UniformFloatRNG out_grad_rng{-10, 10};
  262. auto run_bench = [&](size_t batch, size_t icpg, size_t ocpg, size_t ih, size_t iw,
  263. size_t fh, size_t fw, size_t ph, size_t pw, size_t sh,
  264. size_t sw, size_t dh, size_t dw, size_t group,
  265. size_t deformable_group, size_t nr_times) {
  266. size_t oh, ow;
  267. size_t ic = icpg * group, oc = ocpg * group;
  268. param.pad_h = ph;
  269. param.pad_w = pw;
  270. param.stride_h = sh;
  271. param.stride_w = sw;
  272. param.dilate_h = dh;
  273. param.dilate_w = dw;
  274. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  275. param.format = DeformableConv::Param::Format::NCHW;
  276. param.sparse = DeformableConv::Param::Sparse::DENSE;
  277. bencher.set_param(param)
  278. .set_rng(0, &im_rng)
  279. .set_rng(1, &im_rng)
  280. .set_rng(2, &offset_rng)
  281. .set_rng(3, &mask_rng);
  282. bencher.set_times(nr_times);
  283. TensorShape im{batch, ic, ih, iw}, filter{ic, ic, fh, fw},
  284. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  285. mask{batch, deformable_group * fh * fw, oh, ow},
  286. out_grad{batch, oc, oh, ow}, filter_grad{oc, ic, fh, fw};
  287. auto time_in_ms =
  288. bencher.execs({im, offset, mask, out_grad, filter_grad}) / nr_times;
  289. auto ops = 2.0 * group * (oc / group) * (oh * ow * batch) * (ic / group) * fh *
  290. fw / (time_in_ms * 1e-3) * 1e-12;
  291. printf("deformable conv bwd filter performance: %fTops\n", ops);
  292. };
  293. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  294. // run_bench(16, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  295. }
  296. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_BWD_DATA) {
  297. CUBenchmarker<DeformableConvBackwardData> bencher(handle_cuda());
  298. bencher.set_display(true);
  299. Convolution::Param param;
  300. UniformFloatRNG im_rng{-10, 10};
  301. UniformFloatRNG filter_rng{-10, 10};
  302. UniformFloatRNG offset_rng{-10, 10};
  303. UniformFloatRNG mask_rng{-10, 10};
  304. UniformFloatRNG out_grad_rng{-10, 10};
  305. auto run_bench = [&](size_t batch, size_t ic, size_t oc, size_t ih, size_t iw,
  306. size_t fh, size_t fw, size_t ph, size_t pw, size_t sh,
  307. size_t sw, size_t dh, size_t dw, size_t group,
  308. size_t deformable_group, size_t nr_times) {
  309. size_t oh, ow;
  310. param.pad_h = ph;
  311. param.pad_w = pw;
  312. param.stride_h = sh;
  313. param.stride_w = sw;
  314. param.dilate_h = dh;
  315. param.dilate_w = dw;
  316. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  317. param.format = DeformableConv::Param::Format::NCHW;
  318. param.sparse = DeformableConv::Param::Sparse::DENSE;
  319. bencher.set_param(param)
  320. .set_rng(0, &im_rng)
  321. .set_rng(1, &im_rng)
  322. .set_rng(2, &offset_rng)
  323. .set_rng(3, &mask_rng);
  324. bencher.set_times(nr_times);
  325. TensorShape im{batch, ic, ih, iw}, filter{oc, ic, fh, fw},
  326. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  327. mask{batch, deformable_group * fh * fw, oh, ow},
  328. out_grad{batch, oc, oh, ow}, im_grad{batch, ic, ih, iw},
  329. offset_grad{batch, 2 * deformable_group * fh * fw, oh, ow},
  330. mask_grad{batch, deformable_group * fh * fw, oh, ow};
  331. auto time_in_ms = bencher.execs(
  332. {im, filter, offset, mask, out_grad, im_grad,
  333. offset_grad, mask_grad}) /
  334. nr_times;
  335. auto ops = 2.0 * group * (oc / group) * oh * ow * batch * (ic / group) * fh *
  336. fw / (time_in_ms * 1e-3) * 1e-12;
  337. printf("deformable conv bwd data performance: %fTops\n", ops);
  338. };
  339. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  340. }
  341. #endif
  342. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台