You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deformable_conv.cpp 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #include "megdnn/oprs/nn.h"
  2. #include "src/cuda/utils.h"
  3. #include "test/common/checker.h"
  4. #include "test/common/random_state.h"
  5. #include "test/cuda/benchmark.h"
  6. #include "test/cuda/fixture.h"
  7. using namespace megdnn;
  8. using namespace test;
  9. namespace {
  10. void calc_output_shape(
  11. const size_t& ih, const size_t& iw, const size_t& fh, const size_t& fw,
  12. const size_t& ph, const size_t& pw, const size_t& sh, const size_t& sw,
  13. const size_t& dh, const size_t& dw, size_t& oh, size_t& ow) {
  14. auto kh = 1 + (fh - 1) * dh;
  15. auto kw = 1 + (fw - 1) * dw;
  16. int deduced_oh = ((int)ih + ph * 2 - kh) / sh + 1;
  17. int deduced_ow = ((int)iw + pw * 2 - kw) / sw + 1;
  18. oh = deduced_oh, ow = deduced_ow;
  19. }
  20. } // namespace
  21. TEST_F(CUDA, DEFORMABLE_CONV_FWD) {
  22. Checker<DeformableConv> checker(handle_cuda());
  23. Convolution::Param param;
  24. UniformFloatRNG im_rng{-10, 10};
  25. UniformFloatRNG filter_rng{-1, 1};
  26. UniformFloatRNG offset_rng{-2, 2};
  27. UniformFloatRNG mask_rng{-1, 1};
  28. checker.set_epsilon(0.01)
  29. .set_rng(0, &im_rng)
  30. .set_rng(1, &filter_rng)
  31. .set_rng(2, &offset_rng)
  32. .set_rng(3, &mask_rng);
  33. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  34. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  35. size_t ic, size_t oc, size_t batch, size_t group,
  36. size_t deformable_group) {
  37. size_t oh, ow;
  38. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  39. param.pad_h = ph;
  40. param.pad_w = pw;
  41. param.stride_h = sh;
  42. param.stride_w = sw;
  43. param.dilate_h = dh;
  44. param.dilate_w = dw;
  45. param.format = DeformableConv::Param::Format::NCHW;
  46. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  47. if (group > 1) {
  48. param.sparse = DeformableConv::Param::Sparse::GROUP;
  49. checker.set_param(param).execs(
  50. {{batch, ic, ih, iw},
  51. {group, oc / group, ic / group, fh, fw},
  52. {batch, 2 * deformable_group * fh * fw, oh, ow},
  53. {batch, deformable_group * fh * fw, oh, ow},
  54. {batch, oc, oh, ow}});
  55. } else {
  56. param.sparse = DeformableConv::Param::Sparse::DENSE;
  57. checker.set_param(param).execs(
  58. {{batch, ic, ih, iw},
  59. {oc, ic, fh, fw},
  60. {batch, 2 * deformable_group * fh * fw, oh, ow},
  61. {batch, deformable_group * fh * fw, oh, ow},
  62. {batch, oc, oh, ow}});
  63. }
  64. };
  65. for (auto batch : std::vector<int>{1, 3})
  66. for (auto hw : std::vector<int>{16, 20})
  67. for (auto fhw : std::vector<int>{3, 5, 7})
  68. for (auto phw : std::vector<int>{2, 5})
  69. for (auto shw : std::vector<int>{1, 3})
  70. for (auto g : std::vector<int>{1, 2})
  71. for (auto icpg : std::vector<int>{1, 3})
  72. for (auto ocpg : std::vector<int>{1, 3}) {
  73. auto dhw = shw;
  74. run_test(
  75. hw, hw, fhw, fhw, phw, phw, shw, shw, dhw,
  76. dhw, g * icpg, g * ocpg, batch, g, g);
  77. }
  78. }
  79. TEST_F(CUDA, DEFORMABLE_CONV_BWD_FILTER) {
  80. Checker<DeformableConvBackwardFilter> checker(handle_cuda());
  81. Convolution::Param param;
  82. UniformFloatRNG im_rng{-10, 10};
  83. UniformFloatRNG offset_rng{-2, 2};
  84. UniformFloatRNG mask_rng{-1, 1};
  85. UniformFloatRNG out_grad_rng{-1, 1};
  86. checker.set_epsilon(0.01)
  87. .set_rng(0, &im_rng)
  88. .set_rng(1, &offset_rng)
  89. .set_rng(2, &mask_rng)
  90. .set_rng(3, &out_grad_rng);
  91. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  92. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  93. size_t ic, size_t oc, size_t batch, size_t group,
  94. size_t deformable_group) {
  95. size_t oh, ow;
  96. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  97. param.pad_h = ph;
  98. param.pad_w = pw;
  99. param.stride_h = sh;
  100. param.stride_w = sw;
  101. param.dilate_h = dh;
  102. param.dilate_w = dw;
  103. param.format = DeformableConv::Param::Format::NCHW;
  104. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  105. if (group > 1) {
  106. param.sparse = DeformableConv::Param::Sparse::GROUP;
  107. checker.set_param(param).execs(
  108. {{batch, ic, ih, iw},
  109. {batch, 2 * deformable_group * fh * fw, oh, ow},
  110. {batch, deformable_group * fh * fw, oh, ow},
  111. {batch, oc, oh, ow},
  112. {group, oc / group, ic / group, fh, fw}});
  113. } else {
  114. param.sparse = DeformableConv::Param::Sparse::DENSE;
  115. checker.set_param(param).execs(
  116. {{batch, ic, ih, iw},
  117. {batch, 2 * deformable_group * fh * fw, oh, ow},
  118. {batch, deformable_group * fh * fw, oh, ow},
  119. {batch, oc, oh, ow},
  120. {oc, ic, fh, fw}});
  121. }
  122. };
  123. for (auto batch : std::vector<int>{1, 2})
  124. for (auto hw : std::vector<int>{16, 20})
  125. for (auto fhw : std::vector<int>{3, 5, 7})
  126. for (auto phw : std::vector<int>{2, 5})
  127. for (auto shw : std::vector<int>{1, 3})
  128. for (auto g : std::vector<int>{1, 2})
  129. for (auto icpg : std::vector<int>{1, 5})
  130. for (auto ocpg : std::vector<int>{1, 5}) {
  131. auto dhw = shw;
  132. run_test(
  133. hw, hw, fhw, fhw, phw, phw, shw, shw, dhw,
  134. dhw, g * icpg, g * ocpg, batch, g, g);
  135. }
  136. }
  137. TEST_F(CUDA, DEFORMABLE_CONV_BWD_DATA) {
  138. Checker<DeformableConvBackwardData> checker(handle_cuda());
  139. Convolution::Param param;
  140. UniformFloatRNG im_rng{0, 255};
  141. UniformFloatRNG filter_rng{-1, 1};
  142. UniformFloatRNG offset_rng{-2, 2};
  143. UniformFloatRNG mask_rng{0, 1};
  144. UniformFloatRNG out_grad_rng{0, 2};
  145. checker.set_epsilon(0.1f)
  146. .set_rng(0, &im_rng)
  147. .set_rng(1, &filter_rng)
  148. .set_rng(2, &offset_rng)
  149. .set_rng(3, &mask_rng)
  150. .set_rng(4, &out_grad_rng);
  151. auto run_test = [&](size_t ih, size_t iw, size_t fh, size_t fw, size_t ph,
  152. size_t pw, size_t sh, size_t sw, size_t dh, size_t dw,
  153. size_t ic, size_t oc, size_t batch, size_t group,
  154. size_t deformable_group) {
  155. size_t oh, ow;
  156. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  157. param.pad_h = ph;
  158. param.pad_w = pw;
  159. param.stride_h = sh;
  160. param.stride_w = sw;
  161. param.dilate_h = dh;
  162. param.dilate_w = dw;
  163. param.format = DeformableConv::Param::Format::NCHW;
  164. param.mode = DeformableConv::Param::Mode::CROSS_CORRELATION;
  165. if (group > 1) {
  166. param.sparse = DeformableConv::Param::Sparse::GROUP;
  167. checker.set_param(param).execs(
  168. {{batch, ic, ih, iw},
  169. {group, oc / group, ic / group, fh, fw},
  170. {batch, 2 * deformable_group * fh * fw, oh, ow},
  171. {batch, deformable_group * fh * fw, oh, ow},
  172. {batch, oc, oh, ow},
  173. {batch, ic, ih, iw},
  174. {batch, 2 * deformable_group * fh * fw, oh, ow},
  175. {batch, deformable_group * fh * fw, oh, ow}});
  176. } else {
  177. param.sparse = DeformableConv::Param::Sparse::DENSE;
  178. checker.set_param(param).execs(
  179. {{batch, ic, ih, iw},
  180. {oc, ic, fh, fw},
  181. {batch, 2 * deformable_group * fh * fw, oh, ow},
  182. {batch, deformable_group * fh * fw, oh, ow},
  183. {batch, oc, oh, ow},
  184. {batch, ic, ih, iw},
  185. {batch, 2 * deformable_group * fh * fw, oh, ow},
  186. {batch, deformable_group * fh * fw, oh, ow}});
  187. }
  188. };
  189. for (auto batch : std::vector<int>{1, 3})
  190. for (auto hw : std::vector<int>{16, 20})
  191. for (auto fhw : std::vector<int>{3, 5, 7})
  192. for (auto phw : std::vector<int>{2, 5})
  193. for (auto shw : std::vector<int>{1, 3})
  194. for (auto g : std::vector<int>{1, 2})
  195. for (auto icpg : std::vector<int>{1, 3})
  196. for (auto ocpg : std::vector<int>{1, 3}) {
  197. auto dhw = shw;
  198. run_test(
  199. hw, hw, fhw, fhw, phw, phw, shw, shw, dhw,
  200. dhw, g * icpg, g * ocpg, batch, g, g);
  201. }
  202. }
  203. #if MEGDNN_WITH_BENCHMARK
  204. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_FORWARD) {
  205. CUBenchmarker<DeformableConvForward> bencher(handle_cuda());
  206. bencher.set_display(true);
  207. Convolution::Param param;
  208. UniformFloatRNG im_rng{-10, 10};
  209. UniformFloatRNG filter_rng{-10, 10};
  210. UniformFloatRNG offset_rng{-10, 10};
  211. UniformFloatRNG mask_rng{-10, 10};
  212. UniformFloatRNG out_grad_rng{-10, 10};
  213. auto run_bench = [&](size_t batch, size_t ic, size_t oc, size_t ih, size_t iw,
  214. size_t fh, size_t fw, size_t ph, size_t pw, size_t sh,
  215. size_t sw, size_t dh, size_t dw, size_t group,
  216. size_t deformable_group, size_t nr_times) {
  217. size_t oh, ow;
  218. param.pad_h = ph;
  219. param.pad_w = pw;
  220. param.stride_h = sh;
  221. param.stride_w = sw;
  222. param.dilate_h = dh;
  223. param.dilate_w = dw;
  224. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  225. param.format = DeformableConv::Param::Format::NCHW;
  226. param.sparse = DeformableConv::Param::Sparse::DENSE;
  227. bencher.set_param(param)
  228. .set_rng(0, &im_rng)
  229. .set_rng(1, &im_rng)
  230. .set_rng(2, &offset_rng)
  231. .set_rng(3, &mask_rng);
  232. bencher.set_times(nr_times);
  233. TensorShape im{batch, ic, ih, iw}, filter{oc, ic, fh, fw},
  234. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  235. mask{batch, deformable_group * fh * fw, oh, ow};
  236. auto time_in_ms = bencher.execs({im, filter, offset, mask, {}}) / nr_times;
  237. auto ops = 2.0 * group * (oc / group) * (oh * ow * batch) * (ic / group) * fh *
  238. fw / (time_in_ms * 1e-3) * 1e-12;
  239. printf("deformable conv forward performance: %fTops\n", ops);
  240. };
  241. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  242. }
  243. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_BWD_FILTER) {
  244. CUBenchmarker<DeformableConvBackwardFilter> bencher(handle_cuda());
  245. bencher.set_display(true);
  246. Convolution::Param param;
  247. UniformFloatRNG im_rng{-10, 10};
  248. UniformFloatRNG filter_rng{-10, 10};
  249. UniformFloatRNG offset_rng{-10, 10};
  250. UniformFloatRNG mask_rng{-10, 10};
  251. UniformFloatRNG out_grad_rng{-10, 10};
  252. auto run_bench = [&](size_t batch, size_t icpg, size_t ocpg, size_t ih, size_t iw,
  253. size_t fh, size_t fw, size_t ph, size_t pw, size_t sh,
  254. size_t sw, size_t dh, size_t dw, size_t group,
  255. size_t deformable_group, size_t nr_times) {
  256. size_t oh, ow;
  257. size_t ic = icpg * group, oc = ocpg * group;
  258. param.pad_h = ph;
  259. param.pad_w = pw;
  260. param.stride_h = sh;
  261. param.stride_w = sw;
  262. param.dilate_h = dh;
  263. param.dilate_w = dw;
  264. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  265. param.format = DeformableConv::Param::Format::NCHW;
  266. param.sparse = DeformableConv::Param::Sparse::DENSE;
  267. bencher.set_param(param)
  268. .set_rng(0, &im_rng)
  269. .set_rng(1, &im_rng)
  270. .set_rng(2, &offset_rng)
  271. .set_rng(3, &mask_rng);
  272. bencher.set_times(nr_times);
  273. TensorShape im{batch, ic, ih, iw}, filter{ic, ic, fh, fw},
  274. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  275. mask{batch, deformable_group * fh * fw, oh, ow},
  276. out_grad{batch, oc, oh, ow}, filter_grad{oc, ic, fh, fw};
  277. auto time_in_ms =
  278. bencher.execs({im, offset, mask, out_grad, filter_grad}) / nr_times;
  279. auto ops = 2.0 * group * (oc / group) * (oh * ow * batch) * (ic / group) * fh *
  280. fw / (time_in_ms * 1e-3) * 1e-12;
  281. printf("deformable conv bwd filter performance: %fTops\n", ops);
  282. };
  283. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  284. // run_bench(16, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  285. }
  286. TEST_F(CUDA, BENCHMARK_DEFORMABLE_CONV_BWD_DATA) {
  287. CUBenchmarker<DeformableConvBackwardData> bencher(handle_cuda());
  288. bencher.set_display(true);
  289. Convolution::Param param;
  290. UniformFloatRNG im_rng{-10, 10};
  291. UniformFloatRNG filter_rng{-10, 10};
  292. UniformFloatRNG offset_rng{-10, 10};
  293. UniformFloatRNG mask_rng{-10, 10};
  294. UniformFloatRNG out_grad_rng{-10, 10};
  295. auto run_bench = [&](size_t batch, size_t ic, size_t oc, size_t ih, size_t iw,
  296. size_t fh, size_t fw, size_t ph, size_t pw, size_t sh,
  297. size_t sw, size_t dh, size_t dw, size_t group,
  298. size_t deformable_group, size_t nr_times) {
  299. size_t oh, ow;
  300. param.pad_h = ph;
  301. param.pad_w = pw;
  302. param.stride_h = sh;
  303. param.stride_w = sw;
  304. param.dilate_h = dh;
  305. param.dilate_w = dw;
  306. calc_output_shape(ih, iw, fh, fw, ph, pw, sh, sw, dh, dw, oh, ow);
  307. param.format = DeformableConv::Param::Format::NCHW;
  308. param.sparse = DeformableConv::Param::Sparse::DENSE;
  309. bencher.set_param(param)
  310. .set_rng(0, &im_rng)
  311. .set_rng(1, &im_rng)
  312. .set_rng(2, &offset_rng)
  313. .set_rng(3, &mask_rng);
  314. bencher.set_times(nr_times);
  315. TensorShape im{batch, ic, ih, iw}, filter{oc, ic, fh, fw},
  316. offset{batch, 2 * deformable_group * fh * fw, oh, ow},
  317. mask{batch, deformable_group * fh * fw, oh, ow},
  318. out_grad{batch, oc, oh, ow}, im_grad{batch, ic, ih, iw},
  319. offset_grad{batch, 2 * deformable_group * fh * fw, oh, ow},
  320. mask_grad{batch, deformable_group * fh * fw, oh, ow};
  321. auto time_in_ms = bencher.execs(
  322. {im, filter, offset, mask, out_grad, im_grad,
  323. offset_grad, mask_grad}) /
  324. nr_times;
  325. auto ops = 2.0 * group * (oc / group) * oh * ow * batch * (ic / group) * fh *
  326. fw / (time_in_ms * 1e-3) * 1e-12;
  327. printf("deformable conv bwd data performance: %fTops\n", ops);
  328. };
  329. run_bench(64, 64, 256, 56, 56, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 100);
  330. }
  331. #endif
  332. // vim: syntax=cpp.doxygen