You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 28 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. #include "megdnn/dtype.h"
  2. #include "test/fallback/fixture.h"
  3. #include "test/common/benchmarker.h"
  4. #include "test/common/checker.h"
  5. #include "test/common/convolution.h"
  6. #include "test/common/rng.h"
  7. #include "test/common/task_record_check.h"
  8. using namespace megdnn;
  9. using namespace test;
  10. namespace megdnn {
  11. namespace test {
  12. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_RECORD) {
  13. using Param = Convolution::Param;
  14. TaskRecordChecker<Convolution> checker(1);
  15. NormalRNG default_rng;
  16. UniformIntRNG int_rng{-50, 50};
  17. Param param;
  18. param.stride_h = 2;
  19. param.stride_w = 2;
  20. param.pad_h = 3 / 2;
  21. param.pad_w = 3 / 2;
  22. param.pad_h = 0;
  23. param.pad_w = 0;
  24. checker.set_dtype(0, dtype::Float32())
  25. .set_dtype(1, dtype::Float32())
  26. .set_rng(0, &default_rng)
  27. .set_rng(1, &default_rng)
  28. .set_param(param)
  29. .execs({{1, 3, 20, 40}, {24, 3, 3, 3}, {}});
  30. }
  31. } // namespace test
  32. } // namespace megdnn
  33. #if MEGDNN_WITH_BENCHMARK
  34. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) {
  35. using Param = Convolution::Param;
  36. auto run = [&](const TensorShapeArray& shapes, Param param) {
  37. Benchmarker<Convolution> benchmarker_float(handle());
  38. size_t RUN = 50;
  39. auto tfloat = benchmarker_float.set_display(false)
  40. .set_dtype(0, dtype::Float32{})
  41. .set_dtype(1, dtype::Float32{})
  42. .set_times(RUN)
  43. .set_param(param)
  44. .exec(shapes);
  45. size_t IC = shapes[1][1];
  46. size_t FH = shapes[1][2];
  47. size_t FW = shapes[1][3];
  48. TensorLayout dst_layout;
  49. auto opr = handle()->create_operator<Convolution>();
  50. opr->param() = param;
  51. opr->deduce_layout(
  52. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  53. dst_layout);
  54. printf("fp32 flops: %.3f mflops\n",
  55. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  56. (tfloat / RUN * 1000));
  57. };
  58. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  59. size_t stride) {
  60. Param param;
  61. param.stride_h = stride;
  62. param.stride_w = stride;
  63. param.pad_h = kernel / 2;
  64. param.pad_w = kernel / 2;
  65. param.pad_h = 0;
  66. param.pad_w = 0;
  67. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  68. w, h, stride, kernel);
  69. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  70. };
  71. profile(48, 128, 56, 88, 1, 1);
  72. profile(56, 128, 64, 80, 1, 1);
  73. profile(24, 3, 256, 320, 3, 2);
  74. profile(16, 3, 224, 352, 5, 2);
  75. profile(16, 3, 256, 320, 7, 2);
  76. profile(8, 8, 56, 88, 3, 1);
  77. profile(8, 8, 7, 11, 3, 1);
  78. profile(4, 4, 64, 80, 3, 1);
  79. profile(108, 108, 7, 7, 3, 1);
  80. profile(54, 54, 7, 7, 3, 1);
  81. profile(3, 3, 128, 128, 3, 1);
  82. profile(3, 3, 112, 112, 3, 1);
  83. }
  84. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8832) {
  85. using Param = Convolution::Param;
  86. auto run = [&](const TensorShapeArray& shapes, Param param) {
  87. Benchmarker<Convolution> benchmarker_float(handle());
  88. size_t RUN = 50;
  89. auto tfloat = benchmarker_float.set_display(false)
  90. .set_dtype(0, dtype::Int8{})
  91. .set_dtype(1, dtype::Int8{})
  92. .set_dtype(2, dtype::Int32{})
  93. .set_times(RUN)
  94. .set_param(param)
  95. .exec(shapes);
  96. size_t IC = shapes[1][1];
  97. size_t FH = shapes[1][2];
  98. size_t FW = shapes[1][3];
  99. TensorLayout dst_layout;
  100. auto opr = handle()->create_operator<Convolution>();
  101. opr->param() = param;
  102. opr->deduce_layout(
  103. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  104. dst_layout);
  105. printf("fp32 flops: %.3f mflops\n",
  106. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  107. (tfloat / RUN * 1000));
  108. };
  109. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  110. size_t stride) {
  111. Param param;
  112. param.stride_h = stride;
  113. param.stride_w = stride;
  114. param.pad_h = kernel / 2;
  115. param.pad_w = kernel / 2;
  116. param.pad_h = 0;
  117. param.pad_w = 0;
  118. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  119. w, h, stride, kernel);
  120. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  121. };
  122. profile(48, 128, 56, 88, 1, 1);
  123. profile(56, 128, 64, 80, 3, 1);
  124. profile(24, 3, 256, 320, 3, 2);
  125. }
  126. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8816) {
  127. using Param = Convolution::Param;
  128. auto run = [&](const TensorShapeArray& shapes, Param param) {
  129. Benchmarker<Convolution> benchmarker_float(handle());
  130. size_t RUN = 50;
  131. auto tfloat = benchmarker_float.set_display(false)
  132. .set_dtype(0, dtype::Int8{})
  133. .set_dtype(1, dtype::Int8{})
  134. .set_dtype(2, dtype::Int16{})
  135. .set_times(RUN)
  136. .set_param(param)
  137. .exec(shapes);
  138. size_t IC = shapes[1][1];
  139. size_t FH = shapes[1][2];
  140. size_t FW = shapes[1][3];
  141. TensorLayout dst_layout;
  142. auto opr = handle()->create_operator<Convolution>();
  143. opr->param() = param;
  144. opr->deduce_layout(
  145. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  146. dst_layout);
  147. printf("fp32 flops: %.3f mflops\n",
  148. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  149. (tfloat / RUN * 1000));
  150. };
  151. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  152. size_t stride) {
  153. Param param;
  154. param.stride_h = stride;
  155. param.stride_w = stride;
  156. param.pad_h = kernel / 2;
  157. param.pad_w = kernel / 2;
  158. param.pad_h = 0;
  159. param.pad_w = 0;
  160. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  161. w, h, stride, kernel);
  162. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  163. };
  164. profile(48, 128, 56, 88, 1, 1);
  165. profile(48, 128, 56, 88, 1, 2);
  166. profile(56, 128, 64, 80, 3, 1);
  167. profile(24, 3, 256, 320, 3, 2);
  168. }
  169. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  170. using Param = ConvolutionBackwardData::Param;
  171. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  172. Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle());
  173. size_t RUN = 500;
  174. benchmarker_fallback.set_display(false)
  175. .set_dtype(0, dtype::Float32{})
  176. .set_dtype(1, dtype::Float32{})
  177. .set_times(RUN)
  178. .set_param(param);
  179. auto tmatmul =
  180. benchmarker_fallback
  181. .set_before_exec_callback(
  182. AlgoChecker<ConvolutionBackwardData>("DeconvMatmul"))
  183. .exec(tensors);
  184. auto tdirect =
  185. benchmarker_fallback
  186. .set_before_exec_callback(
  187. AlgoChecker<ConvolutionBackwardData>("DeconvDirect"))
  188. .exec(tensors);
  189. size_t IC = tensors[0][1];
  190. size_t FH = tensors[0][2];
  191. size_t FW = tensors[0][3];
  192. size_t total_flops = IC * tensors[1].total_nr_elems() * FH * FW * 2;
  193. printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect,
  194. total_flops / (tdirect / RUN * 1000));
  195. printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul,
  196. total_flops / (tmatmul / RUN * 1000));
  197. printf("speedup: %.3f\n", tdirect / tmatmul);
  198. };
  199. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  200. size_t fw, size_t stride = 1, size_t padding = 0) {
  201. Param param;
  202. param.pad_h = param.pad_w = padding;
  203. param.stride_h = param.stride_w = stride;
  204. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  205. ow, oh, stride, fh);
  206. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  207. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  208. TensorLayout grad;
  209. {
  210. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  211. opr->param() = param;
  212. opr->deduce_layout(filter, diff, grad);
  213. }
  214. run(TensorLayoutArray{filter, diff, grad}, param);
  215. };
  216. profile(1, 1, 3, 3, 1, 2, 2);
  217. profile(1, 2, 3, 3, 2, 2, 2);
  218. profile(1, 4, 3, 3, 4, 2, 2);
  219. profile(1, 4, 3, 3, 8, 2, 2);
  220. profile(1, 8, 3, 3, 4, 2, 2);
  221. profile(1, 8, 3, 3, 8, 2, 2);
  222. }
  223. #endif
  224. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) {
  225. Checker<Convolution> checker(handle());
  226. using Param = Convolution::Param;
  227. Param param;
  228. param.sparse = param::Convolution::Sparse::DENSE;
  229. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  230. size_t fw) {
  231. param.pad_h = param.pad_w = 1;
  232. param.stride_h = param.stride_w = 1;
  233. checker.set_param(param);
  234. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  235. };
  236. run(1, 3, 128, 128, 5, 3, 3);
  237. run(1, 56, 128, 64, 80, 1, 1);
  238. run(1, 8, 8, 7, 11, 3, 1);
  239. run(1, 54, 54, 7, 7, 3, 1);
  240. run(1, 3, 3, 128, 128, 3, 1);
  241. run(1, 3, 3, 112, 112, 3, 1);
  242. run(1, 1, 1, 1, 1, 3, 3);
  243. }
  244. #if MEGDNN_X86
  245. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_8816) {
  246. Checker<Convolution> checker(handle());
  247. using Param = Convolution::Param;
  248. checker.set_before_exec_callback(AlgoChecker<Convolution>(".+FB_GEMV.+"));
  249. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  250. size_t fw, size_t pad, size_t stride, size_t group) {
  251. Param param;
  252. param.sparse = group > 1 ? param::Convolution::Sparse::GROUP
  253. : param::Convolution::Sparse::DENSE;
  254. param.pad_h = param.pad_w = pad;
  255. param.stride_h = param.stride_w = stride;
  256. checker.set_param(param);
  257. if (group > 1) {
  258. checker.execl(
  259. {{{n, ic, ih, iw}, dtype::Int8()},
  260. {{group, oc / group, ic / group, fh, fw}, dtype::Int8()},
  261. {{}, dtype::Int16()}});
  262. } else {
  263. checker.execl(
  264. {{{n, ic, ih, iw}, dtype::Int8()},
  265. {{oc, ic, fh, fw}, dtype::Int8()},
  266. {{}, dtype::Int16()}});
  267. }
  268. };
  269. for (auto n : {1, 2})
  270. for (auto ic : {3, 4, 8, 12, 16})
  271. for (auto oc : {4, 8, 16, 32})
  272. for (auto ih : {7, 14, 15, 22})
  273. for (auto iw : {7, 13, 11, 32})
  274. for (auto filter : {1, 2, 3, 5, 7})
  275. for (auto stride : {1, 2})
  276. for (auto pad : {0, filter / 2}) {
  277. run(n, ic, ih, iw, oc, filter, filter, pad, stride,
  278. 1);
  279. if (ic == oc) {
  280. run(n, ic, ih, iw, oc, filter, filter, pad,
  281. stride, ic);
  282. }
  283. }
  284. }
  285. #endif
  286. TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) {
  287. Checker<Convolution> checker(handle());
  288. using Param = Convolution::Param;
  289. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  290. Param param;
  291. param.sparse = param::Convolution::Sparse::DENSE;
  292. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  293. size_t fw) {
  294. param.pad_h = param.pad_w = 1;
  295. param.stride_h = param.stride_w = 1;
  296. for (auto cmode : std::vector<Param::ComputeMode>{
  297. Param::ComputeMode::DEFAULT, Param::ComputeMode::FLOAT32}) {
  298. param.compute_mode = cmode;
  299. checker.set_param(param)
  300. .set_dtype(0, dtype::Float16())
  301. .set_dtype(1, dtype::Float16())
  302. // Use inferred output dtype.
  303. .set_dtype(2, {});
  304. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  305. }
  306. };
  307. run(1, 3, 128, 128, 5, 3, 3);
  308. run(1, 8, 8, 7, 11, 3, 1);
  309. }
  310. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_FALLBACK) {
  311. Checker<Convolution> checker(handle());
  312. using Param = Convolution::Param;
  313. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("FALLBACK_ALGO"));
  314. Param param;
  315. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw, size_t oc,
  316. size_t fh, size_t fw) {
  317. param.sparse = param::Convolution::Sparse::GROUP;
  318. param.pad_h = param.pad_w = 1;
  319. param.stride_h = param.stride_w = 1;
  320. TensorShape src{n, ic, ih, iw}, filter{group, oc / group, ic / group, fh, fw};
  321. checker.set_param(param)
  322. .set_dtype(0, dtype::Float32())
  323. .set_dtype(1, dtype::Float32())
  324. .set_dtype(2, {});
  325. checker.execs({src, filter, {}});
  326. };
  327. run(4, 1, 3, 21, 15, 5, 3, 3);
  328. run(1, 8, 56, 24, 31, 56, 1, 1);
  329. run(4, 8, 8, 8, 7, 8, 3, 1);
  330. run(8, 1, 54, 54, 7, 7, 3, 1);
  331. run(100, 1, 1, 1, 1, 1, 3, 3);
  332. }
  333. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) {
  334. Checker<Convolution> checker(handle());
  335. using Param = Convolution::Param;
  336. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  337. Param param;
  338. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw, size_t oc,
  339. size_t fh, size_t fw) {
  340. param.sparse = param::Convolution::Sparse::GROUP;
  341. param.pad_h = param.pad_w = 1;
  342. param.stride_h = param.stride_w = 1;
  343. TensorShape src{n, ic, ih, iw}, filter{group, oc / group, ic / group, fh, fw};
  344. checker.set_param(param).set_dtype(2, {});
  345. //! float32
  346. checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32());
  347. checker.execs({src, filter, {}});
  348. //! float16
  349. checker.set_dtype(0, dtype::Float16()).set_dtype(1, dtype::Float16());
  350. checker.execs({src, filter, {}});
  351. //! Qint8
  352. checker.set_dtype(0, dtype::QuantizedS8(3.34f))
  353. .set_dtype(1, dtype::QuantizedS8(0.32f));
  354. checker.execs({src, filter, {}});
  355. //! Quint8
  356. checker.set_dtype(0, dtype::Quantized8Asymm(3.34f, static_cast<uint8_t>(21)))
  357. .set_dtype(1, dtype::Quantized8Asymm(0.32f, static_cast<uint8_t>(15)));
  358. checker.execs({src, filter, {}});
  359. };
  360. run(4, 1, 3, 21, 15, 5, 3, 3);
  361. run(1, 8, 56, 24, 31, 56, 1, 1);
  362. run(4, 8, 8, 8, 7, 8, 3, 1);
  363. run(8, 1, 54, 54, 7, 7, 3, 1);
  364. run(100, 1, 1, 1, 1, 1, 3, 3);
  365. }
  366. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) {
  367. Checker<Convolution> checker(handle());
  368. using Param = Convolution::Param;
  369. Param param;
  370. param.sparse = param::Convolution::Sparse::DENSE;
  371. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  372. size_t fw) {
  373. param.pad_h = param.pad_w = 1;
  374. param.stride_h = param.stride_w = 1;
  375. checker.set_param(param)
  376. .set_dtype(0, dtype::QuantizedS8(0.2f))
  377. .set_dtype(1, dtype::QuantizedS8(0.2f))
  378. // Use inferred output dtype.
  379. .set_dtype(2, {});
  380. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  381. };
  382. run(1, 3, 128, 128, 5, 3, 3);
  383. run(1, 56, 128, 64, 80, 1, 1);
  384. run(1, 8, 8, 7, 11, 3, 1);
  385. run(1, 54, 54, 7, 7, 3, 1);
  386. run(1, 3, 3, 128, 128, 3, 1);
  387. run(1, 3, 3, 112, 112, 3, 1);
  388. run(1, 1, 1, 1, 1, 3, 3);
  389. }
  390. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA) {
  391. Checker<ConvolutionBackwardData> checker(handle());
  392. using Param = ConvolutionBackwardData::Param;
  393. Param param;
  394. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  395. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  396. size_t group = 1) {
  397. param.pad_h = param.pad_w = padding;
  398. param.stride_h = param.stride_w = stride;
  399. param.dilate_h = param.dilate_w = dilate;
  400. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  401. TensorLayout grad;
  402. TensorLayout filter;
  403. if (group == 1) {
  404. param.sparse = Param::Sparse::DENSE;
  405. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  406. } else {
  407. param.sparse = Param::Sparse::GROUP;
  408. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  409. }
  410. // TensorLayout grad;
  411. {
  412. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  413. opr->param() = param;
  414. opr->deduce_layout(filter, diff, grad);
  415. }
  416. checker.set_param(param)
  417. .set_dtype(0, dtype::Float32())
  418. .set_dtype(1, dtype::Float32());
  419. checker.exec(TensorLayoutArray{filter, diff, grad});
  420. };
  421. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  422. param.mode = mode;
  423. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  424. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  425. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  426. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  427. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  428. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  429. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  430. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  431. }
  432. }
  433. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_RECORD) {
  434. TaskRecordChecker<ConvolutionBackwardData> checker(1);
  435. using Param = ConvolutionBackwardData::Param;
  436. Param param;
  437. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  438. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  439. size_t group = 1) {
  440. param.pad_h = param.pad_w = padding;
  441. param.stride_h = param.stride_w = stride;
  442. param.dilate_h = param.dilate_w = dilate;
  443. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  444. TensorLayout grad;
  445. TensorLayout filter;
  446. if (group == 1) {
  447. param.sparse = Param::Sparse::DENSE;
  448. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  449. } else {
  450. param.sparse = Param::Sparse::GROUP;
  451. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  452. }
  453. // TensorLayout grad;
  454. {
  455. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  456. opr->param() = param;
  457. opr->deduce_layout(filter, diff, grad);
  458. }
  459. checker.set_param(param)
  460. .set_dtype(0, dtype::Float32())
  461. .set_dtype(1, dtype::Float32());
  462. checker.exec(TensorLayoutArray{filter, diff, grad});
  463. };
  464. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  465. param.mode = mode;
  466. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  467. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  468. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  469. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  470. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  471. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  472. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  473. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  474. }
  475. }
  476. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_INT8_INT8_INT32) {
  477. Checker<ConvolutionBackwardData> checker(handle());
  478. using Param = ConvolutionBackwardData::Param;
  479. Param param;
  480. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  481. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  482. size_t group = 1) {
  483. param.pad_h = param.pad_w = padding;
  484. param.stride_h = param.stride_w = stride;
  485. param.dilate_h = param.dilate_w = dilate;
  486. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Int8()};
  487. TensorLayout grad;
  488. TensorLayout filter;
  489. if (group == 1) {
  490. param.sparse = Param::Sparse::DENSE;
  491. filter = {{oc, ic, fh, fw}, dtype::Int8()};
  492. } else {
  493. param.sparse = Param::Sparse::GROUP;
  494. filter = {{group, oc, ic, fh, fw}, dtype::Int8()};
  495. }
  496. // TensorLayout grad;
  497. {
  498. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  499. opr->param() = param;
  500. opr->deduce_layout(filter, diff, grad);
  501. }
  502. checker.set_param(param)
  503. .set_dtype(0, dtype::Int8())
  504. .set_dtype(1, dtype::Int8())
  505. .set_dtype(2, dtype::Int32());
  506. checker.exec(TensorLayoutArray{filter, diff, grad});
  507. };
  508. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  509. param.mode = mode;
  510. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  511. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  512. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  513. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  514. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  515. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  516. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  517. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  518. }
  519. }
  520. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_SINT8) {
  521. Checker<ConvolutionBackwardData> checker(handle());
  522. using Param = ConvolutionBackwardData::Param;
  523. Param param;
  524. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  525. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  526. size_t group = 1) {
  527. param.pad_h = param.pad_w = padding;
  528. param.stride_h = param.stride_w = stride;
  529. param.dilate_h = param.dilate_w = dilate;
  530. TensorLayout diff =
  531. TensorLayout{{n, oc * group, oh, ow}, dtype::QuantizedS8(0.2f)};
  532. TensorLayout grad;
  533. TensorLayout filter;
  534. if (group == 1) {
  535. param.sparse = Param::Sparse::DENSE;
  536. filter = {{oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  537. } else {
  538. param.sparse = Param::Sparse::GROUP;
  539. filter = {{group, oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  540. }
  541. // TensorLayout grad;
  542. {
  543. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  544. opr->param() = param;
  545. opr->deduce_layout(filter, diff, grad);
  546. }
  547. checker.set_param(param)
  548. .set_dtype(0, dtype::QuantizedS8(0.2f))
  549. .set_dtype(1, dtype::QuantizedS8(0.2f))
  550. .set_dtype(2, {});
  551. checker.exec(TensorLayoutArray{filter, diff, grad});
  552. };
  553. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  554. param.mode = mode;
  555. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  556. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  557. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  558. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  559. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  560. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  561. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  562. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  563. }
  564. }
  565. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
  566. Checker<ConvolutionBackwardData> checker(handle());
  567. using Param = ConvolutionBackwardData::Param;
  568. Param param;
  569. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  570. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  571. size_t group = 1) {
  572. param.pad_h = param.pad_w = padding;
  573. param.stride_h = param.stride_w = stride;
  574. param.dilate_h = param.dilate_w = dilate;
  575. TensorLayout diff = TensorLayout{
  576. {n, oc * group, oh, ow}, dtype::Quantized8Asymm(1.3f, (uint8_t)129)};
  577. TensorLayout grad;
  578. TensorLayout filter;
  579. if (group == 1) {
  580. param.sparse = Param::Sparse::DENSE;
  581. filter = {{oc, ic, fh, fw}, dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  582. } else {
  583. param.sparse = Param::Sparse::GROUP;
  584. filter = {
  585. {group, oc, ic, fh, fw},
  586. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  587. }
  588. // TensorLayout grad;
  589. {
  590. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  591. opr->param() = param;
  592. opr->deduce_layout(filter, diff, grad);
  593. }
  594. NormalRNG rng(128.f);
  595. checker.set_param(param)
  596. .set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  597. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  598. .set_dtype(2, {});
  599. checker.set_rng(0, &rng).set_rng(1, &rng);
  600. checker.exec(TensorLayoutArray{filter, diff, grad});
  601. };
  602. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  603. param.mode = mode;
  604. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  605. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  606. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  607. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  608. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  609. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  610. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  611. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  612. }
  613. }
  614. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
  615. Checker<ConvolutionBackwardData> checker(handle());
  616. checker.set_before_exec_callback(
  617. AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
  618. using Param = ConvolutionBackwardData::Param;
  619. Param param;
  620. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  621. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  622. size_t group = 1) {
  623. param.pad_h = param.pad_w = padding;
  624. param.stride_h = param.stride_w = stride;
  625. param.dilate_h = param.dilate_w = dilate;
  626. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  627. TensorLayout grad;
  628. TensorLayout filter;
  629. if (group == 1) {
  630. param.sparse = Param::Sparse::DENSE;
  631. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  632. } else {
  633. param.sparse = Param::Sparse::GROUP;
  634. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  635. }
  636. // TensorLayout grad;
  637. {
  638. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  639. opr->param() = param;
  640. opr->deduce_layout(filter, diff, grad);
  641. }
  642. checker.set_param(param);
  643. checker.exec(TensorLayoutArray{filter, diff, grad});
  644. };
  645. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  646. param.mode = mode;
  647. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  648. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  649. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  650. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  651. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  652. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  653. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  654. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  655. }
  656. }
  657. // vim: syntax=cpp.doxygen