You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 28 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. /**
  2. * \file dnn/test/fallback/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "test/fallback/fixture.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/checker.h"
  16. #include "test/common/convolution.h"
  17. #include "test/common/rng.h"
  18. #include "test/common/task_record_check.h"
  19. using namespace megdnn;
  20. using namespace test;
  21. namespace megdnn {
  22. namespace test {
  23. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_RECORD) {
  24. using Param = Convolution::Param;
  25. TaskRecordChecker<Convolution> checker(1);
  26. NormalRNG default_rng;
  27. UniformIntRNG int_rng{-50, 50};
  28. Param param;
  29. param.stride_h = 2;
  30. param.stride_w = 2;
  31. param.pad_h = 3 / 2;
  32. param.pad_w = 3 / 2;
  33. param.pad_h = 0;
  34. param.pad_w = 0;
  35. checker.set_dtype(0, dtype::Float32())
  36. .set_dtype(1, dtype::Float32())
  37. .set_rng(0, &default_rng)
  38. .set_rng(1, &default_rng)
  39. .set_param(param)
  40. .execs({{1, 3, 20, 40}, {24, 3, 3, 3}, {}});
  41. }
  42. } // namespace test
  43. } // namespace megdnn
  44. #if MEGDNN_WITH_BENCHMARK
  45. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) {
  46. using Param = Convolution::Param;
  47. auto run = [&](const TensorShapeArray& shapes, Param param) {
  48. Benchmarker<Convolution> benchmarker_float(handle());
  49. size_t RUN = 50;
  50. auto tfloat = benchmarker_float.set_display(false)
  51. .set_dtype(0, dtype::Float32{})
  52. .set_dtype(1, dtype::Float32{})
  53. .set_times(RUN)
  54. .set_param(param)
  55. .exec(shapes);
  56. size_t IC = shapes[1][1];
  57. size_t FH = shapes[1][2];
  58. size_t FW = shapes[1][3];
  59. TensorLayout dst_layout;
  60. auto opr = handle()->create_operator<Convolution>();
  61. opr->param() = param;
  62. opr->deduce_layout(
  63. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  64. dst_layout);
  65. printf("fp32 flops: %.3f mflops\n",
  66. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  67. (tfloat / RUN * 1000));
  68. };
  69. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  70. size_t stride) {
  71. Param param;
  72. param.stride_h = stride;
  73. param.stride_w = stride;
  74. param.pad_h = kernel / 2;
  75. param.pad_w = kernel / 2;
  76. param.pad_h = 0;
  77. param.pad_w = 0;
  78. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  79. w, h, stride, kernel);
  80. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  81. };
  82. profile(48, 128, 56, 88, 1, 1);
  83. profile(56, 128, 64, 80, 1, 1);
  84. profile(24, 3, 256, 320, 3, 2);
  85. profile(16, 3, 224, 352, 5, 2);
  86. profile(16, 3, 256, 320, 7, 2);
  87. profile(8, 8, 56, 88, 3, 1);
  88. profile(8, 8, 7, 11, 3, 1);
  89. profile(4, 4, 64, 80, 3, 1);
  90. profile(108, 108, 7, 7, 3, 1);
  91. profile(54, 54, 7, 7, 3, 1);
  92. profile(3, 3, 128, 128, 3, 1);
  93. profile(3, 3, 112, 112, 3, 1);
  94. }
  95. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8832) {
  96. using Param = Convolution::Param;
  97. auto run = [&](const TensorShapeArray& shapes, Param param) {
  98. Benchmarker<Convolution> benchmarker_float(handle());
  99. size_t RUN = 50;
  100. auto tfloat = benchmarker_float.set_display(false)
  101. .set_dtype(0, dtype::Int8{})
  102. .set_dtype(1, dtype::Int8{})
  103. .set_dtype(2, dtype::Int32{})
  104. .set_times(RUN)
  105. .set_param(param)
  106. .exec(shapes);
  107. size_t IC = shapes[1][1];
  108. size_t FH = shapes[1][2];
  109. size_t FW = shapes[1][3];
  110. TensorLayout dst_layout;
  111. auto opr = handle()->create_operator<Convolution>();
  112. opr->param() = param;
  113. opr->deduce_layout(
  114. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  115. dst_layout);
  116. printf("fp32 flops: %.3f mflops\n",
  117. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  118. (tfloat / RUN * 1000));
  119. };
  120. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  121. size_t stride) {
  122. Param param;
  123. param.stride_h = stride;
  124. param.stride_w = stride;
  125. param.pad_h = kernel / 2;
  126. param.pad_w = kernel / 2;
  127. param.pad_h = 0;
  128. param.pad_w = 0;
  129. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  130. w, h, stride, kernel);
  131. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  132. };
  133. profile(48, 128, 56, 88, 1, 1);
  134. profile(56, 128, 64, 80, 3, 1);
  135. profile(24, 3, 256, 320, 3, 2);
  136. }
  137. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8816) {
  138. using Param = Convolution::Param;
  139. auto run = [&](const TensorShapeArray& shapes, Param param) {
  140. Benchmarker<Convolution> benchmarker_float(handle());
  141. size_t RUN = 50;
  142. auto tfloat = benchmarker_float.set_display(false)
  143. .set_dtype(0, dtype::Int8{})
  144. .set_dtype(1, dtype::Int8{})
  145. .set_dtype(2, dtype::Int16{})
  146. .set_times(RUN)
  147. .set_param(param)
  148. .exec(shapes);
  149. size_t IC = shapes[1][1];
  150. size_t FH = shapes[1][2];
  151. size_t FW = shapes[1][3];
  152. TensorLayout dst_layout;
  153. auto opr = handle()->create_operator<Convolution>();
  154. opr->param() = param;
  155. opr->deduce_layout(
  156. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  157. dst_layout);
  158. printf("fp32 flops: %.3f mflops\n",
  159. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  160. (tfloat / RUN * 1000));
  161. };
  162. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  163. size_t stride) {
  164. Param param;
  165. param.stride_h = stride;
  166. param.stride_w = stride;
  167. param.pad_h = kernel / 2;
  168. param.pad_w = kernel / 2;
  169. param.pad_h = 0;
  170. param.pad_w = 0;
  171. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  172. w, h, stride, kernel);
  173. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  174. };
  175. profile(48, 128, 56, 88, 1, 1);
  176. profile(48, 128, 56, 88, 1, 2);
  177. profile(56, 128, 64, 80, 3, 1);
  178. profile(24, 3, 256, 320, 3, 2);
  179. }
  180. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  181. using Param = ConvolutionBackwardData::Param;
  182. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  183. Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle());
  184. size_t RUN = 500;
  185. benchmarker_fallback.set_display(false)
  186. .set_dtype(0, dtype::Float32{})
  187. .set_dtype(1, dtype::Float32{})
  188. .set_times(RUN)
  189. .set_param(param);
  190. auto tmatmul =
  191. benchmarker_fallback
  192. .set_before_exec_callback(
  193. AlgoChecker<ConvolutionBackwardData>("DeconvMatmul"))
  194. .exec(tensors);
  195. auto tdirect =
  196. benchmarker_fallback
  197. .set_before_exec_callback(
  198. AlgoChecker<ConvolutionBackwardData>("DeconvDirect"))
  199. .exec(tensors);
  200. size_t IC = tensors[0][1];
  201. size_t FH = tensors[0][2];
  202. size_t FW = tensors[0][3];
  203. size_t total_flops = IC * tensors[1].total_nr_elems() * FH * FW * 2;
  204. printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect,
  205. total_flops / (tdirect / RUN * 1000));
  206. printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul,
  207. total_flops / (tmatmul / RUN * 1000));
  208. printf("speedup: %.3f\n", tdirect / tmatmul);
  209. };
  210. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  211. size_t fw, size_t stride = 1, size_t padding = 0) {
  212. Param param;
  213. param.pad_h = param.pad_w = padding;
  214. param.stride_h = param.stride_w = stride;
  215. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  216. ow, oh, stride, fh);
  217. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  218. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  219. TensorLayout grad;
  220. {
  221. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  222. opr->param() = param;
  223. opr->deduce_layout(filter, diff, grad);
  224. }
  225. run(TensorLayoutArray{filter, diff, grad}, param);
  226. };
  227. profile(1, 1, 3, 3, 1, 2, 2);
  228. profile(1, 2, 3, 3, 2, 2, 2);
  229. profile(1, 4, 3, 3, 4, 2, 2);
  230. profile(1, 4, 3, 3, 8, 2, 2);
  231. profile(1, 8, 3, 3, 4, 2, 2);
  232. profile(1, 8, 3, 3, 8, 2, 2);
  233. }
  234. #endif
  235. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) {
  236. Checker<Convolution> checker(handle());
  237. using Param = Convolution::Param;
  238. Param param;
  239. param.sparse = param::Convolution::Sparse::DENSE;
  240. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  241. size_t fw) {
  242. param.pad_h = param.pad_w = 1;
  243. param.stride_h = param.stride_w = 1;
  244. checker.set_param(param);
  245. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  246. };
  247. run(1, 3, 128, 128, 5, 3, 3);
  248. run(1, 56, 128, 64, 80, 1, 1);
  249. run(1, 8, 8, 7, 11, 3, 1);
  250. run(1, 54, 54, 7, 7, 3, 1);
  251. run(1, 3, 3, 128, 128, 3, 1);
  252. run(1, 3, 3, 112, 112, 3, 1);
  253. run(1, 1, 1, 1, 1, 3, 3);
  254. }
  255. #if MEGDNN_X86
  256. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_8816) {
  257. Checker<Convolution> checker(handle());
  258. using Param = Convolution::Param;
  259. checker.set_before_exec_callback(AlgoChecker<Convolution>(".+FB_GEMV.+"));
  260. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  261. size_t fw, size_t pad, size_t stride, size_t group) {
  262. Param param;
  263. param.sparse = group > 1 ? param::Convolution::Sparse::GROUP
  264. : param::Convolution::Sparse::DENSE;
  265. param.pad_h = param.pad_w = pad;
  266. param.stride_h = param.stride_w = stride;
  267. checker.set_param(param);
  268. if (group > 1) {
  269. checker.execl(
  270. {{{n, ic, ih, iw}, dtype::Int8()},
  271. {{group, oc / group, ic / group, fh, fw}, dtype::Int8()},
  272. {{}, dtype::Int16()}});
  273. } else {
  274. checker.execl(
  275. {{{n, ic, ih, iw}, dtype::Int8()},
  276. {{oc, ic, fh, fw}, dtype::Int8()},
  277. {{}, dtype::Int16()}});
  278. }
  279. };
  280. for (auto n : {1, 2})
  281. for (auto ic : {3, 4, 8, 12, 16})
  282. for (auto oc : {4, 8, 16, 32})
  283. for (auto ih : {7, 14, 15, 22})
  284. for (auto iw : {7, 13, 11, 32})
  285. for (auto filter : {1, 2, 3, 5, 7})
  286. for (auto stride : {1, 2})
  287. for (auto pad : {0, filter / 2}) {
  288. run(n, ic, ih, iw, oc, filter, filter, pad, stride,
  289. 1);
  290. if (ic == oc) {
  291. run(n, ic, ih, iw, oc, filter, filter, pad,
  292. stride, ic);
  293. }
  294. }
  295. }
  296. #endif
  297. TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) {
  298. Checker<Convolution> checker(handle());
  299. using Param = Convolution::Param;
  300. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  301. Param param;
  302. param.sparse = param::Convolution::Sparse::DENSE;
  303. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  304. size_t fw) {
  305. param.pad_h = param.pad_w = 1;
  306. param.stride_h = param.stride_w = 1;
  307. for (auto cmode : std::vector<Param::ComputeMode>{
  308. Param::ComputeMode::DEFAULT, Param::ComputeMode::FLOAT32}) {
  309. param.compute_mode = cmode;
  310. checker.set_param(param)
  311. .set_dtype(0, dtype::Float16())
  312. .set_dtype(1, dtype::Float16())
  313. // Use inferred output dtype.
  314. .set_dtype(2, {});
  315. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  316. }
  317. };
  318. run(1, 3, 128, 128, 5, 3, 3);
  319. run(1, 8, 8, 7, 11, 3, 1);
  320. }
  321. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_FALLBACK) {
  322. Checker<Convolution> checker(handle());
  323. using Param = Convolution::Param;
  324. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("FALLBACK_ALGO"));
  325. Param param;
  326. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw, size_t oc,
  327. size_t fh, size_t fw) {
  328. param.sparse = param::Convolution::Sparse::GROUP;
  329. param.pad_h = param.pad_w = 1;
  330. param.stride_h = param.stride_w = 1;
  331. TensorShape src{n, ic, ih, iw}, filter{group, oc / group, ic / group, fh, fw};
  332. checker.set_param(param)
  333. .set_dtype(0, dtype::Float32())
  334. .set_dtype(1, dtype::Float32())
  335. .set_dtype(2, {});
  336. checker.execs({src, filter, {}});
  337. };
  338. run(4, 1, 3, 21, 15, 5, 3, 3);
  339. run(1, 8, 56, 24, 31, 56, 1, 1);
  340. run(4, 8, 8, 8, 7, 8, 3, 1);
  341. run(8, 1, 54, 54, 7, 7, 3, 1);
  342. run(100, 1, 1, 1, 1, 1, 3, 3);
  343. }
  344. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) {
  345. Checker<Convolution> checker(handle());
  346. using Param = Convolution::Param;
  347. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  348. Param param;
  349. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw, size_t oc,
  350. size_t fh, size_t fw) {
  351. param.sparse = param::Convolution::Sparse::GROUP;
  352. param.pad_h = param.pad_w = 1;
  353. param.stride_h = param.stride_w = 1;
  354. TensorShape src{n, ic, ih, iw}, filter{group, oc / group, ic / group, fh, fw};
  355. checker.set_param(param).set_dtype(2, {});
  356. //! float32
  357. checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32());
  358. checker.execs({src, filter, {}});
  359. //! float16
  360. checker.set_dtype(0, dtype::Float16()).set_dtype(1, dtype::Float16());
  361. checker.execs({src, filter, {}});
  362. //! Qint8
  363. checker.set_dtype(0, dtype::QuantizedS8(3.34f))
  364. .set_dtype(1, dtype::QuantizedS8(0.32f));
  365. checker.execs({src, filter, {}});
  366. //! Quint8
  367. checker.set_dtype(0, dtype::Quantized8Asymm(3.34f, static_cast<uint8_t>(21)))
  368. .set_dtype(1, dtype::Quantized8Asymm(0.32f, static_cast<uint8_t>(15)));
  369. checker.execs({src, filter, {}});
  370. };
  371. run(4, 1, 3, 21, 15, 5, 3, 3);
  372. run(1, 8, 56, 24, 31, 56, 1, 1);
  373. run(4, 8, 8, 8, 7, 8, 3, 1);
  374. run(8, 1, 54, 54, 7, 7, 3, 1);
  375. run(100, 1, 1, 1, 1, 1, 3, 3);
  376. }
  377. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) {
  378. Checker<Convolution> checker(handle());
  379. using Param = Convolution::Param;
  380. Param param;
  381. param.sparse = param::Convolution::Sparse::DENSE;
  382. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  383. size_t fw) {
  384. param.pad_h = param.pad_w = 1;
  385. param.stride_h = param.stride_w = 1;
  386. checker.set_param(param)
  387. .set_dtype(0, dtype::QuantizedS8(0.2f))
  388. .set_dtype(1, dtype::QuantizedS8(0.2f))
  389. // Use inferred output dtype.
  390. .set_dtype(2, {});
  391. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  392. };
  393. run(1, 3, 128, 128, 5, 3, 3);
  394. run(1, 56, 128, 64, 80, 1, 1);
  395. run(1, 8, 8, 7, 11, 3, 1);
  396. run(1, 54, 54, 7, 7, 3, 1);
  397. run(1, 3, 3, 128, 128, 3, 1);
  398. run(1, 3, 3, 112, 112, 3, 1);
  399. run(1, 1, 1, 1, 1, 3, 3);
  400. }
  401. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA) {
  402. Checker<ConvolutionBackwardData> checker(handle());
  403. using Param = ConvolutionBackwardData::Param;
  404. Param param;
  405. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  406. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  407. size_t group = 1) {
  408. param.pad_h = param.pad_w = padding;
  409. param.stride_h = param.stride_w = stride;
  410. param.dilate_h = param.dilate_w = dilate;
  411. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  412. TensorLayout grad;
  413. TensorLayout filter;
  414. if (group == 1) {
  415. param.sparse = Param::Sparse::DENSE;
  416. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  417. } else {
  418. param.sparse = Param::Sparse::GROUP;
  419. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  420. }
  421. // TensorLayout grad;
  422. {
  423. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  424. opr->param() = param;
  425. opr->deduce_layout(filter, diff, grad);
  426. }
  427. checker.set_param(param)
  428. .set_dtype(0, dtype::Float32())
  429. .set_dtype(1, dtype::Float32());
  430. checker.exec(TensorLayoutArray{filter, diff, grad});
  431. };
  432. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  433. param.mode = mode;
  434. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  435. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  436. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  437. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  438. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  439. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  440. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  441. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  442. }
  443. }
  444. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_RECORD) {
  445. TaskRecordChecker<ConvolutionBackwardData> checker(1);
  446. using Param = ConvolutionBackwardData::Param;
  447. Param param;
  448. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  449. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  450. size_t group = 1) {
  451. param.pad_h = param.pad_w = padding;
  452. param.stride_h = param.stride_w = stride;
  453. param.dilate_h = param.dilate_w = dilate;
  454. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  455. TensorLayout grad;
  456. TensorLayout filter;
  457. if (group == 1) {
  458. param.sparse = Param::Sparse::DENSE;
  459. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  460. } else {
  461. param.sparse = Param::Sparse::GROUP;
  462. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  463. }
  464. // TensorLayout grad;
  465. {
  466. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  467. opr->param() = param;
  468. opr->deduce_layout(filter, diff, grad);
  469. }
  470. checker.set_param(param)
  471. .set_dtype(0, dtype::Float32())
  472. .set_dtype(1, dtype::Float32());
  473. checker.exec(TensorLayoutArray{filter, diff, grad});
  474. };
  475. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  476. param.mode = mode;
  477. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  478. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  479. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  480. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  481. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  482. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  483. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  484. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  485. }
  486. }
  487. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_INT8_INT8_INT32) {
  488. Checker<ConvolutionBackwardData> checker(handle());
  489. using Param = ConvolutionBackwardData::Param;
  490. Param param;
  491. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  492. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  493. size_t group = 1) {
  494. param.pad_h = param.pad_w = padding;
  495. param.stride_h = param.stride_w = stride;
  496. param.dilate_h = param.dilate_w = dilate;
  497. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Int8()};
  498. TensorLayout grad;
  499. TensorLayout filter;
  500. if (group == 1) {
  501. param.sparse = Param::Sparse::DENSE;
  502. filter = {{oc, ic, fh, fw}, dtype::Int8()};
  503. } else {
  504. param.sparse = Param::Sparse::GROUP;
  505. filter = {{group, oc, ic, fh, fw}, dtype::Int8()};
  506. }
  507. // TensorLayout grad;
  508. {
  509. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  510. opr->param() = param;
  511. opr->deduce_layout(filter, diff, grad);
  512. }
  513. checker.set_param(param)
  514. .set_dtype(0, dtype::Int8())
  515. .set_dtype(1, dtype::Int8())
  516. .set_dtype(2, dtype::Int32());
  517. checker.exec(TensorLayoutArray{filter, diff, grad});
  518. };
  519. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  520. param.mode = mode;
  521. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  522. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  523. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  524. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  525. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  526. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  527. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  528. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  529. }
  530. }
  531. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_SINT8) {
  532. Checker<ConvolutionBackwardData> checker(handle());
  533. using Param = ConvolutionBackwardData::Param;
  534. Param param;
  535. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  536. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  537. size_t group = 1) {
  538. param.pad_h = param.pad_w = padding;
  539. param.stride_h = param.stride_w = stride;
  540. param.dilate_h = param.dilate_w = dilate;
  541. TensorLayout diff =
  542. TensorLayout{{n, oc * group, oh, ow}, dtype::QuantizedS8(0.2f)};
  543. TensorLayout grad;
  544. TensorLayout filter;
  545. if (group == 1) {
  546. param.sparse = Param::Sparse::DENSE;
  547. filter = {{oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  548. } else {
  549. param.sparse = Param::Sparse::GROUP;
  550. filter = {{group, oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  551. }
  552. // TensorLayout grad;
  553. {
  554. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  555. opr->param() = param;
  556. opr->deduce_layout(filter, diff, grad);
  557. }
  558. checker.set_param(param)
  559. .set_dtype(0, dtype::QuantizedS8(0.2f))
  560. .set_dtype(1, dtype::QuantizedS8(0.2f))
  561. .set_dtype(2, {});
  562. checker.exec(TensorLayoutArray{filter, diff, grad});
  563. };
  564. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  565. param.mode = mode;
  566. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  567. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  568. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  569. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  570. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  571. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  572. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  573. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  574. }
  575. }
  576. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
  577. Checker<ConvolutionBackwardData> checker(handle());
  578. using Param = ConvolutionBackwardData::Param;
  579. Param param;
  580. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  581. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  582. size_t group = 1) {
  583. param.pad_h = param.pad_w = padding;
  584. param.stride_h = param.stride_w = stride;
  585. param.dilate_h = param.dilate_w = dilate;
  586. TensorLayout diff = TensorLayout{
  587. {n, oc * group, oh, ow}, dtype::Quantized8Asymm(1.3f, (uint8_t)129)};
  588. TensorLayout grad;
  589. TensorLayout filter;
  590. if (group == 1) {
  591. param.sparse = Param::Sparse::DENSE;
  592. filter = {{oc, ic, fh, fw}, dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  593. } else {
  594. param.sparse = Param::Sparse::GROUP;
  595. filter = {
  596. {group, oc, ic, fh, fw},
  597. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  598. }
  599. // TensorLayout grad;
  600. {
  601. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  602. opr->param() = param;
  603. opr->deduce_layout(filter, diff, grad);
  604. }
  605. NormalRNG rng(128.f);
  606. checker.set_param(param)
  607. .set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  608. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  609. .set_dtype(2, {});
  610. checker.set_rng(0, &rng).set_rng(1, &rng);
  611. checker.exec(TensorLayoutArray{filter, diff, grad});
  612. };
  613. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  614. param.mode = mode;
  615. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  616. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  617. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  618. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  619. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  620. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  621. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  622. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  623. }
  624. }
  625. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
  626. Checker<ConvolutionBackwardData> checker(handle());
  627. checker.set_before_exec_callback(
  628. AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
  629. using Param = ConvolutionBackwardData::Param;
  630. Param param;
  631. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  632. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  633. size_t group = 1) {
  634. param.pad_h = param.pad_w = padding;
  635. param.stride_h = param.stride_w = stride;
  636. param.dilate_h = param.dilate_w = dilate;
  637. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  638. TensorLayout grad;
  639. TensorLayout filter;
  640. if (group == 1) {
  641. param.sparse = Param::Sparse::DENSE;
  642. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  643. } else {
  644. param.sparse = Param::Sparse::GROUP;
  645. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  646. }
  647. // TensorLayout grad;
  648. {
  649. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  650. opr->param() = param;
  651. opr->deduce_layout(filter, diff, grad);
  652. }
  653. checker.set_param(param);
  654. checker.exec(TensorLayoutArray{filter, diff, grad});
  655. };
  656. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  657. param.mode = mode;
  658. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  659. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  660. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  661. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  662. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  663. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  664. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  665. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  666. }
  667. }
  668. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台