You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. /**
  2. * \file dnn/test/fallback/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "test/fallback/fixture.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/checker.h"
  16. #include "test/common/convolution.h"
  17. #include "test/common/rng.h"
  18. using namespace megdnn;
  19. using namespace test;
  20. #if MEGDNN_WITH_BENCHMARK
  21. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) {
  22. using Param = Convolution::Param;
  23. auto run = [&](const TensorShapeArray& shapes, Param param) {
  24. Benchmarker<Convolution> benchmarker_float(handle());
  25. size_t RUN = 50;
  26. auto tfloat = benchmarker_float.set_display(false)
  27. .set_dtype(0, dtype::Float32{})
  28. .set_dtype(1, dtype::Float32{})
  29. .set_times(RUN)
  30. .set_param(param)
  31. .exec(shapes);
  32. size_t IC = shapes[1][1];
  33. size_t FH = shapes[1][2];
  34. size_t FW = shapes[1][3];
  35. TensorLayout dst_layout;
  36. auto opr = handle()->create_operator<Convolution>();
  37. opr->param() = param;
  38. opr->deduce_layout({shapes[0], dtype::Float32()},
  39. {shapes[1], dtype::Float32()}, dst_layout);
  40. printf("fp32 flops: %.3f mflops\n",
  41. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  42. (tfloat / RUN * 1000));
  43. };
  44. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  45. size_t stride) {
  46. Param param;
  47. param.stride_h = stride;
  48. param.stride_w = stride;
  49. param.pad_h = kernel / 2;
  50. param.pad_w = kernel / 2;
  51. param.pad_h = 0;
  52. param.pad_w = 0;
  53. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  54. oc, ic, w, h, stride, kernel);
  55. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  56. };
  57. profile(48, 128, 56, 88, 1, 1);
  58. profile(56, 128, 64, 80, 1, 1);
  59. profile(24, 3, 256, 320, 3, 2);
  60. profile(16, 3, 224, 352, 5, 2);
  61. profile(16, 3, 256, 320, 7, 2);
  62. profile(8, 8, 56, 88, 3, 1);
  63. profile(8, 8, 7, 11, 3, 1);
  64. profile(4, 4, 64, 80, 3, 1);
  65. profile(108, 108, 7, 7, 3, 1);
  66. profile(54, 54, 7, 7, 3, 1);
  67. profile(3, 3, 128, 128, 3, 1);
  68. profile(3, 3, 112, 112, 3, 1);
  69. }
  70. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8832) {
  71. using Param = Convolution::Param;
  72. auto run = [&](const TensorShapeArray& shapes, Param param) {
  73. Benchmarker<Convolution> benchmarker_float(handle());
  74. size_t RUN = 50;
  75. auto tfloat = benchmarker_float.set_display(false)
  76. .set_dtype(0, dtype::Int8{})
  77. .set_dtype(1, dtype::Int8{})
  78. .set_dtype(2, dtype::Int32{})
  79. .set_times(RUN)
  80. .set_param(param)
  81. .exec(shapes);
  82. size_t IC = shapes[1][1];
  83. size_t FH = shapes[1][2];
  84. size_t FW = shapes[1][3];
  85. TensorLayout dst_layout;
  86. auto opr = handle()->create_operator<Convolution>();
  87. opr->param() = param;
  88. opr->deduce_layout({shapes[0], dtype::Float32()},
  89. {shapes[1], dtype::Float32()}, dst_layout);
  90. printf("fp32 flops: %.3f mflops\n",
  91. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  92. (tfloat / RUN * 1000));
  93. };
  94. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  95. size_t stride) {
  96. Param param;
  97. param.stride_h = stride;
  98. param.stride_w = stride;
  99. param.pad_h = kernel / 2;
  100. param.pad_w = kernel / 2;
  101. param.pad_h = 0;
  102. param.pad_w = 0;
  103. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  104. oc, ic, w, h, stride, kernel);
  105. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  106. };
  107. profile(48, 128, 56, 88, 1, 1);
  108. profile(56, 128, 64, 80, 3, 1);
  109. profile(24, 3, 256, 320, 3, 2);
  110. }
  111. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8816) {
  112. using Param = Convolution::Param;
  113. auto run = [&](const TensorShapeArray& shapes, Param param) {
  114. Benchmarker<Convolution> benchmarker_float(handle());
  115. size_t RUN = 50;
  116. auto tfloat = benchmarker_float.set_display(false)
  117. .set_dtype(0, dtype::Int8{})
  118. .set_dtype(1, dtype::Int8{})
  119. .set_dtype(2, dtype::Int16{})
  120. .set_times(RUN)
  121. .set_param(param)
  122. .exec(shapes);
  123. size_t IC = shapes[1][1];
  124. size_t FH = shapes[1][2];
  125. size_t FW = shapes[1][3];
  126. TensorLayout dst_layout;
  127. auto opr = handle()->create_operator<Convolution>();
  128. opr->param() = param;
  129. opr->deduce_layout({shapes[0], dtype::Float32()},
  130. {shapes[1], dtype::Float32()}, dst_layout);
  131. printf("fp32 flops: %.3f mflops\n",
  132. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  133. (tfloat / RUN * 1000));
  134. };
  135. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  136. size_t stride) {
  137. Param param;
  138. param.stride_h = stride;
  139. param.stride_w = stride;
  140. param.pad_h = kernel / 2;
  141. param.pad_w = kernel / 2;
  142. param.pad_h = 0;
  143. param.pad_w = 0;
  144. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  145. oc, ic, w, h, stride, kernel);
  146. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  147. };
  148. profile(48, 128, 56, 88, 1, 1);
  149. profile(48, 128, 56, 88, 1, 2);
  150. profile(56, 128, 64, 80, 3, 1);
  151. profile(24, 3, 256, 320, 3, 2);
  152. }
  153. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  154. using Param = ConvolutionBackwardData::Param;
  155. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  156. Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle());
  157. size_t RUN = 500;
  158. benchmarker_fallback.set_display(false)
  159. .set_dtype(0, dtype::Float32{})
  160. .set_dtype(1, dtype::Float32{})
  161. .set_times(RUN)
  162. .set_param(param);
  163. auto tmatmul = benchmarker_fallback
  164. .set_before_exec_callback(
  165. AlgoChecker<ConvolutionBackwardData>(
  166. "DeconvMatmul"))
  167. .exec(tensors);
  168. auto tdirect = benchmarker_fallback
  169. .set_before_exec_callback(
  170. AlgoChecker<ConvolutionBackwardData>(
  171. "DeconvDirect"))
  172. .exec(tensors);
  173. size_t IC = tensors[0][1];
  174. size_t FH = tensors[0][2];
  175. size_t FW = tensors[0][3];
  176. size_t total_flops = IC * tensors[1].total_nr_elems() * FH * FW * 2;
  177. printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect,
  178. total_flops / (tdirect / RUN * 1000));
  179. printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul,
  180. total_flops / (tmatmul / RUN * 1000));
  181. printf("speedup: %.3f\n", tdirect / tmatmul);
  182. };
  183. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  184. size_t fh, size_t fw, size_t stride = 1,
  185. size_t padding = 0) {
  186. Param param;
  187. param.pad_h = param.pad_w = padding;
  188. param.stride_h = param.stride_w = stride;
  189. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  190. oc, ic, ow, oh, stride, fh);
  191. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  192. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  193. TensorLayout grad;
  194. {
  195. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  196. opr->param() = param;
  197. opr->deduce_layout(filter, diff, grad);
  198. }
  199. run(TensorLayoutArray{filter, diff, grad}, param);
  200. };
  201. profile(1, 1, 3, 3, 1, 2, 2);
  202. profile(1, 2, 3, 3, 2, 2, 2);
  203. profile(1, 4, 3, 3, 4, 2, 2);
  204. profile(1, 4, 3, 3, 8, 2, 2);
  205. profile(1, 8, 3, 3, 4, 2, 2);
  206. profile(1, 8, 3, 3, 8, 2, 2);
  207. }
  208. #endif
  209. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) {
  210. Checker<Convolution> checker(handle());
  211. using Param = Convolution::Param;
  212. Param param;
  213. param.sparse = param::Convolution::Sparse::DENSE;
  214. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  215. size_t fh, size_t fw) {
  216. param.pad_h = param.pad_w = 1;
  217. param.stride_h = param.stride_w = 1;
  218. checker.set_param(param);
  219. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  220. };
  221. run(1, 3, 128, 128, 5, 3, 3);
  222. run(1, 56, 128, 64, 80, 1, 1);
  223. run(1, 8, 8, 7, 11, 3, 1);
  224. run(1, 54, 54, 7, 7, 3, 1);
  225. run(1, 3, 3, 128, 128, 3, 1);
  226. run(1, 3, 3, 112, 112, 3, 1);
  227. run(1, 1, 1, 1, 1, 3, 3);
  228. }
  229. #if MEGDNN_X86
  230. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_8816) {
  231. Checker<Convolution> checker(handle());
  232. using Param = Convolution::Param;
  233. checker.set_before_exec_callback(AlgoChecker<Convolution>(".+FB_GEMV.+"));
  234. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  235. size_t fh, size_t fw, size_t pad, size_t stride,
  236. size_t group) {
  237. Param param;
  238. param.sparse = group > 1 ? param::Convolution::Sparse::GROUP
  239. : param::Convolution::Sparse::DENSE;
  240. param.pad_h = param.pad_w = pad;
  241. param.stride_h = param.stride_w = stride;
  242. checker.set_param(param);
  243. if (group > 1) {
  244. checker.execl(
  245. {{{n, ic, ih, iw}, dtype::Int8()},
  246. {{group, oc / group, ic / group, fh, fw}, dtype::Int8()},
  247. {{}, dtype::Int16()}});
  248. } else {
  249. checker.execl({{{n, ic, ih, iw}, dtype::Int8()},
  250. {{oc, ic, fh, fw}, dtype::Int8()},
  251. {{}, dtype::Int16()}});
  252. }
  253. };
  254. for (auto n : {1, 2})
  255. for (auto ic : {3, 4, 8, 12, 16})
  256. for (auto oc : {4, 8, 16, 32})
  257. for (auto ih : {7, 14, 15, 22})
  258. for (auto iw : {7, 13, 11, 32})
  259. for (auto filter : {1, 2, 3, 5, 7})
  260. for (auto stride : {1, 2})
  261. for (auto pad : {0, filter / 2}) {
  262. run(n, ic, ih, iw, oc, filter, filter, pad,
  263. stride, 1);
  264. if (ic == oc) {
  265. run(n, ic, ih, iw, oc, filter, filter,
  266. pad, stride, ic);
  267. }
  268. }
  269. }
  270. #endif
  271. TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) {
  272. Checker<Convolution> checker(handle());
  273. using Param = Convolution::Param;
  274. checker.set_before_exec_callback(
  275. AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  276. Param param;
  277. param.sparse = param::Convolution::Sparse::DENSE;
  278. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  279. size_t fh, size_t fw) {
  280. param.pad_h = param.pad_w = 1;
  281. param.stride_h = param.stride_w = 1;
  282. for (auto cmode :
  283. std::vector<Param::ComputeMode>{Param::ComputeMode::DEFAULT,
  284. Param::ComputeMode::FLOAT32}) {
  285. param.compute_mode = cmode;
  286. checker.set_param(param)
  287. .set_dtype(0, dtype::Float16())
  288. .set_dtype(1, dtype::Float16())
  289. // Use inferred output dtype.
  290. .set_dtype(2, {});
  291. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  292. }
  293. };
  294. run(1, 3, 128, 128, 5, 3, 3);
  295. run(1, 8, 8, 7, 11, 3, 1);
  296. }
  297. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_FALLBACK) {
  298. Checker<Convolution> checker(handle());
  299. using Param = Convolution::Param;
  300. checker.set_before_exec_callback(
  301. AlgoChecker<ConvolutionForward>("FALLBACK_ALGO"));
  302. Param param;
  303. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw,
  304. size_t oc, size_t fh, size_t fw) {
  305. param.sparse = param::Convolution::Sparse::GROUP;
  306. param.pad_h = param.pad_w = 1;
  307. param.stride_h = param.stride_w = 1;
  308. TensorShape src{n, ic, ih, iw},
  309. filter{group, oc / group, ic / group, fh, fw};
  310. checker.set_param(param)
  311. .set_dtype(0, dtype::Float32())
  312. .set_dtype(1, dtype::Float32())
  313. .set_dtype(2, {});
  314. checker.execs({src, filter, {}});
  315. };
  316. run(4, 1, 3, 21, 15, 5, 3, 3);
  317. run(1, 8, 56, 24, 31, 56, 1, 1);
  318. run(4, 8, 8, 8, 7, 8, 3, 1);
  319. run(8, 1, 54, 54, 7, 7, 3, 1);
  320. run(100, 1, 1, 1, 1, 1, 3, 3);
  321. }
  322. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) {
  323. Checker<Convolution> checker(handle());
  324. using Param = Convolution::Param;
  325. checker.set_before_exec_callback(
  326. AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  327. Param param;
  328. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw,
  329. size_t oc, size_t fh, size_t fw) {
  330. param.sparse = param::Convolution::Sparse::GROUP;
  331. param.pad_h = param.pad_w = 1;
  332. param.stride_h = param.stride_w = 1;
  333. TensorShape src{n, ic, ih, iw},
  334. filter{group, oc / group, ic / group, fh, fw};
  335. checker.set_param(param).set_dtype(2, {});
  336. //! float32
  337. checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32());
  338. checker.execs({src, filter, {}});
  339. //! float16
  340. checker.set_dtype(0, dtype::Float16()).set_dtype(1, dtype::Float16());
  341. checker.execs({src, filter, {}});
  342. //! Qint8
  343. checker.set_dtype(0, dtype::QuantizedS8(3.34f))
  344. .set_dtype(1, dtype::QuantizedS8(0.32f));
  345. checker.execs({src, filter, {}});
  346. //! Quint8
  347. checker.set_dtype(0, dtype::Quantized8Asymm(3.34f,
  348. static_cast<uint8_t>(21)))
  349. .set_dtype(1, dtype::Quantized8Asymm(0.32f,
  350. static_cast<uint8_t>(15)));
  351. checker.execs({src, filter, {}});
  352. };
  353. run(4, 1, 3, 21, 15, 5, 3, 3);
  354. run(1, 8, 56, 24, 31, 56, 1, 1);
  355. run(4, 8, 8, 8, 7, 8, 3, 1);
  356. run(8, 1, 54, 54, 7, 7, 3, 1);
  357. run(100, 1, 1, 1, 1, 1, 3, 3);
  358. }
  359. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) {
  360. Checker<Convolution> checker(handle());
  361. using Param = Convolution::Param;
  362. Param param;
  363. param.sparse = param::Convolution::Sparse::DENSE;
  364. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  365. size_t fh, size_t fw) {
  366. param.pad_h = param.pad_w = 1;
  367. param.stride_h = param.stride_w = 1;
  368. checker.set_param(param)
  369. .set_dtype(0, dtype::QuantizedS8(0.2f))
  370. .set_dtype(1, dtype::QuantizedS8(0.2f))
  371. // Use inferred output dtype.
  372. .set_dtype(2, {});
  373. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  374. };
  375. run(1, 3, 128, 128, 5, 3, 3);
  376. run(1, 56, 128, 64, 80, 1, 1);
  377. run(1, 8, 8, 7, 11, 3, 1);
  378. run(1, 54, 54, 7, 7, 3, 1);
  379. run(1, 3, 3, 128, 128, 3, 1);
  380. run(1, 3, 3, 112, 112, 3, 1);
  381. run(1, 1, 1, 1, 1, 3, 3);
  382. }
  383. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA) {
  384. Checker<ConvolutionBackwardData> checker(handle());
  385. using Param = ConvolutionBackwardData::Param;
  386. Param param;
  387. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  388. size_t fh, size_t fw, size_t stride, size_t padding,
  389. size_t dilate = 1, size_t group = 1) {
  390. param.pad_h = param.pad_w = padding;
  391. param.stride_h = param.stride_w = stride;
  392. param.dilate_h = param.dilate_w = dilate;
  393. TensorLayout diff =
  394. TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  395. TensorLayout grad;
  396. TensorLayout filter;
  397. if (group == 1) {
  398. param.sparse = Param::Sparse::DENSE;
  399. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  400. } else {
  401. param.sparse = Param::Sparse::GROUP;
  402. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  403. }
  404. // TensorLayout grad;
  405. {
  406. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  407. opr->param() = param;
  408. opr->deduce_layout(filter, diff, grad);
  409. }
  410. checker.set_param(param)
  411. .set_dtype(0, dtype::Float32())
  412. .set_dtype(1, dtype::Float32());
  413. checker.exec(TensorLayoutArray{filter, diff, grad});
  414. };
  415. for (auto mode :
  416. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  417. param.mode = mode;
  418. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  419. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  420. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  421. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  422. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  423. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  424. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  425. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  426. }
  427. }
  428. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_INT8_INT8_INT32) {
  429. Checker<ConvolutionBackwardData> checker(handle());
  430. using Param = ConvolutionBackwardData::Param;
  431. Param param;
  432. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  433. size_t fh, size_t fw, size_t stride, size_t padding,
  434. size_t dilate = 1, size_t group = 1) {
  435. param.pad_h = param.pad_w = padding;
  436. param.stride_h = param.stride_w = stride;
  437. param.dilate_h = param.dilate_w = dilate;
  438. TensorLayout diff =
  439. TensorLayout{{n, oc * group, oh, ow}, dtype::Int8()};
  440. TensorLayout grad;
  441. TensorLayout filter;
  442. if (group == 1) {
  443. param.sparse = Param::Sparse::DENSE;
  444. filter = {{oc, ic, fh, fw}, dtype::Int8()};
  445. } else {
  446. param.sparse = Param::Sparse::GROUP;
  447. filter = {{group, oc, ic, fh, fw}, dtype::Int8()};
  448. }
  449. // TensorLayout grad;
  450. {
  451. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  452. opr->param() = param;
  453. opr->deduce_layout(filter, diff, grad);
  454. }
  455. checker.set_param(param)
  456. .set_dtype(0, dtype::Int8())
  457. .set_dtype(1, dtype::Int8())
  458. .set_dtype(2, dtype::Int32());
  459. checker.exec(TensorLayoutArray{filter, diff, grad});
  460. };
  461. for (auto mode :
  462. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  463. param.mode = mode;
  464. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  465. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  466. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  467. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  468. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  469. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  470. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  471. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  472. }
  473. }
  474. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_SINT8) {
  475. Checker<ConvolutionBackwardData> checker(handle());
  476. using Param = ConvolutionBackwardData::Param;
  477. Param param;
  478. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  479. size_t fh, size_t fw, size_t stride, size_t padding,
  480. size_t dilate = 1, size_t group = 1) {
  481. param.pad_h = param.pad_w = padding;
  482. param.stride_h = param.stride_w = stride;
  483. param.dilate_h = param.dilate_w = dilate;
  484. TensorLayout diff =
  485. TensorLayout{{n, oc * group, oh, ow}, dtype::QuantizedS8(0.2f)};
  486. TensorLayout grad;
  487. TensorLayout filter;
  488. if (group == 1) {
  489. param.sparse = Param::Sparse::DENSE;
  490. filter = {{oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  491. } else {
  492. param.sparse = Param::Sparse::GROUP;
  493. filter = {{group, oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  494. }
  495. // TensorLayout grad;
  496. {
  497. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  498. opr->param() = param;
  499. opr->deduce_layout(filter, diff, grad);
  500. }
  501. checker.set_param(param)
  502. .set_dtype(0, dtype::QuantizedS8(0.2f))
  503. .set_dtype(1, dtype::QuantizedS8(0.2f))
  504. .set_dtype(2, {});
  505. checker.exec(TensorLayoutArray{filter, diff, grad});
  506. };
  507. for (auto mode :
  508. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  509. param.mode = mode;
  510. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  511. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  512. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  513. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  514. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  515. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  516. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  517. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  518. }
  519. }
  520. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
  521. Checker<ConvolutionBackwardData> checker(handle());
  522. using Param = ConvolutionBackwardData::Param;
  523. Param param;
  524. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  525. size_t fh, size_t fw, size_t stride, size_t padding,
  526. size_t dilate = 1, size_t group = 1) {
  527. param.pad_h = param.pad_w = padding;
  528. param.stride_h = param.stride_w = stride;
  529. param.dilate_h = param.dilate_w = dilate;
  530. TensorLayout diff =
  531. TensorLayout{{n, oc * group, oh, ow},
  532. dtype::Quantized8Asymm(1.3f, (uint8_t)129)};
  533. TensorLayout grad;
  534. TensorLayout filter;
  535. if (group == 1) {
  536. param.sparse = Param::Sparse::DENSE;
  537. filter = {{oc, ic, fh, fw},
  538. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  539. } else {
  540. param.sparse = Param::Sparse::GROUP;
  541. filter = {{group, oc, ic, fh, fw},
  542. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  543. }
  544. // TensorLayout grad;
  545. {
  546. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  547. opr->param() = param;
  548. opr->deduce_layout(filter, diff, grad);
  549. }
  550. NormalRNG rng(128.f);
  551. checker.set_param(param)
  552. .set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  553. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  554. .set_dtype(2, {});
  555. checker.set_rng(0, &rng).set_rng(1, &rng);
  556. checker.exec(TensorLayoutArray{filter, diff, grad});
  557. };
  558. for (auto mode :
  559. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  560. param.mode = mode;
  561. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  562. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  563. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  564. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  565. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  566. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  567. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  568. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  569. }
  570. }
  571. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
  572. Checker<ConvolutionBackwardData> checker(handle());
  573. checker.set_before_exec_callback(
  574. AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
  575. using Param = ConvolutionBackwardData::Param;
  576. Param param;
  577. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  578. size_t fh, size_t fw, size_t stride, size_t padding,
  579. size_t dilate = 1, size_t group = 1) {
  580. param.pad_h = param.pad_w = padding;
  581. param.stride_h = param.stride_w = stride;
  582. param.dilate_h = param.dilate_w = dilate;
  583. TensorLayout diff =
  584. TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  585. TensorLayout grad;
  586. TensorLayout filter;
  587. if (group == 1) {
  588. param.sparse = Param::Sparse::DENSE;
  589. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  590. } else {
  591. param.sparse = Param::Sparse::GROUP;
  592. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  593. }
  594. // TensorLayout grad;
  595. {
  596. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  597. opr->param() = param;
  598. opr->deduce_layout(filter, diff, grad);
  599. }
  600. checker.set_param(param);
  601. checker.exec(TensorLayoutArray{filter, diff, grad});
  602. };
  603. for (auto mode :
  604. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  605. param.mode = mode;
  606. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  607. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  608. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  609. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  610. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  611. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  612. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  613. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  614. }
  615. }
  616. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台