You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. /**
  2. * \file dnn/test/fallback/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "test/fallback/fixture.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/checker.h"
  16. #include "test/common/convolution.h"
  17. #include "test/common/rng.h"
  18. using namespace megdnn;
  19. using namespace test;
  20. #if MEGDNN_WITH_BENCHMARK
  21. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) {
  22. using Param = Convolution::Param;
  23. auto run = [&](const TensorShapeArray& shapes, Param param) {
  24. Benchmarker<Convolution> benchmarker_float(handle());
  25. size_t RUN = 50;
  26. auto tfloat = benchmarker_float.set_display(false)
  27. .set_dtype(0, dtype::Float32{})
  28. .set_dtype(1, dtype::Float32{})
  29. .set_times(RUN)
  30. .set_param(param)
  31. .exec(shapes);
  32. size_t IC = shapes[1][1];
  33. size_t FH = shapes[1][2];
  34. size_t FW = shapes[1][3];
  35. TensorLayout dst_layout;
  36. auto opr = handle()->create_operator<Convolution>();
  37. opr->param() = param;
  38. opr->deduce_layout(
  39. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  40. dst_layout);
  41. printf("fp32 flops: %.3f mflops\n",
  42. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  43. (tfloat / RUN * 1000));
  44. };
  45. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  46. size_t stride) {
  47. Param param;
  48. param.stride_h = stride;
  49. param.stride_w = stride;
  50. param.pad_h = kernel / 2;
  51. param.pad_w = kernel / 2;
  52. param.pad_h = 0;
  53. param.pad_w = 0;
  54. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  55. w, h, stride, kernel);
  56. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  57. };
  58. profile(48, 128, 56, 88, 1, 1);
  59. profile(56, 128, 64, 80, 1, 1);
  60. profile(24, 3, 256, 320, 3, 2);
  61. profile(16, 3, 224, 352, 5, 2);
  62. profile(16, 3, 256, 320, 7, 2);
  63. profile(8, 8, 56, 88, 3, 1);
  64. profile(8, 8, 7, 11, 3, 1);
  65. profile(4, 4, 64, 80, 3, 1);
  66. profile(108, 108, 7, 7, 3, 1);
  67. profile(54, 54, 7, 7, 3, 1);
  68. profile(3, 3, 128, 128, 3, 1);
  69. profile(3, 3, 112, 112, 3, 1);
  70. }
  71. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8832) {
  72. using Param = Convolution::Param;
  73. auto run = [&](const TensorShapeArray& shapes, Param param) {
  74. Benchmarker<Convolution> benchmarker_float(handle());
  75. size_t RUN = 50;
  76. auto tfloat = benchmarker_float.set_display(false)
  77. .set_dtype(0, dtype::Int8{})
  78. .set_dtype(1, dtype::Int8{})
  79. .set_dtype(2, dtype::Int32{})
  80. .set_times(RUN)
  81. .set_param(param)
  82. .exec(shapes);
  83. size_t IC = shapes[1][1];
  84. size_t FH = shapes[1][2];
  85. size_t FW = shapes[1][3];
  86. TensorLayout dst_layout;
  87. auto opr = handle()->create_operator<Convolution>();
  88. opr->param() = param;
  89. opr->deduce_layout(
  90. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  91. dst_layout);
  92. printf("fp32 flops: %.3f mflops\n",
  93. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  94. (tfloat / RUN * 1000));
  95. };
  96. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  97. size_t stride) {
  98. Param param;
  99. param.stride_h = stride;
  100. param.stride_w = stride;
  101. param.pad_h = kernel / 2;
  102. param.pad_w = kernel / 2;
  103. param.pad_h = 0;
  104. param.pad_w = 0;
  105. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  106. w, h, stride, kernel);
  107. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  108. };
  109. profile(48, 128, 56, 88, 1, 1);
  110. profile(56, 128, 64, 80, 3, 1);
  111. profile(24, 3, 256, 320, 3, 2);
  112. }
  113. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8816) {
  114. using Param = Convolution::Param;
  115. auto run = [&](const TensorShapeArray& shapes, Param param) {
  116. Benchmarker<Convolution> benchmarker_float(handle());
  117. size_t RUN = 50;
  118. auto tfloat = benchmarker_float.set_display(false)
  119. .set_dtype(0, dtype::Int8{})
  120. .set_dtype(1, dtype::Int8{})
  121. .set_dtype(2, dtype::Int16{})
  122. .set_times(RUN)
  123. .set_param(param)
  124. .exec(shapes);
  125. size_t IC = shapes[1][1];
  126. size_t FH = shapes[1][2];
  127. size_t FW = shapes[1][3];
  128. TensorLayout dst_layout;
  129. auto opr = handle()->create_operator<Convolution>();
  130. opr->param() = param;
  131. opr->deduce_layout(
  132. {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()},
  133. dst_layout);
  134. printf("fp32 flops: %.3f mflops\n",
  135. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  136. (tfloat / RUN * 1000));
  137. };
  138. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  139. size_t stride) {
  140. Param param;
  141. param.stride_h = stride;
  142. param.stride_w = stride;
  143. param.pad_h = kernel / 2;
  144. param.pad_w = kernel / 2;
  145. param.pad_h = 0;
  146. param.pad_w = 0;
  147. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  148. w, h, stride, kernel);
  149. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  150. };
  151. profile(48, 128, 56, 88, 1, 1);
  152. profile(48, 128, 56, 88, 1, 2);
  153. profile(56, 128, 64, 80, 3, 1);
  154. profile(24, 3, 256, 320, 3, 2);
  155. }
  156. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  157. using Param = ConvolutionBackwardData::Param;
  158. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  159. Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle());
  160. size_t RUN = 500;
  161. benchmarker_fallback.set_display(false)
  162. .set_dtype(0, dtype::Float32{})
  163. .set_dtype(1, dtype::Float32{})
  164. .set_times(RUN)
  165. .set_param(param);
  166. auto tmatmul =
  167. benchmarker_fallback
  168. .set_before_exec_callback(
  169. AlgoChecker<ConvolutionBackwardData>("DeconvMatmul"))
  170. .exec(tensors);
  171. auto tdirect =
  172. benchmarker_fallback
  173. .set_before_exec_callback(
  174. AlgoChecker<ConvolutionBackwardData>("DeconvDirect"))
  175. .exec(tensors);
  176. size_t IC = tensors[0][1];
  177. size_t FH = tensors[0][2];
  178. size_t FW = tensors[0][3];
  179. size_t total_flops = IC * tensors[1].total_nr_elems() * FH * FW * 2;
  180. printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect,
  181. total_flops / (tdirect / RUN * 1000));
  182. printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul,
  183. total_flops / (tmatmul / RUN * 1000));
  184. printf("speedup: %.3f\n", tdirect / tmatmul);
  185. };
  186. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  187. size_t fw, size_t stride = 1, size_t padding = 0) {
  188. Param param;
  189. param.pad_h = param.pad_w = padding;
  190. param.stride_h = param.stride_w = stride;
  191. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  192. ow, oh, stride, fh);
  193. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  194. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  195. TensorLayout grad;
  196. {
  197. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  198. opr->param() = param;
  199. opr->deduce_layout(filter, diff, grad);
  200. }
  201. run(TensorLayoutArray{filter, diff, grad}, param);
  202. };
  203. profile(1, 1, 3, 3, 1, 2, 2);
  204. profile(1, 2, 3, 3, 2, 2, 2);
  205. profile(1, 4, 3, 3, 4, 2, 2);
  206. profile(1, 4, 3, 3, 8, 2, 2);
  207. profile(1, 8, 3, 3, 4, 2, 2);
  208. profile(1, 8, 3, 3, 8, 2, 2);
  209. }
  210. #endif
  211. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) {
  212. Checker<Convolution> checker(handle());
  213. using Param = Convolution::Param;
  214. Param param;
  215. param.sparse = param::Convolution::Sparse::DENSE;
  216. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  217. size_t fw) {
  218. param.pad_h = param.pad_w = 1;
  219. param.stride_h = param.stride_w = 1;
  220. checker.set_param(param);
  221. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  222. };
  223. run(1, 3, 128, 128, 5, 3, 3);
  224. run(1, 56, 128, 64, 80, 1, 1);
  225. run(1, 8, 8, 7, 11, 3, 1);
  226. run(1, 54, 54, 7, 7, 3, 1);
  227. run(1, 3, 3, 128, 128, 3, 1);
  228. run(1, 3, 3, 112, 112, 3, 1);
  229. run(1, 1, 1, 1, 1, 3, 3);
  230. }
  231. #if MEGDNN_X86
  232. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_8816) {
  233. Checker<Convolution> checker(handle());
  234. using Param = Convolution::Param;
  235. checker.set_before_exec_callback(AlgoChecker<Convolution>(".+FB_GEMV.+"));
  236. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  237. size_t fw, size_t pad, size_t stride, size_t group) {
  238. Param param;
  239. param.sparse = group > 1 ? param::Convolution::Sparse::GROUP
  240. : param::Convolution::Sparse::DENSE;
  241. param.pad_h = param.pad_w = pad;
  242. param.stride_h = param.stride_w = stride;
  243. checker.set_param(param);
  244. if (group > 1) {
  245. checker.execl(
  246. {{{n, ic, ih, iw}, dtype::Int8()},
  247. {{group, oc / group, ic / group, fh, fw}, dtype::Int8()},
  248. {{}, dtype::Int16()}});
  249. } else {
  250. checker.execl(
  251. {{{n, ic, ih, iw}, dtype::Int8()},
  252. {{oc, ic, fh, fw}, dtype::Int8()},
  253. {{}, dtype::Int16()}});
  254. }
  255. };
  256. for (auto n : {1, 2})
  257. for (auto ic : {3, 4, 8, 12, 16})
  258. for (auto oc : {4, 8, 16, 32})
  259. for (auto ih : {7, 14, 15, 22})
  260. for (auto iw : {7, 13, 11, 32})
  261. for (auto filter : {1, 2, 3, 5, 7})
  262. for (auto stride : {1, 2})
  263. for (auto pad : {0, filter / 2}) {
  264. run(n, ic, ih, iw, oc, filter, filter, pad, stride,
  265. 1);
  266. if (ic == oc) {
  267. run(n, ic, ih, iw, oc, filter, filter, pad,
  268. stride, ic);
  269. }
  270. }
  271. }
  272. #endif
  273. TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) {
  274. Checker<Convolution> checker(handle());
  275. using Param = Convolution::Param;
  276. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  277. Param param;
  278. param.sparse = param::Convolution::Sparse::DENSE;
  279. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  280. size_t fw) {
  281. param.pad_h = param.pad_w = 1;
  282. param.stride_h = param.stride_w = 1;
  283. for (auto cmode : std::vector<Param::ComputeMode>{
  284. Param::ComputeMode::DEFAULT, Param::ComputeMode::FLOAT32}) {
  285. param.compute_mode = cmode;
  286. checker.set_param(param)
  287. .set_dtype(0, dtype::Float16())
  288. .set_dtype(1, dtype::Float16())
  289. // Use inferred output dtype.
  290. .set_dtype(2, {});
  291. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  292. }
  293. };
  294. run(1, 3, 128, 128, 5, 3, 3);
  295. run(1, 8, 8, 7, 11, 3, 1);
  296. }
  297. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_FALLBACK) {
  298. Checker<Convolution> checker(handle());
  299. using Param = Convolution::Param;
  300. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("FALLBACK_ALGO"));
  301. Param param;
  302. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw, size_t oc,
  303. size_t fh, size_t fw) {
  304. param.sparse = param::Convolution::Sparse::GROUP;
  305. param.pad_h = param.pad_w = 1;
  306. param.stride_h = param.stride_w = 1;
  307. TensorShape src{n, ic, ih, iw}, filter{group, oc / group, ic / group, fh, fw};
  308. checker.set_param(param)
  309. .set_dtype(0, dtype::Float32())
  310. .set_dtype(1, dtype::Float32())
  311. .set_dtype(2, {});
  312. checker.execs({src, filter, {}});
  313. };
  314. run(4, 1, 3, 21, 15, 5, 3, 3);
  315. run(1, 8, 56, 24, 31, 56, 1, 1);
  316. run(4, 8, 8, 8, 7, 8, 3, 1);
  317. run(8, 1, 54, 54, 7, 7, 3, 1);
  318. run(100, 1, 1, 1, 1, 1, 3, 3);
  319. }
  320. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) {
  321. Checker<Convolution> checker(handle());
  322. using Param = Convolution::Param;
  323. checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  324. Param param;
  325. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw, size_t oc,
  326. size_t fh, size_t fw) {
  327. param.sparse = param::Convolution::Sparse::GROUP;
  328. param.pad_h = param.pad_w = 1;
  329. param.stride_h = param.stride_w = 1;
  330. TensorShape src{n, ic, ih, iw}, filter{group, oc / group, ic / group, fh, fw};
  331. checker.set_param(param).set_dtype(2, {});
  332. //! float32
  333. checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32());
  334. checker.execs({src, filter, {}});
  335. //! float16
  336. checker.set_dtype(0, dtype::Float16()).set_dtype(1, dtype::Float16());
  337. checker.execs({src, filter, {}});
  338. //! Qint8
  339. checker.set_dtype(0, dtype::QuantizedS8(3.34f))
  340. .set_dtype(1, dtype::QuantizedS8(0.32f));
  341. checker.execs({src, filter, {}});
  342. //! Quint8
  343. checker.set_dtype(0, dtype::Quantized8Asymm(3.34f, static_cast<uint8_t>(21)))
  344. .set_dtype(1, dtype::Quantized8Asymm(0.32f, static_cast<uint8_t>(15)));
  345. checker.execs({src, filter, {}});
  346. };
  347. run(4, 1, 3, 21, 15, 5, 3, 3);
  348. run(1, 8, 56, 24, 31, 56, 1, 1);
  349. run(4, 8, 8, 8, 7, 8, 3, 1);
  350. run(8, 1, 54, 54, 7, 7, 3, 1);
  351. run(100, 1, 1, 1, 1, 1, 3, 3);
  352. }
  353. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) {
  354. Checker<Convolution> checker(handle());
  355. using Param = Convolution::Param;
  356. Param param;
  357. param.sparse = param::Convolution::Sparse::DENSE;
  358. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  359. size_t fw) {
  360. param.pad_h = param.pad_w = 1;
  361. param.stride_h = param.stride_w = 1;
  362. checker.set_param(param)
  363. .set_dtype(0, dtype::QuantizedS8(0.2f))
  364. .set_dtype(1, dtype::QuantizedS8(0.2f))
  365. // Use inferred output dtype.
  366. .set_dtype(2, {});
  367. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  368. };
  369. run(1, 3, 128, 128, 5, 3, 3);
  370. run(1, 56, 128, 64, 80, 1, 1);
  371. run(1, 8, 8, 7, 11, 3, 1);
  372. run(1, 54, 54, 7, 7, 3, 1);
  373. run(1, 3, 3, 128, 128, 3, 1);
  374. run(1, 3, 3, 112, 112, 3, 1);
  375. run(1, 1, 1, 1, 1, 3, 3);
  376. }
  377. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA) {
  378. Checker<ConvolutionBackwardData> checker(handle());
  379. using Param = ConvolutionBackwardData::Param;
  380. Param param;
  381. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  382. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  383. size_t group = 1) {
  384. param.pad_h = param.pad_w = padding;
  385. param.stride_h = param.stride_w = stride;
  386. param.dilate_h = param.dilate_w = dilate;
  387. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  388. TensorLayout grad;
  389. TensorLayout filter;
  390. if (group == 1) {
  391. param.sparse = Param::Sparse::DENSE;
  392. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  393. } else {
  394. param.sparse = Param::Sparse::GROUP;
  395. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  396. }
  397. // TensorLayout grad;
  398. {
  399. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  400. opr->param() = param;
  401. opr->deduce_layout(filter, diff, grad);
  402. }
  403. checker.set_param(param)
  404. .set_dtype(0, dtype::Float32())
  405. .set_dtype(1, dtype::Float32());
  406. checker.exec(TensorLayoutArray{filter, diff, grad});
  407. };
  408. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  409. param.mode = mode;
  410. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  411. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  412. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  413. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  414. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  415. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  416. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  417. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  418. }
  419. }
  420. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_INT8_INT8_INT32) {
  421. Checker<ConvolutionBackwardData> checker(handle());
  422. using Param = ConvolutionBackwardData::Param;
  423. Param param;
  424. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  425. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  426. size_t group = 1) {
  427. param.pad_h = param.pad_w = padding;
  428. param.stride_h = param.stride_w = stride;
  429. param.dilate_h = param.dilate_w = dilate;
  430. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Int8()};
  431. TensorLayout grad;
  432. TensorLayout filter;
  433. if (group == 1) {
  434. param.sparse = Param::Sparse::DENSE;
  435. filter = {{oc, ic, fh, fw}, dtype::Int8()};
  436. } else {
  437. param.sparse = Param::Sparse::GROUP;
  438. filter = {{group, oc, ic, fh, fw}, dtype::Int8()};
  439. }
  440. // TensorLayout grad;
  441. {
  442. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  443. opr->param() = param;
  444. opr->deduce_layout(filter, diff, grad);
  445. }
  446. checker.set_param(param)
  447. .set_dtype(0, dtype::Int8())
  448. .set_dtype(1, dtype::Int8())
  449. .set_dtype(2, dtype::Int32());
  450. checker.exec(TensorLayoutArray{filter, diff, grad});
  451. };
  452. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  453. param.mode = mode;
  454. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  455. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  456. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  457. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  458. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  459. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  460. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  461. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  462. }
  463. }
  464. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_SINT8) {
  465. Checker<ConvolutionBackwardData> checker(handle());
  466. using Param = ConvolutionBackwardData::Param;
  467. Param param;
  468. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  469. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  470. size_t group = 1) {
  471. param.pad_h = param.pad_w = padding;
  472. param.stride_h = param.stride_w = stride;
  473. param.dilate_h = param.dilate_w = dilate;
  474. TensorLayout diff =
  475. TensorLayout{{n, oc * group, oh, ow}, dtype::QuantizedS8(0.2f)};
  476. TensorLayout grad;
  477. TensorLayout filter;
  478. if (group == 1) {
  479. param.sparse = Param::Sparse::DENSE;
  480. filter = {{oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  481. } else {
  482. param.sparse = Param::Sparse::GROUP;
  483. filter = {{group, oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  484. }
  485. // TensorLayout grad;
  486. {
  487. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  488. opr->param() = param;
  489. opr->deduce_layout(filter, diff, grad);
  490. }
  491. checker.set_param(param)
  492. .set_dtype(0, dtype::QuantizedS8(0.2f))
  493. .set_dtype(1, dtype::QuantizedS8(0.2f))
  494. .set_dtype(2, {});
  495. checker.exec(TensorLayoutArray{filter, diff, grad});
  496. };
  497. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  498. param.mode = mode;
  499. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  500. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  501. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  502. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  503. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  504. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  505. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  506. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  507. }
  508. }
  509. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
  510. Checker<ConvolutionBackwardData> checker(handle());
  511. using Param = ConvolutionBackwardData::Param;
  512. Param param;
  513. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  514. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  515. size_t group = 1) {
  516. param.pad_h = param.pad_w = padding;
  517. param.stride_h = param.stride_w = stride;
  518. param.dilate_h = param.dilate_w = dilate;
  519. TensorLayout diff = TensorLayout{
  520. {n, oc * group, oh, ow}, dtype::Quantized8Asymm(1.3f, (uint8_t)129)};
  521. TensorLayout grad;
  522. TensorLayout filter;
  523. if (group == 1) {
  524. param.sparse = Param::Sparse::DENSE;
  525. filter = {{oc, ic, fh, fw}, dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  526. } else {
  527. param.sparse = Param::Sparse::GROUP;
  528. filter = {
  529. {group, oc, ic, fh, fw},
  530. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  531. }
  532. // TensorLayout grad;
  533. {
  534. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  535. opr->param() = param;
  536. opr->deduce_layout(filter, diff, grad);
  537. }
  538. NormalRNG rng(128.f);
  539. checker.set_param(param)
  540. .set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  541. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  542. .set_dtype(2, {});
  543. checker.set_rng(0, &rng).set_rng(1, &rng);
  544. checker.exec(TensorLayoutArray{filter, diff, grad});
  545. };
  546. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  547. param.mode = mode;
  548. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  549. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  550. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  551. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  552. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  553. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  554. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  555. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  556. }
  557. }
  558. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
  559. Checker<ConvolutionBackwardData> checker(handle());
  560. checker.set_before_exec_callback(
  561. AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
  562. using Param = ConvolutionBackwardData::Param;
  563. Param param;
  564. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  565. size_t fw, size_t stride, size_t padding, size_t dilate = 1,
  566. size_t group = 1) {
  567. param.pad_h = param.pad_w = padding;
  568. param.stride_h = param.stride_w = stride;
  569. param.dilate_h = param.dilate_w = dilate;
  570. TensorLayout diff = TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  571. TensorLayout grad;
  572. TensorLayout filter;
  573. if (group == 1) {
  574. param.sparse = Param::Sparse::DENSE;
  575. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  576. } else {
  577. param.sparse = Param::Sparse::GROUP;
  578. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  579. }
  580. // TensorLayout grad;
  581. {
  582. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  583. opr->param() = param;
  584. opr->deduce_layout(filter, diff, grad);
  585. }
  586. checker.set_param(param);
  587. checker.exec(TensorLayoutArray{filter, diff, grad});
  588. };
  589. for (auto mode : {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  590. param.mode = mode;
  591. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  592. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  593. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  594. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  595. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  596. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  597. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  598. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  599. }
  600. }
  601. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台