You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. /**
  2. * \file dnn/test/fallback/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/fallback/fixture.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/convolution.h"
  16. #include "test/common/rng.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. #if MEGDNN_WITH_BENCHMARK
  20. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) {
  21. using Param = Convolution::Param;
  22. auto run = [&](const TensorShapeArray& shapes, Param param) {
  23. Benchmarker<Convolution> benchmarker_float(handle());
  24. size_t RUN = 50;
  25. auto tfloat = benchmarker_float.set_display(false)
  26. .set_dtype(0, dtype::Float32{})
  27. .set_dtype(1, dtype::Float32{})
  28. .set_times(RUN)
  29. .set_param(param)
  30. .exec(shapes);
  31. size_t IC = shapes[1][1];
  32. size_t FH = shapes[1][2];
  33. size_t FW = shapes[1][3];
  34. TensorLayout dst_layout;
  35. auto opr = handle()->create_operator<Convolution>();
  36. opr->param() = param;
  37. opr->deduce_layout({shapes[0], dtype::Float32()},
  38. {shapes[1], dtype::Float32()}, dst_layout);
  39. printf("fp32 flops: %.3f mflops\n",
  40. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  41. (tfloat / RUN * 1000));
  42. };
  43. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  44. size_t stride) {
  45. Param param;
  46. param.stride_h = stride;
  47. param.stride_w = stride;
  48. param.pad_h = kernel / 2;
  49. param.pad_w = kernel / 2;
  50. param.pad_h = 0;
  51. param.pad_w = 0;
  52. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  53. oc, ic, w, h, stride, kernel);
  54. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  55. };
  56. profile(48, 128, 56, 88, 1, 1);
  57. profile(56, 128, 64, 80, 1, 1);
  58. profile(24, 3, 256, 320, 3, 2);
  59. profile(16, 3, 224, 352, 5, 2);
  60. profile(16, 3, 256, 320, 7, 2);
  61. profile(8, 8, 56, 88, 3, 1);
  62. profile(8, 8, 7, 11, 3, 1);
  63. profile(4, 4, 64, 80, 3, 1);
  64. profile(108, 108, 7, 7, 3, 1);
  65. profile(54, 54, 7, 7, 3, 1);
  66. profile(3, 3, 128, 128, 3, 1);
  67. profile(3, 3, 112, 112, 3, 1);
  68. }
  69. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8832) {
  70. using Param = Convolution::Param;
  71. auto run = [&](const TensorShapeArray& shapes, Param param) {
  72. Benchmarker<Convolution> benchmarker_float(handle());
  73. size_t RUN = 50;
  74. auto tfloat = benchmarker_float.set_display(false)
  75. .set_dtype(0, dtype::Int8{})
  76. .set_dtype(1, dtype::Int8{})
  77. .set_dtype(2, dtype::Int32{})
  78. .set_times(RUN)
  79. .set_param(param)
  80. .exec(shapes);
  81. size_t IC = shapes[1][1];
  82. size_t FH = shapes[1][2];
  83. size_t FW = shapes[1][3];
  84. TensorLayout dst_layout;
  85. auto opr = handle()->create_operator<Convolution>();
  86. opr->param() = param;
  87. opr->deduce_layout({shapes[0], dtype::Float32()},
  88. {shapes[1], dtype::Float32()}, dst_layout);
  89. printf("fp32 flops: %.3f mflops\n",
  90. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  91. (tfloat / RUN * 1000));
  92. };
  93. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  94. size_t stride) {
  95. Param param;
  96. param.stride_h = stride;
  97. param.stride_w = stride;
  98. param.pad_h = kernel / 2;
  99. param.pad_w = kernel / 2;
  100. param.pad_h = 0;
  101. param.pad_w = 0;
  102. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  103. oc, ic, w, h, stride, kernel);
  104. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  105. };
  106. profile(48, 128, 56, 88, 1, 1);
  107. profile(56, 128, 64, 80, 3, 1);
  108. profile(24, 3, 256, 320, 3, 2);
  109. }
  110. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL_8816) {
  111. using Param = Convolution::Param;
  112. auto run = [&](const TensorShapeArray& shapes, Param param) {
  113. Benchmarker<Convolution> benchmarker_float(handle());
  114. size_t RUN = 50;
  115. auto tfloat = benchmarker_float.set_display(false)
  116. .set_dtype(0, dtype::Int8{})
  117. .set_dtype(1, dtype::Int8{})
  118. .set_dtype(2, dtype::Int16{})
  119. .set_times(RUN)
  120. .set_param(param)
  121. .exec(shapes);
  122. size_t IC = shapes[1][1];
  123. size_t FH = shapes[1][2];
  124. size_t FW = shapes[1][3];
  125. TensorLayout dst_layout;
  126. auto opr = handle()->create_operator<Convolution>();
  127. opr->param() = param;
  128. opr->deduce_layout({shapes[0], dtype::Float32()},
  129. {shapes[1], dtype::Float32()}, dst_layout);
  130. printf("fp32 flops: %.3f mflops\n",
  131. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  132. (tfloat / RUN * 1000));
  133. };
  134. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  135. size_t stride) {
  136. Param param;
  137. param.stride_h = stride;
  138. param.stride_w = stride;
  139. param.pad_h = kernel / 2;
  140. param.pad_w = kernel / 2;
  141. param.pad_h = 0;
  142. param.pad_w = 0;
  143. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  144. oc, ic, w, h, stride, kernel);
  145. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  146. };
  147. profile(48, 128, 56, 88, 1, 1);
  148. profile(48, 128, 56, 88, 1, 2);
  149. profile(56, 128, 64, 80, 3, 1);
  150. profile(24, 3, 256, 320, 3, 2);
  151. }
  152. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  153. using Param = ConvolutionBackwardData::Param;
  154. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  155. Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle());
  156. size_t RUN = 500;
  157. benchmarker_fallback.set_display(false)
  158. .set_dtype(0, dtype::Float32{})
  159. .set_dtype(1, dtype::Float32{})
  160. .set_times(RUN)
  161. .set_param(param);
  162. auto tmatmul = benchmarker_fallback
  163. .set_before_exec_callback(
  164. AlgoChecker<ConvolutionBackwardData>(
  165. "DeconvMatmul"))
  166. .exec(tensors);
  167. auto tdirect = benchmarker_fallback
  168. .set_before_exec_callback(
  169. AlgoChecker<ConvolutionBackwardData>(
  170. "DeconvDirect"))
  171. .exec(tensors);
  172. size_t IC = tensors[0][1];
  173. size_t FH = tensors[0][2];
  174. size_t FW = tensors[0][3];
  175. size_t total_flops = IC * tensors[1].total_nr_elems() * FH * FW * 2;
  176. printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect,
  177. total_flops / (tdirect / RUN * 1000));
  178. printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul,
  179. total_flops / (tmatmul / RUN * 1000));
  180. printf("speedup: %.3f\n", tdirect / tmatmul);
  181. };
  182. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  183. size_t fh, size_t fw, size_t stride = 1,
  184. size_t padding = 0) {
  185. Param param;
  186. param.pad_h = param.pad_w = padding;
  187. param.stride_h = param.stride_w = stride;
  188. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  189. oc, ic, ow, oh, stride, fh);
  190. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  191. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  192. TensorLayout grad;
  193. {
  194. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  195. opr->param() = param;
  196. opr->deduce_layout(filter, diff, grad);
  197. }
  198. run(TensorLayoutArray{filter, diff, grad}, param);
  199. };
  200. profile(1, 1, 3, 3, 1, 2, 2);
  201. profile(1, 2, 3, 3, 2, 2, 2);
  202. profile(1, 4, 3, 3, 4, 2, 2);
  203. profile(1, 4, 3, 3, 8, 2, 2);
  204. profile(1, 8, 3, 3, 4, 2, 2);
  205. profile(1, 8, 3, 3, 8, 2, 2);
  206. }
  207. #endif
  208. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) {
  209. Checker<Convolution> checker(handle());
  210. using Param = Convolution::Param;
  211. Param param;
  212. param.sparse = param::Convolution::Sparse::DENSE;
  213. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  214. size_t fh, size_t fw) {
  215. param.pad_h = param.pad_w = 1;
  216. param.stride_h = param.stride_w = 1;
  217. checker.set_param(param);
  218. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  219. };
  220. run(1, 3, 128, 128, 5, 3, 3);
  221. run(1, 56, 128, 64, 80, 1, 1);
  222. run(1, 8, 8, 7, 11, 3, 1);
  223. run(1, 54, 54, 7, 7, 3, 1);
  224. run(1, 3, 3, 128, 128, 3, 1);
  225. run(1, 3, 3, 112, 112, 3, 1);
  226. run(1, 1, 1, 1, 1, 3, 3);
  227. }
  228. #if MEGDNN_X86
  229. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_8816) {
  230. Checker<Convolution> checker(handle());
  231. using Param = Convolution::Param;
  232. checker.set_before_exec_callback(AlgoChecker<Convolution>(".+FB_GEMV.+"));
  233. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  234. size_t fh, size_t fw, size_t pad, size_t stride,
  235. size_t group) {
  236. Param param;
  237. param.sparse = group > 1 ? param::Convolution::Sparse::GROUP
  238. : param::Convolution::Sparse::DENSE;
  239. param.pad_h = param.pad_w = pad;
  240. param.stride_h = param.stride_w = stride;
  241. checker.set_param(param);
  242. if (group > 1) {
  243. checker.execl(
  244. {{{n, ic, ih, iw}, dtype::Int8()},
  245. {{group, oc / group, ic / group, fh, fw}, dtype::Int8()},
  246. {{}, dtype::Int16()}});
  247. } else {
  248. checker.execl({{{n, ic, ih, iw}, dtype::Int8()},
  249. {{oc, ic, fh, fw}, dtype::Int8()},
  250. {{}, dtype::Int16()}});
  251. }
  252. };
  253. for (auto n : {1, 2})
  254. for (auto ic : {3, 4, 8, 12, 16})
  255. for (auto oc : {4, 8, 16, 32})
  256. for (auto ih : {7, 14, 15, 22})
  257. for (auto iw : {7, 13, 11, 32})
  258. for (auto filter : {1, 2, 3, 5, 7})
  259. for (auto stride : {1, 2})
  260. for (auto pad : {0, filter / 2}) {
  261. run(n, ic, ih, iw, oc, filter, filter, pad,
  262. stride, 1);
  263. if (ic == oc) {
  264. run(n, ic, ih, iw, oc, filter, filter,
  265. pad, stride, ic);
  266. }
  267. }
  268. }
  269. #endif
  270. TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) {
  271. Checker<Convolution> checker(handle());
  272. using Param = Convolution::Param;
  273. checker.set_before_exec_callback(
  274. AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  275. Param param;
  276. param.sparse = param::Convolution::Sparse::DENSE;
  277. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  278. size_t fh, size_t fw) {
  279. param.pad_h = param.pad_w = 1;
  280. param.stride_h = param.stride_w = 1;
  281. for (auto cmode :
  282. std::vector<Param::ComputeMode>{Param::ComputeMode::DEFAULT,
  283. Param::ComputeMode::FLOAT32}) {
  284. param.compute_mode = cmode;
  285. checker.set_param(param)
  286. .set_dtype(0, dtype::Float16())
  287. .set_dtype(1, dtype::Float16())
  288. // Use inferred output dtype.
  289. .set_dtype(2, {});
  290. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  291. }
  292. };
  293. run(1, 3, 128, 128, 5, 3, 3);
  294. run(1, 8, 8, 7, 11, 3, 1);
  295. }
  296. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_FALLBACK) {
  297. Checker<Convolution> checker(handle());
  298. using Param = Convolution::Param;
  299. checker.set_before_exec_callback(
  300. AlgoChecker<ConvolutionForward>("FALLBACK_ALGO"));
  301. Param param;
  302. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw,
  303. size_t oc, size_t fh, size_t fw) {
  304. param.sparse = param::Convolution::Sparse::GROUP;
  305. param.pad_h = param.pad_w = 1;
  306. param.stride_h = param.stride_w = 1;
  307. TensorShape src{n, ic, ih, iw},
  308. filter{group, oc / group, ic / group, fh, fw};
  309. checker.set_param(param)
  310. .set_dtype(0, dtype::Float32())
  311. .set_dtype(1, dtype::Float32())
  312. .set_dtype(2, {});
  313. checker.execs({src, filter, {}});
  314. };
  315. run(4, 1, 3, 21, 15, 5, 3, 3);
  316. run(1, 8, 56, 24, 31, 56, 1, 1);
  317. run(4, 8, 8, 8, 7, 8, 3, 1);
  318. run(8, 1, 54, 54, 7, 7, 3, 1);
  319. run(100, 1, 1, 1, 1, 1, 3, 3);
  320. }
  321. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) {
  322. Checker<Convolution> checker(handle());
  323. using Param = Convolution::Param;
  324. checker.set_before_exec_callback(
  325. AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  326. Param param;
  327. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw,
  328. size_t oc, size_t fh, size_t fw) {
  329. param.sparse = param::Convolution::Sparse::GROUP;
  330. param.pad_h = param.pad_w = 1;
  331. param.stride_h = param.stride_w = 1;
  332. TensorShape src{n, ic, ih, iw},
  333. filter{group, oc / group, ic / group, fh, fw};
  334. checker.set_param(param).set_dtype(2, {});
  335. //! float32
  336. checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32());
  337. checker.execs({src, filter, {}});
  338. //! float16
  339. checker.set_dtype(0, dtype::Float16()).set_dtype(1, dtype::Float16());
  340. checker.execs({src, filter, {}});
  341. //! Qint8
  342. checker.set_dtype(0, dtype::QuantizedS8(3.34f))
  343. .set_dtype(1, dtype::QuantizedS8(0.32f));
  344. checker.execs({src, filter, {}});
  345. //! Quint8
  346. checker.set_dtype(0, dtype::Quantized8Asymm(3.34f,
  347. static_cast<uint8_t>(21)))
  348. .set_dtype(1, dtype::Quantized8Asymm(0.32f,
  349. static_cast<uint8_t>(15)));
  350. checker.execs({src, filter, {}});
  351. };
  352. run(4, 1, 3, 21, 15, 5, 3, 3);
  353. run(1, 8, 56, 24, 31, 56, 1, 1);
  354. run(4, 8, 8, 8, 7, 8, 3, 1);
  355. run(8, 1, 54, 54, 7, 7, 3, 1);
  356. run(100, 1, 1, 1, 1, 1, 3, 3);
  357. }
  358. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) {
  359. Checker<Convolution> checker(handle());
  360. using Param = Convolution::Param;
  361. Param param;
  362. param.sparse = param::Convolution::Sparse::DENSE;
  363. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  364. size_t fh, size_t fw) {
  365. param.pad_h = param.pad_w = 1;
  366. param.stride_h = param.stride_w = 1;
  367. checker.set_param(param)
  368. .set_dtype(0, dtype::QuantizedS8(0.2f))
  369. .set_dtype(1, dtype::QuantizedS8(0.2f))
  370. // Use inferred output dtype.
  371. .set_dtype(2, {});
  372. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  373. };
  374. run(1, 3, 128, 128, 5, 3, 3);
  375. run(1, 56, 128, 64, 80, 1, 1);
  376. run(1, 8, 8, 7, 11, 3, 1);
  377. run(1, 54, 54, 7, 7, 3, 1);
  378. run(1, 3, 3, 128, 128, 3, 1);
  379. run(1, 3, 3, 112, 112, 3, 1);
  380. run(1, 1, 1, 1, 1, 3, 3);
  381. }
  382. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA) {
  383. Checker<ConvolutionBackwardData> checker(handle());
  384. using Param = ConvolutionBackwardData::Param;
  385. Param param;
  386. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  387. size_t fh, size_t fw, size_t stride, size_t padding,
  388. size_t dilate = 1, size_t group = 1) {
  389. param.pad_h = param.pad_w = padding;
  390. param.stride_h = param.stride_w = stride;
  391. param.dilate_h = param.dilate_w = dilate;
  392. TensorLayout diff =
  393. TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  394. TensorLayout grad;
  395. TensorLayout filter;
  396. if (group == 1) {
  397. param.sparse = Param::Sparse::DENSE;
  398. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  399. } else {
  400. param.sparse = Param::Sparse::GROUP;
  401. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  402. }
  403. // TensorLayout grad;
  404. {
  405. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  406. opr->param() = param;
  407. opr->deduce_layout(filter, diff, grad);
  408. }
  409. checker.set_param(param)
  410. .set_dtype(0, dtype::Float32())
  411. .set_dtype(1, dtype::Float32());
  412. checker.exec(TensorLayoutArray{filter, diff, grad});
  413. };
  414. for (auto mode :
  415. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  416. param.mode = mode;
  417. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  418. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  419. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  420. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  421. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  422. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  423. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  424. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  425. }
  426. }
  427. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_INT8_INT8_INT32) {
  428. Checker<ConvolutionBackwardData> checker(handle());
  429. using Param = ConvolutionBackwardData::Param;
  430. Param param;
  431. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  432. size_t fh, size_t fw, size_t stride, size_t padding,
  433. size_t dilate = 1, size_t group = 1) {
  434. param.pad_h = param.pad_w = padding;
  435. param.stride_h = param.stride_w = stride;
  436. param.dilate_h = param.dilate_w = dilate;
  437. TensorLayout diff =
  438. TensorLayout{{n, oc * group, oh, ow}, dtype::Int8()};
  439. TensorLayout grad;
  440. TensorLayout filter;
  441. if (group == 1) {
  442. param.sparse = Param::Sparse::DENSE;
  443. filter = {{oc, ic, fh, fw}, dtype::Int8()};
  444. } else {
  445. param.sparse = Param::Sparse::GROUP;
  446. filter = {{group, oc, ic, fh, fw}, dtype::Int8()};
  447. }
  448. // TensorLayout grad;
  449. {
  450. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  451. opr->param() = param;
  452. opr->deduce_layout(filter, diff, grad);
  453. }
  454. checker.set_param(param)
  455. .set_dtype(0, dtype::Int8())
  456. .set_dtype(1, dtype::Int8())
  457. .set_dtype(2, dtype::Int32());
  458. checker.exec(TensorLayoutArray{filter, diff, grad});
  459. };
  460. for (auto mode :
  461. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  462. param.mode = mode;
  463. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  464. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  465. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  466. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  467. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  468. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  469. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  470. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  471. }
  472. }
  473. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_SINT8) {
  474. Checker<ConvolutionBackwardData> checker(handle());
  475. using Param = ConvolutionBackwardData::Param;
  476. Param param;
  477. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  478. size_t fh, size_t fw, size_t stride, size_t padding,
  479. size_t dilate = 1, size_t group = 1) {
  480. param.pad_h = param.pad_w = padding;
  481. param.stride_h = param.stride_w = stride;
  482. param.dilate_h = param.dilate_w = dilate;
  483. TensorLayout diff =
  484. TensorLayout{{n, oc * group, oh, ow}, dtype::QuantizedS8(0.2f)};
  485. TensorLayout grad;
  486. TensorLayout filter;
  487. if (group == 1) {
  488. param.sparse = Param::Sparse::DENSE;
  489. filter = {{oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  490. } else {
  491. param.sparse = Param::Sparse::GROUP;
  492. filter = {{group, oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  493. }
  494. // TensorLayout grad;
  495. {
  496. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  497. opr->param() = param;
  498. opr->deduce_layout(filter, diff, grad);
  499. }
  500. checker.set_param(param)
  501. .set_dtype(0, dtype::QuantizedS8(0.2f))
  502. .set_dtype(1, dtype::QuantizedS8(0.2f))
  503. .set_dtype(2, {});
  504. checker.exec(TensorLayoutArray{filter, diff, grad});
  505. };
  506. for (auto mode :
  507. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  508. param.mode = mode;
  509. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  510. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  511. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  512. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  513. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  514. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  515. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  516. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  517. }
  518. }
  519. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
  520. Checker<ConvolutionBackwardData> checker(handle());
  521. using Param = ConvolutionBackwardData::Param;
  522. Param param;
  523. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  524. size_t fh, size_t fw, size_t stride, size_t padding,
  525. size_t dilate = 1, size_t group = 1) {
  526. param.pad_h = param.pad_w = padding;
  527. param.stride_h = param.stride_w = stride;
  528. param.dilate_h = param.dilate_w = dilate;
  529. TensorLayout diff =
  530. TensorLayout{{n, oc * group, oh, ow},
  531. dtype::Quantized8Asymm(1.3f, (uint8_t)129)};
  532. TensorLayout grad;
  533. TensorLayout filter;
  534. if (group == 1) {
  535. param.sparse = Param::Sparse::DENSE;
  536. filter = {{oc, ic, fh, fw},
  537. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  538. } else {
  539. param.sparse = Param::Sparse::GROUP;
  540. filter = {{group, oc, ic, fh, fw},
  541. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  542. }
  543. // TensorLayout grad;
  544. {
  545. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  546. opr->param() = param;
  547. opr->deduce_layout(filter, diff, grad);
  548. }
  549. NormalRNG rng(128.f);
  550. checker.set_param(param)
  551. .set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  552. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  553. .set_dtype(2, {});
  554. checker.set_rng(0, &rng).set_rng(1, &rng);
  555. checker.exec(TensorLayoutArray{filter, diff, grad});
  556. };
  557. for (auto mode :
  558. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  559. param.mode = mode;
  560. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  561. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  562. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  563. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  564. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  565. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  566. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  567. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  568. }
  569. }
  570. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台