You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 19 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. /**
  2. * \file dnn/test/fallback/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/fallback/fixture.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/convolution.h"
  15. #include "test/common/rng.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. #if MEGDNN_WITH_BENCHMARK
  19. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_MATRIX_MUL) {
  20. using Param = Convolution::Param;
  21. auto run = [&](const TensorShapeArray& shapes, Param param) {
  22. Benchmarker<Convolution> benchmarker_float(handle());
  23. size_t RUN = 50;
  24. auto tfloat = benchmarker_float.set_display(false)
  25. .set_dtype(0, dtype::Float32{})
  26. .set_dtype(1, dtype::Float32{})
  27. .set_times(RUN)
  28. .set_param(param)
  29. .exec(shapes);
  30. size_t IC = shapes[1][1];
  31. size_t FH = shapes[1][2];
  32. size_t FW = shapes[1][3];
  33. TensorLayout dst_layout;
  34. auto opr = handle()->create_operator<Convolution>();
  35. opr->param() = param;
  36. opr->deduce_layout({shapes[0], dtype::Float32()},
  37. {shapes[1], dtype::Float32()}, dst_layout);
  38. printf("fp32 flops: %.3f mflops\n",
  39. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  40. (tfloat / RUN * 1000));
  41. };
  42. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  43. size_t stride) {
  44. Param param;
  45. param.stride_h = stride;
  46. param.stride_w = stride;
  47. param.pad_h = kernel / 2;
  48. param.pad_w = kernel / 2;
  49. param.pad_h = 0;
  50. param.pad_w = 0;
  51. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  52. oc, ic, w, h, stride, kernel);
  53. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param);
  54. };
  55. profile(48, 128, 56, 88, 1, 1);
  56. profile(56, 128, 64, 80, 1, 1);
  57. profile(24, 3, 256, 320, 3, 2);
  58. profile(16, 3, 224, 352, 5, 2);
  59. profile(16, 3, 256, 320, 7, 2);
  60. profile(8, 8, 56, 88, 3, 1);
  61. profile(8, 8, 7, 11, 3, 1);
  62. profile(4, 4, 64, 80, 3, 1);
  63. profile(108, 108, 7, 7, 3, 1);
  64. profile(54, 54, 7, 7, 3, 1);
  65. profile(3, 3, 128, 128, 3, 1);
  66. profile(3, 3, 112, 112, 3, 1);
  67. }
  68. TEST_F(FALLBACK, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  69. using Param = ConvolutionBackwardData::Param;
  70. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  71. Benchmarker<ConvolutionBackwardData> benchmarker_fallback(handle());
  72. size_t RUN = 500;
  73. benchmarker_fallback.set_display(false)
  74. .set_dtype(0, dtype::Float32{})
  75. .set_dtype(1, dtype::Float32{})
  76. .set_times(RUN)
  77. .set_param(param);
  78. auto tmatmul = benchmarker_fallback.set_before_exec_callback(
  79. AlgoChecker<ConvolutionBackwardData>(
  80. "DeconvMatmul"))
  81. .exec(tensors);
  82. auto tdirect = benchmarker_fallback.set_before_exec_callback(
  83. AlgoChecker<ConvolutionBackwardData>(
  84. "DeconvDirect"))
  85. .exec(tensors);
  86. size_t IC = tensors[0][1];
  87. size_t FH = tensors[0][2];
  88. size_t FW = tensors[0][3];
  89. size_t total_flops = IC * tensors[1].total_nr_elems() * FH * FW * 2;
  90. printf("Direct_time: %.3f ms Direct_flops: %.3f mflops\n", tdirect,
  91. total_flops / (tdirect / RUN * 1000));
  92. printf("Matmul_time: %.3f ms Matmul_flops: %.3f mflops\n", tmatmul,
  93. total_flops / (tmatmul/ RUN * 1000));
  94. printf("speedup: %.3f\n", tdirect/tmatmul);
  95. };
  96. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  97. size_t fh, size_t fw, size_t stride = 1,
  98. size_t padding = 0) {
  99. Param param;
  100. param.pad_h = param.pad_w = padding;
  101. param.stride_h = param.stride_w = stride;
  102. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  103. oc, ic, ow, oh, stride, fh);
  104. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  105. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  106. TensorLayout grad;
  107. {
  108. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  109. opr->param() = param;
  110. opr->deduce_layout(filter, diff, grad);
  111. }
  112. run(TensorLayoutArray{filter, diff, grad}, param);
  113. };
  114. profile(1, 1, 3, 3, 1, 2, 2);
  115. profile(1, 2, 3, 3, 2, 2, 2);
  116. profile(1, 4, 3, 3, 4, 2, 2);
  117. profile(1, 4, 3, 3, 8, 2, 2);
  118. profile(1, 8, 3, 3, 4, 2, 2);
  119. profile(1, 8, 3, 3, 8, 2, 2);
  120. }
  121. #endif
  122. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL) {
  123. Checker<Convolution> checker(handle());
  124. using Param = Convolution::Param;
  125. Param param;
  126. param.sparse = param::Convolution::Sparse::DENSE;
  127. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  128. size_t fh, size_t fw) {
  129. param.pad_h = param.pad_w = 1;
  130. param.stride_h = param.stride_w = 1;
  131. checker.set_param(param);
  132. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  133. };
  134. run(1, 3, 128, 128, 5, 3, 3);
  135. run(1, 56, 128, 64, 80, 1, 1);
  136. run(1, 8, 8, 7, 11, 3, 1);
  137. run(1, 54, 54, 7, 7, 3, 1);
  138. run(1, 3, 3, 128, 128, 3, 1);
  139. run(1, 3, 3, 112, 112, 3, 1);
  140. run(1, 1, 1, 1, 1, 3, 3);
  141. }
  142. TEST_F(FALLBACK, CONVOLUTION_NAIVE_ALGO_FP16) {
  143. Checker<Convolution> checker(handle());
  144. using Param = Convolution::Param;
  145. checker.set_before_exec_callback(
  146. AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  147. Param param;
  148. param.sparse = param::Convolution::Sparse::DENSE;
  149. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  150. size_t fh, size_t fw) {
  151. param.pad_h = param.pad_w = 1;
  152. param.stride_h = param.stride_w = 1;
  153. for (auto cmode :
  154. std::vector<Param::ComputeMode>{Param::ComputeMode::DEFAULT,
  155. Param::ComputeMode::FLOAT32}) {
  156. param.compute_mode = cmode;
  157. checker.set_param(param)
  158. .set_dtype(0, dtype::Float16())
  159. .set_dtype(1, dtype::Float16())
  160. // Use inferred output dtype.
  161. .set_dtype(2, {});
  162. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  163. }
  164. };
  165. run(1, 3, 128, 128, 5, 3, 3);
  166. run(1, 8, 8, 7, 11, 3, 1);
  167. }
  168. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_FALLBACK) {
  169. Checker<Convolution> checker(handle());
  170. using Param = Convolution::Param;
  171. checker.set_before_exec_callback(
  172. AlgoChecker<ConvolutionForward>("FALLBACK_ALGO"));
  173. Param param;
  174. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw,
  175. size_t oc, size_t fh, size_t fw) {
  176. param.sparse = param::Convolution::Sparse::GROUP;
  177. param.pad_h = param.pad_w = 1;
  178. param.stride_h = param.stride_w = 1;
  179. TensorShape src{n, ic, ih, iw},
  180. filter{group, oc / group, ic / group, fh, fw};
  181. checker.set_param(param)
  182. .set_dtype(0, dtype::Float32())
  183. .set_dtype(1, dtype::Float32())
  184. .set_dtype(2, {});
  185. checker.execs({src, filter, {}});
  186. };
  187. run(4, 1, 3, 21, 15, 5, 3, 3);
  188. run(1, 8, 56, 24, 31, 56, 1, 1);
  189. run(4, 8, 8, 8, 7, 8, 3, 1);
  190. run(8, 1, 54, 54, 7, 7, 3, 1);
  191. run(100, 1, 1, 1, 1, 1, 3, 3);
  192. }
  193. TEST_F(FALLBACK_MULTI_THREADS, CONVOLUTION_NAIVE_ALGO) {
  194. Checker<Convolution> checker(handle());
  195. using Param = Convolution::Param;
  196. checker.set_before_exec_callback(
  197. AlgoChecker<ConvolutionForward>("NAIVE_ALGO"));
  198. Param param;
  199. auto run = [&](size_t n, size_t group, size_t ic, size_t ih, size_t iw,
  200. size_t oc, size_t fh, size_t fw) {
  201. param.sparse = param::Convolution::Sparse::GROUP;
  202. param.pad_h = param.pad_w = 1;
  203. param.stride_h = param.stride_w = 1;
  204. TensorShape src{n, ic, ih, iw},
  205. filter{group, oc / group, ic / group, fh, fw};
  206. checker.set_param(param).set_dtype(2, {});
  207. //!float32
  208. checker.set_dtype(0, dtype::Float32()).set_dtype(1, dtype::Float32());
  209. checker.execs({src, filter, {}});
  210. //! float16
  211. checker.set_dtype(0, dtype::Float16()).set_dtype(1, dtype::Float16());
  212. checker.execs({src, filter, {}});
  213. //! Qint8
  214. checker.set_dtype(0, dtype::QuantizedS8(3.34f))
  215. .set_dtype(1, dtype::QuantizedS8(0.32f));
  216. checker.execs({src, filter, {}});
  217. //! Quint8
  218. checker.set_dtype(0, dtype::Quantized8Asymm(3.34f,
  219. static_cast<uint8_t>(21)))
  220. .set_dtype(1, dtype::Quantized8Asymm(0.32f,
  221. static_cast<uint8_t>(15)));
  222. checker.execs({src, filter, {}});
  223. };
  224. run(4, 1, 3, 21, 15, 5, 3, 3);
  225. run(1, 8, 56, 24, 31, 56, 1, 1);
  226. run(4, 8, 8, 8, 7, 8, 3, 1);
  227. run(8, 1, 54, 54, 7, 7, 3, 1);
  228. run(100, 1, 1, 1, 1, 1, 3, 3);
  229. }
  230. TEST_F(FALLBACK, CONVOLUTION_MATRIX_MUL_SINT8) {
  231. Checker<Convolution> checker(handle());
  232. using Param = Convolution::Param;
  233. Param param;
  234. param.sparse = param::Convolution::Sparse::DENSE;
  235. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc,
  236. size_t fh, size_t fw) {
  237. param.pad_h = param.pad_w = 1;
  238. param.stride_h = param.stride_w = 1;
  239. checker.set_param(param)
  240. .set_dtype(0, dtype::QuantizedS8(0.2f))
  241. .set_dtype(1, dtype::QuantizedS8(0.2f))
  242. // Use inferred output dtype.
  243. .set_dtype(2, {});
  244. checker.execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  245. };
  246. run(1, 3, 128, 128, 5, 3, 3);
  247. run(1, 56, 128, 64, 80, 1, 1);
  248. run(1, 8, 8, 7, 11, 3, 1);
  249. run(1, 54, 54, 7, 7, 3, 1);
  250. run(1, 3, 3, 128, 128, 3, 1);
  251. run(1, 3, 3, 112, 112, 3, 1);
  252. run(1, 1, 1, 1, 1, 3, 3);
  253. }
  254. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA) {
  255. Checker<ConvolutionBackwardData> checker(handle());
  256. using Param = ConvolutionBackwardData::Param;
  257. Param param;
  258. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  259. size_t fh, size_t fw, size_t stride, size_t padding,
  260. size_t dilate = 1, size_t group = 1) {
  261. param.pad_h = param.pad_w = padding;
  262. param.stride_h = param.stride_w = stride;
  263. param.dilate_h = param.dilate_w = dilate;
  264. TensorLayout diff =
  265. TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
  266. TensorLayout grad;
  267. TensorLayout filter;
  268. if (group == 1) {
  269. param.sparse = Param::Sparse::DENSE;
  270. filter = {{oc, ic, fh, fw}, dtype::Float32()};
  271. } else {
  272. param.sparse = Param::Sparse::GROUP;
  273. filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
  274. }
  275. // TensorLayout grad;
  276. {
  277. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  278. opr->param() = param;
  279. opr->deduce_layout(filter, diff, grad);
  280. }
  281. checker.set_param(param)
  282. .set_dtype(0, dtype::Float32())
  283. .set_dtype(1, dtype::Float32());
  284. checker.exec(TensorLayoutArray{filter, diff, grad});
  285. };
  286. for (auto mode :
  287. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  288. param.mode = mode;
  289. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  290. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  291. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  292. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  293. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  294. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  295. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  296. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  297. }
  298. }
  299. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_INT8_INT8_INT32) {
  300. Checker<ConvolutionBackwardData> checker(handle());
  301. using Param = ConvolutionBackwardData::Param;
  302. Param param;
  303. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  304. size_t fh, size_t fw, size_t stride, size_t padding,
  305. size_t dilate = 1, size_t group = 1) {
  306. param.pad_h = param.pad_w = padding;
  307. param.stride_h = param.stride_w = stride;
  308. param.dilate_h = param.dilate_w = dilate;
  309. TensorLayout diff =
  310. TensorLayout{{n, oc * group, oh, ow}, dtype::Int8()};
  311. TensorLayout grad;
  312. TensorLayout filter;
  313. if (group == 1) {
  314. param.sparse = Param::Sparse::DENSE;
  315. filter = {{oc, ic, fh, fw}, dtype::Int8()};
  316. } else {
  317. param.sparse = Param::Sparse::GROUP;
  318. filter = {{group, oc, ic, fh, fw}, dtype::Int8()};
  319. }
  320. // TensorLayout grad;
  321. {
  322. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  323. opr->param() = param;
  324. opr->deduce_layout(filter, diff, grad);
  325. }
  326. checker.set_param(param)
  327. .set_dtype(0, dtype::Int8())
  328. .set_dtype(1, dtype::Int8())
  329. .set_dtype(2, dtype::Int32());
  330. checker.exec(TensorLayoutArray{filter, diff, grad});
  331. };
  332. for (auto mode :
  333. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  334. param.mode = mode;
  335. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  336. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  337. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  338. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  339. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  340. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  341. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  342. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  343. }
  344. }
  345. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_SINT8) {
  346. Checker<ConvolutionBackwardData> checker(handle());
  347. using Param = ConvolutionBackwardData::Param;
  348. Param param;
  349. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  350. size_t fh, size_t fw, size_t stride, size_t padding,
  351. size_t dilate = 1, size_t group = 1) {
  352. param.pad_h = param.pad_w = padding;
  353. param.stride_h = param.stride_w = stride;
  354. param.dilate_h = param.dilate_w = dilate;
  355. TensorLayout diff =
  356. TensorLayout{{n, oc * group, oh, ow}, dtype::QuantizedS8(0.2f)};
  357. TensorLayout grad;
  358. TensorLayout filter;
  359. if (group == 1) {
  360. param.sparse = Param::Sparse::DENSE;
  361. filter = {{oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  362. } else {
  363. param.sparse = Param::Sparse::GROUP;
  364. filter = {{group, oc, ic, fh, fw}, dtype::QuantizedS8(0.2f)};
  365. }
  366. // TensorLayout grad;
  367. {
  368. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  369. opr->param() = param;
  370. opr->deduce_layout(filter, diff, grad);
  371. }
  372. checker.set_param(param)
  373. .set_dtype(0, dtype::QuantizedS8(0.2f))
  374. .set_dtype(1, dtype::QuantizedS8(0.2f))
  375. .set_dtype(2, {});
  376. checker.exec(TensorLayoutArray{filter, diff, grad});
  377. };
  378. for (auto mode :
  379. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  380. param.mode = mode;
  381. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  382. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  383. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  384. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  385. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  386. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  387. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  388. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  389. }
  390. }
  391. TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
  392. Checker<ConvolutionBackwardData> checker(handle());
  393. using Param = ConvolutionBackwardData::Param;
  394. Param param;
  395. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  396. size_t fh, size_t fw, size_t stride, size_t padding,
  397. size_t dilate = 1, size_t group = 1) {
  398. param.pad_h = param.pad_w = padding;
  399. param.stride_h = param.stride_w = stride;
  400. param.dilate_h = param.dilate_w = dilate;
  401. TensorLayout diff =
  402. TensorLayout{{n, oc * group, oh, ow},
  403. dtype::Quantized8Asymm(1.3f, (uint8_t)129)};
  404. TensorLayout grad;
  405. TensorLayout filter;
  406. if (group == 1) {
  407. param.sparse = Param::Sparse::DENSE;
  408. filter = {{oc, ic, fh, fw},
  409. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  410. } else {
  411. param.sparse = Param::Sparse::GROUP;
  412. filter = {{group, oc, ic, fh, fw},
  413. dtype::Quantized8Asymm(1.2f, (uint8_t)127)};
  414. }
  415. // TensorLayout grad;
  416. {
  417. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  418. opr->param() = param;
  419. opr->deduce_layout(filter, diff, grad);
  420. }
  421. NormalRNG rng(128.f);
  422. checker.set_param(param)
  423. .set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  424. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  425. .set_dtype(2, {});
  426. checker.set_rng(0, &rng).set_rng(1, &rng);
  427. checker.exec(TensorLayoutArray{filter, diff, grad});
  428. };
  429. for (auto mode :
  430. {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
  431. param.mode = mode;
  432. run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
  433. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
  434. run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
  435. run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
  436. run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
  437. run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
  438. run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
  439. run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
  440. }
  441. }
  442. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台

Contributors (1)