You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 40 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032
  1. /**
  2. * \file dnn/test/common/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/conv_bias.h"
  12. #include "src/common/utils.h"
  13. #include "test/common/benchmarker.h"
  14. namespace megdnn {
  15. namespace test {
  16. namespace conv_bias {
  17. namespace {
  18. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  19. arg.param.format = param::ConvBias::Format::CHWN4;
  20. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  21. arg.filter = TensorShape{arg.filter[1], arg.filter[2], arg.filter[3],
  22. arg.filter[0], 4};
  23. arg.bias =
  24. TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  25. }
  26. } // namespace
  27. std::vector<TestArg> get_args() {
  28. std::vector<TestArg> args;
  29. param::ConvBias cur_param;
  30. using NLMode = param::ConvBias::NonlineMode;
  31. // clang-format off
  32. for (auto nlmode :
  33. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  34. for (size_t i : {9, 63}) {
  35. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  36. cur_param.nonlineMode = nlmode;
  37. // fallback case
  38. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  39. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  40. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  41. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  42. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  43. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  44. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  45. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  46. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  47. } }
  48. // clang-format on
  49. return args;
  50. }
  51. std::vector<TestArg> get_chanwise_args() {
  52. std::vector<TestArg> args;
  53. param::ConvBias cur_param;
  54. using NLMode = param::ConvBias::NonlineMode;
  55. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  56. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  57. for (auto nlmode :
  58. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  59. cur_param.nonlineMode = nlmode;
  60. // simple case
  61. for (uint32_t s : {1, 2})
  62. for (uint32_t p : {0, 1, 2, 3})
  63. for (size_t f : {2, 3, 5, 7})
  64. for (size_t ocpg : {1, 3}) {
  65. cur_param.pad_h = cur_param.pad_w = p;
  66. cur_param.stride_h = cur_param.stride_w = s;
  67. args.emplace_back(cur_param, TensorShape{2, 3, 16, 16},
  68. TensorShape{3, ocpg, 1, f, f},
  69. TensorShape{1, 3 * ocpg, 1, 1});
  70. }
  71. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  72. TensorShape{12, 2, 1, 4, 5},
  73. TensorShape{1, 24, 1, 1});
  74. // padding larger than kern
  75. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  76. TensorShape{12, 2, 1, 4, 5},
  77. TensorShape{1, 24, 1, 1});
  78. }
  79. return args;
  80. }
  81. std::vector<TestArg> get_args_1x1() {
  82. std::vector<TestArg> args;
  83. param::ConvBias cur_param;
  84. using NLMode = param::ConvBias::NonlineMode;
  85. for (auto nlmode :
  86. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  87. cur_param.nonlineMode = nlmode;
  88. for (size_t i : {16, 19}) {
  89. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  90. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  91. TensorShape{30, 20, 1, 1},
  92. TensorShape{1, 30, 1, 1});
  93. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  94. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  95. TensorShape{30, 20, 1, 1},
  96. TensorShape{1, 30, 1, 1});
  97. }
  98. }
  99. return args;
  100. }
  101. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  102. std::vector<TestArg> args;
  103. param::ConvBias cur_param;
  104. using NLMode = param::ConvBias::NonlineMode;
  105. // clang-format off
  106. for (auto nlmode :
  107. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  108. for (size_t ic : {1, 3, 4, 7}) {
  109. for (size_t oc : {1, 3, 4, 7}) {
  110. for (size_t i : {9, 63}) {
  111. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  112. cur_param.nonlineMode = nlmode;
  113. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  114. cur_param.pad_h = cur_param.pad_w = 0;
  115. //! no bias
  116. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  117. TensorShape{oc, ic, kernel_size, kernel_size},
  118. TensorShape{});
  119. //! bias
  120. args.emplace_back(
  121. cur_param, TensorShape{2, ic, i, i},
  122. TensorShape{oc, ic, kernel_size, kernel_size},
  123. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  124. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  125. //! bias channel
  126. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  127. TensorShape{oc, ic, kernel_size, kernel_size},
  128. TensorShape{1, oc, 1, 1});
  129. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  130. args.emplace_back(
  131. cur_param, TensorShape{2, 2 * ic, i, i},
  132. TensorShape{2, oc, ic, kernel_size, kernel_size},
  133. TensorShape{2, 2 * oc,
  134. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  135. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  136. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  137. TensorShape{2, oc, ic, kernel_size, kernel_size},
  138. TensorShape{1, 2 * oc, 1, 1});
  139. } } } }
  140. // clang-format on
  141. //! test for multi-thread OC parallel
  142. for (size_t i : {9, 63}) {
  143. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  144. cur_param.pad_h = cur_param.pad_w = 1;
  145. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  146. TensorShape{128, 8, kernel_size, kernel_size},
  147. TensorShape{1, 128, 1, 1});
  148. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  149. TensorShape{128, 8, kernel_size, kernel_size},
  150. TensorShape{1, 128, 1, 1});
  151. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  152. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  153. TensorShape{2, 128, 8, kernel_size, kernel_size},
  154. TensorShape{1, 2 * 128, 1, 1});
  155. }
  156. return args;
  157. }
  158. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  159. std::vector<TestArg> args;
  160. param::ConvBias cur_param;
  161. using NLMode = param::ConvBias::NonlineMode;
  162. // clang-format off
  163. for (auto nlmode :
  164. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  165. for (size_t ic : {pack_size, 2 * pack_size}) {
  166. for (size_t oc : {pack_size, 2 * pack_size}) {
  167. for (size_t i : {9, 63}) {
  168. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  169. cur_param.nonlineMode = nlmode;
  170. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  171. cur_param.pad_h = cur_param.pad_w = 1;
  172. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  173. TensorShape{pack_size, pack_size, 3, 3},
  174. TensorShape{1, pack_size, 1, 1});
  175. //! no bias
  176. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  177. TensorShape{oc, ic, 3, 3}, TensorShape{});
  178. //! bias
  179. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  180. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  181. //! bias channel
  182. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  183. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  184. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  185. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  186. TensorShape{2, oc, ic, 3, 3},
  187. TensorShape{2, 2 * oc, i, i});
  188. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  189. TensorShape{2, oc, ic, 3, 3},
  190. TensorShape{1, 2 * oc, 1, 1});
  191. } } } }
  192. // clang-format on
  193. //! test for multi-thread OC parallel
  194. for (size_t i : {9, 63}) {
  195. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  196. cur_param.pad_h = cur_param.pad_w = 1;
  197. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  198. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  199. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  200. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  201. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  202. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  203. TensorShape{2, 128, 8, 3, 3},
  204. TensorShape{1, 2 * 128, 1, 1});
  205. }
  206. return args;
  207. }
  208. std::vector<TestArg> get_quantized_winograd_mk_packed_args(size_t pack_size) {
  209. std::vector<TestArg> args;
  210. param::ConvBias cur_param;
  211. using NLMode = param::ConvBias::NonlineMode;
  212. // clang-format off
  213. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  214. for (size_t ic : {pack_size, 2 * pack_size}) {
  215. for (size_t oc : {pack_size, 2 * pack_size}) {
  216. for (size_t i : {9, 63}) {
  217. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  218. cur_param.nonlineMode = nlmode;
  219. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  220. cur_param.pad_h = cur_param.pad_w = 1;
  221. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  222. TensorShape{pack_size, pack_size, 3, 3},
  223. TensorShape{1, pack_size, 1, 1});
  224. //! no bias
  225. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  226. TensorShape{oc, ic, 3, 3}, TensorShape{});
  227. //! bias
  228. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  229. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  230. //! bias channel
  231. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  232. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  233. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  234. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  235. TensorShape{2, oc, ic, 3, 3},
  236. TensorShape{2, 2 * oc, i, i});
  237. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  238. TensorShape{2, oc, ic, 3, 3},
  239. TensorShape{1, 2 * oc, 1, 1});
  240. } } } }
  241. // clang-format on
  242. //! test for multi-thread OC parallel
  243. for (size_t i : {9, 63}) {
  244. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  245. cur_param.pad_h = cur_param.pad_w = 1;
  246. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  247. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  248. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  249. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  250. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  251. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  252. TensorShape{2, 128, 8, 3, 3},
  253. TensorShape{1, 2 * 128, 1, 1});
  254. }
  255. return args;
  256. }
  257. std::vector<TestArg> get_quantized_args_with_nlmode(
  258. param::ConvBias::NonlineMode nlmode) {
  259. std::vector<TestArg> args;
  260. param::ConvBias cur_param;
  261. // clang-format off
  262. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  263. param::ConvBias::Mode::CONVOLUTION}) {
  264. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  265. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  266. for (size_t i : {9, 63}) {
  267. cur_param.mode = mode;
  268. cur_param.nonlineMode = nlmode;
  269. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  270. cur_param.pad_h = cur_param.pad_w = 1;
  271. //! no bias
  272. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  273. TensorShape{oc, ic, 3, 3}, TensorShape{});
  274. //! bias
  275. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  276. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  277. //! bias channel
  278. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  279. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  280. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  281. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  282. TensorShape{2, oc, ic, 3, 3},
  283. TensorShape{2, 2 * oc, i, i});
  284. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  285. TensorShape{2, oc, ic, 3, 3},
  286. TensorShape{1, 2 * oc, 1, 1});
  287. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  288. cur_param.pad_h = cur_param.pad_w = 0;
  289. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  290. TensorShape{oc, ic, 1, 1}, TensorShape{});
  291. } } } }
  292. // clang-format on
  293. return args;
  294. }
  295. std::vector<TestArg> get_quantized_args() {
  296. using NLMode = param::ConvBias::NonlineMode;
  297. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  298. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  299. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  300. std::vector<TestArg> args;
  301. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  302. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  303. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  304. return args;
  305. }
  306. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  307. std::vector<TestArg> args;
  308. param::ConvBias cur_param;
  309. using NLMode = param::ConvBias::NonlineMode;
  310. // clang-format off
  311. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  312. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  313. for (size_t b : {64, 16}) {
  314. for (size_t ic : {16, 32}) {
  315. for (size_t oc : {64, 32}) {
  316. for (size_t h : {8}) {
  317. for (size_t w : {8, 11}) {
  318. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  319. for (size_t s : {2, 1}) {
  320. if (kernel_size == 7) {
  321. b = std::min(b, 32_z);
  322. }
  323. size_t f = kernel_size;
  324. cur_param.mode = mode;
  325. cur_param.nonlineMode = nlmode;
  326. cur_param.format = param::ConvBias::Format::NCHW4;
  327. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  328. cur_param.pad_h = cur_param.pad_w = p;
  329. cur_param.stride_h = cur_param.stride_w = s;
  330. //! bias channel
  331. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  332. TensorShape{oc, ic / 4, f, f, 4},
  333. TensorShape{1, oc / 4, 1, 1, 4});
  334. } } } } } } } } }
  335. // clang-format on
  336. return args;
  337. }
  338. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  339. std::vector<TestArg> args;
  340. param::ConvBias cur_param;
  341. using NLMode = param::ConvBias::NonlineMode;
  342. // clang-format off
  343. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  344. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  345. for (size_t b : {7, 8, 4, 1}) {
  346. for (size_t ic : {16, 32}) {
  347. for (size_t oc : {16, 8, 4}) {
  348. for (size_t h : {8}) {
  349. for (size_t w : {8, 11}) {
  350. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  351. for (size_t s : {1, 2}) {
  352. size_t f = kernel_size;
  353. cur_param.mode = mode;
  354. cur_param.nonlineMode = nlmode;
  355. cur_param.format = param::ConvBias::Format::NCHW4;
  356. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  357. cur_param.pad_h = cur_param.pad_w = p;
  358. cur_param.stride_h = cur_param.stride_w = s;
  359. //! bias channel
  360. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  361. TensorShape{oc, ic / 4, f, f, 4},
  362. TensorShape{1, oc / 4, 1, 1, 4});
  363. } } } } } } } } }
  364. // clang-format on
  365. return args;
  366. }
  367. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  368. std::vector<TestArg> args;
  369. param::ConvBias cur_param;
  370. using NLMode = param::ConvBias::NonlineMode;
  371. // clang-format off
  372. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  373. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  374. for (size_t b : {12, 8, 4}) {
  375. for (size_t ic : {16, 32}) {
  376. for (size_t oc : {16, 8, 4}) {
  377. for (size_t h : {8}) {
  378. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  379. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  380. for (size_t s : {1, 2}) {
  381. size_t f = kernel_size;
  382. cur_param.mode = mode;
  383. cur_param.nonlineMode = nlmode;
  384. cur_param.format = param::ConvBias::Format::NCHW4;
  385. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  386. cur_param.pad_h = cur_param.pad_w = p;
  387. cur_param.stride_h = cur_param.stride_w = s;
  388. //! bias channel
  389. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  390. TensorShape{oc, ic / 4, f, f, 4},
  391. TensorShape{1, oc / 4, 1, 1, 4});
  392. } } } } } } } } }
  393. // clang-format on
  394. return args;
  395. }
  396. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  397. std::vector<TestArg> args;
  398. param::ConvBias cur_param;
  399. using NLMode = param::ConvBias::NonlineMode;
  400. // clang-format off
  401. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  402. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  403. for (size_t b : {64, 16}) {
  404. for (size_t ic : {4, 12}) {
  405. for (size_t oc : {128, 32}) {
  406. for (size_t h : {8}) {
  407. for (size_t w : {8, 11}) {
  408. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  409. for (size_t s : {1, 2}) {
  410. size_t f = kernel_size;
  411. cur_param.mode = mode;
  412. cur_param.nonlineMode = nlmode;
  413. cur_param.format =
  414. param::ConvBias::Format::NCHW4;
  415. cur_param.sparse =
  416. param::ConvBias::Sparse::DENSE;
  417. cur_param.pad_h = cur_param.pad_w = p;
  418. cur_param.stride_h =
  419. cur_param.stride_w = s;
  420. //! bias channel
  421. args.emplace_back(
  422. cur_param,
  423. TensorShape{b, ic / 4, h, w, 4},
  424. TensorShape{oc, ic / 4, f, f,
  425. 4},
  426. TensorShape{1, oc / 4, 1, 1,
  427. 4});
  428. } } } } } } } } }
  429. // clang-format on
  430. return args;
  431. }
  432. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  433. size_t kernel_size) {
  434. std::vector<TestArg> args;
  435. param::ConvBias cur_param;
  436. using NLMode = param::ConvBias::NonlineMode;
  437. // clang-format off
  438. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  439. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  440. for (size_t b : {8, 7, 4, 1}) {
  441. for (size_t ic : {4, 12}) {
  442. for (size_t oc : {16, 8, 12, 4}) {
  443. for (size_t h : {8}) {
  444. for (size_t w : {8, 11}) {
  445. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  446. for (size_t s : {1, 2}) {
  447. size_t f = kernel_size;
  448. cur_param.mode = mode;
  449. cur_param.nonlineMode = nlmode;
  450. cur_param.format = param::ConvBias::Format::NCHW4;
  451. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  452. cur_param.pad_h = cur_param.pad_w = p;
  453. cur_param.stride_h = cur_param.stride_w = s;
  454. //! bias channel
  455. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  456. TensorShape{oc, ic / 4, f, f, 4},
  457. TensorShape{1, oc / 4, 1, 1, 4});
  458. } } } } } } } } }
  459. // clang-format on
  460. return args;
  461. }
  462. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  463. auto args = get_int8_nchw4_args(kernel_size);
  464. for (auto& arg : args) {
  465. convert_arg_from_nchw4_to_chwn4(arg);
  466. }
  467. return args;
  468. }
  469. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  470. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  471. for (auto& arg : args) {
  472. convert_arg_from_nchw4_to_chwn4(arg);
  473. }
  474. return args;
  475. }
  476. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  477. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  478. for (auto& arg : args) {
  479. convert_arg_from_nchw4_to_chwn4(arg);
  480. }
  481. return args;
  482. }
  483. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  484. size_t kernel_size) {
  485. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  486. for (auto& arg : args) {
  487. convert_arg_from_nchw4_to_chwn4(arg);
  488. }
  489. return args;
  490. }
  491. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  492. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  493. for (auto& arg : args) {
  494. convert_arg_from_nchw4_to_chwn4(arg);
  495. }
  496. return args;
  497. }
  498. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  499. std::vector<TestArg> args;
  500. param::ConvBias cur_param;
  501. using NLMode = param::ConvBias::NonlineMode;
  502. // clang-format off
  503. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  504. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  505. size_t b = 64, oc = 128;
  506. for (size_t ic : {32, 64}) {
  507. for (size_t h : {8}) {
  508. for (size_t w : {11}) {
  509. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  510. for (size_t s : {1, 2}) {
  511. size_t f = kernel_size;
  512. cur_param.mode = mode;
  513. cur_param.nonlineMode = nlmode;
  514. cur_param.format = param::ConvBias::Format::NCHW4;
  515. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  516. cur_param.pad_h = cur_param.pad_w = p;
  517. cur_param.stride_h = cur_param.stride_w = s;
  518. //! bias channel
  519. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  520. TensorShape{oc, ic / 4, f, f, 4},
  521. TensorShape{1, oc / 4, 1, 1, 4});
  522. } } } } }
  523. } }
  524. // clang-format on
  525. return args;
  526. }
  527. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  528. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  529. for (auto& arg : args) {
  530. convert_arg_from_nchw4_to_chwn4(arg);
  531. }
  532. return args;
  533. }
  534. void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
  535. DType dst_dtype, Handle* handle, const char* algo,
  536. param::ConvBias::Format format,
  537. const std::vector<TestArg>& args) {
  538. megdnn_assert(src_dtype.enumv() == filter_dtype.enumv());
  539. Checker<ConvBiasForward> checker(handle);
  540. if (algo) {
  541. checker.set_before_exec_callback(
  542. ConvBiasAlgoChecker<ConvBiasForward>(algo));
  543. }
  544. std::unique_ptr<RNG> rng;
  545. std::unique_ptr<RNG> bias_rng;
  546. std::unique_ptr<RNG> const_rng;
  547. // TODO: check range of rng
  548. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  549. rng = std::make_unique<UniformIntRNG>(-3, 3);
  550. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  551. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  552. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  553. checker.set_epsilon(1 + 1e-3)
  554. .set_max_avg_error(1e-1)
  555. .set_max_avg_biased_error(1e-1);
  556. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  557. rng = std::make_unique<NormalRNG>(2.f);
  558. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  559. bias_rng = std::make_unique<NormalRNG>(2.f);
  560. checker.set_epsilon(1e-2);
  561. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  562. rng = std::make_unique<NormalRNG>(2.f);
  563. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  564. bias_rng = std::make_unique<NormalRNG>(2.f);
  565. }
  566. megdnn_assert(rng != nullptr && bias_rng != nullptr);
  567. checker.set_rng(0, rng.get())
  568. .set_rng(1, rng.get())
  569. .set_rng(2, rng.get())
  570. .set_rng(3, rng.get());
  571. if (args.empty()) {
  572. std::vector<TestArg> default_args;
  573. using Param = param::ConvBias;
  574. using Format = Param::Format;
  575. if (format == Format::NCHW4) {
  576. default_args = get_int8_nchw4_args(3);
  577. } else if (format == Format::CHWN4) {
  578. default_args = get_int8_chwn4_args(3);
  579. }
  580. for (auto&& arg : default_args) {
  581. checker.set_dtype(0, src_dtype)
  582. .set_dtype(1, filter_dtype)
  583. .set_dtype(2, bias_dtype)
  584. .set_dtype(4, dst_dtype)
  585. .set_param(arg.param)
  586. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  587. }
  588. } else {
  589. for (auto&& arg : args) {
  590. checker.set_dtype(0, src_dtype)
  591. .set_dtype(1, filter_dtype)
  592. .set_dtype(2, bias_dtype)
  593. .set_dtype(4, dst_dtype)
  594. .set_param(arg.param)
  595. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  596. }
  597. }
  598. }
  599. #if MEGDNN_WITH_BENCHMARK
  600. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(size_t kernel,
  601. size_t pack_size) {
  602. std::vector<conv_bias::TestArg> args;
  603. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  604. size_t p) {
  605. if (ic % pack_size != 0 || oc % pack_size != 0)
  606. return;
  607. if (w + 2 * p < kernel || h + 2 * p < kernel)
  608. return;
  609. param::ConvBias param;
  610. param.stride_h = 1;
  611. param.stride_w = 1;
  612. param.pad_h = p;
  613. param.pad_w = p;
  614. args.push_back(conv_bias::TestArg{param,
  615. TensorShape{1, ic, h, w},
  616. TensorShape{oc, ic, kernel, kernel},
  617. {1, oc, 1, 1}});
  618. };
  619. for (size_t ic : {8, 16, 32, 64}) {
  620. for (size_t oc : {8, 16, 32, 64}) {
  621. pack(oc, ic, 56, 56, kernel, kernel / 2);
  622. pack(oc, ic, 128, 128, kernel, kernel / 2);
  623. pack(oc, ic, 256, 256, kernel, kernel / 2);
  624. }
  625. }
  626. //! conv in vgg16
  627. pack(512, 512, 15, 15, kernel, kernel / 2);
  628. pack(512, 256, 15, 15, kernel, kernel / 2);
  629. pack(256, 256, 29, 29, kernel, kernel / 2);
  630. pack(256, 128, 29, 29, kernel, kernel / 2);
  631. pack(128, 128, 57, 57, kernel, kernel / 2);
  632. pack(128, 64, 57, 57, kernel, kernel / 2);
  633. pack(64, 64, 123, 123, kernel, kernel / 2);
  634. pack(64, 24, 123, 123, kernel, kernel / 2);
  635. pack(24, 24, 224, 224, kernel, kernel / 2);
  636. //! conv in resnet18
  637. pack(64, 64, 56, 56, kernel, kernel / 2);
  638. pack(128, 128, 28, 28, kernel, kernel / 2);
  639. pack(256, 256, 14, 14, kernel, kernel / 2);
  640. pack(512, 512, 7, 7, kernel, kernel / 2);
  641. return args;
  642. }
  643. void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel,
  644. size_t pack_size) {
  645. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  646. using namespace conv_bias;
  647. constexpr size_t RUN = 10;
  648. Benchmarker<Convolution> benchmark(handle);
  649. benchmark.set_display(false);
  650. benchmark.set_times(RUN);
  651. Benchmarker<ConvBias> benchmark_winograd(handle);
  652. benchmark_winograd.set_display(false);
  653. benchmark_winograd.set_times(RUN);
  654. for (auto&& arg : args) {
  655. TensorLayout dst_layout;
  656. auto opr = handle->create_operator<ConvBias>();
  657. opr->param() = arg.param;
  658. opr->deduce_layout({arg.src, dtype::Float32()},
  659. {arg.filter, dtype::Float32()},
  660. {arg.bias, dtype::Float32()}, {}, dst_layout);
  661. //! dst.nr_elems * IC * FH * FW * 2
  662. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  663. arg.filter[2] * arg.filter[3] * 2.0 /
  664. (1024 * 1024 * 1024) * 1e3;
  665. param::Convolution conv_param;
  666. conv_param.pad_h = arg.param.pad_h;
  667. conv_param.pad_w = arg.param.pad_w;
  668. conv_param.stride_h = arg.param.stride_h;
  669. conv_param.stride_w = arg.param.stride_w;
  670. auto used = benchmark.set_param(conv_param)
  671. .exec({arg.src, arg.filter, {}}) /
  672. RUN;
  673. benchmark_winograd.set_param(arg.param);
  674. auto used_winograd =
  675. algo_benchmark<ConvBias>(benchmark_winograd,
  676. {arg.src, arg.filter, {}, {}, {}},
  677. algo_name) /
  678. RUN;
  679. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  680. "speedup: "
  681. "%f\n",
  682. arg.src.to_string().c_str(), arg.filter.to_string().c_str(),
  683. used, computations / used, used_winograd,
  684. computations / used_winograd, used / used_winograd);
  685. }
  686. }
  687. #endif // MEGDNN_WITH_BENCHMARK
  688. std::vector<conv_bias::TestArg> get_conv_bias_args(
  689. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  690. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  691. using namespace conv_bias;
  692. using Param = param::ConvBias;
  693. using NLMode = param::ConvBias::NonlineMode;
  694. std::vector<TestArg> args;
  695. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  696. size_t kernel, size_t stride, NLMode nlmode) {
  697. Param param;
  698. param.stride_h = stride;
  699. param.stride_w = stride;
  700. if (!no_pad) {
  701. param.pad_h = kernel / 2;
  702. param.pad_w = kernel / 2;
  703. } else {
  704. param.pad_h = 0;
  705. param.pad_w = 0;
  706. }
  707. param.nonlineMode = nlmode;
  708. args.emplace_back(param, TensorShape{n, ic, h, w},
  709. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  710. if (!no_bias) {
  711. args.emplace_back(param, TensorShape{n, ic, h, w},
  712. TensorShape{oc, ic, kernel, kernel},
  713. TensorShape{1, oc, 1, 1});
  714. if (!only_broadcast_bias) {
  715. args.emplace_back(
  716. param, TensorShape{n, ic, h, w},
  717. TensorShape{oc, ic, kernel, kernel},
  718. TensorShape{
  719. n, oc,
  720. (h + 2 * param.pad_h - kernel) / stride + 1,
  721. (w + 2 * param.pad_h - kernel) / stride + 1});
  722. }
  723. }
  724. param.sparse = param::ConvBias::Sparse::GROUP;
  725. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  726. TensorShape{2, oc, ic, kernel, kernel},
  727. TensorShape{});
  728. if (!no_bias) {
  729. if (!only_broadcast_bias) {
  730. args.emplace_back(
  731. param, TensorShape{n, 2 * ic, h, w},
  732. TensorShape{2, oc, ic, kernel, kernel},
  733. TensorShape{
  734. n, 2 * oc,
  735. (h + param.pad_h * 2 - kernel) / stride + 1,
  736. (w + param.pad_w * 2 - kernel) / stride + 1});
  737. }
  738. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  739. TensorShape{2, oc, ic, kernel, kernel},
  740. TensorShape{1, 2 * oc, 1, 1});
  741. }
  742. };
  743. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  744. if (!no_nonlinemode) {
  745. nonlinemode.emplace_back(NLMode::RELU);
  746. nonlinemode.emplace_back(NLMode::H_SWISH);
  747. if (!quantized_nlmod) {
  748. nonlinemode.emplace_back(NLMode::SIGMOID);
  749. }
  750. }
  751. for (size_t n : {1, 2}) {
  752. for (auto nlmode : nonlinemode) {
  753. for (size_t ic : {1, 3, 7}) {
  754. for (size_t oc : {1, 3, 7}) {
  755. for (size_t size : {8, 16, 20}) {
  756. for (size_t kern : kernel) {
  757. pack(n, oc, ic, size, size, kern, stride, nlmode);
  758. }
  759. }
  760. }
  761. }
  762. }
  763. }
  764. return args;
  765. }
  766. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  767. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  768. bool only_broadcast_bias) {
  769. using namespace conv_bias;
  770. using Param = param::ConvBias;
  771. using NLMode = param::ConvBias::NonlineMode;
  772. using CONVMode = param::ConvBias::Mode;
  773. std::vector<TestArg> args;
  774. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  775. size_t stride, NLMode nlmode, CONVMode convmode) {
  776. Param param;
  777. param.stride_h = stride;
  778. param.stride_w = stride;
  779. param.pad_h = 0;
  780. param.pad_w = 0;
  781. param.mode = convmode;
  782. param.nonlineMode = nlmode;
  783. args.emplace_back(param, TensorShape{n, ic, h, w},
  784. TensorShape{oc, ic, 1, 1}, TensorShape{});
  785. if (!no_bias) {
  786. args.emplace_back(param, TensorShape{n, ic, h, w},
  787. TensorShape{oc, ic, 1, 1},
  788. TensorShape{1, oc, 1, 1});
  789. if (!only_broadcast_bias) {
  790. args.emplace_back(param, TensorShape{n, ic, h, w},
  791. TensorShape{oc, ic, 1, 1},
  792. TensorShape{n, oc, (h - 1) / stride + 1,
  793. (w - 1) / stride + 1});
  794. }
  795. }
  796. param.sparse = param::ConvBias::Sparse::GROUP;
  797. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  798. TensorShape{2, oc, ic, 1, 1}, TensorShape{});
  799. if (!no_bias) {
  800. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  801. TensorShape{2, oc, ic, 1, 1},
  802. TensorShape{1, 2 * oc, 1, 1});
  803. if (!only_broadcast_bias) {
  804. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  805. TensorShape{2, oc, ic, 1, 1},
  806. TensorShape{n, 2 * oc, (h - 1) / stride + 1,
  807. (w - 1) / stride + 1});
  808. }
  809. }
  810. };
  811. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  812. if (!no_nonlinemode) {
  813. nonlinemode.emplace_back(NLMode::RELU);
  814. nonlinemode.emplace_back(NLMode::H_SWISH);
  815. if (!quantized_nlmod) {
  816. nonlinemode.emplace_back(NLMode::SIGMOID);
  817. }
  818. }
  819. std::vector<CONVMode> convmodes{param::ConvBias::Mode::CONVOLUTION,
  820. param::ConvBias::Mode::CROSS_CORRELATION};
  821. for (size_t n : {1, 2})
  822. for (size_t oc : {1, 9, 33})
  823. for (size_t ic : {1, 16, 64})
  824. for (size_t size : {7, 14, 28})
  825. for (auto nlmode : nonlinemode)
  826. for (auto convmode : convmodes) {
  827. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  828. }
  829. return args;
  830. }
  831. void check_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
  832. const char* algo_name) {
  833. using namespace conv_bias;
  834. Checker<ConvBias> checker(handle);
  835. checker.set_before_exec_callback(
  836. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  837. for (auto&& arg : args) {
  838. checker.set_param(arg.param).execs(
  839. {arg.src, arg.filter, arg.bias, {}, {}});
  840. }
  841. }
  842. void checker_conv_bias_int8x8x16(std::vector<conv_bias::TestArg> args,
  843. Handle* handle, const char* algo_name) {
  844. using namespace conv_bias;
  845. Checker<ConvBias> checker(handle);
  846. checker.set_before_exec_callback(
  847. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  848. checker.set_dtype(0, dtype::Int8());
  849. checker.set_dtype(1, dtype::Int8());
  850. checker.set_dtype(2, dtype::Int16());
  851. checker.set_dtype(4, dtype::Int16());
  852. for (auto&& arg : args) {
  853. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  854. }
  855. }
  856. void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m,
  857. param::ConvBias param, Handle* handle,
  858. param::MatrixMul::Format format) {
  859. megdnn_assert(param.format == param::ConvBias::Format::NCHW);
  860. auto winograd_preprocess_opr =
  861. handle->create_operator<WinogradFilterPreprocess>();
  862. winograd_preprocess_opr->param().output_block_size = m;
  863. winograd_preprocess_opr->param().format = format;
  864. TensorLayout filter_transform_layout;
  865. winograd_preprocess_opr->deduce_layout(tensors[1].layout,
  866. filter_transform_layout);
  867. size_t winograd_preprocess_workspace_in_bytes =
  868. winograd_preprocess_opr->get_workspace_in_bytes(
  869. tensors[1].layout, filter_transform_layout);
  870. auto conv_bias_opr = handle->create_operator<ConvBias>();
  871. conv_bias_opr->param() = param;
  872. conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD;
  873. conv_bias_opr->param().output_block_size = m;
  874. size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes(
  875. tensors[0].layout, filter_transform_layout, tensors[2].layout,
  876. tensors[3].layout, tensors[4].layout, nullptr);
  877. WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
  878. conv_bias_workspace_in_bytes,
  879. winograd_preprocess_workspace_in_bytes});
  880. wb.set(malloc(wb.total_size_in_bytes()));
  881. TensorND filter_transform_tensor(wb.get(0),
  882. std::move(filter_transform_layout));
  883. winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
  884. wb.get_workspace(2));
  885. conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
  886. tensors[3], tensors[4], nullptr, wb.get_workspace(1));
  887. free(wb.ptr());
  888. };
  889. } // namespace conv_bias
  890. } // namespace test
  891. } // namespace megdnn
  892. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台