You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. /**
  2. * \file dnn/test/common/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/common/checker.h"
  13. #include "test/common/convolution.h"
  14. #include "src/common/algo_base.h"
  15. #include <unordered_set>
  16. #include <sstream>
  17. using namespace megdnn;
  18. using namespace test;
  19. using namespace convolution;
  20. std::vector<TestArg> convolution::get_1x1_args() {
  21. std::vector<TestArg> args;
  22. param::Convolution param;
  23. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  24. // clang-format off
  25. for (size_t batch_size: {1, 8})
  26. for (size_t ic: {1, 16})
  27. for (size_t oc: {1, 16})
  28. for (size_t ih : {8, 32}) {
  29. size_t iw = ih;
  30. args.emplace_back(param, TensorShape{batch_size, ic, ih, iw},
  31. TensorShape{oc, ic, 1, 1});
  32. }
  33. // clang-format on
  34. return args;
  35. }
  36. std::vector<TestArg> convolution::get_args_common() {
  37. std::vector<TestArg> args;
  38. for (size_t i = 16; i < 24; ++i) {
  39. param::Convolution param;
  40. param.mode = param::Convolution::Mode::CONVOLUTION;
  41. args.emplace_back(param, TensorShape{5, 2, i, i + 1},
  42. TensorShape{3, 2, 3, 4});
  43. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  44. args.emplace_back(param, TensorShape{5, 2, i, i + 1},
  45. TensorShape{3, 2, 3, 4});
  46. }
  47. return args;
  48. }
  49. std::vector<TestArg> convolution::get_args_padding() {
  50. std::vector<TestArg> args;
  51. for (size_t i = 16; i < 24; ++i) {
  52. param::Convolution param;
  53. param.pad_h = 1;
  54. param.pad_w = 2;
  55. param.mode = param::Convolution::Mode::CONVOLUTION;
  56. args.emplace_back(param, TensorShape{5, 2, i, i + 1},
  57. TensorShape{3, 2, 3, 4});
  58. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  59. args.emplace_back(param, TensorShape{5, 2, i, i + 1},
  60. TensorShape{3, 2, 3, 4});
  61. }
  62. return args;
  63. }
  64. std::vector<TestArg> convolution::get_args_large_channel() {
  65. std::vector<TestArg> args;
  66. for (size_t i = 16; i < 24; ++i) {
  67. param::Convolution param;
  68. param.mode = param::Convolution::Mode::CONVOLUTION;
  69. args.emplace_back(param, TensorShape{2, 20, i, i + 1},
  70. TensorShape{30, 20, 3, 4});
  71. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  72. args.emplace_back(param, TensorShape{2, 20, i, i + 1},
  73. TensorShape{30, 20, 3, 4});
  74. }
  75. for (size_t i = 16; i < 24; ++i) {
  76. param::Convolution param;
  77. param.pad_h = 1;
  78. param.pad_w = 2;
  79. param.mode = param::Convolution::Mode::CONVOLUTION;
  80. args.emplace_back(param, TensorShape{2, 20, i, i + 1},
  81. TensorShape{30, 20, 3, 4});
  82. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  83. args.emplace_back(param, TensorShape{2, 20, i, i + 1},
  84. TensorShape{30, 20, 3, 4});
  85. }
  86. return args;
  87. }
  88. std::vector<TestArg> convolution::get_args_1x1() {
  89. std::vector<TestArg> args;
  90. for (size_t i = 16; i < 24; ++i) {
  91. param::Convolution param;
  92. param.mode = param::Convolution::Mode::CONVOLUTION;
  93. args.emplace_back(param, TensorShape{2, 20, i, i + 1},
  94. TensorShape{30, 20, 1, 1});
  95. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  96. args.emplace_back(param, TensorShape{2, 20, i, i + 1},
  97. TensorShape{30, 20, 1, 1});
  98. }
  99. return args;
  100. }
  101. std::vector<TestArg> convolution::get_args_large_filter() {
  102. std::vector<TestArg> args;
  103. for (size_t i = 16; i < 24; ++i) {
  104. param::Convolution param;
  105. param.mode = param::Convolution::Mode::CONVOLUTION;
  106. args.emplace_back(param, TensorShape{2, 2, i, i + 1},
  107. TensorShape{3, 2, 7, 8});
  108. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  109. args.emplace_back(param, TensorShape{2, 2, i, i + 1},
  110. TensorShape{3, 2, 7, 8});
  111. }
  112. return args;
  113. }
  114. std::vector<TestArg> convolution::get_args_exhaustive_search() {
  115. std::vector<TestArg> args;
  116. // clang-format off
  117. for (size_t n: {1, 2})
  118. for (size_t ih: {11, 13})
  119. for (size_t iw: {ih+1})
  120. for (size_t ic: {3})
  121. for (size_t oc: {4})
  122. for (size_t fh: {3, 6})
  123. for (size_t fw: {fh+1})
  124. for (size_t ph: {0, 1})
  125. for (size_t sh: {1, 2})
  126. for (bool xcorr : {false, true}) {
  127. param::Convolution param;
  128. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  129. : param::Convolution::Mode::CONVOLUTION;
  130. param.stride_h = param.stride_w = sh;
  131. param.pad_h = param.pad_w = ph;
  132. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  133. TensorShape{oc, ic, fh, fw});
  134. }
  135. // clang-format on
  136. return args;
  137. }
  138. std::vector<TestArg> convolution::get_args_4x4() {
  139. std::vector<TestArg> args;
  140. for (size_t oh = 1; oh < 20; ++oh) {
  141. param::Convolution param;
  142. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  143. args.emplace_back(param, TensorShape{4, 3, oh + 3, oh + 4},
  144. TensorShape{2, 3, 4, 4});
  145. }
  146. return args;
  147. }
  148. std::vector<TestArg> convolution::get_args_large_channels() {
  149. std::vector<TestArg> args;
  150. // clang-format off
  151. for (size_t n: {2})
  152. for (size_t ih: {13})
  153. for (size_t iw: {ih+1})
  154. for (size_t ic: {32})
  155. for (size_t oc: {32})
  156. for (size_t fh: {3, 6})
  157. for (size_t fw: {fh+1})
  158. for (size_t ph: {0, 1})
  159. for (size_t sh: {1, 2})
  160. for (bool xcorr : {false, true}) {
  161. param::Convolution param;
  162. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  163. : param::Convolution::Mode::CONVOLUTION;
  164. param.stride_h = param.stride_w = sh;
  165. param.pad_h = param.pad_w = ph;
  166. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  167. TensorShape{oc, ic, fh, fw});
  168. }
  169. // clang-format on
  170. return args;
  171. }
  172. std::vector<TestArg> convolution::get_args_x86_direct_case_2() {
  173. std::vector<TestArg> args;
  174. // clang-format off
  175. for (size_t stride: {1, 2})
  176. for (size_t ker_size : {3, 5, 7, 9}) {
  177. param::Convolution param;
  178. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  179. param.stride_h = param.stride_w = stride;
  180. param.pad_h = param.pad_w = ker_size / 2;
  181. args.emplace_back(param, TensorShape{2, 2, 100, 99},
  182. TensorShape{3, 2, ker_size, ker_size});
  183. args.emplace_back(param, TensorShape{2, 2, 100, 99},
  184. TensorShape{1, 2, ker_size, ker_size});
  185. }
  186. // clang-format on
  187. return args;
  188. }
  189. std::vector<TestArg> convolution::get_args_fallback_templated_impl() {
  190. std::vector<TestArg> args;
  191. // clang-format off
  192. for (size_t sh: {1, 2})
  193. for (size_t sw: {1, 2})
  194. for (size_t ph: {0, 1, 2})
  195. for (size_t pw: {0, 1, 2})
  196. for (size_t ker_size: {3, 4, 5, 7})
  197. for (bool xcorr : {false, true}) {
  198. param::Convolution param;
  199. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  200. : param::Convolution::Mode::CONVOLUTION;
  201. param.stride_h = sh;
  202. param.stride_w = sw;
  203. param.pad_h = ph;
  204. param.pad_w = pw;
  205. args.emplace_back(param, TensorShape{2, 2, 50, 55},
  206. TensorShape{3, 2, ker_size, ker_size});
  207. args.emplace_back(param, TensorShape{2, 2, 50, 55},
  208. TensorShape{1, 2, ker_size, ker_size});
  209. }
  210. // clang-format on
  211. return args;
  212. }
  213. std::vector<TestArg> convolution::get_args_fallback_non_templated_impl() {
  214. std::vector<TestArg> args;
  215. // clang-format off
  216. for (size_t sh: {1, 2})
  217. for (size_t sw: {1, 2})
  218. for (size_t ph: {0, 1, 2})
  219. for (size_t pw: {0, 1, 2})
  220. for (size_t ker_size: {3, 4, 5, 7})
  221. for (bool xcorr : {false, true}) {
  222. param::Convolution param;
  223. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  224. : param::Convolution::Mode::CONVOLUTION;
  225. param.stride_h = sh;
  226. param.stride_w = sw;
  227. param.pad_h = ph;
  228. param.pad_w = pw;
  229. args.emplace_back(param, TensorShape{2, 2, 10, 55},
  230. TensorShape{3, 2, ker_size, ker_size + 1});
  231. args.emplace_back(param, TensorShape{2, 2, 10, 55},
  232. TensorShape{1, 2, ker_size, ker_size + 1});
  233. }
  234. // clang-format on
  235. return args;
  236. }
  237. std::vector<TestArg> convolution::get_args_cudnn_5_1_failures() {
  238. std::vector<TestArg> args;
  239. args.emplace_back(
  240. param::Convolution{param::Convolution::Mode::CROSS_CORRELATION, 0,
  241. 4, 1, 2},
  242. TensorShape{5, 3, 25, 20}, TensorShape{10, 3, 7, 4});
  243. return args;
  244. }
  245. std::vector<TestArg> convolution::get_args_x86_winograd_algorithm() {
  246. std::vector<TestArg> args;
  247. for (size_t ic_size : {8, 16}) {
  248. param::Convolution param;
  249. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  250. param.stride_h = param.stride_w = 1;
  251. param.pad_h = param.pad_w = 0;
  252. args.emplace_back(param, TensorShape{2, ic_size, 102, 102},
  253. TensorShape{8, ic_size, 3, 3});
  254. }
  255. return args;
  256. }
  257. std::vector<TestArg> convolution::get_args_BRAIN_481() {
  258. std::vector<TestArg> args;
  259. {
  260. param::Convolution param{param::Convolution::Mode::CROSS_CORRELATION, 0,
  261. 1, 1, 2};
  262. args.emplace_back(param, TensorShape{4, 4, 14, 13},
  263. TensorShape{3, 4, 8, 13});
  264. for (size_t margin = 0; margin < 5; ++margin) {
  265. param::Convolution param{
  266. param::Convolution::Mode::CROSS_CORRELATION, 1, 1, 2, 2};
  267. args.emplace_back(param, TensorShape{4, 4, 14, 13},
  268. TensorShape{3, 4, 16 - margin, 15 - margin});
  269. }
  270. }
  271. return args;
  272. }
  273. std::vector<TestArg> convolution::get_args() {
  274. std::vector<TestArg> all_args, args;
  275. #define ADD_ARGS(NAME) \
  276. args = get_args_##NAME(); \
  277. all_args.insert(all_args.end(), args.begin(), args.end());
  278. ADD_ARGS(common)
  279. ADD_ARGS(padding)
  280. ADD_ARGS(large_channel)
  281. ADD_ARGS(1x1)
  282. ADD_ARGS(large_filter)
  283. ADD_ARGS(exhaustive_search)
  284. ADD_ARGS(4x4)
  285. ADD_ARGS(large_channels)
  286. ADD_ARGS(x86_direct_case_2)
  287. ADD_ARGS(fallback_templated_impl)
  288. ADD_ARGS(fallback_non_templated_impl)
  289. ADD_ARGS(cudnn_5_1_failures)
  290. ADD_ARGS(x86_winograd_algorithm)
  291. ADD_ARGS(BRAIN_481)
  292. #undef ADD_ARGS
  293. return all_args;
  294. }
  295. std::vector<TestArg> convolution::get_args_cuda_conv_bwd_data() {
  296. std::vector<TestArg> all_args, args;
  297. #define ADD_ARGS(NAME) \
  298. args = get_args_##NAME(); \
  299. all_args.insert(all_args.end(), args.begin(), args.end());
  300. ADD_ARGS(common)
  301. ADD_ARGS(padding)
  302. ADD_ARGS(large_channel)
  303. ADD_ARGS(1x1)
  304. ADD_ARGS(large_filter)
  305. ADD_ARGS(exhaustive_search)
  306. ADD_ARGS(4x4)
  307. ADD_ARGS(large_channels)
  308. ADD_ARGS(x86_direct_case_2)
  309. ADD_ARGS(fallback_templated_impl)
  310. ADD_ARGS(fallback_non_templated_impl)
  311. ADD_ARGS(x86_winograd_algorithm)
  312. #undef ADD_ARGS
  313. return all_args;
  314. }
  315. std::vector<TestArg> convolution::get_args_cudnn_7_5_failures() {
  316. std::vector<TestArg> all_args, args;
  317. #define ADD_ARGS(NAME) \
  318. args = get_args_##NAME(); \
  319. all_args.insert(all_args.end(), args.begin(), args.end());
  320. ADD_ARGS(cudnn_5_1_failures)
  321. ADD_ARGS(BRAIN_481)
  322. #undef ADD_ARGS
  323. return all_args;
  324. }
  325. std::vector<TestArg> convolution::get_chanwise_args() {
  326. std::vector<TestArg> args;
  327. // clang-format off
  328. for (size_t n: {2})
  329. for (size_t ih: {13})
  330. for (size_t iw: {ih+1})
  331. for (size_t c: {4, 36, 128, 320})
  332. for (size_t fh: {3, 5})
  333. for (size_t fw: {fh+1})
  334. for (size_t ph: {0, 1})
  335. for (size_t sh: {1, 2})
  336. for (size_t dh : {1, 2}) {
  337. param::Convolution param;
  338. param.sparse = param::Convolution::Sparse::GROUP;
  339. param.stride_h = param.stride_w = sh;
  340. param.pad_h = param.pad_w = ph;
  341. param.dilate_h = param.dilate_w = dh;
  342. args.emplace_back(param, TensorShape{n, c, ih, iw},
  343. TensorShape{c, 1, 1, fh, fw});
  344. }
  345. // clang-format on
  346. return args;
  347. }
  348. std::vector<TestArg> convolution::get_dilated_args() {
  349. std::vector<TestArg> args;
  350. param::Convolution param;
  351. param.pad_h = param.pad_w = 2;
  352. param.dilate_h = param.dilate_w = 2;
  353. size_t n = 1, ic = 15, ih = 128, iw = 128, fh = 3, fw = 3, oc = 17;
  354. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  355. TensorShape{oc, ic, fh, fw});
  356. // exhaustive search
  357. // clang-format off
  358. for (size_t n: {2})
  359. for (size_t ih: {23})
  360. for (size_t iw: {ih+1})
  361. for (size_t ic: {3})
  362. for (size_t oc: {4})
  363. for (size_t fh: {3, 6})
  364. for (size_t fw: {fh+1})
  365. for (size_t ph: {0, 1})
  366. for (size_t sh: {2})
  367. for (size_t dh : {3}) {
  368. param::Convolution param;
  369. param.stride_h = param.stride_w = sh;
  370. param.pad_h = param.pad_w = ph;
  371. param.dilate_h = dh;
  372. param.dilate_w = 3;
  373. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  374. TensorShape{oc, ic, fh, fw});
  375. }
  376. // clang-format on
  377. return args;
  378. }
  379. std::vector<TestArg> convolution::get_args_int8_nchw4_conv_bwd_data() {
  380. std::vector<TestArg> args;
  381. param::Convolution cur_param;
  382. // clang-format off
  383. for (auto mode : {param::Convolution::Mode::CROSS_CORRELATION}) {
  384. for (size_t b : {64, 16}) {
  385. for (size_t ic : {16, 32}) {
  386. for (size_t oc : {16, 32}) {
  387. for (size_t h : {8}) {
  388. for (size_t w : {8, 11}) {
  389. for (size_t kernel_size : {3, 4, 5, 7}) {
  390. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  391. for (size_t s : {2}) {
  392. if (kernel_size >= 7) {
  393. b = std::min(b, 32_z);
  394. }
  395. size_t f = kernel_size;
  396. cur_param.mode = mode;
  397. cur_param.format = param::Convolution::Format::NCHW4;
  398. cur_param.sparse = param::Convolution::Sparse::DENSE;
  399. cur_param.pad_h = cur_param.pad_w = p;
  400. cur_param.stride_h = cur_param.stride_w = s;
  401. //! bias channel
  402. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  403. TensorShape{oc, ic / 4, f, f, 4});
  404. } } } } } } } } }
  405. // clang-format on
  406. cur_param.pad_h = cur_param.pad_w = 1;
  407. cur_param.stride_h = cur_param.stride_w = 1;
  408. args.emplace_back(cur_param, TensorShape{16, 4, 8, 11, 4},
  409. TensorShape{16, 4, 3, 3, 4});
  410. return args;
  411. }
  412. std::vector<TestArg> convolution::get_args_int8_nchw_conv_bwd_data() {
  413. std::vector<TestArg> args;
  414. param::Convolution cur_param;
  415. // clang-format off
  416. for (auto mode : {param::Convolution::Mode::CROSS_CORRELATION}) {
  417. for (size_t b : {64, 16}) {
  418. for (size_t ic : {16, 32}) {
  419. for (size_t oc : {16, 32}) {
  420. for (size_t h : {8}) {
  421. for (size_t w : {8, 11}) {
  422. for (size_t kernel_size : {3, 4, 5, 7}) {
  423. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  424. for (size_t s : {2}) {
  425. if (kernel_size >= 7) {
  426. b = std::min(b, 32_z);
  427. }
  428. size_t f = kernel_size;
  429. cur_param.mode = mode;
  430. cur_param.format = param::Convolution::Format::NCHW;
  431. cur_param.sparse = param::Convolution::Sparse::DENSE;
  432. cur_param.pad_h = cur_param.pad_w = p;
  433. cur_param.stride_h = cur_param.stride_w = s;
  434. //! bias channel
  435. args.emplace_back(cur_param, TensorShape{b, ic, h, w},
  436. TensorShape{oc, ic, f, f});
  437. } } } } } } } } }
  438. // clang-format on
  439. // test stride = 1
  440. cur_param.pad_h = cur_param.pad_w = 1;
  441. cur_param.stride_h = cur_param.stride_w = 1;
  442. args.emplace_back(cur_param, TensorShape{16, 16, 8, 11},
  443. TensorShape{16, 16, 3, 3});
  444. return args;
  445. }
  446. std::vector<TestArg> convolution::get_args_int8_nhwc_conv_bwd_data() {
  447. std::vector<TestArg> args;
  448. param::Convolution cur_param;
  449. // clang-format off
  450. for (auto mode : {param::Convolution::Mode::CROSS_CORRELATION}) {
  451. for (size_t b : {64, 16}) {
  452. for (size_t ic : {16, 32}) {
  453. for (size_t oc : {16, 32}) {
  454. for (size_t h : {8}) {
  455. for (size_t w : {8, 11}) {
  456. for (size_t kernel_size : {3, 4, 5, 7}) {
  457. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  458. for (size_t s : {2}) {
  459. if (kernel_size >= 7) {
  460. b = std::min(b, 32_z);
  461. }
  462. size_t f = kernel_size;
  463. cur_param.mode = mode;
  464. cur_param.format = param::Convolution::Format::NHWC;
  465. cur_param.sparse = param::Convolution::Sparse::DENSE;
  466. cur_param.pad_h = cur_param.pad_w = p;
  467. cur_param.stride_h = cur_param.stride_w = s;
  468. //! bias channel
  469. args.emplace_back(cur_param, TensorShape{b, h, w, ic},
  470. TensorShape{oc, f, f, ic});
  471. } } } } } } } } }
  472. // clang-format on
  473. cur_param.pad_h = cur_param.pad_w = 1;
  474. cur_param.stride_h = cur_param.stride_w = 1;
  475. args.emplace_back(cur_param, TensorShape{16, 8, 11, 16},
  476. TensorShape{16, 3, 3, 16});
  477. return args;
  478. }
  479. void convolution::test_conv_config_combinations(
  480. int k_size, Handle* handle, bool test_int8, bool test_backward,
  481. bool is_cuda, ConvEPSGetter eps_getter, bool use_io16xc32) {
  482. Checker<Convolution> checker(handle);
  483. std::unique_ptr<Checker<ConvolutionBackwardData>> checker_bwd_data_ptr;
  484. std::unique_ptr<Checker<ConvolutionBackwardFilter>> checker_bwd_filter_ptr;
  485. if (test_backward) {
  486. checker_bwd_data_ptr.reset(new std::remove_reference<
  487. decltype(*checker_bwd_data_ptr)>::type(handle));
  488. checker_bwd_filter_ptr.reset(new std::remove_reference<
  489. decltype(*checker_bwd_filter_ptr)>::type(handle));
  490. }
  491. auto &&checker_bwd_data = *checker_bwd_data_ptr;
  492. auto &&checker_bwd_filter = *checker_bwd_filter_ptr;
  493. #define CONF_BOOL(var) for (int var: {0, 1})
  494. std::unordered_set<Convolution::AlgorithmDesc> used_algos;
  495. std::unordered_set<ConvolutionBackwardData::AlgorithmDesc>
  496. used_algos_bwd_data;
  497. std::unordered_set<ConvolutionBackwardFilter::AlgorithmDesc>
  498. used_algos_bwd_flt;
  499. using Param = Convolution::Param;
  500. CONF_BOOL(conv)
  501. CONF_BOOL(padding)
  502. CONF_BOOL(stride)
  503. CONF_BOOL(group)
  504. CONF_BOOL(non_square)
  505. CONF_BOOL(dilation)
  506. CONF_BOOL(format)
  507. // dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32
  508. for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype)
  509. for (int ksize: {1, k_size}) {
  510. // When is_cuda is on, test cases where format is NHWC and
  511. // data type is not INT8x8x32 are disabled.
  512. if (is_cuda) {
  513. if (format && dtype != 3) continue;
  514. }
  515. auto config2str = [&]() -> std::string {
  516. std::ostringstream ostr;
  517. ostr << conv << padding << stride << group << non_square << dilation
  518. << format << dtype << ksize;
  519. return ostr.str();
  520. };
  521. auto errmsg = [&](const char *name) {
  522. std::string ret;
  523. ret += "checker failed for algorithm ";
  524. ret += name;
  525. ret += " with conv,padding,stride,group,non_square,dilation,format,"
  526. "dtype,ksize=";
  527. ret += config2str();
  528. return ret;
  529. };
  530. MEGDNN_MARK_USED_VAR(errmsg);
  531. Param param;
  532. param.mode = conv ? Param::Mode::CONVOLUTION :
  533. Param::Mode::CROSS_CORRELATION;
  534. param.format = format ? Param::Format::NHWC : Param::Format::NCHW;
  535. if (dtype == 1 && use_io16xc32) {
  536. param.compute_mode = Param::ComputeMode::FLOAT32;
  537. }
  538. size_t IC = 6, OC = 9, G = 3, FH = ksize, FW = ksize;
  539. TensorShape ishp = TensorShape{2, 18, 18, IC}, fshp;
  540. if (format) {
  541. ishp.shape[0] = 2;
  542. ishp.shape[1] = 18;
  543. ishp.shape[2] = 18;
  544. ishp.shape[3] = IC;
  545. } else {
  546. ishp.shape[0] = 2;
  547. ishp.shape[1] = IC;
  548. ishp.shape[2] = 18;
  549. ishp.shape[3] = 18;
  550. }
  551. if (padding) {
  552. param.pad_h = 2 + non_square;
  553. param.pad_w = 2 - non_square;
  554. }
  555. if (non_square) {
  556. if (FH > 2)
  557. FH -= 2;
  558. FW += 1;
  559. ++ ishp[format ? 2 : 3] ;
  560. }
  561. if (group) {
  562. fshp = format ?
  563. TensorShape{G, OC / G, FH, FW, IC / G} :
  564. TensorShape{G, OC / G, IC / G, FH, FW};
  565. param.sparse = Param::Sparse::GROUP;
  566. } else {
  567. fshp = format ?
  568. TensorShape{OC, FH, FW, IC} :
  569. TensorShape{OC, IC, FH, FW};
  570. }
  571. if (dilation) {
  572. param.dilate_h = 2 - non_square;
  573. param.dilate_w = 2 + non_square;
  574. }
  575. if (stride) {
  576. param.stride_h = 2 + non_square;
  577. param.stride_w = 2 - non_square;
  578. }
  579. DType inp_type, out_type;
  580. if (dtype == 2) {
  581. inp_type = dtype::Int8();
  582. out_type = dtype::Int16();
  583. } else if (dtype == 3) {
  584. inp_type = dtype::Int8();
  585. out_type = dtype::Int32();
  586. } else {
  587. if (!dtype)
  588. inp_type = dtype::Float32();
  589. else
  590. inp_type = dtype::Float16();
  591. out_type = inp_type;
  592. }
  593. checker.set_dtype(0, inp_type)
  594. .set_dtype(1, inp_type)
  595. .set_dtype(2, out_type)
  596. .set_param(param);
  597. auto opr = checker.opr();
  598. opr->param() = param;
  599. std::string param_str;
  600. Algorithm::serialize_write_pod(opr->param(), param_str);
  601. TensorLayout ily{ishp, inp_type}, fly{fshp, inp_type}, oly;
  602. oly.dtype = out_type;
  603. opr->deduce_layout(ily, fly, oly);
  604. int channel_start = 1;
  605. if (format)
  606. channel_start = 3;
  607. float scale = 1.0f / sqrt(fshp[channel_start] * FH * FW);
  608. UniformFloatRNG rng(scale, 2 * scale);
  609. checker.set_rng(0, &rng).set_rng(1, &rng);
  610. for (auto algo : opr->get_all_algorithms_info(ily, fly, oly)) {
  611. used_algos.insert(algo.desc);
  612. opr->execution_policy().algo = algo.desc;
  613. construct_sub_execution_policy_heuristic<ConvolutionForward>(
  614. opr->execution_policy(), {ily, fly, oly}, param_str,
  615. opr->handle());
  616. checker.set_epsilon(
  617. eps_getter(dtype == 1, 0, algo.desc.name.c_str()))
  618. .execs({ishp, fshp, {}});
  619. opr->execution_policy() = {};
  620. ASSERT_TRUE(checker.prev_succ()) << errmsg(algo.desc.name.c_str());
  621. }
  622. if (test_backward) {
  623. // backward data
  624. checker_bwd_data.set_dtype(0, inp_type)
  625. .set_dtype(1, out_type)
  626. .set_dtype(2, inp_type)
  627. .set_param(param);
  628. auto opr = checker_bwd_data.opr();
  629. opr->param() = param;
  630. std::string param_str;
  631. Algorithm::serialize_write_pod(opr->param(), param_str);
  632. for (auto algo : opr->get_all_algorithms_info(fly, oly, ily)) {
  633. used_algos_bwd_data.insert(algo.desc);
  634. opr->execution_policy().algo = algo.desc;
  635. construct_sub_execution_policy_heuristic<
  636. ConvolutionBackwardData>(opr->execution_policy(),
  637. {fly, oly, ily}, param_str,
  638. opr->handle());
  639. checker_bwd_data
  640. .set_epsilon(eps_getter(dtype == 1, 1,
  641. algo.desc.name.c_str()))
  642. .execl({fly, oly, ily});
  643. opr->execution_policy() = {};
  644. ASSERT_TRUE(checker_bwd_data.prev_succ())
  645. << errmsg(algo.desc.name.c_str());
  646. }
  647. }
  648. if (test_backward) {
  649. // backward filter
  650. checker_bwd_filter.set_dtype(0, inp_type)
  651. .set_dtype(1, out_type)
  652. .set_dtype(2, inp_type)
  653. .set_param(param);
  654. auto opr = checker_bwd_filter.opr();
  655. opr->param() = param;
  656. std::string param_str;
  657. Algorithm::serialize_write_pod(opr->param(), param_str);
  658. for (auto algo : opr->get_all_algorithms_info(ily, oly, fly)) {
  659. used_algos_bwd_flt.insert(algo.desc);
  660. opr->execution_policy().algo = algo.desc;
  661. construct_sub_execution_policy_heuristic<
  662. ConvolutionBackwardFilter>(opr->execution_policy(),
  663. {ily, oly, fly}, param_str,
  664. opr->handle());
  665. checker_bwd_filter
  666. .set_epsilon(eps_getter(dtype == 1, 2,
  667. algo.desc.name.c_str()))
  668. .execl({ily, oly, fly});
  669. opr->execution_policy() = {};
  670. ASSERT_TRUE(checker_bwd_filter.prev_succ())
  671. << errmsg(algo.desc.name.c_str());
  672. }
  673. }
  674. }
  675. }
  676. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台