You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 22 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. /**
  2. * \file dnn/test/common/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/checker.h"
  12. #include "test/common/convolution.h"
  13. #include "src/common/algo_base.h"
  14. #include <unordered_set>
  15. #include <sstream>
  16. using namespace megdnn;
  17. using namespace test;
  18. using namespace convolution;
  19. std::vector<TestArg> convolution::get_1x1_args() {
  20. std::vector<TestArg> args;
  21. param::Convolution param;
  22. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  23. // clang-format off
  24. for (size_t batch_size: {1, 8})
  25. for (size_t ic: {1, 16})
  26. for (size_t oc: {1, 16})
  27. for (size_t ih : {8, 32}) {
  28. size_t iw = ih;
  29. args.emplace_back(param, TensorShape{batch_size, ic, ih, iw},
  30. TensorShape{oc, ic, 1, 1});
  31. }
  32. // clang-format on
  33. return args;
  34. }
  35. std::vector<TestArg> convolution::get_args_common() {
  36. std::vector<TestArg> args;
  37. for (size_t i = 16; i < 24; ++i) {
  38. param::Convolution param;
  39. param.mode = param::Convolution::Mode::CONVOLUTION;
  40. args.emplace_back(param,
  41. TensorShape{5, 2, i, i+1},
  42. TensorShape{3, 2, 3, 4});
  43. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  44. args.emplace_back(param,
  45. TensorShape{5, 2, i, i+1},
  46. TensorShape{3, 2, 3, 4});
  47. }
  48. return args;
  49. }
  50. std::vector<TestArg> convolution::get_args_padding() {
  51. std::vector<TestArg> args;
  52. for (size_t i = 16; i < 24; ++i) {
  53. param::Convolution param;
  54. param.pad_h = 1;
  55. param.pad_w = 2;
  56. param.mode = param::Convolution::Mode::CONVOLUTION;
  57. args.emplace_back(param,
  58. TensorShape{5, 2, i, i+1},
  59. TensorShape{3, 2, 3, 4});
  60. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  61. args.emplace_back(param,
  62. TensorShape{5, 2, i, i+1},
  63. TensorShape{3, 2, 3, 4});
  64. }
  65. return args;
  66. }
  67. std::vector<TestArg> convolution::get_args_large_channel() {
  68. std::vector<TestArg> args;
  69. for (size_t i = 16; i < 24; ++i) {
  70. param::Convolution param;
  71. param.mode = param::Convolution::Mode::CONVOLUTION;
  72. args.emplace_back(param,
  73. TensorShape{2, 20, i, i+1},
  74. TensorShape{30, 20, 3, 4});
  75. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  76. args.emplace_back(param,
  77. TensorShape{2, 20, i, i+1},
  78. TensorShape{30, 20, 3, 4});
  79. }
  80. for (size_t i = 16; i < 24; ++i) {
  81. param::Convolution param;
  82. param.pad_h = 1;
  83. param.pad_w = 2;
  84. param.mode = param::Convolution::Mode::CONVOLUTION;
  85. args.emplace_back(param,
  86. TensorShape{2, 20, i, i+1},
  87. TensorShape{30, 20, 3, 4});
  88. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  89. args.emplace_back(param,
  90. TensorShape{2, 20, i, i+1},
  91. TensorShape{30, 20, 3, 4});
  92. }
  93. return args;
  94. }
  95. std::vector<TestArg> convolution::get_args_1x1() {
  96. std::vector<TestArg> args;
  97. for (size_t i = 16; i < 24; ++i) {
  98. param::Convolution param;
  99. param.mode = param::Convolution::Mode::CONVOLUTION;
  100. args.emplace_back(param,
  101. TensorShape{2, 20, i, i+1},
  102. TensorShape{30, 20, 1, 1});
  103. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  104. args.emplace_back(param,
  105. TensorShape{2, 20, i, i+1},
  106. TensorShape{30, 20, 1, 1});
  107. }
  108. return args;
  109. }
  110. std::vector<TestArg> convolution::get_args_large_filter() {
  111. std::vector<TestArg> args;
  112. for (size_t i = 16; i < 24; ++i) {
  113. param::Convolution param;
  114. param.mode = param::Convolution::Mode::CONVOLUTION;
  115. args.emplace_back(param,
  116. TensorShape{2, 2, i, i+1},
  117. TensorShape{3, 2, 7, 8});
  118. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  119. args.emplace_back(param,
  120. TensorShape{2, 2, i, i+1},
  121. TensorShape{3, 2, 7, 8});
  122. }
  123. return args;
  124. }
  125. std::vector<TestArg> convolution::get_args_exhaustive_search() {
  126. std::vector<TestArg> args;
  127. // clang-format off
  128. for (size_t n: {1, 2})
  129. for (size_t ih: {11, 13})
  130. for (size_t iw: {ih+1})
  131. for (size_t ic: {3})
  132. for (size_t oc: {4})
  133. for (size_t fh: {3, 6})
  134. for (size_t fw: {fh+1})
  135. for (size_t ph: {0, 1})
  136. for (size_t sh: {1, 2})
  137. for (bool xcorr : {false, true}) {
  138. param::Convolution param;
  139. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  140. : param::Convolution::Mode::CONVOLUTION;
  141. param.stride_h = param.stride_w = sh;
  142. param.pad_h = param.pad_w = ph;
  143. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  144. TensorShape{oc, ic, fh, fw});
  145. }
  146. // clang-format on
  147. return args;
  148. }
  149. std::vector<TestArg> convolution::get_args_4x4() {
  150. std::vector<TestArg> args;
  151. for (size_t oh = 1; oh < 20; ++oh) {
  152. param::Convolution param;
  153. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  154. args.emplace_back(param,
  155. TensorShape{4, 3, oh+3, oh+4},
  156. TensorShape{2, 3, 4, 4});
  157. }
  158. return args;
  159. }
  160. std::vector<TestArg> convolution::get_args_large_channels() {
  161. std::vector<TestArg> args;
  162. // clang-format off
  163. for (size_t n: {2})
  164. for (size_t ih: {13})
  165. for (size_t iw: {ih+1})
  166. for (size_t ic: {32})
  167. for (size_t oc: {32})
  168. for (size_t fh: {3, 6})
  169. for (size_t fw: {fh+1})
  170. for (size_t ph: {0, 1})
  171. for (size_t sh: {1, 2})
  172. for (bool xcorr : {false, true}) {
  173. param::Convolution param;
  174. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  175. : param::Convolution::Mode::CONVOLUTION;
  176. param.stride_h = param.stride_w = sh;
  177. param.pad_h = param.pad_w = ph;
  178. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  179. TensorShape{oc, ic, fh, fw});
  180. }
  181. // clang-format on
  182. return args;
  183. }
  184. std::vector<TestArg> convolution::get_args_x86_direct_case_2() {
  185. std::vector<TestArg> args;
  186. // clang-format off
  187. for (size_t stride: {1, 2})
  188. for (size_t ker_size : {3, 5, 7, 9}) {
  189. param::Convolution param;
  190. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  191. param.stride_h = param.stride_w = stride;
  192. param.pad_h = param.pad_w = ker_size / 2;
  193. args.emplace_back(param, TensorShape{2, 2, 100, 99},
  194. TensorShape{3, 2, ker_size, ker_size});
  195. args.emplace_back(param, TensorShape{2, 2, 100, 99},
  196. TensorShape{1, 2, ker_size, ker_size});
  197. }
  198. // clang-format on
  199. return args;
  200. }
  201. std::vector<TestArg> convolution::get_args_fallback_templated_impl() {
  202. std::vector<TestArg> args;
  203. // clang-format off
  204. for (size_t sh: {1, 2})
  205. for (size_t sw: {1, 2})
  206. for (size_t ph: {0, 1, 2})
  207. for (size_t pw: {0, 1, 2})
  208. for (size_t ker_size: {3, 4, 5, 7})
  209. for (bool xcorr : {false, true}) {
  210. param::Convolution param;
  211. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  212. : param::Convolution::Mode::CONVOLUTION;
  213. param.stride_h = sh;
  214. param.stride_w = sw;
  215. param.pad_h = ph;
  216. param.pad_w = pw;
  217. args.emplace_back(param, TensorShape{2, 2, 50, 55},
  218. TensorShape{3, 2, ker_size, ker_size});
  219. args.emplace_back(param, TensorShape{2, 2, 50, 55},
  220. TensorShape{1, 2, ker_size, ker_size});
  221. }
  222. // clang-format on
  223. return args;
  224. }
  225. std::vector<TestArg> convolution::get_args_fallback_non_templated_impl() {
  226. std::vector<TestArg> args;
  227. // clang-format off
  228. for (size_t sh: {1, 2})
  229. for (size_t sw: {1, 2})
  230. for (size_t ph: {0, 1, 2})
  231. for (size_t pw: {0, 1, 2})
  232. for (size_t ker_size: {3, 4, 5, 7})
  233. for (bool xcorr : {false, true}) {
  234. param::Convolution param;
  235. param.mode = xcorr ? param::Convolution::Mode::CROSS_CORRELATION
  236. : param::Convolution::Mode::CONVOLUTION;
  237. param.stride_h = sh;
  238. param.stride_w = sw;
  239. param.pad_h = ph;
  240. param.pad_w = pw;
  241. args.emplace_back(param, TensorShape{2, 2, 10, 55},
  242. TensorShape{3, 2, ker_size, ker_size + 1});
  243. args.emplace_back(param, TensorShape{2, 2, 10, 55},
  244. TensorShape{1, 2, ker_size, ker_size + 1});
  245. }
  246. // clang-format on
  247. return args;
  248. }
  249. std::vector<TestArg> convolution::get_args_cudnn_5_1_failures() {
  250. std::vector<TestArg> args;
  251. args.emplace_back(
  252. param::Convolution{
  253. param::Convolution::Mode::CROSS_CORRELATION, 0, 4, 1, 2},
  254. TensorShape{5, 3, 25, 20},
  255. TensorShape{10, 3, 7, 4}
  256. );
  257. return args;
  258. }
  259. std::vector<TestArg> convolution::get_args_x86_winograd_algorithm() {
  260. std::vector<TestArg> args;
  261. for (size_t ic_size: {8, 16})
  262. {
  263. param::Convolution param;
  264. param.mode = param::Convolution::Mode::CROSS_CORRELATION;
  265. param.stride_h = param.stride_w = 1;
  266. param.pad_h = param.pad_w = 0;
  267. args.emplace_back(param,
  268. TensorShape{2, ic_size, 102, 102},
  269. TensorShape{8, ic_size, 3, 3});
  270. }
  271. return args;
  272. }
  273. std::vector<TestArg> convolution::get_args_BRAIN_481() {
  274. std::vector<TestArg> args;
  275. {
  276. param::Convolution param{param::Convolution::Mode::CROSS_CORRELATION,
  277. 0, 1, 1, 2};
  278. args.emplace_back(param,
  279. TensorShape{4, 4, 14, 13},
  280. TensorShape{3, 4, 8, 13});
  281. for (size_t margin = 0; margin < 5; ++margin)
  282. {
  283. param::Convolution param{param::Convolution::Mode::CROSS_CORRELATION,
  284. 1, 1, 2, 2};
  285. args.emplace_back(param,
  286. TensorShape{4, 4, 14, 13},
  287. TensorShape{3, 4, 16-margin, 15-margin});
  288. }
  289. }
  290. return args;
  291. }
  292. std::vector<TestArg> convolution::get_args() {
  293. std::vector<TestArg> all_args, args;
  294. #define ADD_ARGS(NAME) \
  295. args = get_args_##NAME(); \
  296. all_args.insert(all_args.end(), args.begin(), args.end());
  297. ADD_ARGS(common)
  298. ADD_ARGS(padding)
  299. ADD_ARGS(large_channel)
  300. ADD_ARGS(1x1)
  301. ADD_ARGS(large_filter)
  302. ADD_ARGS(exhaustive_search)
  303. ADD_ARGS(4x4)
  304. ADD_ARGS(large_channels)
  305. ADD_ARGS(x86_direct_case_2)
  306. ADD_ARGS(fallback_templated_impl)
  307. ADD_ARGS(fallback_non_templated_impl)
  308. ADD_ARGS(cudnn_5_1_failures)
  309. ADD_ARGS(x86_winograd_algorithm)
  310. ADD_ARGS(BRAIN_481)
  311. #undef ADD_ARGS
  312. return all_args;
  313. }
  314. std::vector<TestArg> convolution::get_args_cuda_conv_bwd_data() {
  315. std::vector<TestArg> all_args, args;
  316. #define ADD_ARGS(NAME) \
  317. args = get_args_##NAME(); \
  318. all_args.insert(all_args.end(), args.begin(), args.end());
  319. ADD_ARGS(common)
  320. ADD_ARGS(padding)
  321. ADD_ARGS(large_channel)
  322. ADD_ARGS(1x1)
  323. ADD_ARGS(large_filter)
  324. ADD_ARGS(exhaustive_search)
  325. ADD_ARGS(4x4)
  326. ADD_ARGS(large_channels)
  327. ADD_ARGS(x86_direct_case_2)
  328. ADD_ARGS(fallback_templated_impl)
  329. ADD_ARGS(fallback_non_templated_impl)
  330. ADD_ARGS(x86_winograd_algorithm)
  331. #undef ADD_ARGS
  332. return all_args;
  333. }
  334. std::vector<TestArg> convolution::get_args_cudnn_7_5_failures() {
  335. std::vector<TestArg> all_args, args;
  336. #define ADD_ARGS(NAME) \
  337. args = get_args_##NAME(); \
  338. all_args.insert(all_args.end(), args.begin(), args.end());
  339. ADD_ARGS(cudnn_5_1_failures)
  340. ADD_ARGS(BRAIN_481)
  341. #undef ADD_ARGS
  342. return all_args;
  343. }
  344. std::vector<TestArg> convolution::get_chanwise_args() {
  345. std::vector<TestArg> args;
  346. // clang-format off
  347. for (size_t n: {2})
  348. for (size_t ih: {13})
  349. for (size_t iw: {ih+1})
  350. for (size_t c: {4, 36, 128, 320})
  351. for (size_t fh: {3, 5})
  352. for (size_t fw: {fh+1})
  353. for (size_t ph: {0, 1})
  354. for (size_t sh: {1, 2})
  355. for (size_t dh : {1, 2}) {
  356. param::Convolution param;
  357. param.sparse = param::Convolution::Sparse::GROUP;
  358. param.stride_h = param.stride_w = sh;
  359. param.pad_h = param.pad_w = ph;
  360. param.dilate_h = param.dilate_w = dh;
  361. args.emplace_back(param, TensorShape{n, c, ih, iw},
  362. TensorShape{c, 1, 1, fh, fw});
  363. }
  364. // clang-format on
  365. return args;
  366. }
  367. std::vector<TestArg> convolution::get_dilated_args() {
  368. std::vector<TestArg> args;
  369. param::Convolution param;
  370. param.pad_h = param.pad_w = 2;
  371. param.dilate_h = param.dilate_w = 2;
  372. size_t n = 1, ic = 15, ih = 128, iw = 128,
  373. fh = 3, fw = 3,
  374. oc = 17;
  375. args.emplace_back(param,
  376. TensorShape{n, ic, ih, iw},
  377. TensorShape{oc, ic, fh, fw});
  378. // exhaustive search
  379. // clang-format off
  380. for (size_t n: {2})
  381. for (size_t ih: {23})
  382. for (size_t iw: {ih+1})
  383. for (size_t ic: {3})
  384. for (size_t oc: {4})
  385. for (size_t fh: {3, 6})
  386. for (size_t fw: {fh+1})
  387. for (size_t ph: {0, 1})
  388. for (size_t sh: {2})
  389. for (size_t dh : {3}) {
  390. param::Convolution param;
  391. param.stride_h = param.stride_w = sh;
  392. param.pad_h = param.pad_w = ph;
  393. param.dilate_h = dh;
  394. param.dilate_w = 3;
  395. args.emplace_back(param, TensorShape{n, ic, ih, iw},
  396. TensorShape{oc, ic, fh, fw});
  397. }
  398. // clang-format on
  399. return args;
  400. }
  401. void convolution::test_conv_config_combinations(int k_size,
  402. Handle* handle, bool test_int8,
  403. bool test_backward,
  404. bool is_cuda,
  405. ConvEPSGetter eps_getter,
  406. bool use_io16xc32) {
  407. Checker<Convolution> checker(handle);
  408. std::unique_ptr<Checker<ConvolutionBackwardData>> checker_bwd_data_ptr;
  409. std::unique_ptr<Checker<ConvolutionBackwardFilter>> checker_bwd_filter_ptr;
  410. if (test_backward) {
  411. checker_bwd_data_ptr.reset(new std::remove_reference<
  412. decltype(*checker_bwd_data_ptr)>::type(handle));
  413. checker_bwd_filter_ptr.reset(new std::remove_reference<
  414. decltype(*checker_bwd_filter_ptr)>::type(handle));
  415. }
  416. auto &&checker_bwd_data = *checker_bwd_data_ptr;
  417. auto &&checker_bwd_filter = *checker_bwd_filter_ptr;
  418. #define CONF_BOOL(var) for (int var: {0, 1})
  419. std::unordered_set<Convolution::AlgorithmDesc> used_algos;
  420. std::unordered_set<ConvolutionBackwardData::AlgorithmDesc>
  421. used_algos_bwd_data;
  422. std::unordered_set<ConvolutionBackwardFilter::AlgorithmDesc>
  423. used_algos_bwd_flt;
  424. using Param = Convolution::Param;
  425. CONF_BOOL(conv)
  426. CONF_BOOL(padding)
  427. CONF_BOOL(stride)
  428. CONF_BOOL(group)
  429. CONF_BOOL(non_square)
  430. CONF_BOOL(dilation)
  431. CONF_BOOL(format)
  432. // dtype: 0: f32; 1: f16; 2: i8x8x16 3: i8x8x32
  433. for (int dtype = 0; dtype < (test_int8 ? 4 : 2); ++ dtype)
  434. for (int ksize: {1, k_size}) {
  435. // When is_cuda is on, test cases where format is NHWC and
  436. // data type is not INT8x8x32 are disabled.
  437. if (is_cuda) {
  438. if (format && dtype != 3) continue;
  439. }
  440. auto config2str = [&]() -> std::string {
  441. std::ostringstream ostr;
  442. ostr << conv << padding << stride << group << non_square << dilation
  443. << format << dtype << ksize;
  444. return ostr.str();
  445. };
  446. auto errmsg = [&](const char *name) {
  447. std::string ret;
  448. ret += "checker failed for algorithm ";
  449. ret += name;
  450. ret += " with conv,padding,stride,group,non_square,dilation,format,"
  451. "dtype,ksize=";
  452. ret += config2str();
  453. return ret;
  454. };
  455. MEGDNN_MARK_USED_VAR(errmsg);
  456. Param param;
  457. param.mode = conv ? Param::Mode::CONVOLUTION :
  458. Param::Mode::CROSS_CORRELATION;
  459. param.format = format ? Param::Format::NHWC : Param::Format::NCHW;
  460. if (dtype == 1 && use_io16xc32) {
  461. param.compute_mode = Param::ComputeMode::FLOAT32;
  462. }
  463. size_t IC = 6, OC = 9, G = 3, FH = ksize, FW = ksize;
  464. TensorShape ishp = TensorShape{2, 18, 18, IC}, fshp;
  465. if (format) {
  466. ishp.shape[0] = 2;
  467. ishp.shape[1] = 18;
  468. ishp.shape[2] = 18;
  469. ishp.shape[3] = IC;
  470. } else {
  471. ishp.shape[0] = 2;
  472. ishp.shape[1] = IC;
  473. ishp.shape[2] = 18;
  474. ishp.shape[3] = 18;
  475. }
  476. if (padding) {
  477. param.pad_h = 2 + non_square;
  478. param.pad_w = 2 - non_square;
  479. }
  480. if (non_square) {
  481. if (FH > 2)
  482. FH -= 2;
  483. FW += 1;
  484. ++ ishp[format ? 2 : 3] ;
  485. }
  486. if (group) {
  487. fshp = format ?
  488. TensorShape{G, OC / G, FH, FW, IC / G} :
  489. TensorShape{G, OC / G, IC / G, FH, FW};
  490. param.sparse = Param::Sparse::GROUP;
  491. } else {
  492. fshp = format ?
  493. TensorShape{OC, FH, FW, IC} :
  494. TensorShape{OC, IC, FH, FW};
  495. }
  496. if (dilation) {
  497. param.dilate_h = 2 - non_square;
  498. param.dilate_w = 2 + non_square;
  499. }
  500. if (stride) {
  501. param.stride_h = 2 + non_square;
  502. param.stride_w = 2 - non_square;
  503. }
  504. DType inp_type, out_type;
  505. if (dtype == 2) {
  506. inp_type = dtype::Int8();
  507. out_type = dtype::Int16();
  508. } else if (dtype == 3) {
  509. inp_type = dtype::Int8();
  510. out_type = dtype::Int32();
  511. } else {
  512. if (!dtype)
  513. inp_type = dtype::Float32();
  514. else
  515. inp_type = dtype::Float16();
  516. out_type = inp_type;
  517. }
  518. checker
  519. .set_dtype(0, inp_type)
  520. .set_dtype(1, inp_type)
  521. .set_dtype(2, out_type)
  522. .set_param(param);
  523. auto opr = checker.opr();
  524. opr->param() = param;
  525. std::string param_str;
  526. Algorithm::serialize_write_pod(opr->param(), param_str);
  527. TensorLayout ily{ishp, inp_type}, fly{fshp, inp_type}, oly;
  528. oly.dtype = out_type;
  529. opr->deduce_layout(ily, fly, oly);
  530. int channel_start = 1;
  531. if (format) channel_start = 3;
  532. float scale = 1.0f / sqrt(fshp[channel_start] * FH * FW);
  533. UniformFloatRNG rng(scale, 2 * scale);
  534. checker.set_rng(0, &rng).set_rng(1, &rng);
  535. for (auto algo : opr->get_all_algorithms_info(ily, fly, oly)) {
  536. used_algos.insert(algo.desc);
  537. opr->execution_policy().algo = algo.desc;
  538. construct_sub_execution_policy_heuristic<ConvolutionForward>(
  539. opr->execution_policy(), {ily, fly, oly}, param_str,
  540. opr->handle());
  541. checker
  542. .set_epsilon(eps_getter(dtype == 1, 0, algo.name.c_str()))
  543. .execs({ishp, fshp, {}});
  544. opr->execution_policy() = {};
  545. ASSERT_TRUE(checker.prev_succ()) << errmsg(algo.name.c_str());
  546. }
  547. if (test_backward) {
  548. // backward data
  549. checker_bwd_data.set_dtype(0, inp_type)
  550. .set_dtype(1, out_type)
  551. .set_dtype(2, inp_type)
  552. .set_param(param);
  553. auto opr = checker_bwd_data.opr();
  554. opr->param() = param;
  555. std::string param_str;
  556. Algorithm::serialize_write_pod(opr->param(), param_str);
  557. for (auto algo: opr->get_all_algorithms_info(fly, oly, ily)) {
  558. used_algos_bwd_data.insert(algo.desc);
  559. opr->execution_policy().algo = algo.desc;
  560. construct_sub_execution_policy_heuristic<
  561. ConvolutionBackwardData>(opr->execution_policy(),
  562. {fly, oly, ily}, param_str,
  563. opr->handle());
  564. checker_bwd_data
  565. .set_epsilon(eps_getter(dtype == 1, 1, algo.name.c_str()))
  566. .execl({fly, oly, ily});
  567. opr->execution_policy() = {};
  568. ASSERT_TRUE(checker_bwd_data.prev_succ()) <<
  569. errmsg(algo.name.c_str());
  570. }
  571. }
  572. if (test_backward) {
  573. // backward filter
  574. checker_bwd_filter
  575. .set_dtype(0, inp_type)
  576. .set_dtype(1, out_type)
  577. .set_dtype(2, inp_type)
  578. .set_param(param);
  579. auto opr = checker_bwd_filter.opr();
  580. opr->param() = param;
  581. std::string param_str;
  582. Algorithm::serialize_write_pod(opr->param(), param_str);
  583. for (auto algo: opr->get_all_algorithms_info(ily, oly, fly)) {
  584. used_algos_bwd_flt.insert(algo.desc);
  585. opr->execution_policy().algo = algo.desc;
  586. construct_sub_execution_policy_heuristic<
  587. ConvolutionBackwardFilter>(opr->execution_policy(),
  588. {ily, oly, fly}, param_str,
  589. opr->handle());
  590. checker_bwd_filter
  591. .set_epsilon(eps_getter(dtype == 1, 2, algo.name.c_str()))
  592. .execl({ily, oly, fly});
  593. opr->execution_policy() = {};
  594. ASSERT_TRUE(checker_bwd_filter.prev_succ()) <<
  595. errmsg(algo.name.c_str());
  596. }
  597. }
  598. }
  599. }
  600. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台