You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. /**
  2. * \file dnn/test/cuda/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cuda/fixture.h"
  12. #include "test/common/checker.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/warp_perspective.h"
  15. #include "test/common/opr_proxy.h"
  16. namespace {
  17. using namespace megdnn;
  18. using namespace test;
  19. class NanMatRNG: public RNG {
  20. void gen(const TensorND &tensor_) override
  21. {
  22. auto &gen = RandomState::generator();
  23. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  24. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  25. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  26. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  27. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  28. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  29. std::uniform_int_distribution<int> dice(0, 5);
  30. float *ptr = tensor_.ptr<dt_float32>();
  31. auto N = tensor_.layout.shape[0];
  32. for (size_t n = 0; n < N; ++n) {
  33. for (size_t i = 0; i < 9; ++i) {
  34. switch (dice(gen)) {
  35. case 0:
  36. ptr[i] = pdist3(gen);
  37. break;
  38. case 1:
  39. ptr[i] = pdist(gen);
  40. break;
  41. case 2:
  42. ptr[i] = pdisth(gen);
  43. break;
  44. case 3:
  45. ptr[i] = ndist(gen);
  46. break;
  47. case 4:
  48. ptr[i] = ndist3(gen);
  49. break;
  50. case 5:
  51. ptr[i] = ndisth(gen);
  52. break;
  53. }
  54. }
  55. ptr[6] = 1;
  56. ptr[7] = -1;
  57. ptr[8] = 5;
  58. ptr += 9;
  59. }
  60. }
  61. };
  62. } // anonymous namespace
  63. namespace megdnn {
  64. namespace test {
  65. // FIXME test WARP_PERSPECTIVE_CV failed here
  66. #if 0
  67. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  68. //! format = NHWC
  69. Checker<WarpPerspective> checker(handle_cuda());
  70. param::WarpPerspective param;
  71. class ResizeMatRNG: public RNG {
  72. void gen(const TensorND &tensor_) override
  73. {
  74. auto &gen = RandomState::generator();
  75. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  76. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  77. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  78. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  79. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  80. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  81. std::uniform_int_distribution<int> dice(0, 5);
  82. float *ptr = tensor_.ptr<dt_float32>();
  83. auto N = tensor_.layout.shape[0];
  84. for (size_t n = 0; n < N; ++n) {
  85. for (size_t i = 0; i < 9; ++i) {
  86. switch (dice(gen)) {
  87. case 0:
  88. ptr[i] = pdist3(gen);
  89. break;
  90. case 1:
  91. ptr[i] = pdist(gen);
  92. break;
  93. case 2:
  94. ptr[i] = pdisth(gen);
  95. break;
  96. case 3:
  97. ptr[i] = ndist(gen);
  98. break;
  99. case 4:
  100. ptr[i] = ndist3(gen);
  101. break;
  102. case 5:
  103. ptr[i] = ndisth(gen);
  104. break;
  105. }
  106. }
  107. // is resize?
  108. if (n & 1) {
  109. ptr[1] = 0;
  110. ptr[3] = 0;
  111. ptr[6] = ptr[7] = 0;
  112. }
  113. ptr += 9;
  114. }
  115. }
  116. } rng;
  117. checker.set_rng(1, &rng);
  118. using BMode = param::WarpPerspective::BorderMode;
  119. param.format = param::WarpPerspective::Format::NHWC;
  120. // naive and cuda uses different algorithms and different border handling
  121. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  122. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  123. BMode::WRAP, BMode::CONSTANT})
  124. {
  125. param.bmode = mode;
  126. param.border_val = 1.737;
  127. checker.set_param(param);
  128. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  129. }
  130. auto args = warp_perspective::get_cv_args();
  131. for (auto &&arg : args) {
  132. checker.set_param(arg.param)
  133. .set_dtype(0, dtype::Float32())
  134. .set_dtype(1, dtype::Float32())
  135. .set_dtype(2, dtype::Float32())
  136. .execs({arg.src, arg.trans, arg.dst});
  137. }
  138. for (auto &&arg : args) {
  139. checker.set_param(arg.param)
  140. .set_epsilon(242.001)
  141. .set_max_avg_error(3.0)
  142. .set_dtype(0, dtype::Uint8())
  143. .set_dtype(1, dtype::Float32())
  144. .set_dtype(2, dtype::Uint8())
  145. .execs({arg.src, arg.trans, arg.dst});
  146. }
  147. // resize nan case
  148. UniformFloatRNG rng_zero(0, 0);
  149. checker.set_rng(1, &rng_zero);
  150. {
  151. param.bmode = BMode::CONSTANT;
  152. param.border_val = 1.737;
  153. checker.set_param(param)
  154. .set_dtype(0, dtype::Float32())
  155. .set_dtype(1, dtype::Float32())
  156. .set_dtype(2, dtype::Float32());
  157. // no invalid mem access is enough; no need to check value
  158. checker.set_expect_exec_fail([](){});
  159. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  160. }
  161. }
  162. #endif
  163. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD)
  164. {
  165. using Param = WarpPerspective::Param;
  166. Checker<WarpPerspectiveForward> checker(handle_cuda());
  167. WarpPerspectiveMatRNG rng;
  168. checker.set_rng(1, &rng);
  169. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  170. WarpPerspective::BorderMode::REFLECT,
  171. WarpPerspective::BorderMode::REPLICATE,
  172. WarpPerspective::BorderMode::CONSTANT})
  173. {
  174. WarpPerspective::Param param;
  175. param.border_val = 0.3f;
  176. param.bmode = bmode;
  177. param.imode = Param::InterpolationMode::LINEAR;
  178. param.format = Param::Format::NHWC;
  179. checker.set_param(param);
  180. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  181. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  182. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  183. checker.set_epsilon(1e-3);
  184. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  185. param.format = Param::Format::NCHW;
  186. checker.set_param(param);
  187. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  188. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  189. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  190. }
  191. // nan case
  192. NanMatRNG rng_nan;
  193. UniformFloatRNG rng_zero(0, 0);
  194. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  195. {
  196. param::WarpPerspective param;
  197. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  198. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  199. checker.set_rng(1, rng);
  200. param.border_val = 1.737;
  201. checker.set_param(param);
  202. // no invalid mem access is enough; no need to check value
  203. checker.set_expect_exec_fail([](){});
  204. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  205. }
  206. }
  207. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16)
  208. {
  209. using Param = WarpPerspective::Param;
  210. Checker<WarpPerspectiveForward> checker(handle_cuda());
  211. WarpPerspectiveMatRNG rng;
  212. checker.set_rng(1, &rng);
  213. checker.set_dtype(0, dtype::Float16())
  214. .set_dtype(1, dtype::Float32())
  215. .set_dtype(2, dtype::Float16());
  216. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  217. WarpPerspective::BorderMode::REFLECT,
  218. WarpPerspective::BorderMode::REPLICATE,
  219. WarpPerspective::BorderMode::CONSTANT})
  220. {
  221. WarpPerspective::Param param;
  222. param.border_val = 0.3f;
  223. param.bmode = bmode;
  224. param.imode = Param::InterpolationMode::LINEAR;
  225. param.format = Param::Format::NHWC;
  226. checker.set_param(param);
  227. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  228. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  229. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  230. checker.set_epsilon(1e-3);
  231. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  232. param.format = Param::Format::NCHW;
  233. checker.set_param(param);
  234. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  235. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  236. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  237. }
  238. // nan case
  239. NanMatRNG rng_nan;
  240. UniformFloatRNG rng_zero(0, 0);
  241. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  242. {
  243. param::WarpPerspective param;
  244. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  245. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  246. checker.set_rng(1, rng);
  247. param.border_val = 1.737;
  248. checker.set_param(param);
  249. // no invalid mem access is enough; no need to check value
  250. checker.set_expect_exec_fail([](){});
  251. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  252. }
  253. }
  254. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4)
  255. {
  256. using Param = WarpPerspective::Param;
  257. WarpPerspective::Param param;
  258. Checker<WarpPerspectiveForward> checker(handle_cuda());
  259. WarpPerspectiveMatRNG rng;
  260. checker.set_rng(1, &rng);
  261. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  262. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  263. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  264. WarpPerspective::BorderMode::REFLECT,
  265. WarpPerspective::BorderMode::REPLICATE,
  266. WarpPerspective::BorderMode::CONSTANT}) {
  267. param.border_val = 0.3f;
  268. param.bmode = bmode;
  269. param.imode = Param::InterpolationMode::LINEAR;
  270. param.format = Param::Format::NCHW4;
  271. checker.set_param(param);
  272. checker.set_epsilon(1 + 1e-3);
  273. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  274. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  275. checker.execs(
  276. {{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  277. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  278. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  279. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  280. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  281. }
  282. {
  283. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  284. handle_cuda());
  285. constexpr int N_SRC = 5;
  286. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  287. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  288. checker.set_rng(1, &rng);
  289. checker.set_dtype(2, dtype::Int32());
  290. checker.set_rng(2, &mat_idx_rng);
  291. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  292. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  293. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  294. checker.set_param(param);
  295. checker.set_epsilon(1 + 1e-3);
  296. checker.execs(
  297. {{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  298. checker.execs({{N_SRC, 14, 17, 13, 4},
  299. {123, 3, 3},
  300. {123},
  301. {123, 14, 16, 15, 4}});
  302. }
  303. }
  304. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  305. warp_perspective::run_int8_test(handle_cuda());
  306. }
  307. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA)
  308. {
  309. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  310. WarpPerspectiveMatRNG rng;
  311. checker.set_rng(0, &rng);
  312. for (int i = 0; i < 1; ++i) {
  313. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  314. WarpPerspective::BorderMode::REFLECT,
  315. WarpPerspective::BorderMode::REPLICATE,
  316. WarpPerspective::BorderMode::CONSTANT})
  317. {
  318. WarpPerspective::Param param;
  319. param.border_val = 0.3f;
  320. param.bmode = bmode;
  321. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  322. checker.set_param(param);
  323. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  324. checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  325. }
  326. }
  327. // nan case
  328. NanMatRNG rng_nan;
  329. UniformFloatRNG rng_zero(0, 0);
  330. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  331. {
  332. param::WarpPerspective param;
  333. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  334. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  335. checker.set_rng(0, rng);
  336. param.border_val = 1.737;
  337. checker.set_param(param);
  338. // no invalid mem access is enough; no need to check value
  339. checker.set_expect_exec_fail([](){});
  340. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  341. }
  342. }
  343. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT)
  344. {
  345. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  346. WarpPerspectiveMatRNG rng;
  347. checker.set_rng(1, &rng);
  348. for (int i = 0; i < 1; ++i) {
  349. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  350. WarpPerspective::BorderMode::REFLECT,
  351. WarpPerspective::BorderMode::REPLICATE,
  352. WarpPerspective::BorderMode::CONSTANT})
  353. {
  354. WarpPerspective::Param param;
  355. param.border_val = 0.3f;
  356. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  357. param.bmode = bmode;
  358. checker.set_param(param);
  359. checker.set_epsilon(1e-2);
  360. checker.execs({
  361. {1000, 3, 11, 12}, {1000, 3, 3},
  362. {1000, 3, 10, 11}, {1000, 3, 3}
  363. });
  364. }
  365. }
  366. // nan case
  367. NanMatRNG rng_nan;
  368. UniformFloatRNG rng_zero(0, 0);
  369. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  370. {
  371. param::WarpPerspective param;
  372. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  373. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  374. checker.set_rng(1, rng);
  375. param.border_val = 1.737;
  376. checker.set_param(param);
  377. // no invalid mem access is enough; no need to check value
  378. checker.set_expect_exec_fail([](){});
  379. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3},
  380. {1000, 2, 12, 13}, {1000, 3, 3}});
  381. }
  382. }
  383. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16)
  384. {
  385. using Param = WarpPerspective::Param;
  386. Checker<WarpPerspectiveForward> checker(handle_cuda());
  387. WarpPerspectiveMatRNG rng;
  388. checker.set_rng(1, &rng);
  389. checker.set_dtype(0, dtype::BFloat16())
  390. .set_dtype(1, dtype::Float32())
  391. .set_dtype(2, dtype::BFloat16());
  392. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  393. WarpPerspective::BorderMode::REFLECT,
  394. WarpPerspective::BorderMode::REPLICATE,
  395. WarpPerspective::BorderMode::CONSTANT})
  396. {
  397. WarpPerspective::Param param;
  398. param.border_val = 0.3f;
  399. param.bmode = bmode;
  400. param.imode = Param::InterpolationMode::LINEAR;
  401. param.format = Param::Format::NHWC;
  402. checker.set_param(param);
  403. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  404. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  405. param.format = Param::Format::NCHW;
  406. checker.set_param(param);
  407. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  408. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  409. }
  410. }
  411. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16)
  412. {
  413. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  414. WarpPerspectiveMatRNG rng;
  415. checker.set_rng(0, &rng)
  416. .set_epsilon(1e-1)
  417. .set_dtype(0, dtype::Float32())
  418. .set_dtype(1, dtype::BFloat16())
  419. .set_dtype(2, dtype::BFloat16());
  420. for (int i = 0; i < 1; ++i) {
  421. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  422. WarpPerspective::BorderMode::REFLECT,
  423. WarpPerspective::BorderMode::REPLICATE,
  424. WarpPerspective::BorderMode::CONSTANT})
  425. {
  426. WarpPerspective::Param param;
  427. param.border_val = 0.3f;
  428. param.bmode = bmode;
  429. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  430. checker.set_param(param);
  431. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  432. }
  433. }
  434. }
  435. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16)
  436. {
  437. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  438. WarpPerspectiveMatRNG rng;
  439. checker.set_rng(1, &rng)
  440. .set_epsilon(1e-2)
  441. .set_dtype(0, dtype::BFloat16())
  442. .set_dtype(1, dtype::Float32())
  443. .set_dtype(2, dtype::BFloat16())
  444. .set_dtype(3, dtype::Float32());
  445. for (int i = 0; i < 1; ++i) {
  446. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  447. WarpPerspective::BorderMode::REFLECT,
  448. WarpPerspective::BorderMode::REPLICATE,
  449. WarpPerspective::BorderMode::CONSTANT})
  450. {
  451. WarpPerspective::Param param;
  452. param.border_val = 0.3f;
  453. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  454. param.bmode = bmode;
  455. checker.set_param(param);
  456. checker.execs({
  457. {1000, 3, 11, 12}, {1000, 3, 3},
  458. {1000, 3, 10, 11}, {1000, 3, 3}
  459. });
  460. }
  461. }
  462. }
  463. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  464. warp_perspective::run_mat_idx_test(handle_cuda());
  465. }
  466. #if MEGDNN_WITH_BENCHMARK
  467. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  468. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  469. using Param = param::WarpPerspective;
  470. WarpPerspectiveMatRNG rng;
  471. benchmarker.set_rng(1, &rng);
  472. Param param;
  473. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  474. benchmarker.set_param(param);
  475. auto used = benchmarker.execs(shapes);
  476. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  477. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  478. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(),
  479. used,
  480. shapes[2].total_nr_elems() *
  481. (4.f + 1.f + shapes[1].total_nr_elems()) /
  482. (1024 * 1024 * 1024) / used * 1e3,
  483. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) /
  484. used * 1e3);
  485. };
  486. param.format = Param::Format::NCHW;
  487. benchmarker.set_dtype(0, dtype::Int8());
  488. benchmarker.set_dtype(2, dtype::Int8());
  489. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  490. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  491. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  492. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  493. param.format = Param::Format::NCHW4;
  494. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  495. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  496. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  497. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1,25, 256, 256, 4}});
  498. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  499. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  500. }
  501. #endif
  502. } // namespace test
  503. } // namespace megdnn
  504. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台