You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. /**
  2. * \file dnn/test/cuda/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cuda/fixture.h"
  12. #include "test/common/checker.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/warp_perspective.h"
  15. #include "test/common/opr_proxy.h"
  16. #include "test/cuda/utils.h"
  17. namespace {
  18. using namespace megdnn;
  19. using namespace test;
  20. class NanMatRNG: public RNG {
  21. void gen(const TensorND &tensor_) override
  22. {
  23. auto &gen = RandomState::generator();
  24. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  25. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  26. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  27. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  28. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  29. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  30. std::uniform_int_distribution<int> dice(0, 5);
  31. float *ptr = tensor_.ptr<dt_float32>();
  32. auto N = tensor_.layout.shape[0];
  33. for (size_t n = 0; n < N; ++n) {
  34. for (size_t i = 0; i < 9; ++i) {
  35. switch (dice(gen)) {
  36. case 0:
  37. ptr[i] = pdist3(gen);
  38. break;
  39. case 1:
  40. ptr[i] = pdist(gen);
  41. break;
  42. case 2:
  43. ptr[i] = pdisth(gen);
  44. break;
  45. case 3:
  46. ptr[i] = ndist(gen);
  47. break;
  48. case 4:
  49. ptr[i] = ndist3(gen);
  50. break;
  51. case 5:
  52. ptr[i] = ndisth(gen);
  53. break;
  54. }
  55. }
  56. ptr[6] = 1;
  57. ptr[7] = -1;
  58. ptr[8] = 5;
  59. ptr += 9;
  60. }
  61. }
  62. };
  63. } // anonymous namespace
  64. namespace megdnn {
  65. namespace test {
  66. // FIXME test WARP_PERSPECTIVE_CV failed here
  67. #if 0
  68. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  69. //! format = NHWC
  70. Checker<WarpPerspective> checker(handle_cuda());
  71. param::WarpPerspective param;
  72. class ResizeMatRNG: public RNG {
  73. void gen(const TensorND &tensor_) override
  74. {
  75. auto &gen = RandomState::generator();
  76. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  77. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  78. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  79. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  80. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  81. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  82. std::uniform_int_distribution<int> dice(0, 5);
  83. float *ptr = tensor_.ptr<dt_float32>();
  84. auto N = tensor_.layout.shape[0];
  85. for (size_t n = 0; n < N; ++n) {
  86. for (size_t i = 0; i < 9; ++i) {
  87. switch (dice(gen)) {
  88. case 0:
  89. ptr[i] = pdist3(gen);
  90. break;
  91. case 1:
  92. ptr[i] = pdist(gen);
  93. break;
  94. case 2:
  95. ptr[i] = pdisth(gen);
  96. break;
  97. case 3:
  98. ptr[i] = ndist(gen);
  99. break;
  100. case 4:
  101. ptr[i] = ndist3(gen);
  102. break;
  103. case 5:
  104. ptr[i] = ndisth(gen);
  105. break;
  106. }
  107. }
  108. // is resize?
  109. if (n & 1) {
  110. ptr[1] = 0;
  111. ptr[3] = 0;
  112. ptr[6] = ptr[7] = 0;
  113. }
  114. ptr += 9;
  115. }
  116. }
  117. } rng;
  118. checker.set_rng(1, &rng);
  119. using BMode = param::WarpPerspective::BorderMode;
  120. param.format = param::WarpPerspective::Format::NHWC;
  121. // naive and cuda uses different algorithms and different border handling
  122. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  123. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  124. BMode::WRAP, BMode::CONSTANT})
  125. {
  126. param.bmode = mode;
  127. param.border_val = 1.737;
  128. checker.set_param(param);
  129. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  130. }
  131. auto args = warp_perspective::get_cv_args();
  132. for (auto &&arg : args) {
  133. checker.set_param(arg.param)
  134. .set_dtype(0, dtype::Float32())
  135. .set_dtype(1, dtype::Float32())
  136. .set_dtype(2, dtype::Float32())
  137. .execs({arg.src, arg.trans, arg.dst});
  138. }
  139. for (auto &&arg : args) {
  140. checker.set_param(arg.param)
  141. .set_epsilon(242.001)
  142. .set_max_avg_error(3.0)
  143. .set_dtype(0, dtype::Uint8())
  144. .set_dtype(1, dtype::Float32())
  145. .set_dtype(2, dtype::Uint8())
  146. .execs({arg.src, arg.trans, arg.dst});
  147. }
  148. // resize nan case
  149. UniformFloatRNG rng_zero(0, 0);
  150. checker.set_rng(1, &rng_zero);
  151. {
  152. param.bmode = BMode::CONSTANT;
  153. param.border_val = 1.737;
  154. checker.set_param(param)
  155. .set_dtype(0, dtype::Float32())
  156. .set_dtype(1, dtype::Float32())
  157. .set_dtype(2, dtype::Float32());
  158. // no invalid mem access is enough; no need to check value
  159. checker.set_expect_exec_fail([](){});
  160. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  161. }
  162. }
  163. #endif
  164. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD)
  165. {
  166. using Param = WarpPerspective::Param;
  167. Checker<WarpPerspectiveForward> checker(handle_cuda());
  168. WarpPerspectiveMatRNG rng;
  169. checker.set_rng(1, &rng);
  170. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  171. WarpPerspective::BorderMode::REFLECT,
  172. WarpPerspective::BorderMode::REPLICATE,
  173. WarpPerspective::BorderMode::CONSTANT})
  174. {
  175. WarpPerspective::Param param;
  176. param.border_val = 0.3f;
  177. param.bmode = bmode;
  178. param.imode = Param::InterpolationMode::LINEAR;
  179. param.format = Param::Format::NHWC;
  180. checker.set_param(param);
  181. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  182. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  183. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  184. checker.set_epsilon(1e-3);
  185. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  186. param.format = Param::Format::NCHW;
  187. checker.set_param(param);
  188. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  189. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  190. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  191. }
  192. // nan case
  193. NanMatRNG rng_nan;
  194. UniformFloatRNG rng_zero(0, 0);
  195. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  196. {
  197. param::WarpPerspective param;
  198. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  199. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  200. checker.set_rng(1, rng);
  201. param.border_val = 1.737;
  202. checker.set_param(param);
  203. // no invalid mem access is enough; no need to check value
  204. checker.set_expect_exec_fail([](){});
  205. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  206. }
  207. }
  208. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX)
  209. {
  210. require_compute_capability(6, 0);
  211. using Param = WarpPerspective::Param;
  212. Checker<WarpPerspectiveForward> checker(handle_cuda());
  213. WarpPerspectiveMatRNG rng;
  214. checker.set_rng(1, &rng);
  215. for (auto bmode: {WarpPerspective::BorderMode::REPLICATE})
  216. {
  217. WarpPerspective::Param param;
  218. param.border_val = 0.3f;
  219. param.bmode = bmode;
  220. param.imode = Param::InterpolationMode::LINEAR;
  221. param.format = Param::Format::NHWC;
  222. checker.set_param(param);
  223. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  224. size_t n = (INT_MAX) / (512 * 512 * 3);
  225. checker.execs(
  226. {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}});
  227. }
  228. }
  229. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16)
  230. {
  231. using Param = WarpPerspective::Param;
  232. Checker<WarpPerspectiveForward> checker(handle_cuda());
  233. WarpPerspectiveMatRNG rng;
  234. checker.set_rng(1, &rng);
  235. checker.set_dtype(0, dtype::Float16())
  236. .set_dtype(1, dtype::Float32())
  237. .set_dtype(2, dtype::Float16());
  238. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  239. WarpPerspective::BorderMode::REFLECT,
  240. WarpPerspective::BorderMode::REPLICATE,
  241. WarpPerspective::BorderMode::CONSTANT})
  242. {
  243. WarpPerspective::Param param;
  244. param.border_val = 0.3f;
  245. param.bmode = bmode;
  246. param.imode = Param::InterpolationMode::LINEAR;
  247. param.format = Param::Format::NHWC;
  248. checker.set_param(param);
  249. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  250. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  251. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  252. checker.set_epsilon(1e-3);
  253. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  254. param.format = Param::Format::NCHW;
  255. checker.set_param(param);
  256. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  257. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  258. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  259. }
  260. // nan case
  261. NanMatRNG rng_nan;
  262. UniformFloatRNG rng_zero(0, 0);
  263. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  264. {
  265. param::WarpPerspective param;
  266. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  267. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  268. checker.set_rng(1, rng);
  269. param.border_val = 1.737;
  270. checker.set_param(param);
  271. // no invalid mem access is enough; no need to check value
  272. checker.set_expect_exec_fail([](){});
  273. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  274. }
  275. }
  276. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4)
  277. {
  278. using Param = WarpPerspective::Param;
  279. WarpPerspective::Param param;
  280. Checker<WarpPerspectiveForward> checker(handle_cuda());
  281. WarpPerspectiveMatRNG rng;
  282. checker.set_rng(1, &rng);
  283. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  284. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  285. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  286. WarpPerspective::BorderMode::REFLECT,
  287. WarpPerspective::BorderMode::REPLICATE,
  288. WarpPerspective::BorderMode::CONSTANT}) {
  289. param.border_val = 0.3f;
  290. param.bmode = bmode;
  291. param.imode = Param::InterpolationMode::LINEAR;
  292. param.format = Param::Format::NCHW4;
  293. checker.set_param(param);
  294. checker.set_epsilon(1 + 1e-3);
  295. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  296. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  297. checker.execs(
  298. {{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  299. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  300. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  301. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  302. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  303. }
  304. {
  305. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  306. handle_cuda());
  307. constexpr int N_SRC = 5;
  308. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  309. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  310. checker.set_rng(1, &rng);
  311. checker.set_dtype(2, dtype::Int32());
  312. checker.set_rng(2, &mat_idx_rng);
  313. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  314. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  315. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  316. checker.set_param(param);
  317. checker.set_epsilon(1 + 1e-3);
  318. checker.execs(
  319. {{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  320. checker.execs({{N_SRC, 14, 17, 13, 4},
  321. {123, 3, 3},
  322. {123},
  323. {123, 14, 16, 15, 4}});
  324. }
  325. }
  326. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  327. warp_perspective::run_int8_test(handle_cuda());
  328. }
  329. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA)
  330. {
  331. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  332. WarpPerspectiveMatRNG rng;
  333. checker.set_rng(0, &rng);
  334. for (int i = 0; i < 1; ++i) {
  335. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  336. WarpPerspective::BorderMode::REFLECT,
  337. WarpPerspective::BorderMode::REPLICATE,
  338. WarpPerspective::BorderMode::CONSTANT})
  339. {
  340. WarpPerspective::Param param;
  341. param.border_val = 0.3f;
  342. param.bmode = bmode;
  343. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  344. checker.set_param(param);
  345. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  346. checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  347. }
  348. }
  349. // nan case
  350. NanMatRNG rng_nan;
  351. UniformFloatRNG rng_zero(0, 0);
  352. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  353. {
  354. param::WarpPerspective param;
  355. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  356. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  357. checker.set_rng(0, rng);
  358. param.border_val = 1.737;
  359. checker.set_param(param);
  360. // no invalid mem access is enough; no need to check value
  361. checker.set_expect_exec_fail([](){});
  362. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  363. }
  364. }
  365. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT)
  366. {
  367. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  368. WarpPerspectiveMatRNG rng;
  369. checker.set_rng(1, &rng);
  370. for (int i = 0; i < 1; ++i) {
  371. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  372. WarpPerspective::BorderMode::REFLECT,
  373. WarpPerspective::BorderMode::REPLICATE,
  374. WarpPerspective::BorderMode::CONSTANT})
  375. {
  376. WarpPerspective::Param param;
  377. param.border_val = 0.3f;
  378. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  379. param.bmode = bmode;
  380. checker.set_param(param);
  381. checker.set_epsilon(1e-2);
  382. checker.execs({
  383. {1000, 3, 11, 12}, {1000, 3, 3},
  384. {1000, 3, 10, 11}, {1000, 3, 3}
  385. });
  386. }
  387. }
  388. // nan case
  389. NanMatRNG rng_nan;
  390. UniformFloatRNG rng_zero(0, 0);
  391. for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero})
  392. {
  393. param::WarpPerspective param;
  394. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  395. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  396. checker.set_rng(1, rng);
  397. param.border_val = 1.737;
  398. checker.set_param(param);
  399. // no invalid mem access is enough; no need to check value
  400. checker.set_expect_exec_fail([](){});
  401. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3},
  402. {1000, 2, 12, 13}, {1000, 3, 3}});
  403. }
  404. }
  405. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16)
  406. {
  407. using Param = WarpPerspective::Param;
  408. Checker<WarpPerspectiveForward> checker(handle_cuda());
  409. WarpPerspectiveMatRNG rng;
  410. checker.set_rng(1, &rng);
  411. checker.set_dtype(0, dtype::BFloat16())
  412. .set_dtype(1, dtype::Float32())
  413. .set_dtype(2, dtype::BFloat16());
  414. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  415. WarpPerspective::BorderMode::REFLECT,
  416. WarpPerspective::BorderMode::REPLICATE,
  417. WarpPerspective::BorderMode::CONSTANT})
  418. {
  419. WarpPerspective::Param param;
  420. param.border_val = 0.3f;
  421. param.bmode = bmode;
  422. param.imode = Param::InterpolationMode::LINEAR;
  423. param.format = Param::Format::NHWC;
  424. checker.set_param(param);
  425. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  426. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  427. param.format = Param::Format::NCHW;
  428. checker.set_param(param);
  429. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  430. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  431. }
  432. }
  433. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16)
  434. {
  435. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  436. WarpPerspectiveMatRNG rng;
  437. checker.set_rng(0, &rng)
  438. .set_epsilon(1e-1)
  439. .set_dtype(0, dtype::Float32())
  440. .set_dtype(1, dtype::BFloat16())
  441. .set_dtype(2, dtype::BFloat16());
  442. for (int i = 0; i < 1; ++i) {
  443. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  444. WarpPerspective::BorderMode::REFLECT,
  445. WarpPerspective::BorderMode::REPLICATE,
  446. WarpPerspective::BorderMode::CONSTANT})
  447. {
  448. WarpPerspective::Param param;
  449. param.border_val = 0.3f;
  450. param.bmode = bmode;
  451. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  452. checker.set_param(param);
  453. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  454. }
  455. }
  456. }
  457. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16)
  458. {
  459. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  460. WarpPerspectiveMatRNG rng;
  461. checker.set_rng(1, &rng)
  462. .set_epsilon(1e-2)
  463. .set_dtype(0, dtype::BFloat16())
  464. .set_dtype(1, dtype::Float32())
  465. .set_dtype(2, dtype::BFloat16())
  466. .set_dtype(3, dtype::Float32());
  467. for (int i = 0; i < 1; ++i) {
  468. for (auto bmode: {WarpPerspective::BorderMode::WRAP,
  469. WarpPerspective::BorderMode::REFLECT,
  470. WarpPerspective::BorderMode::REPLICATE,
  471. WarpPerspective::BorderMode::CONSTANT})
  472. {
  473. WarpPerspective::Param param;
  474. param.border_val = 0.3f;
  475. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  476. param.bmode = bmode;
  477. checker.set_param(param);
  478. checker.execs({
  479. {1000, 3, 11, 12}, {1000, 3, 3},
  480. {1000, 3, 10, 11}, {1000, 3, 3}
  481. });
  482. }
  483. }
  484. }
  485. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  486. warp_perspective::run_mat_idx_test(handle_cuda());
  487. }
  488. #if MEGDNN_WITH_BENCHMARK
  489. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  490. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  491. using Param = param::WarpPerspective;
  492. WarpPerspectiveMatRNG rng;
  493. benchmarker.set_rng(1, &rng);
  494. Param param;
  495. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  496. benchmarker.set_param(param);
  497. auto used = benchmarker.execs(shapes);
  498. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  499. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  500. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(),
  501. used,
  502. shapes[2].total_nr_elems() *
  503. (4.f + 1.f + shapes[1].total_nr_elems()) /
  504. (1024 * 1024 * 1024) / used * 1e3,
  505. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) /
  506. used * 1e3);
  507. };
  508. param.format = Param::Format::NCHW;
  509. benchmarker.set_dtype(0, dtype::Int8());
  510. benchmarker.set_dtype(2, dtype::Int8());
  511. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  512. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  513. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  514. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  515. param.format = Param::Format::NCHW4;
  516. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  517. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  518. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  519. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1,25, 256, 256, 4}});
  520. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  521. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  522. }
  523. #endif
  524. } // namespace test
  525. } // namespace megdnn
  526. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台