You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. /**
  2. * \file dnn/test/cuda/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/warp_perspective.h"
  16. #include "test/common/opr_proxy.h"
  17. #include "test/cuda/utils.h"
  18. namespace {
  19. using namespace megdnn;
  20. using namespace test;
  21. class NanMatRNG : public RNG {
  22. void gen(const TensorND& tensor_) override
  23. {
  24. auto& gen = RandomState::generator();
  25. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  26. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  27. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  28. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  29. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  30. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  31. std::uniform_int_distribution<int> dice(0, 5);
  32. float* ptr = tensor_.ptr<dt_float32>();
  33. auto N = tensor_.layout.shape[0];
  34. for (size_t n = 0; n < N; ++n) {
  35. for (size_t i = 0; i < 9; ++i) {
  36. switch (dice(gen)) {
  37. case 0:
  38. ptr[i] = pdist3(gen);
  39. break;
  40. case 1:
  41. ptr[i] = pdist(gen);
  42. break;
  43. case 2:
  44. ptr[i] = pdisth(gen);
  45. break;
  46. case 3:
  47. ptr[i] = ndist(gen);
  48. break;
  49. case 4:
  50. ptr[i] = ndist3(gen);
  51. break;
  52. case 5:
  53. ptr[i] = ndisth(gen);
  54. break;
  55. }
  56. }
  57. ptr[6] = 1;
  58. ptr[7] = -1;
  59. ptr[8] = 5;
  60. ptr += 9;
  61. }
  62. }
  63. };
  64. } // anonymous namespace
  65. namespace megdnn {
  66. namespace test {
  67. // FIXME test WARP_PERSPECTIVE_CV failed here
  68. #if 0
  69. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  70. //! format = NHWC
  71. Checker<WarpPerspective> checker(handle_cuda());
  72. param::WarpPerspective param;
  73. class ResizeMatRNG: public RNG {
  74. void gen(const TensorND &tensor_) override
  75. {
  76. auto &gen = RandomState::generator();
  77. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  78. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  79. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  80. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  81. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  82. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  83. std::uniform_int_distribution<int> dice(0, 5);
  84. float *ptr = tensor_.ptr<dt_float32>();
  85. auto N = tensor_.layout.shape[0];
  86. for (size_t n = 0; n < N; ++n) {
  87. for (size_t i = 0; i < 9; ++i) {
  88. switch (dice(gen)) {
  89. case 0:
  90. ptr[i] = pdist3(gen);
  91. break;
  92. case 1:
  93. ptr[i] = pdist(gen);
  94. break;
  95. case 2:
  96. ptr[i] = pdisth(gen);
  97. break;
  98. case 3:
  99. ptr[i] = ndist(gen);
  100. break;
  101. case 4:
  102. ptr[i] = ndist3(gen);
  103. break;
  104. case 5:
  105. ptr[i] = ndisth(gen);
  106. break;
  107. }
  108. }
  109. // is resize?
  110. if (n & 1) {
  111. ptr[1] = 0;
  112. ptr[3] = 0;
  113. ptr[6] = ptr[7] = 0;
  114. }
  115. ptr += 9;
  116. }
  117. }
  118. } rng;
  119. checker.set_rng(1, &rng);
  120. using BMode = param::WarpPerspective::BorderMode;
  121. param.format = param::WarpPerspective::Format::NHWC;
  122. // naive and cuda uses different algorithms and different border handling
  123. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  124. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  125. BMode::WRAP, BMode::CONSTANT})
  126. {
  127. param.bmode = mode;
  128. param.border_val = 1.737;
  129. checker.set_param(param);
  130. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  131. }
  132. auto args = warp_perspective::get_cv_args();
  133. for (auto &&arg : args) {
  134. checker.set_param(arg.param)
  135. .set_dtype(0, dtype::Float32())
  136. .set_dtype(1, dtype::Float32())
  137. .set_dtype(2, dtype::Float32())
  138. .execs({arg.src, arg.trans, arg.dst});
  139. }
  140. for (auto &&arg : args) {
  141. checker.set_param(arg.param)
  142. .set_epsilon(242.001)
  143. .set_max_avg_error(3.0)
  144. .set_dtype(0, dtype::Uint8())
  145. .set_dtype(1, dtype::Float32())
  146. .set_dtype(2, dtype::Uint8())
  147. .execs({arg.src, arg.trans, arg.dst});
  148. }
  149. // resize nan case
  150. UniformFloatRNG rng_zero(0, 0);
  151. checker.set_rng(1, &rng_zero);
  152. {
  153. param.bmode = BMode::CONSTANT;
  154. param.border_val = 1.737;
  155. checker.set_param(param)
  156. .set_dtype(0, dtype::Float32())
  157. .set_dtype(1, dtype::Float32())
  158. .set_dtype(2, dtype::Float32());
  159. // no invalid mem access is enough; no need to check value
  160. checker.set_expect_exec_fail([](){});
  161. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  162. }
  163. }
  164. #endif
  165. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) {
  166. using Param = WarpPerspective::Param;
  167. Checker<WarpPerspectiveForward> checker(handle_cuda());
  168. WarpPerspectiveMatRNG rng;
  169. checker.set_rng(1, &rng);
  170. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  171. WarpPerspective::BorderMode::REFLECT,
  172. WarpPerspective::BorderMode::REPLICATE,
  173. WarpPerspective::BorderMode::CONSTANT}) {
  174. WarpPerspective::Param param;
  175. param.border_val = 0.3f;
  176. param.bmode = bmode;
  177. param.imode = Param::InterpolationMode::LINEAR;
  178. param.format = Param::Format::NHWC;
  179. checker.set_param(param);
  180. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  181. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  182. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  183. checker.set_epsilon(1e-3);
  184. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  185. param.format = Param::Format::NCHW;
  186. checker.set_param(param);
  187. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  188. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  189. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  190. }
  191. // nan case
  192. NanMatRNG rng_nan;
  193. UniformFloatRNG rng_zero(0, 0);
  194. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  195. param::WarpPerspective param;
  196. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  197. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  198. checker.set_rng(1, rng);
  199. param.border_val = 1.737;
  200. checker.set_param(param);
  201. // no invalid mem access is enough; no need to check value
  202. checker.set_expect_exec_fail([]() {});
  203. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  204. }
  205. }
  206. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) {
  207. require_compute_capability(6, 0);
  208. using Param = WarpPerspective::Param;
  209. Checker<WarpPerspectiveForward> checker(handle_cuda());
  210. WarpPerspectiveMatRNG rng;
  211. checker.set_rng(1, &rng);
  212. for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) {
  213. WarpPerspective::Param param;
  214. param.border_val = 0.3f;
  215. param.bmode = bmode;
  216. param.imode = Param::InterpolationMode::LINEAR;
  217. param.format = Param::Format::NHWC;
  218. checker.set_param(param);
  219. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  220. size_t n = (INT_MAX) / (512 * 512 * 3);
  221. checker.execs(
  222. {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}});
  223. }
  224. }
  225. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) {
  226. using Param = WarpPerspective::Param;
  227. Checker<WarpPerspectiveForward> checker(handle_cuda());
  228. WarpPerspectiveMatRNG rng;
  229. checker.set_rng(1, &rng);
  230. checker.set_dtype(0, dtype::Float16())
  231. .set_dtype(1, dtype::Float32())
  232. .set_dtype(2, dtype::Float16());
  233. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  234. WarpPerspective::BorderMode::REFLECT,
  235. WarpPerspective::BorderMode::REPLICATE,
  236. WarpPerspective::BorderMode::CONSTANT}) {
  237. WarpPerspective::Param param;
  238. param.border_val = 0.3f;
  239. param.bmode = bmode;
  240. param.imode = Param::InterpolationMode::LINEAR;
  241. param.format = Param::Format::NHWC;
  242. checker.set_param(param);
  243. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  244. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  245. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  246. checker.set_epsilon(1e-3);
  247. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  248. param.format = Param::Format::NCHW;
  249. checker.set_param(param);
  250. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  251. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  252. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  253. }
  254. // nan case
  255. NanMatRNG rng_nan;
  256. UniformFloatRNG rng_zero(0, 0);
  257. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  258. param::WarpPerspective param;
  259. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  260. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  261. checker.set_rng(1, rng);
  262. param.border_val = 1.737;
  263. checker.set_param(param);
  264. // no invalid mem access is enough; no need to check value
  265. checker.set_expect_exec_fail([]() {});
  266. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  267. }
  268. }
  269. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) {
  270. using Param = WarpPerspective::Param;
  271. WarpPerspective::Param param;
  272. Checker<WarpPerspectiveForward> checker(handle_cuda());
  273. WarpPerspectiveMatRNG rng;
  274. checker.set_rng(1, &rng);
  275. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  276. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  277. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  278. WarpPerspective::BorderMode::REFLECT,
  279. WarpPerspective::BorderMode::REPLICATE,
  280. WarpPerspective::BorderMode::CONSTANT}) {
  281. param.border_val = 0.3f;
  282. param.bmode = bmode;
  283. param.imode = Param::InterpolationMode::LINEAR;
  284. param.format = Param::Format::NCHW4;
  285. checker.set_param(param);
  286. checker.set_epsilon(1 + 1e-3);
  287. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  288. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  289. checker.execs(
  290. {{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  291. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  292. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  293. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  294. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  295. }
  296. {
  297. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  298. handle_cuda());
  299. constexpr int N_SRC = 5;
  300. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  301. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  302. checker.set_rng(1, &rng);
  303. checker.set_dtype(2, dtype::Int32());
  304. checker.set_rng(2, &mat_idx_rng);
  305. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  306. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  307. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  308. checker.set_param(param);
  309. checker.set_epsilon(1 + 1e-3);
  310. checker.execs(
  311. {{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  312. checker.execs({{N_SRC, 14, 17, 13, 4},
  313. {123, 3, 3},
  314. {123},
  315. {123, 14, 16, 15, 4}});
  316. }
  317. }
  318. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  319. warp_perspective::run_int8_test(handle_cuda());
  320. }
  321. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) {
  322. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  323. WarpPerspectiveMatRNG rng;
  324. checker.set_rng(0, &rng);
  325. for (int i = 0; i < 1; ++i) {
  326. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  327. WarpPerspective::BorderMode::REFLECT,
  328. WarpPerspective::BorderMode::REPLICATE,
  329. WarpPerspective::BorderMode::CONSTANT}) {
  330. WarpPerspective::Param param;
  331. param.border_val = 0.3f;
  332. param.bmode = bmode;
  333. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  334. checker.set_param(param);
  335. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  336. checker.execs(
  337. {{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  338. }
  339. }
  340. // nan case
  341. NanMatRNG rng_nan;
  342. UniformFloatRNG rng_zero(0, 0);
  343. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  344. param::WarpPerspective param;
  345. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  346. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  347. checker.set_rng(0, rng);
  348. param.border_val = 1.737;
  349. checker.set_param(param);
  350. // no invalid mem access is enough; no need to check value
  351. checker.set_expect_exec_fail([]() {});
  352. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  353. }
  354. {
  355. Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy>
  356. checker(handle_cuda());
  357. constexpr int N_SRC = 5;
  358. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  359. checker.set_rng(0, &rng);
  360. checker.set_dtype(1, dtype::Int32());
  361. checker.set_rng(1, &mat_idx_rng);
  362. param::WarpPerspective param;
  363. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  364. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  365. checker.set_param(param);
  366. checker.set_epsilon(1 + 1e-3);
  367. checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}});
  368. checker.execs(
  369. {{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}});
  370. }
  371. }
  372. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) {
  373. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  374. WarpPerspectiveMatRNG rng;
  375. checker.set_rng(1, &rng);
  376. for (int i = 0; i < 1; ++i) {
  377. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  378. WarpPerspective::BorderMode::REFLECT,
  379. WarpPerspective::BorderMode::REPLICATE,
  380. WarpPerspective::BorderMode::CONSTANT}) {
  381. WarpPerspective::Param param;
  382. param.border_val = 0.3f;
  383. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  384. param.bmode = bmode;
  385. checker.set_param(param);
  386. checker.set_epsilon(1e-2);
  387. checker.execs({{1000, 3, 11, 12},
  388. {1000, 3, 3},
  389. {1000, 3, 10, 11},
  390. {1000, 3, 3}});
  391. }
  392. }
  393. // nan case
  394. NanMatRNG rng_nan;
  395. UniformFloatRNG rng_zero(0, 0);
  396. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  397. param::WarpPerspective param;
  398. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  399. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  400. checker.set_rng(1, rng);
  401. param.border_val = 1.737;
  402. checker.set_param(param);
  403. // no invalid mem access is enough; no need to check value
  404. checker.set_expect_exec_fail([]() {});
  405. checker.exec({{1000, 2, 10, 11},
  406. {1000, 3, 3},
  407. {1000, 2, 12, 13},
  408. {1000, 3, 3}});
  409. }
  410. {
  411. Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker(
  412. handle_cuda());
  413. constexpr int N_SRC = 5;
  414. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  415. checker.set_rng(1, &rng);
  416. checker.set_dtype(2, dtype::Int32());
  417. checker.set_rng(2, &mat_idx_rng);
  418. param::WarpPerspective param;
  419. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  420. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  421. checker.set_param(param);
  422. checker.set_epsilon(1 + 1e-3);
  423. checker.execs({{N_SRC, 12, 10, 11},
  424. {2, 3, 3},
  425. {2},
  426. {2, 12, 11, 12},
  427. {2, 3, 3}});
  428. checker.execs({{N_SRC, 56, 17, 13},
  429. {123, 3, 3},
  430. {123},
  431. {123, 56, 16, 15},
  432. {123, 3, 3}});
  433. }
  434. }
  435. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) {
  436. using Param = WarpPerspective::Param;
  437. Checker<WarpPerspectiveForward> checker(handle_cuda());
  438. WarpPerspectiveMatRNG rng;
  439. checker.set_rng(1, &rng);
  440. checker.set_dtype(0, dtype::BFloat16())
  441. .set_dtype(1, dtype::Float32())
  442. .set_dtype(2, dtype::BFloat16());
  443. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  444. WarpPerspective::BorderMode::REFLECT,
  445. WarpPerspective::BorderMode::REPLICATE,
  446. WarpPerspective::BorderMode::CONSTANT}) {
  447. WarpPerspective::Param param;
  448. param.border_val = 0.3f;
  449. param.bmode = bmode;
  450. param.imode = Param::InterpolationMode::LINEAR;
  451. param.format = Param::Format::NHWC;
  452. checker.set_param(param);
  453. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  454. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  455. param.format = Param::Format::NCHW;
  456. checker.set_param(param);
  457. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  458. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  459. }
  460. }
  461. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) {
  462. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  463. WarpPerspectiveMatRNG rng;
  464. checker.set_rng(0, &rng)
  465. .set_epsilon(1e-1)
  466. .set_dtype(0, dtype::Float32())
  467. .set_dtype(1, dtype::BFloat16())
  468. .set_dtype(2, dtype::BFloat16());
  469. for (int i = 0; i < 1; ++i) {
  470. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  471. WarpPerspective::BorderMode::REFLECT,
  472. WarpPerspective::BorderMode::REPLICATE,
  473. WarpPerspective::BorderMode::CONSTANT}) {
  474. WarpPerspective::Param param;
  475. param.border_val = 0.3f;
  476. param.bmode = bmode;
  477. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  478. checker.set_param(param);
  479. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  480. }
  481. }
  482. }
  483. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) {
  484. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  485. WarpPerspectiveMatRNG rng;
  486. checker.set_rng(1, &rng)
  487. .set_epsilon(1e-2)
  488. .set_dtype(0, dtype::BFloat16())
  489. .set_dtype(1, dtype::Float32())
  490. .set_dtype(2, dtype::BFloat16())
  491. .set_dtype(3, dtype::Float32());
  492. for (int i = 0; i < 1; ++i) {
  493. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  494. WarpPerspective::BorderMode::REFLECT,
  495. WarpPerspective::BorderMode::REPLICATE,
  496. WarpPerspective::BorderMode::CONSTANT}) {
  497. WarpPerspective::Param param;
  498. param.border_val = 0.3f;
  499. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  500. param.bmode = bmode;
  501. checker.set_param(param);
  502. checker.execs({{1000, 3, 11, 12},
  503. {1000, 3, 3},
  504. {1000, 3, 10, 11},
  505. {1000, 3, 3}});
  506. }
  507. }
  508. }
  509. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  510. warp_perspective::run_mat_idx_test(handle_cuda());
  511. }
  512. #if MEGDNN_WITH_BENCHMARK
  513. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  514. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  515. using Param = param::WarpPerspective;
  516. WarpPerspectiveMatRNG rng;
  517. benchmarker.set_rng(1, &rng);
  518. Param param;
  519. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  520. benchmarker.set_param(param);
  521. auto used = benchmarker.execs(shapes);
  522. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  523. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  524. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(),
  525. used,
  526. shapes[2].total_nr_elems() *
  527. (4.f + 1.f + shapes[1].total_nr_elems()) /
  528. (1024 * 1024 * 1024) / used * 1e3,
  529. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) /
  530. used * 1e3);
  531. };
  532. param.format = Param::Format::NCHW;
  533. benchmarker.set_dtype(0, dtype::Int8());
  534. benchmarker.set_dtype(2, dtype::Int8());
  535. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  536. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  537. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  538. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  539. param.format = Param::Format::NCHW4;
  540. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  541. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  542. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  543. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  544. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  545. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  546. }
  547. #endif
  548. } // namespace test
  549. } // namespace megdnn
  550. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台