You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 33 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. /**
  2. * \file dnn/test/cuda/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/warp_perspective.h"
  16. #include "test/common/opr_proxy.h"
  17. #include "test/cuda/utils.h"
  18. namespace {
  19. using namespace megdnn;
  20. using namespace test;
  21. class NanMatRNG : public RNG {
  22. void gen(const TensorND& tensor_) override {
  23. auto& gen = RandomState::generator();
  24. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  25. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  26. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  27. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  28. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  29. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  30. std::uniform_int_distribution<int> dice(0, 5);
  31. float* ptr = tensor_.ptr<dt_float32>();
  32. auto N = tensor_.layout.shape[0];
  33. for (size_t n = 0; n < N; ++n) {
  34. for (size_t i = 0; i < 9; ++i) {
  35. switch (dice(gen)) {
  36. case 0:
  37. ptr[i] = pdist3(gen);
  38. break;
  39. case 1:
  40. ptr[i] = pdist(gen);
  41. break;
  42. case 2:
  43. ptr[i] = pdisth(gen);
  44. break;
  45. case 3:
  46. ptr[i] = ndist(gen);
  47. break;
  48. case 4:
  49. ptr[i] = ndist3(gen);
  50. break;
  51. case 5:
  52. ptr[i] = ndisth(gen);
  53. break;
  54. }
  55. }
  56. ptr[6] = 1;
  57. ptr[7] = -1;
  58. ptr[8] = 5;
  59. ptr += 9;
  60. }
  61. }
  62. };
  63. } // anonymous namespace
  64. namespace megdnn {
  65. namespace test {
  66. // FIXME test WARP_PERSPECTIVE_CV failed here
  67. #if 0
  68. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  69. //! format = NHWC
  70. Checker<WarpPerspective> checker(handle_cuda());
  71. param::WarpPerspective param;
  72. class ResizeMatRNG: public RNG {
  73. void gen(const TensorND &tensor_) override
  74. {
  75. auto &gen = RandomState::generator();
  76. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  77. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  78. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  79. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  80. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  81. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  82. std::uniform_int_distribution<int> dice(0, 5);
  83. float *ptr = tensor_.ptr<dt_float32>();
  84. auto N = tensor_.layout.shape[0];
  85. for (size_t n = 0; n < N; ++n) {
  86. for (size_t i = 0; i < 9; ++i) {
  87. switch (dice(gen)) {
  88. case 0:
  89. ptr[i] = pdist3(gen);
  90. break;
  91. case 1:
  92. ptr[i] = pdist(gen);
  93. break;
  94. case 2:
  95. ptr[i] = pdisth(gen);
  96. break;
  97. case 3:
  98. ptr[i] = ndist(gen);
  99. break;
  100. case 4:
  101. ptr[i] = ndist3(gen);
  102. break;
  103. case 5:
  104. ptr[i] = ndisth(gen);
  105. break;
  106. }
  107. }
  108. // is resize?
  109. if (n & 1) {
  110. ptr[1] = 0;
  111. ptr[3] = 0;
  112. ptr[6] = ptr[7] = 0;
  113. }
  114. ptr += 9;
  115. }
  116. }
  117. } rng;
  118. checker.set_rng(1, &rng);
  119. using BMode = param::WarpPerspective::BorderMode;
  120. param.format = param::WarpPerspective::Format::NHWC;
  121. // naive and cuda uses different algorithms and different border handling
  122. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  123. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  124. BMode::WRAP, BMode::CONSTANT})
  125. {
  126. param.bmode = mode;
  127. param.border_val = 1.737;
  128. checker.set_param(param);
  129. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  130. }
  131. auto args = warp_perspective::get_cv_args();
  132. for (auto &&arg : args) {
  133. checker.set_param(arg.param)
  134. .set_dtype(0, dtype::Float32())
  135. .set_dtype(1, dtype::Float32())
  136. .set_dtype(2, dtype::Float32())
  137. .execs({arg.src, arg.trans, arg.dst});
  138. }
  139. for (auto &&arg : args) {
  140. checker.set_param(arg.param)
  141. .set_epsilon(242.001)
  142. .set_max_avg_error(3.0)
  143. .set_dtype(0, dtype::Uint8())
  144. .set_dtype(1, dtype::Float32())
  145. .set_dtype(2, dtype::Uint8())
  146. .execs({arg.src, arg.trans, arg.dst});
  147. }
  148. // resize nan case
  149. UniformFloatRNG rng_zero(0, 0);
  150. checker.set_rng(1, &rng_zero);
  151. {
  152. param.bmode = BMode::CONSTANT;
  153. param.border_val = 1.737;
  154. checker.set_param(param)
  155. .set_dtype(0, dtype::Float32())
  156. .set_dtype(1, dtype::Float32())
  157. .set_dtype(2, dtype::Float32());
  158. // no invalid mem access is enough; no need to check value
  159. checker.set_expect_exec_fail([](){});
  160. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  161. }
  162. }
  163. #endif
  164. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) {
  165. using Param = WarpPerspective::Param;
  166. Checker<WarpPerspectiveForward> checker(handle_cuda());
  167. WarpPerspectiveMatRNG rng;
  168. checker.set_rng(1, &rng);
  169. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  170. WarpPerspective::BorderMode::REFLECT,
  171. WarpPerspective::BorderMode::REPLICATE,
  172. WarpPerspective::BorderMode::CONSTANT}) {
  173. WarpPerspective::Param param;
  174. param.border_val = 0.3f;
  175. param.bmode = bmode;
  176. param.imode = Param::InterpolationMode::LINEAR;
  177. param.format = Param::Format::NHWC;
  178. checker.set_param(param);
  179. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  180. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  181. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  182. checker.set_epsilon(1e-3);
  183. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  184. param.format = Param::Format::NCHW;
  185. checker.set_param(param);
  186. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  187. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  188. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  189. }
  190. // nan case
  191. NanMatRNG rng_nan;
  192. UniformFloatRNG rng_zero(0, 0);
  193. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  194. param::WarpPerspective param;
  195. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  196. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  197. checker.set_rng(1, rng);
  198. param.border_val = 1.737;
  199. checker.set_param(param);
  200. // no invalid mem access is enough; no need to check value
  201. checker.set_expect_exec_fail([]() {});
  202. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  203. }
  204. }
  205. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) {
  206. require_compute_capability(6, 0);
  207. using Param = WarpPerspective::Param;
  208. Checker<WarpPerspectiveForward> checker(handle_cuda());
  209. WarpPerspectiveMatRNG rng;
  210. checker.set_rng(1, &rng);
  211. for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) {
  212. WarpPerspective::Param param;
  213. param.border_val = 0.3f;
  214. param.bmode = bmode;
  215. param.imode = Param::InterpolationMode::LINEAR;
  216. param.format = Param::Format::NHWC;
  217. checker.set_param(param);
  218. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  219. size_t n = (INT_MAX) / (512 * 512 * 3);
  220. checker.execs(
  221. {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}});
  222. }
  223. }
  224. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) {
  225. using Param = WarpPerspective::Param;
  226. Checker<WarpPerspectiveForward> checker(handle_cuda());
  227. WarpPerspectiveMatRNG rng;
  228. checker.set_rng(1, &rng);
  229. checker.set_dtype(0, dtype::Float16())
  230. .set_dtype(1, dtype::Float32())
  231. .set_dtype(2, dtype::Float16());
  232. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  233. WarpPerspective::BorderMode::REFLECT,
  234. WarpPerspective::BorderMode::REPLICATE,
  235. WarpPerspective::BorderMode::CONSTANT}) {
  236. WarpPerspective::Param param;
  237. param.border_val = 0.3f;
  238. param.bmode = bmode;
  239. param.imode = Param::InterpolationMode::LINEAR;
  240. param.format = Param::Format::NHWC;
  241. checker.set_param(param);
  242. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  243. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  244. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  245. checker.set_epsilon(1e-3);
  246. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  247. param.format = Param::Format::NCHW;
  248. checker.set_param(param);
  249. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  250. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  251. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  252. }
  253. // nan case
  254. NanMatRNG rng_nan;
  255. UniformFloatRNG rng_zero(0, 0);
  256. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  257. param::WarpPerspective param;
  258. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  259. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  260. checker.set_rng(1, rng);
  261. param.border_val = 1.737;
  262. checker.set_param(param);
  263. // no invalid mem access is enough; no need to check value
  264. checker.set_expect_exec_fail([]() {});
  265. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  266. }
  267. }
  268. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) {
  269. using Param = WarpPerspective::Param;
  270. WarpPerspective::Param param;
  271. Checker<WarpPerspectiveForward> checker(handle_cuda());
  272. WarpPerspectiveMatRNG rng;
  273. checker.set_rng(1, &rng);
  274. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  275. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  276. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  277. WarpPerspective::BorderMode::REFLECT,
  278. WarpPerspective::BorderMode::REPLICATE,
  279. WarpPerspective::BorderMode::CONSTANT}) {
  280. param.border_val = 0.3f;
  281. param.bmode = bmode;
  282. param.imode = Param::InterpolationMode::LINEAR;
  283. param.format = Param::Format::NCHW4;
  284. checker.set_param(param);
  285. checker.set_epsilon(1 + 1e-3);
  286. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  287. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  288. checker.execs(
  289. {{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  290. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  291. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  292. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  293. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  294. }
  295. {
  296. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  297. handle_cuda());
  298. constexpr int N_SRC = 5;
  299. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  300. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  301. checker.set_rng(1, &rng);
  302. checker.set_dtype(2, dtype::Int32());
  303. checker.set_rng(2, &mat_idx_rng);
  304. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  305. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  306. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  307. checker.set_param(param);
  308. checker.set_epsilon(1 + 1e-3);
  309. checker.execs(
  310. {{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  311. checker.execs({{N_SRC, 14, 17, 13, 4},
  312. {123, 3, 3},
  313. {123},
  314. {123, 14, 16, 15, 4}});
  315. }
  316. }
  317. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW_NCHW4_IC_SMALL) {
  318. using Param = WarpPerspective::Param;
  319. WarpPerspective::Param param;
  320. Checker<WarpPerspectiveForward> checker(handle_cuda());
  321. WarpPerspectiveMatRNG rng;
  322. param.format = Param::Format::NCHW_NCHW4_IC_SMALL;
  323. checker.set_rng(1, &rng);
  324. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  325. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  326. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  327. WarpPerspective::BorderMode::REFLECT,
  328. WarpPerspective::BorderMode::REPLICATE,
  329. WarpPerspective::BorderMode::CONSTANT}) {
  330. param.border_val = 0.3f;
  331. param.bmode = bmode;
  332. param.imode = Param::InterpolationMode::LINEAR;
  333. checker.set_param(param);
  334. checker.set_epsilon(1 + 1e-3);
  335. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  336. checker.execs({{1, 3, 25, 510}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  337. checker.execs({{1, 3, 25, 25}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  338. checker.execs({{1, 3, 51, 51}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  339. }
  340. {
  341. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  342. handle_cuda());
  343. constexpr int N_SRC = 5;
  344. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  345. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  346. checker.set_rng(1, &rng);
  347. checker.set_dtype(2, dtype::Int32());
  348. checker.set_rng(2, &mat_idx_rng);
  349. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  350. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  351. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  352. checker.set_param(param);
  353. checker.set_epsilon(1 + 1e-3);
  354. checker.execs({{N_SRC, 3, 10, 11}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  355. checker.execs(
  356. {{N_SRC, 3, 17, 13}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  357. }
  358. }
  359. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW4_IC_SMALL) {
  360. using Param = WarpPerspective::Param;
  361. WarpPerspective::Param param;
  362. Checker<WarpPerspectiveForward> checker(handle_cuda());
  363. WarpPerspectiveMatRNG rng;
  364. param.format = Param::Format::NHWC_NCHW4_IC_SMALL;
  365. checker.set_rng(1, &rng);
  366. checker.set_dtype(0, dtype::Uint8());
  367. checker.set_dtype(2, dtype::QuantizedS8(1.f));
  368. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  369. WarpPerspective::BorderMode::REFLECT,
  370. WarpPerspective::BorderMode::REPLICATE,
  371. WarpPerspective::BorderMode::CONSTANT}) {
  372. param.border_val = 0.3f;
  373. param.bmode = bmode;
  374. param.imode = Param::InterpolationMode::LINEAR;
  375. checker.set_param(param);
  376. checker.set_epsilon(1 + 1e-3);
  377. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  378. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  379. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  380. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  381. }
  382. {
  383. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  384. handle_cuda());
  385. constexpr int N_SRC = 5;
  386. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  387. checker.set_dtype(0, dtype::Uint8());
  388. checker.set_rng(1, &rng);
  389. checker.set_dtype(2, dtype::Int32());
  390. checker.set_rng(2, &mat_idx_rng);
  391. checker.set_dtype(3, dtype::QuantizedS8(1.f));
  392. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  393. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  394. checker.set_param(param);
  395. checker.set_epsilon(1 + 1e-3);
  396. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  397. checker.execs(
  398. {{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  399. }
  400. }
  401. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW) {
  402. using Param = WarpPerspective::Param;
  403. WarpPerspective::Param param;
  404. Checker<WarpPerspectiveForward> checker(handle_cuda());
  405. WarpPerspectiveMatRNG rng;
  406. param.format = Param::Format::NHWC_NCHW;
  407. checker.set_rng(1, &rng);
  408. checker.set_dtype(0, dtype::Uint8());
  409. checker.set_dtype(2, dtype::Float32());
  410. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  411. WarpPerspective::BorderMode::REFLECT,
  412. WarpPerspective::BorderMode::REPLICATE,
  413. WarpPerspective::BorderMode::CONSTANT}) {
  414. param.border_val = 0.3f;
  415. param.bmode = bmode;
  416. param.imode = Param::InterpolationMode::LINEAR;
  417. checker.set_param(param);
  418. checker.set_epsilon(1 + 1e-3);
  419. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 3, 11, 12}});
  420. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  421. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 3, 51, 51}});
  422. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  423. }
  424. {
  425. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  426. handle_cuda());
  427. constexpr int N_SRC = 5;
  428. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  429. checker.set_dtype(0, dtype::Uint8());
  430. checker.set_rng(1, &rng);
  431. checker.set_dtype(2, dtype::Int32());
  432. checker.set_rng(2, &mat_idx_rng);
  433. checker.set_dtype(3, dtype::Float32());
  434. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  435. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  436. checker.set_param(param);
  437. checker.set_epsilon(1 + 1e-3);
  438. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 3, 11, 12}});
  439. checker.execs(
  440. {{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 3, 16, 15}});
  441. }
  442. }
  443. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  444. warp_perspective::run_int8_test(handle_cuda());
  445. }
  446. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) {
  447. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  448. WarpPerspectiveMatRNG rng;
  449. checker.set_rng(0, &rng);
  450. for (int i = 0; i < 1; ++i) {
  451. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  452. WarpPerspective::BorderMode::REFLECT,
  453. WarpPerspective::BorderMode::REPLICATE,
  454. WarpPerspective::BorderMode::CONSTANT}) {
  455. WarpPerspective::Param param;
  456. param.border_val = 0.3f;
  457. param.bmode = bmode;
  458. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  459. checker.set_param(param);
  460. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  461. checker.execs(
  462. {{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  463. }
  464. }
  465. // nan case
  466. NanMatRNG rng_nan;
  467. UniformFloatRNG rng_zero(0, 0);
  468. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  469. param::WarpPerspective param;
  470. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  471. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  472. checker.set_rng(0, rng);
  473. param.border_val = 1.737;
  474. checker.set_param(param);
  475. // no invalid mem access is enough; no need to check value
  476. checker.set_expect_exec_fail([]() {});
  477. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  478. }
  479. {
  480. Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy>
  481. checker(handle_cuda());
  482. constexpr int N_SRC = 5;
  483. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  484. checker.set_rng(0, &rng);
  485. checker.set_dtype(1, dtype::Int32());
  486. checker.set_rng(1, &mat_idx_rng);
  487. param::WarpPerspective param;
  488. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  489. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  490. checker.set_param(param);
  491. checker.set_epsilon(1 + 1e-3);
  492. checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}});
  493. checker.execs(
  494. {{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}});
  495. }
  496. }
  497. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) {
  498. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  499. WarpPerspectiveMatRNG rng;
  500. checker.set_rng(1, &rng);
  501. for (int i = 0; i < 1; ++i) {
  502. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  503. WarpPerspective::BorderMode::REFLECT,
  504. WarpPerspective::BorderMode::REPLICATE,
  505. WarpPerspective::BorderMode::CONSTANT}) {
  506. WarpPerspective::Param param;
  507. param.border_val = 0.3f;
  508. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  509. param.bmode = bmode;
  510. checker.set_param(param);
  511. checker.set_epsilon(1e-2);
  512. checker.execs({{1000, 3, 11, 12},
  513. {1000, 3, 3},
  514. {1000, 3, 10, 11},
  515. {1000, 3, 3}});
  516. }
  517. }
  518. // nan case
  519. NanMatRNG rng_nan;
  520. UniformFloatRNG rng_zero(0, 0);
  521. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  522. param::WarpPerspective param;
  523. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  524. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  525. checker.set_rng(1, rng);
  526. param.border_val = 1.737;
  527. checker.set_param(param);
  528. // no invalid mem access is enough; no need to check value
  529. checker.set_expect_exec_fail([]() {});
  530. checker.exec({{1000, 2, 10, 11},
  531. {1000, 3, 3},
  532. {1000, 2, 12, 13},
  533. {1000, 3, 3}});
  534. }
  535. {
  536. Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker(
  537. handle_cuda());
  538. constexpr int N_SRC = 5;
  539. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  540. checker.set_rng(1, &rng);
  541. checker.set_dtype(2, dtype::Int32());
  542. checker.set_rng(2, &mat_idx_rng);
  543. param::WarpPerspective param;
  544. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  545. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  546. checker.set_param(param);
  547. checker.set_epsilon(1 + 1e-3);
  548. checker.execs({{N_SRC, 12, 10, 11},
  549. {2, 3, 3},
  550. {2},
  551. {2, 12, 11, 12},
  552. {2, 3, 3}});
  553. checker.execs({{N_SRC, 56, 17, 13},
  554. {123, 3, 3},
  555. {123},
  556. {123, 56, 16, 15},
  557. {123, 3, 3}});
  558. }
  559. }
  560. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) {
  561. using Param = WarpPerspective::Param;
  562. Checker<WarpPerspectiveForward> checker(handle_cuda());
  563. WarpPerspectiveMatRNG rng;
  564. checker.set_rng(1, &rng);
  565. checker.set_dtype(0, dtype::BFloat16())
  566. .set_dtype(1, dtype::Float32())
  567. .set_dtype(2, dtype::BFloat16());
  568. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  569. WarpPerspective::BorderMode::REFLECT,
  570. WarpPerspective::BorderMode::REPLICATE,
  571. WarpPerspective::BorderMode::CONSTANT}) {
  572. WarpPerspective::Param param;
  573. param.border_val = 0.3f;
  574. param.bmode = bmode;
  575. param.imode = Param::InterpolationMode::LINEAR;
  576. param.format = Param::Format::NHWC;
  577. checker.set_param(param);
  578. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  579. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  580. param.format = Param::Format::NCHW;
  581. checker.set_param(param);
  582. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  583. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  584. }
  585. }
  586. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QINT4) {
  587. using Param = WarpPerspective::Param;
  588. Checker<WarpPerspectiveForward> checker(handle_cuda());
  589. WarpPerspectiveMatRNG rng;
  590. checker.set_rng(1, &rng);
  591. checker.set_dtype(0, dtype::QuantizedS4(1.25f))
  592. .set_dtype(1, dtype::Float32())
  593. .set_dtype(2, dtype::QuantizedS4(1.25f));
  594. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  595. WarpPerspective::BorderMode::REFLECT,
  596. WarpPerspective::BorderMode::REPLICATE,
  597. WarpPerspective::BorderMode::CONSTANT}) {
  598. WarpPerspective::Param param;
  599. param.border_val = 0.3f;
  600. param.bmode = bmode;
  601. param.imode = Param::InterpolationMode::LINEAR;
  602. param.format = Param::Format::NCHW;
  603. checker.set_param(param);
  604. checker.set_epsilon(1 + 1e-3);
  605. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  606. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  607. }
  608. }
  609. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) {
  610. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  611. WarpPerspectiveMatRNG rng;
  612. checker.set_rng(0, &rng)
  613. .set_epsilon(1e-1)
  614. .set_dtype(0, dtype::Float32())
  615. .set_dtype(1, dtype::BFloat16())
  616. .set_dtype(2, dtype::BFloat16());
  617. for (int i = 0; i < 1; ++i) {
  618. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  619. WarpPerspective::BorderMode::REFLECT,
  620. WarpPerspective::BorderMode::REPLICATE,
  621. WarpPerspective::BorderMode::CONSTANT}) {
  622. WarpPerspective::Param param;
  623. param.border_val = 0.3f;
  624. param.bmode = bmode;
  625. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  626. checker.set_param(param);
  627. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  628. }
  629. }
  630. }
  631. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) {
  632. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  633. WarpPerspectiveMatRNG rng;
  634. checker.set_rng(1, &rng)
  635. .set_epsilon(1e-2)
  636. .set_dtype(0, dtype::BFloat16())
  637. .set_dtype(1, dtype::Float32())
  638. .set_dtype(2, dtype::BFloat16())
  639. .set_dtype(3, dtype::Float32());
  640. for (int i = 0; i < 1; ++i) {
  641. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  642. WarpPerspective::BorderMode::REFLECT,
  643. WarpPerspective::BorderMode::REPLICATE,
  644. WarpPerspective::BorderMode::CONSTANT}) {
  645. WarpPerspective::Param param;
  646. param.border_val = 0.3f;
  647. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  648. param.bmode = bmode;
  649. checker.set_param(param);
  650. checker.execs({{10, 3, 11, 12},
  651. {10, 3, 3},
  652. {10, 3, 10, 11},
  653. {10, 3, 3}});
  654. }
  655. }
  656. }
  657. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  658. warp_perspective::run_mat_idx_test(handle_cuda());
  659. }
  660. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64) {
  661. using Param = WarpPerspective::Param;
  662. WarpPerspective::Param param;
  663. Checker<WarpPerspectiveForward> checker(handle_cuda());
  664. WarpPerspectiveMatRNG_V2 rng;
  665. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  666. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  667. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  668. WarpPerspective::BorderMode::REFLECT,
  669. WarpPerspective::BorderMode::REPLICATE,
  670. WarpPerspective::BorderMode::CONSTANT}) {
  671. param.border_val = 0.3f;
  672. param.bmode = bmode;
  673. param.imode = Param::InterpolationMode::LINEAR;
  674. param.format = Param::Format::NCHW64;
  675. checker.set_param(param);
  676. checker.set_epsilon(1 + 1e-3);
  677. rng.set_hw(10, 11);
  678. checker.set_rng(1, &rng);
  679. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  680. checker.execs(
  681. {{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  682. checker.execs(
  683. {{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  684. rng.set_hw(25, 25);
  685. checker.set_rng(1, &rng);
  686. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  687. rng.set_hw(25, 510);
  688. checker.set_rng(1, &rng);
  689. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  690. rng.set_hw(25, 25);
  691. checker.set_rng(1, &rng);
  692. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  693. rng.set_hw(51, 51);
  694. checker.set_rng(1, &rng);
  695. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  696. }
  697. {
  698. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  699. handle_cuda());
  700. constexpr int N_SRC = 5;
  701. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  702. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  703. checker.set_rng(1, &rng);
  704. checker.set_dtype(2, dtype::Int32());
  705. checker.set_rng(2, &mat_idx_rng);
  706. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  707. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  708. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  709. checker.set_param(param);
  710. checker.set_epsilon(1 + 1e-3);
  711. rng.set_hw(10, 11);
  712. checker.set_rng(1, &rng);
  713. checker.execs(
  714. {{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  715. rng.set_hw(17, 13);
  716. checker.set_rng(1, &rng);
  717. checker.execs({{N_SRC, 14, 17, 13, 64},
  718. {123, 3, 3},
  719. {123},
  720. {123, 14, 16, 15, 64}});
  721. }
  722. }
  723. #if MEGDNN_WITH_BENCHMARK
  724. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  725. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  726. using Param = param::WarpPerspective;
  727. WarpPerspectiveMatRNG rng;
  728. benchmarker.set_rng(1, &rng);
  729. Param param;
  730. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  731. benchmarker.set_param(param);
  732. auto used = benchmarker.execs(shapes);
  733. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  734. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  735. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(),
  736. used,
  737. shapes[2].total_nr_elems() *
  738. (4.f + 1.f + shapes[1].total_nr_elems()) /
  739. (1024 * 1024 * 1024) / used * 1e3,
  740. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) /
  741. used * 1e3);
  742. };
  743. param.format = Param::Format::NCHW;
  744. benchmarker.set_dtype(0, dtype::Int8());
  745. benchmarker.set_dtype(2, dtype::Int8());
  746. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  747. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  748. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  749. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  750. param.format = Param::Format::NCHW4;
  751. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  752. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  753. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  754. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  755. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  756. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  757. }
  758. #endif
  759. } // namespace test
  760. } // namespace megdnn
  761. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台