You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 46 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128
  1. #include "test/cuda/fixture.h"
  2. #include "test/common/benchmarker.h"
  3. #include "test/common/checker.h"
  4. #include "test/common/opr_proxy.h"
  5. #include "test/common/warp_perspective.h"
  6. #include "test/cuda/utils.h"
  7. namespace {
  8. using namespace megdnn;
  9. using namespace test;
  10. class NanMatRNG : public RNG {
  11. void gen(const TensorND& tensor_) override {
  12. auto& gen = RandomState::generator();
  13. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  14. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  15. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  16. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  17. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  18. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  19. std::uniform_int_distribution<int> dice(0, 5);
  20. float* ptr = tensor_.ptr<dt_float32>();
  21. auto N = tensor_.layout.shape[0];
  22. for (size_t n = 0; n < N; ++n) {
  23. for (size_t i = 0; i < 9; ++i) {
  24. switch (dice(gen)) {
  25. case 0:
  26. ptr[i] = pdist3(gen);
  27. break;
  28. case 1:
  29. ptr[i] = pdist(gen);
  30. break;
  31. case 2:
  32. ptr[i] = pdisth(gen);
  33. break;
  34. case 3:
  35. ptr[i] = ndist(gen);
  36. break;
  37. case 4:
  38. ptr[i] = ndist3(gen);
  39. break;
  40. case 5:
  41. ptr[i] = ndisth(gen);
  42. break;
  43. }
  44. }
  45. ptr[6] = 1;
  46. ptr[7] = -1;
  47. ptr[8] = 5;
  48. ptr += 9;
  49. }
  50. }
  51. };
  52. } // anonymous namespace
  53. namespace megdnn {
  54. namespace test {
  55. // FIXME test WARP_PERSPECTIVE_CV failed here
  56. #if 0
  57. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  58. //! format = NHWC
  59. Checker<WarpPerspective> checker(handle_cuda());
  60. param::WarpPerspective param;
  61. class ResizeMatRNG: public RNG {
  62. void gen(const TensorND &tensor_) override
  63. {
  64. auto &gen = RandomState::generator();
  65. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  66. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  67. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  68. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  69. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  70. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  71. std::uniform_int_distribution<int> dice(0, 5);
  72. float *ptr = tensor_.ptr<dt_float32>();
  73. auto N = tensor_.layout.shape[0];
  74. for (size_t n = 0; n < N; ++n) {
  75. for (size_t i = 0; i < 9; ++i) {
  76. switch (dice(gen)) {
  77. case 0:
  78. ptr[i] = pdist3(gen);
  79. break;
  80. case 1:
  81. ptr[i] = pdist(gen);
  82. break;
  83. case 2:
  84. ptr[i] = pdisth(gen);
  85. break;
  86. case 3:
  87. ptr[i] = ndist(gen);
  88. break;
  89. case 4:
  90. ptr[i] = ndist3(gen);
  91. break;
  92. case 5:
  93. ptr[i] = ndisth(gen);
  94. break;
  95. }
  96. }
  97. // is resize?
  98. if (n & 1) {
  99. ptr[1] = 0;
  100. ptr[3] = 0;
  101. ptr[6] = ptr[7] = 0;
  102. }
  103. ptr += 9;
  104. }
  105. }
  106. } rng;
  107. checker.set_rng(1, &rng);
  108. using BMode = param::WarpPerspective::BorderMode;
  109. param.format = param::WarpPerspective::Format::NHWC;
  110. // naive and cuda uses different algorithms and different border handling
  111. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  112. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  113. BMode::WRAP, BMode::CONSTANT})
  114. {
  115. param.bmode = mode;
  116. param.border_val = 1.737;
  117. checker.set_param(param);
  118. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  119. }
  120. auto args = warp_perspective::get_cv_args();
  121. for (auto &&arg : args) {
  122. checker.set_param(arg.param)
  123. .set_dtype(0, dtype::Float32())
  124. .set_dtype(1, dtype::Float32())
  125. .set_dtype(2, dtype::Float32())
  126. .execs({arg.src, arg.trans, arg.dst});
  127. }
  128. for (auto &&arg : args) {
  129. checker.set_param(arg.param)
  130. .set_epsilon(242.001)
  131. .set_max_avg_error(3.0)
  132. .set_dtype(0, dtype::Uint8())
  133. .set_dtype(1, dtype::Float32())
  134. .set_dtype(2, dtype::Uint8())
  135. .execs({arg.src, arg.trans, arg.dst});
  136. }
  137. // resize nan case
  138. UniformFloatRNG rng_zero(0, 0);
  139. checker.set_rng(1, &rng_zero);
  140. {
  141. param.bmode = BMode::CONSTANT;
  142. param.border_val = 1.737;
  143. checker.set_param(param)
  144. .set_dtype(0, dtype::Float32())
  145. .set_dtype(1, dtype::Float32())
  146. .set_dtype(2, dtype::Float32());
  147. // no invalid mem access is enough; no need to check value
  148. checker.set_expect_exec_fail([](){});
  149. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  150. }
  151. }
  152. #endif
  153. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) {
  154. using Param = WarpPerspective::Param;
  155. Checker<WarpPerspectiveForward> checker(handle_cuda());
  156. WarpPerspectiveMatRNG rng;
  157. checker.set_rng(1, &rng);
  158. for (auto bmode :
  159. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  160. WarpPerspective::BorderMode::REPLICATE,
  161. WarpPerspective::BorderMode::CONSTANT}) {
  162. WarpPerspective::Param param;
  163. param.border_val = 0.3f;
  164. param.bmode = bmode;
  165. param.imode = Param::InterpolationMode::LINEAR;
  166. param.format = Param::Format::NHWC;
  167. checker.set_param(param);
  168. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  169. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  170. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  171. checker.set_epsilon(1e-3);
  172. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  173. param.format = Param::Format::NCHW;
  174. checker.set_param(param);
  175. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  176. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  177. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  178. }
  179. // nan case
  180. NanMatRNG rng_nan;
  181. UniformFloatRNG rng_zero(0, 0);
  182. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  183. param::WarpPerspective param;
  184. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  185. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  186. checker.set_rng(1, rng);
  187. param.border_val = 1.737;
  188. checker.set_param(param);
  189. // no invalid mem access is enough; no need to check value
  190. checker.set_expect_exec_fail([]() {});
  191. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  192. }
  193. }
  194. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NHWC) {
  195. using Param = WarpPerspective::Param;
  196. Checker<WarpPerspectiveForward> checker(handle_cuda());
  197. WarpPerspectiveMatRNG_V2 rng;
  198. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  199. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  200. checker.set_rng(1, &rng);
  201. for (auto bmode :
  202. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  203. WarpPerspective::BorderMode::REPLICATE,
  204. WarpPerspective::BorderMode::CONSTANT}) {
  205. WarpPerspective::Param param;
  206. param.border_val = 1.2f;
  207. param.bmode = bmode;
  208. param.imode = Param::InterpolationMode::LINEAR;
  209. param.format = Param::Format::NHWC;
  210. checker.set_param(param);
  211. checker.set_epsilon(1 + 1e-3);
  212. rng.set_hw(10, 11);
  213. checker.execs({{23, 10, 11, 16}, {23, 3, 3}, {23, 11, 12, 16}});
  214. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  215. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  216. rng.set_hw(55, 66);
  217. checker.execs({{20, 55, 66, 32}, {20, 3, 3}, {20, 44, 34, 32}});
  218. }
  219. {
  220. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  221. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  222. checker.set_rng(1, &rng);
  223. for (auto bmode :
  224. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  225. WarpPerspective::BorderMode::REPLICATE,
  226. WarpPerspective::BorderMode::CONSTANT}) {
  227. WarpPerspective::Param param;
  228. param.border_val = 0.3f;
  229. param.bmode = bmode;
  230. param.imode = Param::InterpolationMode::LINEAR;
  231. param.format = Param::Format::NHWC;
  232. checker.set_param(param);
  233. checker.set_epsilon(1 + 1e-3);
  234. rng.set_hw(10, 11);
  235. checker.execs({{23, 10, 11, 16}, {23, 3, 3}, {23, 11, 12, 16}});
  236. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  237. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  238. rng.set_hw(55, 66);
  239. checker.execs({{20, 55, 66, 32}, {20, 3, 3}, {20, 44, 34, 32}});
  240. }
  241. }
  242. {
  243. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  244. constexpr int N_SRC = 5;
  245. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  246. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  247. checker.set_rng(1, &rng);
  248. checker.set_dtype(2, dtype::Int32());
  249. checker.set_rng(2, &mat_idx_rng);
  250. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  251. WarpPerspective::Param param;
  252. param.border_val = 0.3f;
  253. param.format = Param::Format::NHWC;
  254. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  255. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  256. checker.set_param(param);
  257. checker.set_epsilon(1 + 1e-3);
  258. rng.set_hw(10, 11);
  259. checker.set_rng(1, &rng);
  260. checker.execs({{N_SRC, 10, 11, 48}, {2, 3, 3}, {2}, {2, 11, 12, 48}});
  261. rng.set_hw(17, 13);
  262. checker.set_rng(1, &rng);
  263. checker.execs({{N_SRC, 17, 13, 64}, {123, 3, 3}, {123}, {123, 16, 15, 64}});
  264. }
  265. }
  266. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) {
  267. require_compute_capability(6, 0);
  268. using Param = WarpPerspective::Param;
  269. Checker<WarpPerspectiveForward> checker(handle_cuda());
  270. WarpPerspectiveMatRNG rng;
  271. checker.set_rng(1, &rng);
  272. for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) {
  273. WarpPerspective::Param param;
  274. param.border_val = 0.3f;
  275. param.bmode = bmode;
  276. param.imode = Param::InterpolationMode::LINEAR;
  277. param.format = Param::Format::NHWC;
  278. checker.set_param(param);
  279. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  280. size_t n = (INT_MAX) / (512 * 512 * 3);
  281. checker.execs({{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}});
  282. }
  283. }
  284. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) {
  285. using Param = WarpPerspective::Param;
  286. Checker<WarpPerspectiveForward> checker(handle_cuda());
  287. WarpPerspectiveMatRNG rng;
  288. checker.set_rng(1, &rng);
  289. checker.set_dtype(0, dtype::Float16())
  290. .set_dtype(1, dtype::Float32())
  291. .set_dtype(2, dtype::Float16());
  292. for (auto bmode :
  293. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  294. WarpPerspective::BorderMode::REPLICATE,
  295. WarpPerspective::BorderMode::CONSTANT}) {
  296. WarpPerspective::Param param;
  297. param.border_val = 0.3f;
  298. param.bmode = bmode;
  299. param.imode = Param::InterpolationMode::LINEAR;
  300. param.format = Param::Format::NHWC;
  301. checker.set_param(param);
  302. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  303. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  304. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  305. checker.set_epsilon(1e-3);
  306. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  307. param.format = Param::Format::NCHW;
  308. checker.set_param(param);
  309. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  310. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  311. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  312. }
  313. // nan case
  314. NanMatRNG rng_nan;
  315. UniformFloatRNG rng_zero(0, 0);
  316. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  317. param::WarpPerspective param;
  318. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  319. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  320. checker.set_rng(1, rng);
  321. param.border_val = 1.737;
  322. checker.set_param(param);
  323. // no invalid mem access is enough; no need to check value
  324. checker.set_expect_exec_fail([]() {});
  325. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  326. }
  327. }
  328. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) {
  329. using Param = WarpPerspective::Param;
  330. WarpPerspective::Param param;
  331. Checker<WarpPerspectiveForward> checker(handle_cuda());
  332. WarpPerspectiveMatRNG rng;
  333. checker.set_rng(1, &rng);
  334. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  335. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  336. for (auto bmode :
  337. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  338. WarpPerspective::BorderMode::REPLICATE,
  339. WarpPerspective::BorderMode::CONSTANT}) {
  340. param.border_val = 0.3f;
  341. param.bmode = bmode;
  342. param.imode = Param::InterpolationMode::LINEAR;
  343. param.format = Param::Format::NCHW4;
  344. checker.set_param(param);
  345. checker.set_epsilon(1 + 1e-3);
  346. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  347. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  348. checker.execs({{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  349. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  350. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  351. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  352. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  353. }
  354. {
  355. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  356. constexpr int N_SRC = 5;
  357. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  358. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  359. checker.set_rng(1, &rng);
  360. checker.set_dtype(2, dtype::Int32());
  361. checker.set_rng(2, &mat_idx_rng);
  362. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  363. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  364. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  365. checker.set_param(param);
  366. checker.set_epsilon(1 + 1e-3);
  367. checker.execs({{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  368. checker.execs(
  369. {{N_SRC, 14, 17, 13, 4}, {123, 3, 3}, {123}, {123, 14, 16, 15, 4}});
  370. }
  371. }
  372. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW_NCHW4_IC_SMALL) {
  373. using Param = WarpPerspective::Param;
  374. WarpPerspective::Param param;
  375. Checker<WarpPerspectiveForward> checker(handle_cuda());
  376. WarpPerspectiveMatRNG rng;
  377. param.format = Param::Format::NCHW_NCHW4_IC_SMALL;
  378. checker.set_rng(1, &rng);
  379. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  380. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  381. for (auto bmode :
  382. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  383. WarpPerspective::BorderMode::REPLICATE,
  384. WarpPerspective::BorderMode::CONSTANT}) {
  385. param.border_val = 0.3f;
  386. param.bmode = bmode;
  387. param.imode = Param::InterpolationMode::LINEAR;
  388. checker.set_param(param);
  389. checker.set_epsilon(1 + 1e-3);
  390. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  391. checker.execs({{1, 3, 25, 510}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  392. checker.execs({{1, 3, 25, 25}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  393. checker.execs({{1, 3, 51, 51}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  394. }
  395. {
  396. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  397. constexpr int N_SRC = 5;
  398. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  399. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  400. checker.set_rng(1, &rng);
  401. checker.set_dtype(2, dtype::Int32());
  402. checker.set_rng(2, &mat_idx_rng);
  403. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  404. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  405. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  406. checker.set_param(param);
  407. checker.set_epsilon(1 + 1e-3);
  408. checker.execs({{N_SRC, 3, 10, 11}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  409. checker.execs({{N_SRC, 3, 17, 13}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  410. }
  411. }
  412. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW4_IC_SMALL) {
  413. using Param = WarpPerspective::Param;
  414. WarpPerspective::Param param;
  415. Checker<WarpPerspectiveForward> checker(handle_cuda());
  416. WarpPerspectiveMatRNG rng;
  417. param.format = Param::Format::NHWC_NCHW4_IC_SMALL;
  418. checker.set_rng(1, &rng);
  419. checker.set_dtype(0, dtype::Uint8());
  420. checker.set_dtype(2, dtype::QuantizedS8(1.f));
  421. for (auto bmode :
  422. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  423. WarpPerspective::BorderMode::REPLICATE,
  424. WarpPerspective::BorderMode::CONSTANT}) {
  425. param.border_val = 0.3f;
  426. param.bmode = bmode;
  427. param.imode = Param::InterpolationMode::LINEAR;
  428. checker.set_param(param);
  429. checker.set_epsilon(1 + 1e-3);
  430. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  431. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  432. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  433. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  434. }
  435. {
  436. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  437. constexpr int N_SRC = 5;
  438. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  439. checker.set_dtype(0, dtype::Uint8());
  440. checker.set_rng(1, &rng);
  441. checker.set_dtype(2, dtype::Int32());
  442. checker.set_rng(2, &mat_idx_rng);
  443. checker.set_dtype(3, dtype::QuantizedS8(1.f));
  444. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  445. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  446. checker.set_param(param);
  447. checker.set_epsilon(1 + 1e-3);
  448. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  449. checker.execs({{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  450. }
  451. }
  452. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW) {
  453. using Param = WarpPerspective::Param;
  454. WarpPerspective::Param param;
  455. Checker<WarpPerspectiveForward> checker(handle_cuda());
  456. WarpPerspectiveMatRNG rng;
  457. param.format = Param::Format::NHWC_NCHW;
  458. checker.set_rng(1, &rng);
  459. checker.set_dtype(0, dtype::Uint8());
  460. checker.set_dtype(2, dtype::Float32());
  461. for (auto bmode :
  462. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  463. WarpPerspective::BorderMode::REPLICATE,
  464. WarpPerspective::BorderMode::CONSTANT}) {
  465. param.border_val = 0.3f;
  466. param.bmode = bmode;
  467. param.imode = Param::InterpolationMode::LINEAR;
  468. checker.set_param(param);
  469. checker.set_epsilon(1 + 1e-3);
  470. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 3, 11, 12}});
  471. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  472. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 3, 51, 51}});
  473. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  474. }
  475. {
  476. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  477. constexpr int N_SRC = 5;
  478. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  479. checker.set_dtype(0, dtype::Uint8());
  480. checker.set_rng(1, &rng);
  481. checker.set_dtype(2, dtype::Int32());
  482. checker.set_rng(2, &mat_idx_rng);
  483. checker.set_dtype(3, dtype::Float32());
  484. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  485. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  486. checker.set_param(param);
  487. checker.set_epsilon(1 + 1e-3);
  488. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 3, 11, 12}});
  489. checker.execs({{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 3, 16, 15}});
  490. }
  491. }
  492. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  493. warp_perspective::run_int8_test(handle_cuda());
  494. }
  495. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) {
  496. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  497. WarpPerspectiveMatRNG rng;
  498. checker.set_rng(0, &rng);
  499. for (int i = 0; i < 1; ++i) {
  500. for (auto bmode :
  501. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  502. WarpPerspective::BorderMode::REPLICATE,
  503. WarpPerspective::BorderMode::CONSTANT}) {
  504. WarpPerspective::Param param;
  505. param.border_val = 0.3f;
  506. param.bmode = bmode;
  507. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  508. checker.set_param(param);
  509. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  510. checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  511. }
  512. }
  513. // nan case
  514. NanMatRNG rng_nan;
  515. UniformFloatRNG rng_zero(0, 0);
  516. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  517. param::WarpPerspective param;
  518. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  519. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  520. checker.set_rng(0, rng);
  521. param.border_val = 1.737;
  522. checker.set_param(param);
  523. // no invalid mem access is enough; no need to check value
  524. checker.set_expect_exec_fail([]() {});
  525. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  526. }
  527. {
  528. Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy> checker(
  529. handle_cuda());
  530. constexpr int N_SRC = 5;
  531. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  532. checker.set_rng(0, &rng);
  533. checker.set_dtype(1, dtype::Int32());
  534. checker.set_rng(1, &mat_idx_rng);
  535. param::WarpPerspective param;
  536. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  537. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  538. checker.set_param(param);
  539. checker.set_epsilon(1 + 1e-3);
  540. checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}});
  541. checker.execs({{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}});
  542. }
  543. }
  544. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) {
  545. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  546. WarpPerspectiveMatRNG rng;
  547. checker.set_rng(1, &rng);
  548. for (int i = 0; i < 1; ++i) {
  549. for (auto bmode :
  550. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  551. WarpPerspective::BorderMode::REPLICATE,
  552. WarpPerspective::BorderMode::CONSTANT}) {
  553. WarpPerspective::Param param;
  554. param.border_val = 0.3f;
  555. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  556. param.bmode = bmode;
  557. checker.set_param(param);
  558. checker.set_epsilon(1e-2);
  559. checker.execs(
  560. {{1000, 3, 11, 12}, {1000, 3, 3}, {1000, 3, 10, 11}, {1000, 3, 3}});
  561. }
  562. }
  563. // nan case
  564. NanMatRNG rng_nan;
  565. UniformFloatRNG rng_zero(0, 0);
  566. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  567. param::WarpPerspective param;
  568. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  569. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  570. checker.set_rng(1, rng);
  571. param.border_val = 1.737;
  572. checker.set_param(param);
  573. // no invalid mem access is enough; no need to check value
  574. checker.set_expect_exec_fail([]() {});
  575. checker.exec(
  576. {{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}, {1000, 3, 3}});
  577. }
  578. {
  579. Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker(
  580. handle_cuda());
  581. constexpr int N_SRC = 5;
  582. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  583. checker.set_rng(1, &rng);
  584. checker.set_dtype(2, dtype::Int32());
  585. checker.set_rng(2, &mat_idx_rng);
  586. param::WarpPerspective param;
  587. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  588. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  589. checker.set_param(param);
  590. checker.set_epsilon(1 + 1e-3);
  591. checker.execs(
  592. {{N_SRC, 12, 10, 11}, {2, 3, 3}, {2}, {2, 12, 11, 12}, {2, 3, 3}});
  593. checker.execs(
  594. {{N_SRC, 56, 17, 13},
  595. {123, 3, 3},
  596. {123},
  597. {123, 56, 16, 15},
  598. {123, 3, 3}});
  599. }
  600. }
  601. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) {
  602. using Param = WarpPerspective::Param;
  603. Checker<WarpPerspectiveForward> checker(handle_cuda());
  604. WarpPerspectiveMatRNG rng;
  605. checker.set_rng(1, &rng);
  606. checker.set_dtype(0, dtype::BFloat16())
  607. .set_dtype(1, dtype::Float32())
  608. .set_dtype(2, dtype::BFloat16());
  609. for (auto bmode :
  610. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  611. WarpPerspective::BorderMode::REPLICATE,
  612. WarpPerspective::BorderMode::CONSTANT}) {
  613. WarpPerspective::Param param;
  614. param.border_val = 0.3f;
  615. param.bmode = bmode;
  616. param.imode = Param::InterpolationMode::LINEAR;
  617. param.format = Param::Format::NHWC;
  618. checker.set_param(param);
  619. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  620. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  621. param.format = Param::Format::NCHW;
  622. checker.set_param(param);
  623. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  624. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  625. }
  626. }
  627. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QINT4) {
  628. using Param = WarpPerspective::Param;
  629. Checker<WarpPerspectiveForward> checker(handle_cuda());
  630. WarpPerspectiveMatRNG rng;
  631. checker.set_rng(1, &rng);
  632. checker.set_dtype(0, dtype::QuantizedS4(1.25f))
  633. .set_dtype(1, dtype::Float32())
  634. .set_dtype(2, dtype::QuantizedS4(1.25f));
  635. for (auto bmode :
  636. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  637. WarpPerspective::BorderMode::REPLICATE,
  638. WarpPerspective::BorderMode::CONSTANT}) {
  639. WarpPerspective::Param param;
  640. param.border_val = 0.3f;
  641. param.bmode = bmode;
  642. param.imode = Param::InterpolationMode::LINEAR;
  643. param.format = Param::Format::NCHW;
  644. checker.set_param(param);
  645. checker.set_epsilon(1 + 1e-3);
  646. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  647. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  648. }
  649. }
  650. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QUINT4) {
  651. using Param = WarpPerspective::Param;
  652. Checker<WarpPerspectiveForward> checker(handle_cuda());
  653. WarpPerspectiveMatRNG rng;
  654. checker.set_rng(1, &rng);
  655. checker.set_dtype(0, dtype::Quantized4Asymm(1.25f, 0))
  656. .set_dtype(1, dtype::Float32())
  657. .set_dtype(2, dtype::Quantized4Asymm(1.25f, 0));
  658. for (auto bmode :
  659. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  660. WarpPerspective::BorderMode::REPLICATE,
  661. WarpPerspective::BorderMode::CONSTANT}) {
  662. WarpPerspective::Param param;
  663. param.border_val = 0.3f;
  664. param.bmode = bmode;
  665. param.imode = Param::InterpolationMode::LINEAR;
  666. param.format = Param::Format::NCHW;
  667. checker.set_param(param);
  668. checker.set_epsilon(1 + 1e-3);
  669. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  670. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  671. }
  672. }
  673. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) {
  674. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  675. WarpPerspectiveMatRNG rng;
  676. checker.set_rng(0, &rng)
  677. .set_epsilon(1e-1)
  678. .set_dtype(0, dtype::Float32())
  679. .set_dtype(1, dtype::BFloat16())
  680. .set_dtype(2, dtype::BFloat16());
  681. for (int i = 0; i < 1; ++i) {
  682. for (auto bmode :
  683. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  684. WarpPerspective::BorderMode::REPLICATE,
  685. WarpPerspective::BorderMode::CONSTANT}) {
  686. WarpPerspective::Param param;
  687. param.border_val = 0.3f;
  688. param.bmode = bmode;
  689. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  690. checker.set_param(param);
  691. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  692. }
  693. }
  694. }
  695. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) {
  696. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  697. WarpPerspectiveMatRNG rng;
  698. checker.set_rng(1, &rng)
  699. .set_epsilon(1e-2)
  700. .set_dtype(0, dtype::BFloat16())
  701. .set_dtype(1, dtype::Float32())
  702. .set_dtype(2, dtype::BFloat16())
  703. .set_dtype(3, dtype::Float32());
  704. for (int i = 0; i < 1; ++i) {
  705. for (auto bmode :
  706. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  707. WarpPerspective::BorderMode::REPLICATE,
  708. WarpPerspective::BorderMode::CONSTANT}) {
  709. WarpPerspective::Param param;
  710. param.border_val = 0.3f;
  711. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  712. param.bmode = bmode;
  713. checker.set_param(param);
  714. checker.execs({{10, 3, 11, 12}, {10, 3, 3}, {10, 3, 10, 11}, {10, 3, 3}});
  715. }
  716. }
  717. }
  718. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  719. warp_perspective::run_mat_idx_test(handle_cuda());
  720. }
  721. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64_QINT4) {
  722. using Param = WarpPerspective::Param;
  723. WarpPerspective::Param param;
  724. Checker<WarpPerspectiveForward> checker(handle_cuda());
  725. WarpPerspectiveMatRNG_V2 rng;
  726. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  727. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  728. for (auto bmode :
  729. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  730. WarpPerspective::BorderMode::REPLICATE,
  731. WarpPerspective::BorderMode::CONSTANT}) {
  732. param.border_val = 0.3f;
  733. param.bmode = bmode;
  734. param.imode = Param::InterpolationMode::LINEAR;
  735. param.format = Param::Format::NCHW64;
  736. checker.set_param(param);
  737. checker.set_epsilon(1 + 1e-3);
  738. rng.set_hw(10, 11);
  739. checker.set_rng(1, &rng);
  740. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  741. checker.execs({{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  742. checker.execs({{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  743. rng.set_hw(25, 25);
  744. checker.set_rng(1, &rng);
  745. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  746. rng.set_hw(25, 510);
  747. checker.set_rng(1, &rng);
  748. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  749. rng.set_hw(25, 25);
  750. checker.set_rng(1, &rng);
  751. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  752. rng.set_hw(51, 51);
  753. checker.set_rng(1, &rng);
  754. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  755. }
  756. {
  757. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  758. constexpr int N_SRC = 5;
  759. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  760. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  761. checker.set_rng(1, &rng);
  762. checker.set_dtype(2, dtype::Int32());
  763. checker.set_rng(2, &mat_idx_rng);
  764. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  765. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  766. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  767. checker.set_param(param);
  768. checker.set_epsilon(1 + 1e-3);
  769. rng.set_hw(10, 11);
  770. checker.set_rng(1, &rng);
  771. checker.execs({{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  772. rng.set_hw(17, 13);
  773. checker.set_rng(1, &rng);
  774. checker.execs(
  775. {{N_SRC, 14, 17, 13, 64}, {123, 3, 3}, {123}, {123, 14, 16, 15, 64}});
  776. }
  777. }
  778. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64_QUINT4) {
  779. using Param = WarpPerspective::Param;
  780. WarpPerspective::Param param;
  781. Checker<WarpPerspectiveForward> checker(handle_cuda());
  782. WarpPerspectiveMatRNG_V2 rng;
  783. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  784. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  785. for (auto bmode :
  786. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  787. WarpPerspective::BorderMode::REPLICATE,
  788. WarpPerspective::BorderMode::CONSTANT}) {
  789. param.border_val = 0.3f;
  790. param.bmode = bmode;
  791. param.imode = Param::InterpolationMode::LINEAR;
  792. param.format = Param::Format::NCHW64;
  793. checker.set_param(param);
  794. checker.set_epsilon(1 + 1e-3);
  795. rng.set_hw(10, 11);
  796. checker.set_rng(1, &rng);
  797. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  798. checker.execs({{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  799. checker.execs({{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  800. rng.set_hw(25, 25);
  801. checker.set_rng(1, &rng);
  802. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  803. rng.set_hw(25, 510);
  804. checker.set_rng(1, &rng);
  805. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  806. rng.set_hw(25, 25);
  807. checker.set_rng(1, &rng);
  808. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  809. rng.set_hw(51, 51);
  810. checker.set_rng(1, &rng);
  811. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  812. }
  813. {
  814. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  815. constexpr int N_SRC = 5;
  816. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  817. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  818. checker.set_rng(1, &rng);
  819. checker.set_dtype(2, dtype::Int32());
  820. checker.set_rng(2, &mat_idx_rng);
  821. checker.set_dtype(3, dtype::Quantized4Asymm(0.1f, 3));
  822. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  823. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  824. checker.set_param(param);
  825. checker.set_epsilon(1 + 1e-3);
  826. rng.set_hw(10, 11);
  827. checker.set_rng(1, &rng);
  828. checker.execs({{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  829. rng.set_hw(17, 13);
  830. checker.set_rng(1, &rng);
  831. checker.execs(
  832. {{N_SRC, 14, 17, 13, 64}, {123, 3, 3}, {123}, {123, 14, 16, 15, 64}});
  833. }
  834. }
  835. TEST_F(CUDA, WARP_PERSPECTIVE_MULTI_SRC_NCHW) {
  836. using Param = WarpPerspective::Param;
  837. Param param;
  838. WarpPerspectiveMatRNG rng;
  839. for (auto bmode :
  840. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  841. WarpPerspective::BorderMode::REPLICATE,
  842. WarpPerspective::BorderMode::CONSTANT}) {
  843. param.border_val = 0.3f;
  844. param.bmode = bmode;
  845. param.imode = Param::InterpolationMode::LINEAR;
  846. param.format = Param::Format::NCHW;
  847. auto run = [&param, &rng, this](
  848. size_t bs, size_t ih, size_t iw, size_t c, size_t oh,
  849. size_t ow, DType dtype) {
  850. Checker<WarpPerspectiveForward, WarpPerspectiveMultiSrcProxy> checker(
  851. handle_cuda());
  852. checker.set_param(param);
  853. TensorShapeArray shapes;
  854. // src
  855. for (size_t i = 0; i < bs; i++) {
  856. shapes.emplace_back(TensorShape{{1, c, ih, iw}});
  857. checker.set_dtype(i, dtype);
  858. }
  859. // mat
  860. shapes.emplace_back(TensorShape{{bs, 3, 3}});
  861. checker.set_rng(bs, &rng);
  862. // dst
  863. shapes.emplace_back(TensorShape{{bs, c, oh, ow}});
  864. checker.set_dtype(bs + 1, dtype);
  865. checker.execs(shapes);
  866. };
  867. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) {
  868. run(1, 20, 18, 4, 6, 6, dtype);
  869. run(2, 100, 110, 10, 50, 50, dtype);
  870. run(20, 10, 11, 123, 15, 16, dtype);
  871. run(2200, 10, 11, 3, 11, 12, dtype);
  872. }
  873. }
  874. }
  875. TEST_F(CUDA, WARP_PERSPECTIVE_MULTI_SRC_NHWC) {
  876. using Param = WarpPerspective::Param;
  877. Param param;
  878. WarpPerspectiveMatRNG rng;
  879. for (auto bmode :
  880. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  881. WarpPerspective::BorderMode::REPLICATE,
  882. WarpPerspective::BorderMode::CONSTANT}) {
  883. param.border_val = 0.3f;
  884. param.bmode = bmode;
  885. param.imode = Param::InterpolationMode::LINEAR;
  886. param.format = Param::Format::NHWC;
  887. auto run = [&param, &rng, this](
  888. size_t bs, size_t ih, size_t iw, size_t c, size_t oh,
  889. size_t ow, DType dtype) {
  890. Checker<WarpPerspectiveForward, WarpPerspectiveMultiSrcProxy> checker(
  891. handle_cuda());
  892. checker.set_param(param);
  893. TensorShapeArray shapes;
  894. // src
  895. for (size_t i = 0; i < bs; i++) {
  896. shapes.emplace_back(TensorShape{{1, ih, iw, c}});
  897. checker.set_dtype(i, dtype);
  898. }
  899. // mat
  900. shapes.emplace_back(TensorShape{{bs, 3, 3}});
  901. checker.set_rng(bs, &rng);
  902. // dst
  903. shapes.emplace_back(TensorShape{{bs, oh, ow, c}});
  904. checker.set_dtype(bs + 1, dtype);
  905. checker.execs(shapes);
  906. };
  907. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) {
  908. run(1, 20, 18, 4, 6, 6, dtype);
  909. run(2, 100, 110, 10, 50, 50, dtype);
  910. run(20, 10, 11, 123, 15, 16, dtype);
  911. run(2200, 10, 11, 3, 11, 12, dtype);
  912. }
  913. }
  914. }
  915. TEST_F(CUDA, WARP_PERSPECTIVE_MULTI_SRC_WITH_IDX_NCHW) {
  916. using Param = WarpPerspective::Param;
  917. Param param;
  918. WarpPerspectiveMatRNG rng;
  919. UniformIntRNG idx_rng{0, 0};
  920. for (auto bmode :
  921. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  922. WarpPerspective::BorderMode::REPLICATE,
  923. WarpPerspective::BorderMode::CONSTANT}) {
  924. param.border_val = 0.3f;
  925. param.bmode = bmode;
  926. param.imode = Param::InterpolationMode::LINEAR;
  927. param.format = Param::Format::NCHW;
  928. auto run = [&param, &rng, &idx_rng, this](
  929. size_t bs, size_t ih, size_t iw, size_t c, size_t oh,
  930. size_t ow, size_t idx, DType dtype) {
  931. Checker<WarpPerspectiveForward, WarpPerspectiveMultiSrcProxy> checker(
  932. handle_cuda());
  933. checker.set_param(param);
  934. TensorShapeArray shapes;
  935. // src
  936. for (size_t i = 0; i < bs; i++) {
  937. shapes.emplace_back(TensorShape{{1, c, ih, iw}});
  938. checker.set_dtype(i, dtype);
  939. }
  940. // mat
  941. shapes.emplace_back(TensorShape{{idx, 3, 3}});
  942. checker.set_rng(bs, &rng);
  943. // mat_idx
  944. shapes.emplace_back(TensorShape{{idx}});
  945. checker.set_dtype(bs + 1, dtype::Int32());
  946. idx_rng = UniformIntRNG{0, (int)bs - 1};
  947. checker.set_rng(bs + 1, &idx_rng);
  948. // dst
  949. shapes.emplace_back(TensorShape{{idx, c, oh, ow}});
  950. checker.set_dtype(bs + 2, dtype);
  951. checker.execs(shapes);
  952. };
  953. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) {
  954. run(1, 20, 18, 4, 6, 6, 1, dtype);
  955. run(2, 100, 110, 10, 50, 50, 1, dtype);
  956. run(20, 10, 11, 123, 15, 16, 10, dtype);
  957. run(2200, 10, 11, 3, 11, 12, 100, dtype);
  958. }
  959. }
  960. }
  961. TEST_F(CUDA, WARP_PERSPECTIVE_MULTI_SRC_WITH_IDX_NHWC) {
  962. using Param = WarpPerspective::Param;
  963. Param param;
  964. WarpPerspectiveMatRNG rng;
  965. UniformIntRNG idx_rng{0, 0};
  966. for (auto bmode :
  967. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  968. WarpPerspective::BorderMode::REPLICATE,
  969. WarpPerspective::BorderMode::CONSTANT}) {
  970. param.border_val = 0.3f;
  971. param.bmode = bmode;
  972. param.imode = Param::InterpolationMode::LINEAR;
  973. param.format = Param::Format::NHWC;
  974. auto run = [&param, &rng, &idx_rng, this](
  975. size_t bs, size_t ih, size_t iw, size_t c, size_t oh,
  976. size_t ow, size_t idx, DType dtype) {
  977. Checker<WarpPerspectiveForward, WarpPerspectiveMultiSrcProxy> checker(
  978. handle_cuda());
  979. checker.set_param(param);
  980. TensorShapeArray shapes;
  981. // src
  982. for (size_t i = 0; i < bs; i++) {
  983. shapes.emplace_back(TensorShape{{1, ih, iw, c}});
  984. checker.set_dtype(i, dtype);
  985. }
  986. // mat
  987. shapes.emplace_back(TensorShape{{idx, 3, 3}});
  988. checker.set_rng(bs, &rng);
  989. // mat_idx
  990. shapes.emplace_back(TensorShape{{idx}});
  991. checker.set_dtype(bs + 1, dtype::Int32());
  992. idx_rng = UniformIntRNG{0, (int)bs - 1};
  993. checker.set_rng(bs + 1, &idx_rng);
  994. // dst
  995. shapes.emplace_back(TensorShape{{idx, oh, ow, c}});
  996. checker.set_dtype(bs + 2, dtype);
  997. checker.execs(shapes);
  998. };
  999. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()}) {
  1000. run(1, 20, 18, 4, 6, 6, 1, dtype);
  1001. run(2, 100, 110, 10, 50, 50, 1, dtype);
  1002. run(20, 10, 11, 123, 15, 16, 10, dtype);
  1003. run(2200, 10, 11, 3, 11, 12, 100, dtype);
  1004. }
  1005. }
  1006. }
  1007. #if MEGDNN_WITH_BENCHMARK
  1008. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  1009. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  1010. using Param = param::WarpPerspective;
  1011. WarpPerspectiveMatRNG rng;
  1012. benchmarker.set_rng(1, &rng);
  1013. Param param;
  1014. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  1015. benchmarker.set_param(param);
  1016. auto used = benchmarker.execs(shapes);
  1017. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  1018. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  1019. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(), used,
  1020. shapes[2].total_nr_elems() * (4.f + 1.f + shapes[1].total_nr_elems()) /
  1021. (1024 * 1024 * 1024) / used * 1e3,
  1022. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) / used *
  1023. 1e3);
  1024. };
  1025. param.format = Param::Format::NCHW;
  1026. benchmarker.set_dtype(0, dtype::Int8());
  1027. benchmarker.set_dtype(2, dtype::Int8());
  1028. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  1029. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  1030. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  1031. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  1032. param.format = Param::Format::NCHW4;
  1033. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  1034. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  1035. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  1036. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  1037. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  1038. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  1039. param.format = Param::Format::NHWC;
  1040. benchmarker.set_dtype(0, dtype::QuantizedS4(1.f));
  1041. benchmarker.set_dtype(2, dtype::QuantizedS4(1.f));
  1042. run({TensorShape{1, 256, 256, 4 * 24}, {1, 3, 3}, {1, 256, 5120, 4 * 24}});
  1043. run({TensorShape{1, 256, 5120, 4 * 24}, {1, 3, 3}, {1, 256, 256, 4 * 24}});
  1044. run({TensorShape{1, 256, 256, 4 * 24}, {1, 3, 3}, {1, 512, 512, 4 * 24}});
  1045. run({TensorShape{1, 512, 512, 4 * 24}, {1, 3, 3}, {1, 256, 256, 4 * 24}});
  1046. }
  1047. #endif
  1048. } // namespace test
  1049. } // namespace megdnn
  1050. // vim: syntax=cpp.doxygen