You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 39 kB


  1. /**
  2. * \file dnn/test/cuda/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/opr_proxy.h"
  16. #include "test/common/warp_perspective.h"
  17. #include "test/cuda/utils.h"
  18. namespace {
  19. using namespace megdnn;
  20. using namespace test;
  21. class NanMatRNG : public RNG {
  22. void gen(const TensorND& tensor_) override {
  23. auto& gen = RandomState::generator();
  24. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  25. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  26. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  27. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  28. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  29. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  30. std::uniform_int_distribution<int> dice(0, 5);
  31. float* ptr = tensor_.ptr<dt_float32>();
  32. auto N = tensor_.layout.shape[0];
  33. for (size_t n = 0; n < N; ++n) {
  34. for (size_t i = 0; i < 9; ++i) {
  35. switch (dice(gen)) {
  36. case 0:
  37. ptr[i] = pdist3(gen);
  38. break;
  39. case 1:
  40. ptr[i] = pdist(gen);
  41. break;
  42. case 2:
  43. ptr[i] = pdisth(gen);
  44. break;
  45. case 3:
  46. ptr[i] = ndist(gen);
  47. break;
  48. case 4:
  49. ptr[i] = ndist3(gen);
  50. break;
  51. case 5:
  52. ptr[i] = ndisth(gen);
  53. break;
  54. }
  55. }
  56. ptr[6] = 1;
  57. ptr[7] = -1;
  58. ptr[8] = 5;
  59. ptr += 9;
  60. }
  61. }
  62. };
  63. } // anonymous namespace
  64. namespace megdnn {
  65. namespace test {
  66. // FIXME test WARP_PERSPECTIVE_CV failed here
  67. #if 0
  68. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  69. //! format = NHWC
  70. Checker<WarpPerspective> checker(handle_cuda());
  71. param::WarpPerspective param;
  72. class ResizeMatRNG: public RNG {
  73. void gen(const TensorND &tensor_) override
  74. {
  75. auto &gen = RandomState::generator();
  76. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  77. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  78. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  79. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  80. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  81. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  82. std::uniform_int_distribution<int> dice(0, 5);
  83. float *ptr = tensor_.ptr<dt_float32>();
  84. auto N = tensor_.layout.shape[0];
  85. for (size_t n = 0; n < N; ++n) {
  86. for (size_t i = 0; i < 9; ++i) {
  87. switch (dice(gen)) {
  88. case 0:
  89. ptr[i] = pdist3(gen);
  90. break;
  91. case 1:
  92. ptr[i] = pdist(gen);
  93. break;
  94. case 2:
  95. ptr[i] = pdisth(gen);
  96. break;
  97. case 3:
  98. ptr[i] = ndist(gen);
  99. break;
  100. case 4:
  101. ptr[i] = ndist3(gen);
  102. break;
  103. case 5:
  104. ptr[i] = ndisth(gen);
  105. break;
  106. }
  107. }
  108. // is resize?
  109. if (n & 1) {
  110. ptr[1] = 0;
  111. ptr[3] = 0;
  112. ptr[6] = ptr[7] = 0;
  113. }
  114. ptr += 9;
  115. }
  116. }
  117. } rng;
  118. checker.set_rng(1, &rng);
  119. using BMode = param::WarpPerspective::BorderMode;
  120. param.format = param::WarpPerspective::Format::NHWC;
  121. // naive and cuda uses different algorithms and different border handling
  122. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  123. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  124. BMode::WRAP, BMode::CONSTANT})
  125. {
  126. param.bmode = mode;
  127. param.border_val = 1.737;
  128. checker.set_param(param);
  129. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  130. }
  131. auto args = warp_perspective::get_cv_args();
  132. for (auto &&arg : args) {
  133. checker.set_param(arg.param)
  134. .set_dtype(0, dtype::Float32())
  135. .set_dtype(1, dtype::Float32())
  136. .set_dtype(2, dtype::Float32())
  137. .execs({arg.src, arg.trans, arg.dst});
  138. }
  139. for (auto &&arg : args) {
  140. checker.set_param(arg.param)
  141. .set_epsilon(242.001)
  142. .set_max_avg_error(3.0)
  143. .set_dtype(0, dtype::Uint8())
  144. .set_dtype(1, dtype::Float32())
  145. .set_dtype(2, dtype::Uint8())
  146. .execs({arg.src, arg.trans, arg.dst});
  147. }
  148. // resize nan case
  149. UniformFloatRNG rng_zero(0, 0);
  150. checker.set_rng(1, &rng_zero);
  151. {
  152. param.bmode = BMode::CONSTANT;
  153. param.border_val = 1.737;
  154. checker.set_param(param)
  155. .set_dtype(0, dtype::Float32())
  156. .set_dtype(1, dtype::Float32())
  157. .set_dtype(2, dtype::Float32());
  158. // no invalid mem access is enough; no need to check value
  159. checker.set_expect_exec_fail([](){});
  160. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  161. }
  162. }
  163. #endif
  164. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) {
  165. using Param = WarpPerspective::Param;
  166. Checker<WarpPerspectiveForward> checker(handle_cuda());
  167. WarpPerspectiveMatRNG rng;
  168. checker.set_rng(1, &rng);
  169. for (auto bmode :
  170. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  171. WarpPerspective::BorderMode::REPLICATE,
  172. WarpPerspective::BorderMode::CONSTANT}) {
  173. WarpPerspective::Param param;
  174. param.border_val = 0.3f;
  175. param.bmode = bmode;
  176. param.imode = Param::InterpolationMode::LINEAR;
  177. param.format = Param::Format::NHWC;
  178. checker.set_param(param);
  179. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  180. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  181. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  182. checker.set_epsilon(1e-3);
  183. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  184. param.format = Param::Format::NCHW;
  185. checker.set_param(param);
  186. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  187. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  188. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  189. }
  190. // nan case
  191. NanMatRNG rng_nan;
  192. UniformFloatRNG rng_zero(0, 0);
  193. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  194. param::WarpPerspective param;
  195. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  196. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  197. checker.set_rng(1, rng);
  198. param.border_val = 1.737;
  199. checker.set_param(param);
  200. // no invalid mem access is enough; no need to check value
  201. checker.set_expect_exec_fail([]() {});
  202. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  203. }
  204. }
  205. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NHWC) {
  206. using Param = WarpPerspective::Param;
  207. Checker<WarpPerspectiveForward> checker(handle_cuda());
  208. WarpPerspectiveMatRNG_V2 rng;
  209. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  210. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  211. checker.set_rng(1, &rng);
  212. for (auto bmode :
  213. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  214. WarpPerspective::BorderMode::REPLICATE,
  215. WarpPerspective::BorderMode::CONSTANT}) {
  216. WarpPerspective::Param param;
  217. param.border_val = 1.2f;
  218. param.bmode = bmode;
  219. param.imode = Param::InterpolationMode::LINEAR;
  220. param.format = Param::Format::NHWC;
  221. checker.set_param(param);
  222. checker.set_epsilon(1 + 1e-3);
  223. rng.set_hw(10, 11);
  224. checker.execs({{23, 10, 11, 16}, {23, 3, 3}, {23, 11, 12, 16}});
  225. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  226. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  227. rng.set_hw(55, 66);
  228. checker.execs({{20, 55, 66, 32}, {20, 3, 3}, {20, 44, 34, 32}});
  229. }
  230. {
  231. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  232. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  233. checker.set_rng(1, &rng);
  234. for (auto bmode :
  235. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  236. WarpPerspective::BorderMode::REPLICATE,
  237. WarpPerspective::BorderMode::CONSTANT}) {
  238. WarpPerspective::Param param;
  239. param.border_val = 0.3f;
  240. param.bmode = bmode;
  241. param.imode = Param::InterpolationMode::LINEAR;
  242. param.format = Param::Format::NHWC;
  243. checker.set_param(param);
  244. checker.set_epsilon(1 + 1e-3);
  245. rng.set_hw(10, 11);
  246. checker.execs({{23, 10, 11, 16}, {23, 3, 3}, {23, 11, 12, 16}});
  247. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  248. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  249. rng.set_hw(55, 66);
  250. checker.execs({{20, 55, 66, 32}, {20, 3, 3}, {20, 44, 34, 32}});
  251. }
  252. }
  253. {
  254. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  255. constexpr int N_SRC = 5;
  256. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  257. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  258. checker.set_rng(1, &rng);
  259. checker.set_dtype(2, dtype::Int32());
  260. checker.set_rng(2, &mat_idx_rng);
  261. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  262. WarpPerspective::Param param;
  263. param.border_val = 0.3f;
  264. param.format = Param::Format::NHWC;
  265. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  266. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  267. checker.set_param(param);
  268. checker.set_epsilon(1 + 1e-3);
  269. rng.set_hw(10, 11);
  270. checker.set_rng(1, &rng);
  271. checker.execs({{N_SRC, 10, 11, 48}, {2, 3, 3}, {2}, {2, 11, 12, 48}});
  272. rng.set_hw(17, 13);
  273. checker.set_rng(1, &rng);
  274. checker.execs({{N_SRC, 17, 13, 64}, {123, 3, 3}, {123}, {123, 16, 15, 64}});
  275. }
  276. }
  277. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) {
  278. require_compute_capability(6, 0);
  279. using Param = WarpPerspective::Param;
  280. Checker<WarpPerspectiveForward> checker(handle_cuda());
  281. WarpPerspectiveMatRNG rng;
  282. checker.set_rng(1, &rng);
  283. for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) {
  284. WarpPerspective::Param param;
  285. param.border_val = 0.3f;
  286. param.bmode = bmode;
  287. param.imode = Param::InterpolationMode::LINEAR;
  288. param.format = Param::Format::NHWC;
  289. checker.set_param(param);
  290. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  291. size_t n = (INT_MAX) / (512 * 512 * 3);
  292. checker.execs({{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}});
  293. }
  294. }
  295. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) {
  296. using Param = WarpPerspective::Param;
  297. Checker<WarpPerspectiveForward> checker(handle_cuda());
  298. WarpPerspectiveMatRNG rng;
  299. checker.set_rng(1, &rng);
  300. checker.set_dtype(0, dtype::Float16())
  301. .set_dtype(1, dtype::Float32())
  302. .set_dtype(2, dtype::Float16());
  303. for (auto bmode :
  304. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  305. WarpPerspective::BorderMode::REPLICATE,
  306. WarpPerspective::BorderMode::CONSTANT}) {
  307. WarpPerspective::Param param;
  308. param.border_val = 0.3f;
  309. param.bmode = bmode;
  310. param.imode = Param::InterpolationMode::LINEAR;
  311. param.format = Param::Format::NHWC;
  312. checker.set_param(param);
  313. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  314. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  315. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  316. checker.set_epsilon(1e-3);
  317. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  318. param.format = Param::Format::NCHW;
  319. checker.set_param(param);
  320. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  321. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  322. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  323. }
  324. // nan case
  325. NanMatRNG rng_nan;
  326. UniformFloatRNG rng_zero(0, 0);
  327. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  328. param::WarpPerspective param;
  329. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  330. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  331. checker.set_rng(1, rng);
  332. param.border_val = 1.737;
  333. checker.set_param(param);
  334. // no invalid mem access is enough; no need to check value
  335. checker.set_expect_exec_fail([]() {});
  336. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  337. }
  338. }
  339. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) {
  340. using Param = WarpPerspective::Param;
  341. WarpPerspective::Param param;
  342. Checker<WarpPerspectiveForward> checker(handle_cuda());
  343. WarpPerspectiveMatRNG rng;
  344. checker.set_rng(1, &rng);
  345. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  346. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  347. for (auto bmode :
  348. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  349. WarpPerspective::BorderMode::REPLICATE,
  350. WarpPerspective::BorderMode::CONSTANT}) {
  351. param.border_val = 0.3f;
  352. param.bmode = bmode;
  353. param.imode = Param::InterpolationMode::LINEAR;
  354. param.format = Param::Format::NCHW4;
  355. checker.set_param(param);
  356. checker.set_epsilon(1 + 1e-3);
  357. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  358. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  359. checker.execs({{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  360. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  361. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  362. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  363. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  364. }
  365. {
  366. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  367. constexpr int N_SRC = 5;
  368. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  369. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  370. checker.set_rng(1, &rng);
  371. checker.set_dtype(2, dtype::Int32());
  372. checker.set_rng(2, &mat_idx_rng);
  373. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  374. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  375. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  376. checker.set_param(param);
  377. checker.set_epsilon(1 + 1e-3);
  378. checker.execs({{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  379. checker.execs(
  380. {{N_SRC, 14, 17, 13, 4}, {123, 3, 3}, {123}, {123, 14, 16, 15, 4}});
  381. }
  382. }
  383. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW_NCHW4_IC_SMALL) {
  384. using Param = WarpPerspective::Param;
  385. WarpPerspective::Param param;
  386. Checker<WarpPerspectiveForward> checker(handle_cuda());
  387. WarpPerspectiveMatRNG rng;
  388. param.format = Param::Format::NCHW_NCHW4_IC_SMALL;
  389. checker.set_rng(1, &rng);
  390. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  391. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  392. for (auto bmode :
  393. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  394. WarpPerspective::BorderMode::REPLICATE,
  395. WarpPerspective::BorderMode::CONSTANT}) {
  396. param.border_val = 0.3f;
  397. param.bmode = bmode;
  398. param.imode = Param::InterpolationMode::LINEAR;
  399. checker.set_param(param);
  400. checker.set_epsilon(1 + 1e-3);
  401. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  402. checker.execs({{1, 3, 25, 510}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  403. checker.execs({{1, 3, 25, 25}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  404. checker.execs({{1, 3, 51, 51}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  405. }
  406. {
  407. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  408. constexpr int N_SRC = 5;
  409. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  410. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  411. checker.set_rng(1, &rng);
  412. checker.set_dtype(2, dtype::Int32());
  413. checker.set_rng(2, &mat_idx_rng);
  414. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  415. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  416. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  417. checker.set_param(param);
  418. checker.set_epsilon(1 + 1e-3);
  419. checker.execs({{N_SRC, 3, 10, 11}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  420. checker.execs({{N_SRC, 3, 17, 13}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  421. }
  422. }
  423. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW4_IC_SMALL) {
  424. using Param = WarpPerspective::Param;
  425. WarpPerspective::Param param;
  426. Checker<WarpPerspectiveForward> checker(handle_cuda());
  427. WarpPerspectiveMatRNG rng;
  428. param.format = Param::Format::NHWC_NCHW4_IC_SMALL;
  429. checker.set_rng(1, &rng);
  430. checker.set_dtype(0, dtype::Uint8());
  431. checker.set_dtype(2, dtype::QuantizedS8(1.f));
  432. for (auto bmode :
  433. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  434. WarpPerspective::BorderMode::REPLICATE,
  435. WarpPerspective::BorderMode::CONSTANT}) {
  436. param.border_val = 0.3f;
  437. param.bmode = bmode;
  438. param.imode = Param::InterpolationMode::LINEAR;
  439. checker.set_param(param);
  440. checker.set_epsilon(1 + 1e-3);
  441. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  442. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  443. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  444. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  445. }
  446. {
  447. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  448. constexpr int N_SRC = 5;
  449. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  450. checker.set_dtype(0, dtype::Uint8());
  451. checker.set_rng(1, &rng);
  452. checker.set_dtype(2, dtype::Int32());
  453. checker.set_rng(2, &mat_idx_rng);
  454. checker.set_dtype(3, dtype::QuantizedS8(1.f));
  455. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  456. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  457. checker.set_param(param);
  458. checker.set_epsilon(1 + 1e-3);
  459. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  460. checker.execs({{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  461. }
  462. }
  463. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW) {
  464. using Param = WarpPerspective::Param;
  465. WarpPerspective::Param param;
  466. Checker<WarpPerspectiveForward> checker(handle_cuda());
  467. WarpPerspectiveMatRNG rng;
  468. param.format = Param::Format::NHWC_NCHW;
  469. checker.set_rng(1, &rng);
  470. checker.set_dtype(0, dtype::Uint8());
  471. checker.set_dtype(2, dtype::Float32());
  472. for (auto bmode :
  473. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  474. WarpPerspective::BorderMode::REPLICATE,
  475. WarpPerspective::BorderMode::CONSTANT}) {
  476. param.border_val = 0.3f;
  477. param.bmode = bmode;
  478. param.imode = Param::InterpolationMode::LINEAR;
  479. checker.set_param(param);
  480. checker.set_epsilon(1 + 1e-3);
  481. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 3, 11, 12}});
  482. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  483. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 3, 51, 51}});
  484. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  485. }
  486. {
  487. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  488. constexpr int N_SRC = 5;
  489. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  490. checker.set_dtype(0, dtype::Uint8());
  491. checker.set_rng(1, &rng);
  492. checker.set_dtype(2, dtype::Int32());
  493. checker.set_rng(2, &mat_idx_rng);
  494. checker.set_dtype(3, dtype::Float32());
  495. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  496. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  497. checker.set_param(param);
  498. checker.set_epsilon(1 + 1e-3);
  499. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 3, 11, 12}});
  500. checker.execs({{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 3, 16, 15}});
  501. }
  502. }
  503. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  504. warp_perspective::run_int8_test(handle_cuda());
  505. }
  506. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) {
  507. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  508. WarpPerspectiveMatRNG rng;
  509. checker.set_rng(0, &rng);
  510. for (int i = 0; i < 1; ++i) {
  511. for (auto bmode :
  512. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  513. WarpPerspective::BorderMode::REPLICATE,
  514. WarpPerspective::BorderMode::CONSTANT}) {
  515. WarpPerspective::Param param;
  516. param.border_val = 0.3f;
  517. param.bmode = bmode;
  518. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  519. checker.set_param(param);
  520. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  521. checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  522. }
  523. }
  524. // nan case
  525. NanMatRNG rng_nan;
  526. UniformFloatRNG rng_zero(0, 0);
  527. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  528. param::WarpPerspective param;
  529. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  530. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  531. checker.set_rng(0, rng);
  532. param.border_val = 1.737;
  533. checker.set_param(param);
  534. // no invalid mem access is enough; no need to check value
  535. checker.set_expect_exec_fail([]() {});
  536. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  537. }
  538. {
  539. Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy> checker(
  540. handle_cuda());
  541. constexpr int N_SRC = 5;
  542. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  543. checker.set_rng(0, &rng);
  544. checker.set_dtype(1, dtype::Int32());
  545. checker.set_rng(1, &mat_idx_rng);
  546. param::WarpPerspective param;
  547. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  548. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  549. checker.set_param(param);
  550. checker.set_epsilon(1 + 1e-3);
  551. checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}});
  552. checker.execs({{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}});
  553. }
  554. }
  555. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) {
  556. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  557. WarpPerspectiveMatRNG rng;
  558. checker.set_rng(1, &rng);
  559. for (int i = 0; i < 1; ++i) {
  560. for (auto bmode :
  561. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  562. WarpPerspective::BorderMode::REPLICATE,
  563. WarpPerspective::BorderMode::CONSTANT}) {
  564. WarpPerspective::Param param;
  565. param.border_val = 0.3f;
  566. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  567. param.bmode = bmode;
  568. checker.set_param(param);
  569. checker.set_epsilon(1e-2);
  570. checker.execs(
  571. {{1000, 3, 11, 12}, {1000, 3, 3}, {1000, 3, 10, 11}, {1000, 3, 3}});
  572. }
  573. }
  574. // nan case
  575. NanMatRNG rng_nan;
  576. UniformFloatRNG rng_zero(0, 0);
  577. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  578. param::WarpPerspective param;
  579. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  580. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  581. checker.set_rng(1, rng);
  582. param.border_val = 1.737;
  583. checker.set_param(param);
  584. // no invalid mem access is enough; no need to check value
  585. checker.set_expect_exec_fail([]() {});
  586. checker.exec(
  587. {{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}, {1000, 3, 3}});
  588. }
  589. {
  590. Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker(
  591. handle_cuda());
  592. constexpr int N_SRC = 5;
  593. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  594. checker.set_rng(1, &rng);
  595. checker.set_dtype(2, dtype::Int32());
  596. checker.set_rng(2, &mat_idx_rng);
  597. param::WarpPerspective param;
  598. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  599. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  600. checker.set_param(param);
  601. checker.set_epsilon(1 + 1e-3);
  602. checker.execs(
  603. {{N_SRC, 12, 10, 11}, {2, 3, 3}, {2}, {2, 12, 11, 12}, {2, 3, 3}});
  604. checker.execs(
  605. {{N_SRC, 56, 17, 13},
  606. {123, 3, 3},
  607. {123},
  608. {123, 56, 16, 15},
  609. {123, 3, 3}});
  610. }
  611. }
  612. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) {
  613. using Param = WarpPerspective::Param;
  614. Checker<WarpPerspectiveForward> checker(handle_cuda());
  615. WarpPerspectiveMatRNG rng;
  616. checker.set_rng(1, &rng);
  617. checker.set_dtype(0, dtype::BFloat16())
  618. .set_dtype(1, dtype::Float32())
  619. .set_dtype(2, dtype::BFloat16());
  620. for (auto bmode :
  621. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  622. WarpPerspective::BorderMode::REPLICATE,
  623. WarpPerspective::BorderMode::CONSTANT}) {
  624. WarpPerspective::Param param;
  625. param.border_val = 0.3f;
  626. param.bmode = bmode;
  627. param.imode = Param::InterpolationMode::LINEAR;
  628. param.format = Param::Format::NHWC;
  629. checker.set_param(param);
  630. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  631. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  632. param.format = Param::Format::NCHW;
  633. checker.set_param(param);
  634. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  635. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  636. }
  637. }
  638. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QINT4) {
  639. using Param = WarpPerspective::Param;
  640. Checker<WarpPerspectiveForward> checker(handle_cuda());
  641. WarpPerspectiveMatRNG rng;
  642. checker.set_rng(1, &rng);
  643. checker.set_dtype(0, dtype::QuantizedS4(1.25f))
  644. .set_dtype(1, dtype::Float32())
  645. .set_dtype(2, dtype::QuantizedS4(1.25f));
  646. for (auto bmode :
  647. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  648. WarpPerspective::BorderMode::REPLICATE,
  649. WarpPerspective::BorderMode::CONSTANT}) {
  650. WarpPerspective::Param param;
  651. param.border_val = 0.3f;
  652. param.bmode = bmode;
  653. param.imode = Param::InterpolationMode::LINEAR;
  654. param.format = Param::Format::NCHW;
  655. checker.set_param(param);
  656. checker.set_epsilon(1 + 1e-3);
  657. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  658. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  659. }
  660. }
  661. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QUINT4) {
  662. using Param = WarpPerspective::Param;
  663. Checker<WarpPerspectiveForward> checker(handle_cuda());
  664. WarpPerspectiveMatRNG rng;
  665. checker.set_rng(1, &rng);
  666. checker.set_dtype(0, dtype::Quantized4Asymm(1.25f, 0))
  667. .set_dtype(1, dtype::Float32())
  668. .set_dtype(2, dtype::Quantized4Asymm(1.25f, 0));
  669. for (auto bmode :
  670. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  671. WarpPerspective::BorderMode::REPLICATE,
  672. WarpPerspective::BorderMode::CONSTANT}) {
  673. WarpPerspective::Param param;
  674. param.border_val = 0.3f;
  675. param.bmode = bmode;
  676. param.imode = Param::InterpolationMode::LINEAR;
  677. param.format = Param::Format::NCHW;
  678. checker.set_param(param);
  679. checker.set_epsilon(1 + 1e-3);
  680. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  681. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  682. }
  683. }
  684. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) {
  685. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  686. WarpPerspectiveMatRNG rng;
  687. checker.set_rng(0, &rng)
  688. .set_epsilon(1e-1)
  689. .set_dtype(0, dtype::Float32())
  690. .set_dtype(1, dtype::BFloat16())
  691. .set_dtype(2, dtype::BFloat16());
  692. for (int i = 0; i < 1; ++i) {
  693. for (auto bmode :
  694. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  695. WarpPerspective::BorderMode::REPLICATE,
  696. WarpPerspective::BorderMode::CONSTANT}) {
  697. WarpPerspective::Param param;
  698. param.border_val = 0.3f;
  699. param.bmode = bmode;
  700. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  701. checker.set_param(param);
  702. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  703. }
  704. }
  705. }
  706. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) {
  707. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  708. WarpPerspectiveMatRNG rng;
  709. checker.set_rng(1, &rng)
  710. .set_epsilon(1e-2)
  711. .set_dtype(0, dtype::BFloat16())
  712. .set_dtype(1, dtype::Float32())
  713. .set_dtype(2, dtype::BFloat16())
  714. .set_dtype(3, dtype::Float32());
  715. for (int i = 0; i < 1; ++i) {
  716. for (auto bmode :
  717. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  718. WarpPerspective::BorderMode::REPLICATE,
  719. WarpPerspective::BorderMode::CONSTANT}) {
  720. WarpPerspective::Param param;
  721. param.border_val = 0.3f;
  722. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  723. param.bmode = bmode;
  724. checker.set_param(param);
  725. checker.execs({{10, 3, 11, 12}, {10, 3, 3}, {10, 3, 10, 11}, {10, 3, 3}});
  726. }
  727. }
  728. }
  729. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  730. warp_perspective::run_mat_idx_test(handle_cuda());
  731. }
  732. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64_QINT4) {
  733. using Param = WarpPerspective::Param;
  734. WarpPerspective::Param param;
  735. Checker<WarpPerspectiveForward> checker(handle_cuda());
  736. WarpPerspectiveMatRNG_V2 rng;
  737. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  738. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  739. for (auto bmode :
  740. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  741. WarpPerspective::BorderMode::REPLICATE,
  742. WarpPerspective::BorderMode::CONSTANT}) {
  743. param.border_val = 0.3f;
  744. param.bmode = bmode;
  745. param.imode = Param::InterpolationMode::LINEAR;
  746. param.format = Param::Format::NCHW64;
  747. checker.set_param(param);
  748. checker.set_epsilon(1 + 1e-3);
  749. rng.set_hw(10, 11);
  750. checker.set_rng(1, &rng);
  751. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  752. checker.execs({{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  753. checker.execs({{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  754. rng.set_hw(25, 25);
  755. checker.set_rng(1, &rng);
  756. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  757. rng.set_hw(25, 510);
  758. checker.set_rng(1, &rng);
  759. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  760. rng.set_hw(25, 25);
  761. checker.set_rng(1, &rng);
  762. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  763. rng.set_hw(51, 51);
  764. checker.set_rng(1, &rng);
  765. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  766. }
  767. {
  768. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  769. constexpr int N_SRC = 5;
  770. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  771. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  772. checker.set_rng(1, &rng);
  773. checker.set_dtype(2, dtype::Int32());
  774. checker.set_rng(2, &mat_idx_rng);
  775. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  776. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  777. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  778. checker.set_param(param);
  779. checker.set_epsilon(1 + 1e-3);
  780. rng.set_hw(10, 11);
  781. checker.set_rng(1, &rng);
  782. checker.execs({{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  783. rng.set_hw(17, 13);
  784. checker.set_rng(1, &rng);
  785. checker.execs(
  786. {{N_SRC, 14, 17, 13, 64}, {123, 3, 3}, {123}, {123, 14, 16, 15, 64}});
  787. }
  788. }
  789. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64_QUINT4) {
  790. using Param = WarpPerspective::Param;
  791. WarpPerspective::Param param;
  792. Checker<WarpPerspectiveForward> checker(handle_cuda());
  793. WarpPerspectiveMatRNG_V2 rng;
  794. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  795. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  796. for (auto bmode :
  797. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  798. WarpPerspective::BorderMode::REPLICATE,
  799. WarpPerspective::BorderMode::CONSTANT}) {
  800. param.border_val = 0.3f;
  801. param.bmode = bmode;
  802. param.imode = Param::InterpolationMode::LINEAR;
  803. param.format = Param::Format::NCHW64;
  804. checker.set_param(param);
  805. checker.set_epsilon(1 + 1e-3);
  806. rng.set_hw(10, 11);
  807. checker.set_rng(1, &rng);
  808. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  809. checker.execs({{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  810. checker.execs({{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  811. rng.set_hw(25, 25);
  812. checker.set_rng(1, &rng);
  813. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  814. rng.set_hw(25, 510);
  815. checker.set_rng(1, &rng);
  816. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  817. rng.set_hw(25, 25);
  818. checker.set_rng(1, &rng);
  819. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  820. rng.set_hw(51, 51);
  821. checker.set_rng(1, &rng);
  822. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  823. }
  824. {
  825. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(handle_cuda());
  826. constexpr int N_SRC = 5;
  827. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  828. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  829. checker.set_rng(1, &rng);
  830. checker.set_dtype(2, dtype::Int32());
  831. checker.set_rng(2, &mat_idx_rng);
  832. checker.set_dtype(3, dtype::Quantized4Asymm(0.1f, 3));
  833. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  834. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  835. checker.set_param(param);
  836. checker.set_epsilon(1 + 1e-3);
  837. rng.set_hw(10, 11);
  838. checker.set_rng(1, &rng);
  839. checker.execs({{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  840. rng.set_hw(17, 13);
  841. checker.set_rng(1, &rng);
  842. checker.execs(
  843. {{N_SRC, 14, 17, 13, 64}, {123, 3, 3}, {123}, {123, 14, 16, 15, 64}});
  844. }
  845. }
  846. #if MEGDNN_WITH_BENCHMARK
  847. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  848. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  849. using Param = param::WarpPerspective;
  850. WarpPerspectiveMatRNG rng;
  851. benchmarker.set_rng(1, &rng);
  852. Param param;
  853. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  854. benchmarker.set_param(param);
  855. auto used = benchmarker.execs(shapes);
  856. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  857. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  858. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(), used,
  859. shapes[2].total_nr_elems() * (4.f + 1.f + shapes[1].total_nr_elems()) /
  860. (1024 * 1024 * 1024) / used * 1e3,
  861. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) / used *
  862. 1e3);
  863. };
  864. param.format = Param::Format::NCHW;
  865. benchmarker.set_dtype(0, dtype::Int8());
  866. benchmarker.set_dtype(2, dtype::Int8());
  867. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  868. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  869. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  870. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  871. param.format = Param::Format::NCHW4;
  872. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  873. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  874. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  875. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  876. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  877. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  878. param.format = Param::Format::NHWC;
  879. benchmarker.set_dtype(0, dtype::QuantizedS4(1.f));
  880. benchmarker.set_dtype(2, dtype::QuantizedS4(1.f));
  881. run({TensorShape{1, 256, 256, 4 * 24}, {1, 3, 3}, {1, 256, 5120, 4 * 24}});
  882. run({TensorShape{1, 256, 5120, 4 * 24}, {1, 3, 3}, {1, 256, 256, 4 * 24}});
  883. run({TensorShape{1, 256, 256, 4 * 24}, {1, 3, 3}, {1, 512, 512, 4 * 24}});
  884. run({TensorShape{1, 512, 512, 4 * 24}, {1, 3, 3}, {1, 256, 256, 4 * 24}});
  885. }
  886. #endif
  887. } // namespace test
  888. } // namespace megdnn
  889. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台