You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 41 kB


  1. /**
  2. * \file dnn/test/cuda/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/cuda/fixture.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/warp_perspective.h"
  16. #include "test/common/opr_proxy.h"
  17. #include "test/cuda/utils.h"
  18. namespace {
  19. using namespace megdnn;
  20. using namespace test;
  21. class NanMatRNG : public RNG {
  22. void gen(const TensorND& tensor_) override {
  23. auto& gen = RandomState::generator();
  24. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  25. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  26. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  27. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  28. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  29. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  30. std::uniform_int_distribution<int> dice(0, 5);
  31. float* ptr = tensor_.ptr<dt_float32>();
  32. auto N = tensor_.layout.shape[0];
  33. for (size_t n = 0; n < N; ++n) {
  34. for (size_t i = 0; i < 9; ++i) {
  35. switch (dice(gen)) {
  36. case 0:
  37. ptr[i] = pdist3(gen);
  38. break;
  39. case 1:
  40. ptr[i] = pdist(gen);
  41. break;
  42. case 2:
  43. ptr[i] = pdisth(gen);
  44. break;
  45. case 3:
  46. ptr[i] = ndist(gen);
  47. break;
  48. case 4:
  49. ptr[i] = ndist3(gen);
  50. break;
  51. case 5:
  52. ptr[i] = ndisth(gen);
  53. break;
  54. }
  55. }
  56. ptr[6] = 1;
  57. ptr[7] = -1;
  58. ptr[8] = 5;
  59. ptr += 9;
  60. }
  61. }
  62. };
  63. } // anonymous namespace
  64. namespace megdnn {
  65. namespace test {
  66. // FIXME test WARP_PERSPECTIVE_CV failed here
  67. #if 0
  68. TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
  69. //! format = NHWC
  70. Checker<WarpPerspective> checker(handle_cuda());
  71. param::WarpPerspective param;
  72. class ResizeMatRNG: public RNG {
  73. void gen(const TensorND &tensor_) override
  74. {
  75. auto &gen = RandomState::generator();
  76. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 3.1f);
  77. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  78. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  79. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  80. std::uniform_real_distribution<dt_float32> ndist3(-3.1f, -1.9f);
  81. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  82. std::uniform_int_distribution<int> dice(0, 5);
  83. float *ptr = tensor_.ptr<dt_float32>();
  84. auto N = tensor_.layout.shape[0];
  85. for (size_t n = 0; n < N; ++n) {
  86. for (size_t i = 0; i < 9; ++i) {
  87. switch (dice(gen)) {
  88. case 0:
  89. ptr[i] = pdist3(gen);
  90. break;
  91. case 1:
  92. ptr[i] = pdist(gen);
  93. break;
  94. case 2:
  95. ptr[i] = pdisth(gen);
  96. break;
  97. case 3:
  98. ptr[i] = ndist(gen);
  99. break;
  100. case 4:
  101. ptr[i] = ndist3(gen);
  102. break;
  103. case 5:
  104. ptr[i] = ndisth(gen);
  105. break;
  106. }
  107. }
  108. // is resize?
  109. if (n & 1) {
  110. ptr[1] = 0;
  111. ptr[3] = 0;
  112. ptr[6] = ptr[7] = 0;
  113. }
  114. ptr += 9;
  115. }
  116. }
  117. } rng;
  118. checker.set_rng(1, &rng);
  119. using BMode = param::WarpPerspective::BorderMode;
  120. param.format = param::WarpPerspective::Format::NHWC;
  121. // naive and cuda uses different algorithms and different border handling
  122. checker.set_epsilon(2.001).set_max_avg_error(4e-2);
  123. for (auto mode: {BMode::REFLECT_101, BMode::REPLICATE, BMode::REFLECT,
  124. BMode::WRAP, BMode::CONSTANT})
  125. {
  126. param.bmode = mode;
  127. param.border_val = 1.737;
  128. checker.set_param(param);
  129. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  130. }
  131. auto args = warp_perspective::get_cv_args();
  132. for (auto &&arg : args) {
  133. checker.set_param(arg.param)
  134. .set_dtype(0, dtype::Float32())
  135. .set_dtype(1, dtype::Float32())
  136. .set_dtype(2, dtype::Float32())
  137. .execs({arg.src, arg.trans, arg.dst});
  138. }
  139. for (auto &&arg : args) {
  140. checker.set_param(arg.param)
  141. .set_epsilon(242.001)
  142. .set_max_avg_error(3.0)
  143. .set_dtype(0, dtype::Uint8())
  144. .set_dtype(1, dtype::Float32())
  145. .set_dtype(2, dtype::Uint8())
  146. .execs({arg.src, arg.trans, arg.dst});
  147. }
  148. // resize nan case
  149. UniformFloatRNG rng_zero(0, 0);
  150. checker.set_rng(1, &rng_zero);
  151. {
  152. param.bmode = BMode::CONSTANT;
  153. param.border_val = 1.737;
  154. checker.set_param(param)
  155. .set_dtype(0, dtype::Float32())
  156. .set_dtype(1, dtype::Float32())
  157. .set_dtype(2, dtype::Float32());
  158. // no invalid mem access is enough; no need to check value
  159. checker.set_expect_exec_fail([](){});
  160. checker.exec({{1000, 2, 10, 3}, {1000, 3, 3}, {1000, 2, 12, 3}});
  161. }
  162. }
  163. #endif
  164. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) {
  165. using Param = WarpPerspective::Param;
  166. Checker<WarpPerspectiveForward> checker(handle_cuda());
  167. WarpPerspectiveMatRNG rng;
  168. checker.set_rng(1, &rng);
  169. for (auto bmode : {
  170. WarpPerspective::BorderMode::WRAP,
  171. WarpPerspective::BorderMode::REFLECT,
  172. WarpPerspective::BorderMode::REPLICATE,
  173. WarpPerspective::BorderMode::CONSTANT
  174. }) {
  175. WarpPerspective::Param param;
  176. param.border_val = 0.3f;
  177. param.bmode = bmode;
  178. param.imode = Param::InterpolationMode::LINEAR;
  179. param.format = Param::Format::NHWC;
  180. checker.set_param(param);
  181. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  182. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  183. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  184. checker.set_epsilon(1e-3);
  185. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  186. param.format = Param::Format::NCHW;
  187. checker.set_param(param);
  188. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  189. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  190. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  191. }
  192. // nan case
  193. NanMatRNG rng_nan;
  194. UniformFloatRNG rng_zero(0, 0);
  195. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  196. param::WarpPerspective param;
  197. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  198. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  199. checker.set_rng(1, rng);
  200. param.border_val = 1.737;
  201. checker.set_param(param);
  202. // no invalid mem access is enough; no need to check value
  203. checker.set_expect_exec_fail([]() {});
  204. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  205. }
  206. }
  207. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NHWC) {
  208. using Param = WarpPerspective::Param;
  209. Checker<WarpPerspectiveForward> checker(handle_cuda());
  210. WarpPerspectiveMatRNG_V2 rng;
  211. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  212. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  213. checker.set_rng(1, &rng);
  214. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  215. WarpPerspective::BorderMode::REFLECT,
  216. WarpPerspective::BorderMode::REPLICATE,
  217. WarpPerspective::BorderMode::CONSTANT}) {
  218. WarpPerspective::Param param;
  219. param.border_val = 1.2f;
  220. param.bmode = bmode;
  221. param.imode = Param::InterpolationMode::LINEAR;
  222. param.format = Param::Format::NHWC;
  223. checker.set_param(param);
  224. checker.set_epsilon(1 + 1e-3);
  225. rng.set_hw(10, 11);
  226. checker.execs({{23, 10, 11, 16}, {23, 3, 3}, {23, 11, 12, 16}});
  227. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  228. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  229. rng.set_hw(55, 66);
  230. checker.execs({{20, 55, 66, 32}, {20, 3, 3}, {20, 44, 34, 32}});
  231. }
  232. {
  233. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  234. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  235. checker.set_rng(1, &rng);
  236. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  237. WarpPerspective::BorderMode::REFLECT,
  238. WarpPerspective::BorderMode::REPLICATE,
  239. WarpPerspective::BorderMode::CONSTANT}) {
  240. WarpPerspective::Param param;
  241. param.border_val = 0.3f;
  242. param.bmode = bmode;
  243. param.imode = Param::InterpolationMode::LINEAR;
  244. param.format = Param::Format::NHWC;
  245. checker.set_param(param);
  246. checker.set_epsilon(1 + 1e-3);
  247. rng.set_hw(10, 11);
  248. checker.execs({{23, 10, 11, 16}, {23, 3, 3}, {23, 11, 12, 16}});
  249. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  250. checker.execs({{20, 10, 11, 32}, {20, 3, 3}, {20, 11, 12, 32}});
  251. rng.set_hw(55, 66);
  252. checker.execs({{20, 55, 66, 32}, {20, 3, 3}, {20, 44, 34, 32}});
  253. }
  254. }
  255. {
  256. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  257. handle_cuda());
  258. constexpr int N_SRC = 5;
  259. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  260. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  261. checker.set_rng(1, &rng);
  262. checker.set_dtype(2, dtype::Int32());
  263. checker.set_rng(2, &mat_idx_rng);
  264. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  265. WarpPerspective::Param param;
  266. param.border_val = 0.3f;
  267. param.format = Param::Format::NHWC;
  268. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  269. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  270. checker.set_param(param);
  271. checker.set_epsilon(1 + 1e-3);
  272. rng.set_hw(10, 11);
  273. checker.set_rng(1, &rng);
  274. checker.execs({{N_SRC, 10, 11, 48}, {2, 3, 3}, {2}, {2, 11, 12, 48}});
  275. rng.set_hw(17, 13);
  276. checker.set_rng(1, &rng);
  277. checker.execs(
  278. {{N_SRC, 17, 13, 64}, {123, 3, 3}, {123}, {123, 16, 15, 64}});
  279. }
  280. }
  281. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) {
  282. require_compute_capability(6, 0);
  283. using Param = WarpPerspective::Param;
  284. Checker<WarpPerspectiveForward> checker(handle_cuda());
  285. WarpPerspectiveMatRNG rng;
  286. checker.set_rng(1, &rng);
  287. for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) {
  288. WarpPerspective::Param param;
  289. param.border_val = 0.3f;
  290. param.bmode = bmode;
  291. param.imode = Param::InterpolationMode::LINEAR;
  292. param.format = Param::Format::NHWC;
  293. checker.set_param(param);
  294. checker.set_epsilon(0.15).set_max_avg_error(4e-2);
  295. size_t n = (INT_MAX) / (512 * 512 * 3);
  296. checker.execs(
  297. {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}});
  298. }
  299. }
  300. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) {
  301. using Param = WarpPerspective::Param;
  302. Checker<WarpPerspectiveForward> checker(handle_cuda());
  303. WarpPerspectiveMatRNG rng;
  304. checker.set_rng(1, &rng);
  305. checker.set_dtype(0, dtype::Float16())
  306. .set_dtype(1, dtype::Float32())
  307. .set_dtype(2, dtype::Float16());
  308. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  309. WarpPerspective::BorderMode::REFLECT,
  310. WarpPerspective::BorderMode::REPLICATE,
  311. WarpPerspective::BorderMode::CONSTANT}) {
  312. WarpPerspective::Param param;
  313. param.border_val = 0.3f;
  314. param.bmode = bmode;
  315. param.imode = Param::InterpolationMode::LINEAR;
  316. param.format = Param::Format::NHWC;
  317. checker.set_param(param);
  318. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  319. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  320. checker.execs({{2200, 10, 11, 3}, {2200, 3, 3}, {2200, 11, 12, 3}});
  321. checker.set_epsilon(1e-3);
  322. checker.execs({{20, 10, 11, 123}, {20, 3, 3}, {20, 11, 12, 123}});
  323. param.format = Param::Format::NCHW;
  324. checker.set_param(param);
  325. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  326. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  327. checker.execs({{22000, 3, 10, 11}, {22000, 3, 3}, {22000, 3, 11, 12}});
  328. }
  329. // nan case
  330. NanMatRNG rng_nan;
  331. UniformFloatRNG rng_zero(0, 0);
  332. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  333. param::WarpPerspective param;
  334. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  335. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  336. checker.set_rng(1, rng);
  337. param.border_val = 1.737;
  338. checker.set_param(param);
  339. // no invalid mem access is enough; no need to check value
  340. checker.set_expect_exec_fail([]() {});
  341. checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  342. }
  343. }
  344. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) {
  345. using Param = WarpPerspective::Param;
  346. WarpPerspective::Param param;
  347. Checker<WarpPerspectiveForward> checker(handle_cuda());
  348. WarpPerspectiveMatRNG rng;
  349. checker.set_rng(1, &rng);
  350. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  351. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  352. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  353. WarpPerspective::BorderMode::REFLECT,
  354. WarpPerspective::BorderMode::REPLICATE,
  355. WarpPerspective::BorderMode::CONSTANT}) {
  356. param.border_val = 0.3f;
  357. param.bmode = bmode;
  358. param.imode = Param::InterpolationMode::LINEAR;
  359. param.format = Param::Format::NCHW4;
  360. checker.set_param(param);
  361. checker.set_epsilon(1 + 1e-3);
  362. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  363. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  364. checker.execs(
  365. {{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  366. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 51, 4}});
  367. checker.execs({{1, 1, 25, 510, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  368. checker.execs({{1, 1, 25, 25, 4}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  369. checker.execs({{1, 1, 51, 51, 4}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  370. }
  371. {
  372. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  373. handle_cuda());
  374. constexpr int N_SRC = 5;
  375. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  376. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  377. checker.set_rng(1, &rng);
  378. checker.set_dtype(2, dtype::Int32());
  379. checker.set_rng(2, &mat_idx_rng);
  380. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  381. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  382. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  383. checker.set_param(param);
  384. checker.set_epsilon(1 + 1e-3);
  385. checker.execs(
  386. {{N_SRC, 3, 10, 11, 4}, {2, 3, 3}, {2}, {2, 3, 11, 12, 4}});
  387. checker.execs({{N_SRC, 14, 17, 13, 4},
  388. {123, 3, 3},
  389. {123},
  390. {123, 14, 16, 15, 4}});
  391. }
  392. }
  393. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW_NCHW4_IC_SMALL) {
  394. using Param = WarpPerspective::Param;
  395. WarpPerspective::Param param;
  396. Checker<WarpPerspectiveForward> checker(handle_cuda());
  397. WarpPerspectiveMatRNG rng;
  398. param.format = Param::Format::NCHW_NCHW4_IC_SMALL;
  399. checker.set_rng(1, &rng);
  400. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  401. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  402. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  403. WarpPerspective::BorderMode::REFLECT,
  404. WarpPerspective::BorderMode::REPLICATE,
  405. WarpPerspective::BorderMode::CONSTANT}) {
  406. param.border_val = 0.3f;
  407. param.bmode = bmode;
  408. param.imode = Param::InterpolationMode::LINEAR;
  409. checker.set_param(param);
  410. checker.set_epsilon(1 + 1e-3);
  411. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  412. checker.execs({{1, 3, 25, 510}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  413. checker.execs({{1, 3, 25, 25}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  414. checker.execs({{1, 3, 51, 51}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  415. }
  416. {
  417. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  418. handle_cuda());
  419. constexpr int N_SRC = 5;
  420. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  421. checker.set_dtype(0, dtype::Quantized8Asymm(0.1f, 128));
  422. checker.set_rng(1, &rng);
  423. checker.set_dtype(2, dtype::Int32());
  424. checker.set_rng(2, &mat_idx_rng);
  425. checker.set_dtype(3, dtype::QuantizedS8(0.1f));
  426. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  427. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  428. checker.set_param(param);
  429. checker.set_epsilon(1 + 1e-3);
  430. checker.execs({{N_SRC, 3, 10, 11}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  431. checker.execs(
  432. {{N_SRC, 3, 17, 13}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  433. }
  434. }
  435. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW4_IC_SMALL) {
  436. using Param = WarpPerspective::Param;
  437. WarpPerspective::Param param;
  438. Checker<WarpPerspectiveForward> checker(handle_cuda());
  439. WarpPerspectiveMatRNG rng;
  440. param.format = Param::Format::NHWC_NCHW4_IC_SMALL;
  441. checker.set_rng(1, &rng);
  442. checker.set_dtype(0, dtype::Uint8());
  443. checker.set_dtype(2, dtype::QuantizedS8(1.f));
  444. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  445. WarpPerspective::BorderMode::REFLECT,
  446. WarpPerspective::BorderMode::REPLICATE,
  447. WarpPerspective::BorderMode::CONSTANT}) {
  448. param.border_val = 0.3f;
  449. param.bmode = bmode;
  450. param.imode = Param::InterpolationMode::LINEAR;
  451. checker.set_param(param);
  452. checker.set_epsilon(1 + 1e-3);
  453. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  454. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  455. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 1, 51, 51, 4}});
  456. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 1, 25, 25, 4}});
  457. }
  458. {
  459. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  460. handle_cuda());
  461. constexpr int N_SRC = 5;
  462. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  463. checker.set_dtype(0, dtype::Uint8());
  464. checker.set_rng(1, &rng);
  465. checker.set_dtype(2, dtype::Int32());
  466. checker.set_rng(2, &mat_idx_rng);
  467. checker.set_dtype(3, dtype::QuantizedS8(1.f));
  468. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  469. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  470. checker.set_param(param);
  471. checker.set_epsilon(1 + 1e-3);
  472. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 1, 11, 12, 4}});
  473. checker.execs(
  474. {{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 1, 16, 15, 4}});
  475. }
  476. }
  477. TEST_F(CUDA, WARP_PERSPECTIVE_NHWC_NCHW) {
  478. using Param = WarpPerspective::Param;
  479. WarpPerspective::Param param;
  480. Checker<WarpPerspectiveForward> checker(handle_cuda());
  481. WarpPerspectiveMatRNG rng;
  482. param.format = Param::Format::NHWC_NCHW;
  483. checker.set_rng(1, &rng);
  484. checker.set_dtype(0, dtype::Uint8());
  485. checker.set_dtype(2, dtype::Float32());
  486. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  487. WarpPerspective::BorderMode::REFLECT,
  488. WarpPerspective::BorderMode::REPLICATE,
  489. WarpPerspective::BorderMode::CONSTANT}) {
  490. param.border_val = 0.3f;
  491. param.bmode = bmode;
  492. param.imode = Param::InterpolationMode::LINEAR;
  493. checker.set_param(param);
  494. checker.set_epsilon(1 + 1e-3);
  495. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 3, 11, 12}});
  496. checker.execs({{1, 25, 510, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  497. checker.execs({{1, 25, 25, 3}, {1, 3, 3}, {1, 3, 51, 51}});
  498. checker.execs({{1, 51, 51, 3}, {1, 3, 3}, {1, 3, 25, 25}});
  499. }
  500. {
  501. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  502. handle_cuda());
  503. constexpr int N_SRC = 5;
  504. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  505. checker.set_dtype(0, dtype::Uint8());
  506. checker.set_rng(1, &rng);
  507. checker.set_dtype(2, dtype::Int32());
  508. checker.set_rng(2, &mat_idx_rng);
  509. checker.set_dtype(3, dtype::Float32());
  510. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  511. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  512. checker.set_param(param);
  513. checker.set_epsilon(1 + 1e-3);
  514. checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 3, 11, 12}});
  515. checker.execs(
  516. {{N_SRC, 17, 13, 3}, {123, 3, 3}, {123}, {123, 3, 16, 15}});
  517. }
  518. }
  519. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
  520. warp_perspective::run_int8_test(handle_cuda());
  521. }
  522. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) {
  523. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  524. WarpPerspectiveMatRNG rng;
  525. checker.set_rng(0, &rng);
  526. for (int i = 0; i < 1; ++i) {
  527. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  528. WarpPerspective::BorderMode::REFLECT,
  529. WarpPerspective::BorderMode::REPLICATE,
  530. WarpPerspective::BorderMode::CONSTANT}) {
  531. WarpPerspective::Param param;
  532. param.border_val = 0.3f;
  533. param.bmode = bmode;
  534. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  535. checker.set_param(param);
  536. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  537. checker.execs(
  538. {{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}});
  539. }
  540. }
  541. // nan case
  542. NanMatRNG rng_nan;
  543. UniformFloatRNG rng_zero(0, 0);
  544. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  545. param::WarpPerspective param;
  546. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  547. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  548. checker.set_rng(0, rng);
  549. param.border_val = 1.737;
  550. checker.set_param(param);
  551. // no invalid mem access is enough; no need to check value
  552. checker.set_expect_exec_fail([]() {});
  553. checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}});
  554. }
  555. {
  556. Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy>
  557. checker(handle_cuda());
  558. constexpr int N_SRC = 5;
  559. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  560. checker.set_rng(0, &rng);
  561. checker.set_dtype(1, dtype::Int32());
  562. checker.set_rng(1, &mat_idx_rng);
  563. param::WarpPerspective param;
  564. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  565. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  566. checker.set_param(param);
  567. checker.set_epsilon(1 + 1e-3);
  568. checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}});
  569. checker.execs(
  570. {{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}});
  571. }
  572. }
  573. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) {
  574. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  575. WarpPerspectiveMatRNG rng;
  576. checker.set_rng(1, &rng);
  577. for (int i = 0; i < 1; ++i) {
  578. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  579. WarpPerspective::BorderMode::REFLECT,
  580. WarpPerspective::BorderMode::REPLICATE,
  581. WarpPerspective::BorderMode::CONSTANT}) {
  582. WarpPerspective::Param param;
  583. param.border_val = 0.3f;
  584. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  585. param.bmode = bmode;
  586. checker.set_param(param);
  587. checker.set_epsilon(1e-2);
  588. checker.execs({{1000, 3, 11, 12},
  589. {1000, 3, 3},
  590. {1000, 3, 10, 11},
  591. {1000, 3, 3}});
  592. }
  593. }
  594. // nan case
  595. NanMatRNG rng_nan;
  596. UniformFloatRNG rng_zero(0, 0);
  597. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  598. param::WarpPerspective param;
  599. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  600. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  601. checker.set_rng(1, rng);
  602. param.border_val = 1.737;
  603. checker.set_param(param);
  604. // no invalid mem access is enough; no need to check value
  605. checker.set_expect_exec_fail([]() {});
  606. checker.exec({{1000, 2, 10, 11},
  607. {1000, 3, 3},
  608. {1000, 2, 12, 13},
  609. {1000, 3, 3}});
  610. }
  611. {
  612. Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker(
  613. handle_cuda());
  614. constexpr int N_SRC = 5;
  615. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  616. checker.set_rng(1, &rng);
  617. checker.set_dtype(2, dtype::Int32());
  618. checker.set_rng(2, &mat_idx_rng);
  619. param::WarpPerspective param;
  620. param.bmode = param::WarpPerspective::BorderMode::REFLECT;
  621. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  622. checker.set_param(param);
  623. checker.set_epsilon(1 + 1e-3);
  624. checker.execs({{N_SRC, 12, 10, 11},
  625. {2, 3, 3},
  626. {2},
  627. {2, 12, 11, 12},
  628. {2, 3, 3}});
  629. checker.execs({{N_SRC, 56, 17, 13},
  630. {123, 3, 3},
  631. {123},
  632. {123, 56, 16, 15},
  633. {123, 3, 3}});
  634. }
  635. }
  636. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) {
  637. using Param = WarpPerspective::Param;
  638. Checker<WarpPerspectiveForward> checker(handle_cuda());
  639. WarpPerspectiveMatRNG rng;
  640. checker.set_rng(1, &rng);
  641. checker.set_dtype(0, dtype::BFloat16())
  642. .set_dtype(1, dtype::Float32())
  643. .set_dtype(2, dtype::BFloat16());
  644. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  645. WarpPerspective::BorderMode::REFLECT,
  646. WarpPerspective::BorderMode::REPLICATE,
  647. WarpPerspective::BorderMode::CONSTANT}) {
  648. WarpPerspective::Param param;
  649. param.border_val = 0.3f;
  650. param.bmode = bmode;
  651. param.imode = Param::InterpolationMode::LINEAR;
  652. param.format = Param::Format::NHWC;
  653. checker.set_param(param);
  654. checker.set_epsilon(2.1).set_max_avg_error(4e-2);
  655. checker.execs({{2, 10, 11, 3}, {2, 3, 3}, {2, 11, 12, 3}});
  656. param.format = Param::Format::NCHW;
  657. checker.set_param(param);
  658. checker.execs({{2, 3, 10, 11}, {2, 3, 3}, {2, 3, 11, 12}});
  659. checker.execs({{20, 3000, 10, 11}, {20, 3, 3}, {20, 3000, 11, 12}});
  660. }
  661. }
  662. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QINT4) {
  663. using Param = WarpPerspective::Param;
  664. Checker<WarpPerspectiveForward> checker(handle_cuda());
  665. WarpPerspectiveMatRNG rng;
  666. checker.set_rng(1, &rng);
  667. checker.set_dtype(0, dtype::QuantizedS4(1.25f))
  668. .set_dtype(1, dtype::Float32())
  669. .set_dtype(2, dtype::QuantizedS4(1.25f));
  670. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  671. WarpPerspective::BorderMode::REFLECT,
  672. WarpPerspective::BorderMode::REPLICATE,
  673. WarpPerspective::BorderMode::CONSTANT}) {
  674. WarpPerspective::Param param;
  675. param.border_val = 0.3f;
  676. param.bmode = bmode;
  677. param.imode = Param::InterpolationMode::LINEAR;
  678. param.format = Param::Format::NCHW;
  679. checker.set_param(param);
  680. checker.set_epsilon(1 + 1e-3);
  681. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  682. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  683. }
  684. }
  685. TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_QUINT4) {
  686. using Param = WarpPerspective::Param;
  687. Checker<WarpPerspectiveForward> checker(handle_cuda());
  688. WarpPerspectiveMatRNG rng;
  689. checker.set_rng(1, &rng);
  690. checker.set_dtype(0, dtype::Quantized4Asymm(1.25f, 0))
  691. .set_dtype(1, dtype::Float32())
  692. .set_dtype(2, dtype::Quantized4Asymm(1.25f, 0));
  693. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  694. WarpPerspective::BorderMode::REFLECT,
  695. WarpPerspective::BorderMode::REPLICATE,
  696. WarpPerspective::BorderMode::CONSTANT}) {
  697. WarpPerspective::Param param;
  698. param.border_val = 0.3f;
  699. param.bmode = bmode;
  700. param.imode = Param::InterpolationMode::LINEAR;
  701. param.format = Param::Format::NCHW;
  702. checker.set_param(param);
  703. checker.set_epsilon(1 + 1e-3);
  704. checker.execs({{1, 64, 11, 11}, {1, 3, 3}, {1, 64, 11, 11}});
  705. checker.execs({{20, 640, 11, 12}, {20, 3, 3}, {20, 640, 11, 12}});
  706. }
  707. }
  708. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) {
  709. Checker<WarpPerspectiveBackwardData> checker(handle_cuda());
  710. WarpPerspectiveMatRNG rng;
  711. checker.set_rng(0, &rng)
  712. .set_epsilon(1e-1)
  713. .set_dtype(0, dtype::Float32())
  714. .set_dtype(1, dtype::BFloat16())
  715. .set_dtype(2, dtype::BFloat16());
  716. for (int i = 0; i < 1; ++i) {
  717. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  718. WarpPerspective::BorderMode::REFLECT,
  719. WarpPerspective::BorderMode::REPLICATE,
  720. WarpPerspective::BorderMode::CONSTANT}) {
  721. WarpPerspective::Param param;
  722. param.border_val = 0.3f;
  723. param.bmode = bmode;
  724. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  725. checker.set_param(param);
  726. checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}});
  727. }
  728. }
  729. }
  730. TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) {
  731. Checker<WarpPerspectiveBackwardMat> checker(handle_cuda());
  732. WarpPerspectiveMatRNG rng;
  733. checker.set_rng(1, &rng)
  734. .set_epsilon(1e-2)
  735. .set_dtype(0, dtype::BFloat16())
  736. .set_dtype(1, dtype::Float32())
  737. .set_dtype(2, dtype::BFloat16())
  738. .set_dtype(3, dtype::Float32());
  739. for (int i = 0; i < 1; ++i) {
  740. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  741. WarpPerspective::BorderMode::REFLECT,
  742. WarpPerspective::BorderMode::REPLICATE,
  743. WarpPerspective::BorderMode::CONSTANT}) {
  744. WarpPerspective::Param param;
  745. param.border_val = 0.3f;
  746. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  747. param.bmode = bmode;
  748. checker.set_param(param);
  749. checker.execs({{10, 3, 11, 12},
  750. {10, 3, 3},
  751. {10, 3, 10, 11},
  752. {10, 3, 3}});
  753. }
  754. }
  755. }
  756. TEST_F(CUDA, WARP_PERSPECTIVE_MAT_IDX) {
  757. warp_perspective::run_mat_idx_test(handle_cuda());
  758. }
  759. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64_QINT4) {
  760. using Param = WarpPerspective::Param;
  761. WarpPerspective::Param param;
  762. Checker<WarpPerspectiveForward> checker(handle_cuda());
  763. WarpPerspectiveMatRNG_V2 rng;
  764. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  765. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  766. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  767. WarpPerspective::BorderMode::REFLECT,
  768. WarpPerspective::BorderMode::REPLICATE,
  769. WarpPerspective::BorderMode::CONSTANT}) {
  770. param.border_val = 0.3f;
  771. param.bmode = bmode;
  772. param.imode = Param::InterpolationMode::LINEAR;
  773. param.format = Param::Format::NCHW64;
  774. checker.set_param(param);
  775. checker.set_epsilon(1 + 1e-3);
  776. rng.set_hw(10, 11);
  777. checker.set_rng(1, &rng);
  778. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  779. checker.execs(
  780. {{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  781. checker.execs(
  782. {{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  783. rng.set_hw(25, 25);
  784. checker.set_rng(1, &rng);
  785. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  786. rng.set_hw(25, 510);
  787. checker.set_rng(1, &rng);
  788. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  789. rng.set_hw(25, 25);
  790. checker.set_rng(1, &rng);
  791. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  792. rng.set_hw(51, 51);
  793. checker.set_rng(1, &rng);
  794. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  795. }
  796. {
  797. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  798. handle_cuda());
  799. constexpr int N_SRC = 5;
  800. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  801. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  802. checker.set_rng(1, &rng);
  803. checker.set_dtype(2, dtype::Int32());
  804. checker.set_rng(2, &mat_idx_rng);
  805. checker.set_dtype(3, dtype::QuantizedS4(0.1f));
  806. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  807. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  808. checker.set_param(param);
  809. checker.set_epsilon(1 + 1e-3);
  810. rng.set_hw(10, 11);
  811. checker.set_rng(1, &rng);
  812. checker.execs(
  813. {{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  814. rng.set_hw(17, 13);
  815. checker.set_rng(1, &rng);
  816. checker.execs({{N_SRC, 14, 17, 13, 64},
  817. {123, 3, 3},
  818. {123},
  819. {123, 14, 16, 15, 64}});
  820. }
  821. }
  822. TEST_F(CUDA, WARP_PERSPECTIVE_NCHW64_QUINT4) {
  823. using Param = WarpPerspective::Param;
  824. WarpPerspective::Param param;
  825. Checker<WarpPerspectiveForward> checker(handle_cuda());
  826. WarpPerspectiveMatRNG_V2 rng;
  827. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  828. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  829. for (auto bmode : {WarpPerspective::BorderMode::WRAP,
  830. WarpPerspective::BorderMode::REFLECT,
  831. WarpPerspective::BorderMode::REPLICATE,
  832. WarpPerspective::BorderMode::CONSTANT}) {
  833. param.border_val = 0.3f;
  834. param.bmode = bmode;
  835. param.imode = Param::InterpolationMode::LINEAR;
  836. param.format = Param::Format::NCHW64;
  837. checker.set_param(param);
  838. checker.set_epsilon(1 + 1e-3);
  839. rng.set_hw(10, 11);
  840. checker.set_rng(1, &rng);
  841. checker.execs({{2, 1, 10, 11, 64}, {2, 3, 3}, {2, 1, 11, 12, 64}});
  842. checker.execs(
  843. {{20, 300, 10, 11, 64}, {20, 3, 3}, {20, 300, 11, 12, 64}});
  844. checker.execs(
  845. {{2200, 3, 10, 11, 64}, {2200, 3, 3}, {2200, 3, 11, 12, 64}});
  846. rng.set_hw(25, 25);
  847. checker.set_rng(1, &rng);
  848. checker.execs({{1, 25, 25, 25, 64}, {1, 3, 3}, {1, 25, 25, 51, 64}});
  849. rng.set_hw(25, 510);
  850. checker.set_rng(1, &rng);
  851. checker.execs({{1, 1, 25, 510, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  852. rng.set_hw(25, 25);
  853. checker.set_rng(1, &rng);
  854. checker.execs({{1, 1, 25, 25, 64}, {1, 3, 3}, {1, 1, 51, 51, 64}});
  855. rng.set_hw(51, 51);
  856. checker.set_rng(1, &rng);
  857. checker.execs({{1, 1, 51, 51, 64}, {1, 3, 3}, {1, 1, 25, 25, 64}});
  858. }
  859. {
  860. Checker<WarpPerspective, WarpPerspectiveMatIdxProxy> checker(
  861. handle_cuda());
  862. constexpr int N_SRC = 5;
  863. UniformIntRNG mat_idx_rng{0, N_SRC - 1};
  864. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  865. checker.set_rng(1, &rng);
  866. checker.set_dtype(2, dtype::Int32());
  867. checker.set_rng(2, &mat_idx_rng);
  868. checker.set_dtype(3, dtype::Quantized4Asymm(0.1f, 3));
  869. param.bmode = WarpPerspective::Param::BorderMode::REFLECT;
  870. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  871. checker.set_param(param);
  872. checker.set_epsilon(1 + 1e-3);
  873. rng.set_hw(10, 11);
  874. checker.set_rng(1, &rng);
  875. checker.execs(
  876. {{N_SRC, 3, 10, 11, 64}, {2, 3, 3}, {2}, {2, 3, 11, 12, 64}});
  877. rng.set_hw(17, 13);
  878. checker.set_rng(1, &rng);
  879. checker.execs({{N_SRC, 14, 17, 13, 64},
  880. {123, 3, 3},
  881. {123},
  882. {123, 14, 16, 15, 64}});
  883. }
  884. }
  885. #if MEGDNN_WITH_BENCHMARK
  886. TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
  887. Benchmarker<WarpPerspective> benchmarker(handle_cuda());
  888. using Param = param::WarpPerspective;
  889. WarpPerspectiveMatRNG rng;
  890. benchmarker.set_rng(1, &rng);
  891. Param param;
  892. auto run = [&benchmarker, &param](const megdnn::TensorShapeArray& shapes) {
  893. benchmarker.set_param(param);
  894. auto used = benchmarker.execs(shapes);
  895. printf("format %s, run %s->%s used: %f ms %f GBPS %f Gflops\n",
  896. param.format == Param::Format::NCHW ? "NCHW" : "NCHW4",
  897. shapes[0].to_string().c_str(), shapes[2].to_string().c_str(),
  898. used,
  899. shapes[2].total_nr_elems() *
  900. (4.f + 1.f + shapes[1].total_nr_elems()) /
  901. (1024 * 1024 * 1024) / used * 1e3,
  902. shapes[2].total_nr_elems() * (4.f + 3.f) / (1024 * 1024 * 1024) /
  903. used * 1e3);
  904. };
  905. param.format = Param::Format::NCHW;
  906. benchmarker.set_dtype(0, dtype::Int8());
  907. benchmarker.set_dtype(2, dtype::Int8());
  908. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 256, 5120}});
  909. run({TensorShape{1, 100, 256, 5120}, {1, 3, 3}, {1, 100, 256, 256}});
  910. run({TensorShape{1, 100, 256, 256}, {1, 3, 3}, {1, 100, 512, 512}});
  911. run({TensorShape{1, 100, 512, 512}, {1, 3, 3}, {1, 100, 256, 256}});
  912. param.format = Param::Format::NCHW4;
  913. benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f));
  914. benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f));
  915. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}});
  916. run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  917. run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}});
  918. run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}});
  919. param.format = Param::Format::NHWC;
  920. benchmarker.set_dtype(0, dtype::QuantizedS4(1.f));
  921. benchmarker.set_dtype(2, dtype::QuantizedS4(1.f));
  922. run({TensorShape{1, 256, 256, 4 * 24}, {1, 3, 3}, {1, 256, 5120, 4 * 24}});
  923. run({TensorShape{1, 256, 5120, 4 * 24}, {1, 3, 3}, {1, 256, 256, 4 * 24}});
  924. run({TensorShape{1, 256, 256, 4 * 24}, {1, 3, 3}, {1, 512, 512, 4 * 24}});
  925. run({TensorShape{1, 512, 512, 4 * 24}, {1, 3, 3}, {1, 256, 256, 4 * 24}});
  926. }
  927. #endif
  928. } // namespace test
  929. } // namespace megdnn
  930. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台