You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_perspective.cpp 31 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. /**
  2. * \file dnn/test/naive/warp_perspective.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/naive/fixture.h"
  12. #include "megdnn/oprs/cv.h"
  13. #include "megdnn/tensor_format.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/checker.h"
  16. #include "test/common/extra_impl_helper.h"
  17. #include "test/common/warp_perspective.h"
  18. using namespace megdnn;
  19. using namespace test;
  20. namespace {
  21. class NanMatRNG : public RNG {
  22. void gen(const TensorND& tensor_) override {
  23. auto& gen = RandomState::generator();
  24. std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f);
  25. std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f);
  26. std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f);
  27. std::uniform_real_distribution<dt_float32> ndist(-1.1f, -0.9f);
  28. std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f);
  29. std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f);
  30. std::uniform_int_distribution<int> dice(0, 5);
  31. float* ptr = tensor_.ptr<dt_float32>();
  32. auto N = tensor_.layout.shape[0];
  33. for (size_t n = 0; n < N; ++n) {
  34. for (size_t i = 0; i < 9; ++i) {
  35. switch (dice(gen)) {
  36. case 0:
  37. ptr[i] = pdist3(gen);
  38. break;
  39. case 1:
  40. ptr[i] = pdist(gen);
  41. break;
  42. case 2:
  43. ptr[i] = pdisth(gen);
  44. break;
  45. case 3:
  46. ptr[i] = ndist(gen);
  47. break;
  48. case 4:
  49. ptr[i] = ndist3(gen);
  50. break;
  51. case 5:
  52. ptr[i] = ndisth(gen);
  53. break;
  54. }
  55. }
  56. ptr[6] = 1;
  57. ptr[7] = -1;
  58. ptr[8] = 5;
  59. ptr += 9;
  60. }
  61. }
  62. };
  63. } // namespace
  64. TEST_F(NAIVE, WARP_PERSPECTIVE_NCHW4) {
  65. using Param = WarpPerspective::Param;
  66. auto convert_true_format = [](const TensorLayout& layout) {
  67. if (layout.ndim == 4)
  68. return layout.reshape({layout[0], layout[1] / 4, layout[2], layout[3], 4})
  69. .dimshuffle({0, 1, 4, 2, 3});
  70. else
  71. return layout;
  72. };
  73. WarpPerspective::Param param;
  74. auto extra_impl = [&param, this,
  75. convert_true_format](const TensorNDArray& tensors) {
  76. auto warp_perspective = handle()->create_operator<WarpPerspective>();
  77. warp_perspective->param() = param;
  78. warp_perspective->param().format = Param::Format::NCHW;
  79. TensorNDArray nchw_tensors;
  80. for (size_t i = 0; i < tensors.size(); ++i) {
  81. auto layout = tensors[i].layout;
  82. if (layout.dtype.enumv() == DTypeEnum::QuantizedS8)
  83. layout.dtype = dtype::Int8();
  84. if (layout.ndim == 5) {
  85. layout = layout.reshape(
  86. {layout[0], layout[1] * layout[4], layout[2], layout[3]});
  87. }
  88. nchw_tensors.emplace_back(malloc(layout.span().dist_byte()), layout);
  89. }
  90. TensorNDArray nchw4_tensors;
  91. for (size_t i = 0; i < tensors.size(); ++i) {
  92. auto layout = convert_true_format(nchw_tensors[i].layout);
  93. nchw4_tensors.emplace_back(tensors[i].raw_ptr(), std::move(layout));
  94. }
  95. auto workspace_size = warp_perspective->get_workspace_in_bytes(
  96. tensors[0].layout, tensors[1].layout, tensors[2].layout);
  97. dt_byte* workspace_ptr = static_cast<dt_byte*>(malloc(workspace_size));
  98. Workspace workspace{workspace_ptr, workspace_size};
  99. auto relayout = handle()->create_operator<RelayoutForward>();
  100. relayout->exec(nchw4_tensors[0], nchw_tensors[0]);
  101. relayout->exec(nchw4_tensors[1], nchw_tensors[1]);
  102. warp_perspective->exec(
  103. nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], workspace);
  104. relayout->exec(nchw_tensors[2], nchw4_tensors[2]);
  105. free(workspace_ptr);
  106. for (auto&& tensor : nchw_tensors) {
  107. free(tensor.raw_ptr());
  108. }
  109. };
  110. Checker<WarpPerspectiveForward> checker(handle());
  111. WarpPerspectiveMatRNG rng;
  112. checker.set_rng(1, &rng);
  113. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  114. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  115. checker.set_extra_opr_impl(extra_impl);
  116. for (auto bmode :
  117. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  118. WarpPerspective::BorderMode::REPLICATE,
  119. WarpPerspective::BorderMode::CONSTANT}) {
  120. param.border_val = 0.3f;
  121. param.bmode = bmode;
  122. param.imode = Param::InterpolationMode::LINEAR;
  123. param.format = Param::Format::NCHW4;
  124. checker.set_param(param);
  125. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  126. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  127. checker.execs({{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  128. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 510, 4}});
  129. checker.execs({{1, 25, 25, 510, 4}, {1, 3, 3}, {1, 25, 25, 25, 4}});
  130. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 51, 51, 4}});
  131. checker.execs({{1, 25, 51, 51, 4}, {1, 3, 3}, {1, 25, 25, 25, 4}});
  132. }
  133. }
  134. TEST_F(NAIVE, WARP_PERSPECTIVE) {
  135. Checker<WarpPerspective> checker(handle(), false);
  136. WarpPerspective::Param param;
  137. param.bmode = WarpPerspective::Param::BorderMode::BORDER_REFLECT;
  138. param.imode = WarpPerspective::Param::InterpolationMode::LINEAR;
  139. param.format = WarpPerspective::Param::Format::NCHW;
  140. checker.set_param(param).exect(
  141. Testcase{
  142. TensorValue(
  143. {1, 1, 3, 3}, dtype::Uint8{},
  144. {131, 255, 180, 245, 8, 0, 10, 3, 178}),
  145. TensorValue(
  146. {1, 3, 3}, dtype::Float32{},
  147. {1.2f, 1.2f, 0.6f, -1.05f, -2.0f, -0.7f, 1.3f, 1.5f, 3.0f}),
  148. {}},
  149. Testcase{
  150. {},
  151. {},
  152. TensorValue({1, 1, 2, 2}, dtype::Uint8{}, {156, 183, 181, 195})});
  153. checker.set_param(param).exect(
  154. Testcase{
  155. TensorValue(
  156. {1, 1, 3, 3},
  157. dtype::Quantized8Asymm{1.4f, static_cast<uint8_t>(127)},
  158. {131, 255, 180, 245, 8, 0, 10, 3, 178}),
  159. TensorValue(
  160. {1, 3, 3}, dtype::Float32{},
  161. {1.2f, 1.2f, 0.6f, -1.05f, -2.0f, -0.7f, 1.3f, 1.5f, 3.0f}),
  162. {}},
  163. Testcase{
  164. {},
  165. {},
  166. TensorValue(
  167. {1, 1, 2, 2},
  168. dtype::Quantized8Asymm{1.4f, static_cast<uint8_t>(127)},
  169. {156, 183, 181, 195})});
  170. }
  171. TEST_F(NAIVE, WARP_PERSPECTIVE_NCHW_QINT4) {
  172. Checker<WarpPerspective> checker(handle(), false);
  173. WarpPerspective::Param param;
  174. param.bmode = WarpPerspective::Param::BorderMode::BORDER_REFLECT;
  175. param.imode = WarpPerspective::Param::InterpolationMode::LINEAR;
  176. param.format = WarpPerspective::Param::Format::NCHW;
  177. std::vector<int> input_values = {-1, -3, -2, -2, 0, 0, 0, 0, -2},
  178. output_values = {-1, -2, -2, -2};
  179. checker.set_param(param).exect(
  180. Testcase{
  181. TensorValueLowbit4(
  182. {1, 1, 3, 3}, dtype::QuantizedS4(0.1), input_values),
  183. TensorValue(
  184. {1, 3, 3}, dtype::Float32{},
  185. {1.2f, 1.2f, 0.6f, -1.05f, -2.0f, -0.7f, 1.3f, 1.5f, 3.0f}),
  186. {}},
  187. Testcase{
  188. {},
  189. {},
  190. TensorValueLowbit4(
  191. {1, 1, 2, 2}, dtype::QuantizedS4(0.1), output_values)});
  192. }
  193. TEST_F(NAIVE, WARP_PERSPECTIVE_NCHW_QUINT4) {
  194. Checker<WarpPerspective> checker(handle(), false);
  195. WarpPerspective::Param param;
  196. param.bmode = WarpPerspective::Param::BorderMode::BORDER_REFLECT;
  197. param.imode = WarpPerspective::Param::InterpolationMode::LINEAR;
  198. param.format = WarpPerspective::Param::Format::NCHW;
  199. std::vector<int> input_values = {4, 13, 0, 0, 0, 0, 0, 0, 0},
  200. output_values = {6, 8, 8, 9};
  201. checker.set_param(param).exect(
  202. Testcase{
  203. TensorValueLowbit4(
  204. {1, 1, 3, 3}, dtype::Quantized4Asymm(0.1, 3), input_values),
  205. TensorValue(
  206. {1, 3, 3}, dtype::Float32{},
  207. {1.2f, 1.2f, 0.6f, -1.05f, -2.0f, -0.7f, 1.3f, 1.5f, 3.0f}),
  208. {}},
  209. Testcase{
  210. {},
  211. {},
  212. TensorValueLowbit4(
  213. {1, 1, 2, 2}, dtype::Quantized4Asymm(0.1, 3),
  214. output_values)});
  215. }
  216. TEST_F(NAIVE_MULTI_THREADS, WARP_PERSPECTIVE_NCHW4) {
  217. using Param = WarpPerspective::Param;
  218. auto convert_true_format = [](const TensorLayout& layout) {
  219. if (layout.ndim == 4)
  220. return layout.reshape({layout[0], layout[1] / 4, layout[2], layout[3], 4})
  221. .dimshuffle({0, 1, 4, 2, 3});
  222. else
  223. return layout;
  224. };
  225. WarpPerspective::Param param;
  226. auto extra_impl = [&param, this,
  227. convert_true_format](const TensorNDArray& tensors) {
  228. auto warp_perspective = handle()->create_operator<WarpPerspective>();
  229. warp_perspective->param() = param;
  230. warp_perspective->param().format = Param::Format::NCHW;
  231. TensorNDArray nchw_tensors;
  232. for (size_t i = 0; i < tensors.size(); ++i) {
  233. auto layout = tensors[i].layout;
  234. if (layout.dtype.enumv() == DTypeEnum::QuantizedS8)
  235. layout.dtype = dtype::Int8();
  236. if (layout.ndim == 5) {
  237. layout = layout.reshape(
  238. {layout[0], layout[1] * layout[4], layout[2], layout[3]});
  239. }
  240. nchw_tensors.emplace_back(malloc(layout.span().dist_byte()), layout);
  241. }
  242. TensorNDArray nchw4_tensors;
  243. for (size_t i = 0; i < tensors.size(); ++i) {
  244. auto layout = convert_true_format(nchw_tensors[i].layout);
  245. nchw4_tensors.emplace_back(tensors[i].raw_ptr(), std::move(layout));
  246. }
  247. auto workspace_size = warp_perspective->get_workspace_in_bytes(
  248. tensors[0].layout, tensors[1].layout, tensors[2].layout);
  249. dt_byte* workspace_ptr = static_cast<dt_byte*>(malloc(workspace_size));
  250. Workspace workspace{workspace_ptr, workspace_size};
  251. auto relayout = handle()->create_operator<RelayoutForward>();
  252. relayout->exec(nchw4_tensors[0], nchw_tensors[0]);
  253. relayout->exec(nchw4_tensors[1], nchw_tensors[1]);
  254. warp_perspective->exec(
  255. nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], workspace);
  256. relayout->exec(nchw_tensors[2], nchw4_tensors[2]);
  257. free(workspace_ptr);
  258. for (auto&& tensor : nchw_tensors) {
  259. free(tensor.raw_ptr());
  260. }
  261. };
  262. Checker<WarpPerspectiveForward> checker(handle());
  263. WarpPerspectiveMatRNG rng;
  264. checker.set_rng(1, &rng);
  265. checker.set_dtype(0, dtype::QuantizedS8(0.1f));
  266. checker.set_dtype(2, dtype::QuantizedS8(0.1f));
  267. checker.set_extra_opr_impl(extra_impl);
  268. for (auto bmode :
  269. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  270. WarpPerspective::BorderMode::REPLICATE,
  271. WarpPerspective::BorderMode::CONSTANT}) {
  272. param.border_val = 0.3f;
  273. param.bmode = bmode;
  274. param.imode = Param::InterpolationMode::LINEAR;
  275. param.format = Param::Format::NCHW4;
  276. checker.set_param(param);
  277. checker.execs({{2, 1, 10, 11, 4}, {2, 3, 3}, {2, 1, 11, 12, 4}});
  278. checker.execs({{20, 300, 10, 11, 4}, {20, 3, 3}, {20, 300, 11, 12, 4}});
  279. checker.execs({{2200, 3, 10, 11, 4}, {2200, 3, 3}, {2200, 3, 11, 12, 4}});
  280. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 25, 510, 4}});
  281. checker.execs({{1, 25, 25, 510, 4}, {1, 3, 3}, {1, 25, 25, 25, 4}});
  282. checker.execs({{1, 25, 25, 25, 4}, {1, 3, 3}, {1, 25, 51, 51, 4}});
  283. checker.execs({{1, 25, 51, 51, 4}, {1, 3, 3}, {1, 25, 25, 25, 4}});
  284. }
  285. }
  286. TEST_F(NAIVE_MULTI_THREADS, WARP_PERSPECTIVE) {
  287. Checker<WarpPerspective> checker(handle(), false);
  288. WarpPerspective::Param param;
  289. param.bmode = WarpPerspective::Param::BorderMode::BORDER_REFLECT;
  290. param.imode = WarpPerspective::Param::InterpolationMode::LINEAR;
  291. param.format = WarpPerspective::Param::Format::NCHW;
  292. checker.set_param(param).exect(
  293. Testcase{
  294. TensorValue(
  295. {1, 1, 3, 3}, dtype::Uint8{},
  296. {131, 255, 180, 245, 8, 0, 10, 3, 178}),
  297. TensorValue(
  298. {1, 3, 3}, dtype::Float32{},
  299. {1.2f, 1.2f, 0.6f, -1.05f, -2.0f, -0.7f, 1.3f, 1.5f, 3.0f}),
  300. {}},
  301. Testcase{
  302. {},
  303. {},
  304. TensorValue({1, 1, 2, 2}, dtype::Uint8{}, {156, 183, 181, 195})});
  305. checker.set_param(param).exect(
  306. Testcase{
  307. TensorValue(
  308. {1, 1, 3, 3},
  309. dtype::Quantized8Asymm{1.4f, static_cast<uint8_t>(127)},
  310. {131, 255, 180, 245, 8, 0, 10, 3, 178}),
  311. TensorValue(
  312. {1, 3, 3}, dtype::Float32{},
  313. {1.2f, 1.2f, 0.6f, -1.05f, -2.0f, -0.7f, 1.3f, 1.5f, 3.0f}),
  314. {}},
  315. Testcase{
  316. {},
  317. {},
  318. TensorValue(
  319. {1, 1, 2, 2},
  320. dtype::Quantized8Asymm{1.4f, static_cast<uint8_t>(127)},
  321. {156, 183, 181, 195})});
  322. }
  323. TEST_F(NAIVE_MULTI_THREADS, WARP_PERSPECTIVE_FORWARD_HWCD4) {
  324. auto handle_multi_thread = handle();
  325. Checker<WarpPerspective> checker(handle(), false);
  326. TensorFormat img_fmt = Image2DPack4TensorFormat::make(2, handle_multi_thread);
  327. checker.set_fmt(0, img_fmt).set_fmt(2, img_fmt);
  328. for (auto dtype : std::vector<DType>{
  329. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(4.3f),
  330. dtype::Quantized8Asymm(2.4f, static_cast<uint8_t>(10))}) {
  331. for (auto bmode :
  332. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  333. WarpPerspective::BorderMode::CONSTANT,
  334. WarpPerspective::BorderMode::REPLICATE,
  335. WarpPerspective::BorderMode::CONSTANT}) {
  336. WarpPerspectiveMatRNG rng;
  337. checker.set_rng(1, &rng);
  338. WarpPerspective::Param param;
  339. param.border_val = 0.3f;
  340. param.bmode = bmode;
  341. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  342. param.format = param::WarpPerspective::Format::NHWCD4;
  343. if (dtype == dtype::Float16()) {
  344. //! if exists error, the value of a result pixel maybe another
  345. //! pixel in the origin image, so we just consider the avg error
  346. checker.set_epsilon(2e-1);
  347. checker.set_max_avg_error(1e-2);
  348. }
  349. checker.set_param(param);
  350. checker.set_dtype(0, dtype);
  351. checker.set_dtype(2, dtype);
  352. if (dtype.category() == DTypeCategory::FLOAT) {
  353. checker.set_dtype(1, dtype);
  354. } else {
  355. checker.set_dtype(1, dtype::Float32());
  356. }
  357. checker.execs({{2, 10, 1, 11, 4}, {2, 3, 3}, {2, 11, 1, 12, 4}});
  358. checker.execs({{22, 10, 1, 11, 4}, {22, 3, 3}, {22, 11, 1, 12, 4}});
  359. }
  360. }
  361. #if MEGDNN_TEST_ASAN
  362. //! asan detect nan will make test failed
  363. #else
  364. // nan case
  365. NanMatRNG rng_nan;
  366. UniformFloatRNG rng_zero(0, 0);
  367. //! NanMatRng not support float16, I have to reset dtype to Float32
  368. checker.set_dtype(0, dtype::Float32())
  369. .set_dtype(1, dtype::Float32())
  370. .set_dtype(2, dtype::Float32());
  371. for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) {
  372. param::WarpPerspective param;
  373. param.bmode = param::WarpPerspective::BorderMode::CONSTANT;
  374. param.imode = param::WarpPerspective::InterpolationMode::LINEAR;
  375. param.format = param::WarpPerspective::Format::NHWCD4;
  376. checker.set_rng(1, rng);
  377. param.border_val = 1.737;
  378. checker.set_param(param);
  379. checker.exec({{10, 10, 1, 11, 4}, {10, 3, 3}, {10, 12, 1, 13, 4}});
  380. }
  381. #endif
  382. }
  383. #if MEGDNN_WITH_BENCHMARK
  384. namespace {
  385. void benchmark_impl(
  386. const typename WarpPerspective::Param& param,
  387. std::vector<SmallVector<TensorShape>> shapes, size_t RUNS,
  388. TaskExecutorConfig&& multi_thread_config,
  389. TaskExecutorConfig&& single_thread_config) {
  390. std::vector<float> multi_thread_times, single_thread_times;
  391. {
  392. auto multi_thread_hanle = create_cpu_handle(0, true, &multi_thread_config);
  393. auto benchmarker = Benchmarker<WarpPerspective>(multi_thread_hanle.get());
  394. benchmarker.set_times(RUNS).set_display(false).set_param(param);
  395. for (auto shape : shapes) {
  396. multi_thread_times.push_back(benchmarker.exec(shape) / RUNS);
  397. }
  398. }
  399. {
  400. auto single_thread_handle = create_cpu_handle(0, true, &single_thread_config);
  401. auto benchmarker = Benchmarker<WarpPerspective>(single_thread_handle.get());
  402. benchmarker.set_times(RUNS).set_display(false).set_param(param);
  403. for (auto shape : shapes) {
  404. single_thread_times.push_back(benchmarker.exec(shape) / RUNS);
  405. }
  406. }
  407. printf("Benchmark : Multi threads %zu, ", multi_thread_config.nr_thread);
  408. printf("core_ids:");
  409. for (size_t i = 0; i < multi_thread_config.affinity_core_set.size(); i++) {
  410. printf("%zu ", multi_thread_config.affinity_core_set[i]);
  411. }
  412. printf(", Single thread core_id %zu\n", single_thread_config.affinity_core_set[0]);
  413. for (size_t i = 0; i < shapes.size(); i++) {
  414. auto shape = shapes[i];
  415. printf("Case: ");
  416. for (auto sh : shape)
  417. printf("%s ", sh.to_string().c_str());
  418. printf("%zu threads time: %f,\n single thread time: "
  419. "%f. spead up = %f, speedup/cores=%f\n",
  420. multi_thread_config.nr_thread, multi_thread_times[i],
  421. single_thread_times[i], single_thread_times[i] / multi_thread_times[i],
  422. single_thread_times[i] / multi_thread_times[i] /
  423. multi_thread_config.nr_thread);
  424. }
  425. }
  426. } // namespace
  427. TEST_F(NAIVE_BENCHMARK_MULTI_THREADS, BENCHMARK_WARP_PERSPECTIVE) {
  428. constexpr size_t RUNS = 50;
  429. using BMode = param::WarpPerspective::BorderMode;
  430. using IMode = param::WarpPerspective::InterpolationMode;
  431. WarpPerspective::Param param;
  432. param.border_val = 0.3f;
  433. param.format = param::WarpPerspective::Format::NCHW;
  434. param.imode = IMode::INTER_LINEAR;
  435. param.bmode = BMode::REPLICATE;
  436. std::vector<SmallVector<TensorShape>> shapes;
  437. auto bench_case = [&](size_t N, size_t H, size_t W, size_t C) {
  438. SmallVector<TensorShape> shape{{N, C, H, W}, {N, 3, 3}, {N, C, 224, 224}};
  439. shapes.push_back(shape);
  440. };
  441. bench_case(1, 700, 490, 10);
  442. bench_case(1, 700, 490, 20);
  443. bench_case(1, 700, 490, 30);
  444. bench_case(1, 500, 334, 10);
  445. bench_case(1, 500, 334, 20);
  446. bench_case(1, 500, 334, 30);
  447. bench_case(1, 140, 144, 10);
  448. bench_case(1, 140, 144, 20);
  449. bench_case(1, 140, 114, 30);
  450. printf("Benchmark warp perspective\n");
  451. benchmark_impl(param, shapes, RUNS, {4, {4, 5, 6, 7}}, {1, {4}});
  452. benchmark_impl(param, shapes, RUNS, {4, {4, 5, 6, 7}}, {1, {7}});
  453. benchmark_impl(param, shapes, RUNS, {2, {4, 5}}, {1, {4}});
  454. }
  455. #endif
  456. TEST_F(NAIVE, WARP_PERSPECTIVE_BFLOAT16) {
  457. Checker<WarpPerspective> checker(handle(), false);
  458. WarpPerspective::Param p;
  459. p.bmode = WarpPerspective::Param::BorderMode::BORDER_REFLECT;
  460. p.imode = WarpPerspective::Param::InterpolationMode::LINEAR;
  461. p.format = WarpPerspective::Param::Format::NCHW;
  462. auto extra_impl = extra_impl_helper<WarpPerspective>(handle(), p);
  463. checker.set_param(p)
  464. .set_epsilon(1e-1)
  465. .set_dtype(0, dtype::BFloat16())
  466. .set_dtype(1, dtype::Float32())
  467. .set_dtype(2, dtype::BFloat16())
  468. .set_extra_opr_impl(extra_impl)
  469. .execs({{1, 1, 3, 3}, {1, 3, 3}, {1, 1, 2, 2}})
  470. .execs({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}});
  471. }
  472. TEST_F(NAIVE, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) {
  473. Checker<WarpPerspectiveBackwardData> checker(handle(), false);
  474. WarpPerspectiveBackwardData::Param p;
  475. p.bmode = WarpPerspectiveBackwardData::Param::BorderMode::BORDER_REFLECT;
  476. p.imode = WarpPerspectiveBackwardData::Param::InterpolationMode::LINEAR;
  477. p.format = WarpPerspectiveBackwardData::Param::Format::NCHW;
  478. auto extra_impl = extra_impl_helper<WarpPerspectiveBackwardData>(handle(), p);
  479. checker.set_param(p)
  480. .set_dtype(0, dtype::Float32())
  481. .set_dtype(1, dtype::BFloat16())
  482. .set_dtype(2, dtype::BFloat16())
  483. .set_extra_opr_impl(extra_impl)
  484. .set_epsilon(1e-1)
  485. .execs({{1, 3, 3}, {1, 1, 2, 2}, {1, 1, 3, 3}});
  486. }
  487. TEST_F(NAIVE, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) {
  488. Checker<WarpPerspectiveBackwardMat> checker(handle(), false);
  489. WarpPerspectiveBackwardMat::Param p;
  490. p.bmode = WarpPerspectiveBackwardMat::Param::BorderMode::BORDER_REFLECT;
  491. p.imode = WarpPerspectiveBackwardMat::Param::InterpolationMode::LINEAR;
  492. p.format = WarpPerspectiveBackwardMat::Param::Format::NCHW;
  493. p.border_val = 0.3f;
  494. auto extra_impl = extra_impl_helper<WarpPerspectiveBackwardMat>(handle(), p);
  495. checker.set_param(p)
  496. .set_dtype(0, dtype::BFloat16())
  497. .set_dtype(1, dtype::Float32())
  498. .set_dtype(2, dtype::BFloat16())
  499. .set_dtype(3, dtype::Float32())
  500. .set_extra_opr_impl(extra_impl)
  501. .set_epsilon(1e-1)
  502. .execs({{1000, 3, 11, 12}, {1000, 3, 3}, {1000, 3, 10, 11}, {1000, 3, 3}});
  503. }
  504. TEST_F(NAIVE, WARP_PERSPECTIVE_NCHW64) {
  505. using Param = WarpPerspective::Param;
  506. auto convert_true_format = [](const TensorLayout& layout) {
  507. if (layout.ndim == 4) {
  508. TensorLayout ret{
  509. {layout[0], layout[1] / 64, layout[2], layout[3], 64},
  510. layout.dtype};
  511. return ret.dimshuffle({0, 1, 4, 2, 3});
  512. } else
  513. return layout;
  514. };
  515. WarpPerspective::Param param;
  516. auto extra_impl = [&param, this,
  517. convert_true_format](const TensorNDArray& tensors) {
  518. auto warp_perspective = handle()->create_operator<WarpPerspective>();
  519. warp_perspective->param() = param;
  520. warp_perspective->param().format = Param::Format::NCHW;
  521. TensorNDArray nchw_tensors;
  522. for (size_t i = 0; i < tensors.size(); ++i) {
  523. TensorLayout ly;
  524. auto layout = tensors[i].layout;
  525. if (tensors[i].layout.ndim == 5) {
  526. ly = TensorLayout{
  527. {layout[0], layout[1] * layout[4], layout[2], layout[3]},
  528. layout.dtype};
  529. } else {
  530. ly = layout;
  531. }
  532. nchw_tensors.emplace_back(malloc(ly.span().dist_byte()), ly);
  533. }
  534. TensorNDArray nchw64_tensors;
  535. for (size_t i = 0; i < tensors.size(); ++i) {
  536. auto layout = convert_true_format(nchw_tensors[i].layout);
  537. nchw64_tensors.emplace_back(tensors[i].raw_ptr(), std::move(layout));
  538. }
  539. auto workspace_size = warp_perspective->get_workspace_in_bytes(
  540. tensors[0].layout, tensors[1].layout, tensors[2].layout);
  541. dt_byte* workspace_ptr = static_cast<dt_byte*>(malloc(workspace_size));
  542. Workspace workspace{workspace_ptr, workspace_size};
  543. auto relayout = handle()->create_operator<RelayoutForward>();
  544. relayout->exec(nchw64_tensors[0], nchw_tensors[0]);
  545. relayout->exec(nchw64_tensors[1], nchw_tensors[1]);
  546. warp_perspective->exec(
  547. nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], workspace);
  548. relayout->exec(nchw_tensors[2], nchw64_tensors[2]);
  549. free(workspace_ptr);
  550. for (auto&& tensor : nchw_tensors) {
  551. free(tensor.raw_ptr());
  552. }
  553. };
  554. Checker<WarpPerspectiveForward> checker(handle());
  555. WarpPerspectiveMatRNG rng;
  556. checker.set_rng(1, &rng);
  557. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  558. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  559. checker.set_extra_opr_impl(extra_impl);
  560. for (auto bmode :
  561. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  562. WarpPerspective::BorderMode::REPLICATE,
  563. WarpPerspective::BorderMode::CONSTANT}) {
  564. param.border_val = 0.3f;
  565. param.bmode = bmode;
  566. param.imode = Param::InterpolationMode::LINEAR;
  567. param.format = Param::Format::NCHW64;
  568. checker.set_param(param);
  569. checker.execs({{2, 1, 10, 10, 64}, {2, 3, 3}, {2, 1, 10, 12, 64}});
  570. checker.execs({{20, 3, 10, 12, 64}, {20, 3, 3}, {20, 3, 11, 12, 64}});
  571. checker.execs({{1, 3, 25, 24, 64}, {1, 3, 3}, {1, 3, 25, 51, 64}});
  572. checker.execs({{1, 3, 25, 51, 64}, {1, 3, 3}, {1, 3, 25, 24, 64}});
  573. checker.execs({{1, 3, 25, 24, 64}, {1, 3, 3}, {1, 3, 51, 50, 64}});
  574. checker.execs({{1, 3, 51, 50, 64}, {1, 3, 3}, {1, 3, 25, 24, 64}});
  575. }
  576. }
  577. TEST_F(NAIVE, WARP_PERSPECTIVE_NHWC) {
  578. using Param = WarpPerspective::Param;
  579. auto convert_true_format = [](const TensorLayout& layout) {
  580. if (layout.ndim == 4) {
  581. TensorLayout ret{
  582. {layout[0], layout[2], layout[3], layout[1]}, layout.dtype};
  583. return ret.dimshuffle({0, 3, 1, 2});
  584. } else
  585. return layout;
  586. };
  587. WarpPerspective::Param param;
  588. auto extra_impl = [&param, this,
  589. convert_true_format](const TensorNDArray& tensors) {
  590. auto warp_perspective = handle()->create_operator<WarpPerspective>();
  591. warp_perspective->param() = param;
  592. warp_perspective->param().format = Param::Format::NCHW;
  593. TensorNDArray nchw_tensors;
  594. for (size_t i = 0; i < tensors.size(); ++i) {
  595. TensorLayout ly;
  596. auto layout = tensors[i].layout;
  597. if (layout.ndim == 4) {
  598. ly = TensorLayout{
  599. {layout[0], layout[3], layout[1], layout[2]}, layout.dtype};
  600. } else {
  601. ly = layout;
  602. }
  603. nchw_tensors.emplace_back(malloc(ly.span().dist_byte()), ly);
  604. }
  605. TensorNDArray nhwc_tensors;
  606. for (size_t i = 0; i < tensors.size(); ++i) {
  607. auto layout = convert_true_format(nchw_tensors[i].layout);
  608. nhwc_tensors.emplace_back(tensors[i].raw_ptr(), std::move(layout));
  609. }
  610. auto workspace_size = warp_perspective->get_workspace_in_bytes(
  611. tensors[0].layout, tensors[1].layout, tensors[2].layout);
  612. dt_byte* workspace_ptr = static_cast<dt_byte*>(malloc(workspace_size));
  613. Workspace workspace{workspace_ptr, workspace_size};
  614. auto relayout = handle()->create_operator<RelayoutForward>();
  615. relayout->exec(nhwc_tensors[0], nchw_tensors[0]);
  616. relayout->exec(nhwc_tensors[1], nchw_tensors[1]);
  617. warp_perspective->exec(
  618. nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], workspace);
  619. relayout->exec(nchw_tensors[2], nhwc_tensors[2]);
  620. free(workspace_ptr);
  621. for (auto&& tensor : nchw_tensors) {
  622. free(tensor.raw_ptr());
  623. }
  624. };
  625. {
  626. Checker<WarpPerspectiveForward> checker(handle());
  627. WarpPerspectiveMatRNG rng;
  628. checker.set_rng(1, &rng);
  629. checker.set_dtype(0, dtype::QuantizedS4(0.1f));
  630. checker.set_dtype(2, dtype::QuantizedS4(0.1f));
  631. checker.set_extra_opr_impl(extra_impl);
  632. for (auto bmode :
  633. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  634. WarpPerspective::BorderMode::REPLICATE,
  635. WarpPerspective::BorderMode::CONSTANT}) {
  636. param.border_val = 0.3f;
  637. param.bmode = bmode;
  638. param.imode = Param::InterpolationMode::LINEAR;
  639. param.format = Param::Format::NHWC;
  640. checker.set_param(param);
  641. checker.execs({{1, 2, 2, 4}, {1, 3, 3}, {1, 2, 2, 4}});
  642. checker.execs({{2, 10, 10, 4}, {2, 3, 3}, {2, 10, 12, 4}});
  643. checker.execs({{3, 25, 24, 8}, {3, 3, 3}, {3, 12, 10, 8}});
  644. checker.execs({{4, 33, 22, 16}, {4, 3, 3}, {4, 9, 12, 16}});
  645. }
  646. }
  647. {
  648. Checker<WarpPerspectiveForward> checker(handle());
  649. WarpPerspectiveMatRNG rng;
  650. checker.set_rng(1, &rng);
  651. checker.set_dtype(0, dtype::Quantized4Asymm(0.1f, 3));
  652. checker.set_dtype(2, dtype::Quantized4Asymm(0.1f, 3));
  653. checker.set_extra_opr_impl(extra_impl);
  654. for (auto bmode :
  655. {WarpPerspective::BorderMode::WRAP, WarpPerspective::BorderMode::REFLECT,
  656. WarpPerspective::BorderMode::REPLICATE,
  657. WarpPerspective::BorderMode::CONSTANT}) {
  658. param.border_val = 0.3f;
  659. param.bmode = bmode;
  660. param.imode = Param::InterpolationMode::LINEAR;
  661. param.format = Param::Format::NHWC;
  662. checker.set_param(param);
  663. checker.execs({{1, 2, 2, 4}, {1, 3, 3}, {1, 2, 2, 4}});
  664. checker.execs({{2, 10, 10, 4}, {2, 3, 3}, {2, 10, 12, 4}});
  665. checker.execs({{3, 25, 24, 8}, {3, 3, 3}, {3, 12, 10, 8}});
  666. checker.execs({{4, 33, 22, 16}, {4, 3, 3}, {4, 9, 12, 16}});
  667. }
  668. }
  669. }
  670. // vim: syntax=cpp.doxygen