You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling.cpp 23 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /**
  2. * \file dnn/test/fallback/pooling.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2022 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/fallback/fixture.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/pooling.h"
  15. #include "test/common/rng.h"
  16. #include "test/common/task_record_check.h"
  17. namespace megdnn {
  18. namespace test {
  19. namespace {
  20. std::vector<std::pair<param::Pooling, TensorShapeArray>> get_nchw44_pool_args(
  21. size_t filter, size_t stride) {
  22. constexpr size_t ic_step = 4;
  23. std::vector<std::pair<param::Pooling, TensorShapeArray>> args;
  24. for (size_t n : {1, 2})
  25. for (size_t c : {4, 8})
  26. for (size_t ih : {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13})
  27. for (size_t iw : {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13})
  28. for (size_t ph : {0, 1, 2})
  29. for (size_t pw : {0, 1, 2})
  30. for (auto mode :
  31. {param::Pooling::Mode::MAX,
  32. param::Pooling::Mode::AVERAGE})
  33. if (ih + 2 * ph >= filter && iw + 2 * pw >= filter &&
  34. filter > ph && filter > pw) {
  35. param::Pooling param;
  36. param.mode = mode;
  37. param.format = param::Pooling::Format::NCHW44;
  38. param.pad_h = ph;
  39. param.pad_w = pw;
  40. param.stride_h = param.stride_w = stride;
  41. param.window_h = param.window_w = filter;
  42. args.emplace_back(std::make_pair(
  43. param,
  44. TensorShapeArray{
  45. {n, c / ic_step, ih, iw, ic_step},
  46. {}}));
  47. }
  48. return args;
  49. }
  50. void run_pooling_check(
  51. Handle* handle, std::vector<std::pair<param::Pooling, TensorShapeArray>> args,
  52. bool is_int8) {
  53. Checker<Pooling> checker(handle);
  54. UniformIntRNG rng_int8{INT8_MIN >> 1, INT8_MAX >> 1};
  55. UniformIntRNG rng_fp32{-10, 10};
  56. if (is_int8) {
  57. checker.set_dtype(0, dtype::QuantizedS8(1.1f));
  58. checker.set_rng(0, &rng_int8);
  59. } else {
  60. checker.set_rng(0, &rng_fp32);
  61. }
  62. for (auto arg : args) {
  63. checker.set_param(arg.first).exec(arg.second);
  64. }
  65. }
  66. } // namespace
  67. TEST_F(FALLBACK_MULTI_THREADS, POOLING_GI_NCHW44_FP32) {
  68. for (auto filter : {2, 3, 4, 5})
  69. for (auto stride : {1, 2}) {
  70. run_pooling_check(handle(), get_nchw44_pool_args(filter, stride), false);
  71. }
  72. }
  73. TEST_F(FALLBACK, POOLING_GI) {
  74. using Param = param::Pooling;
  75. // clang-format off
  76. for (size_t ih: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  77. for (size_t iw: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  78. for (size_t p: {1, 2})
  79. {
  80. Param param;
  81. param.mode = Param::Mode::MAX;
  82. param.window_h = param.window_w = 3;
  83. param.stride_h = param.stride_w = 2;
  84. param.pad_h = param.pad_w = p;
  85. Checker<Pooling> checker(handle());
  86. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  87. param.mode = Param::Mode::AVERAGE;
  88. param.window_h = param.window_w = 3;
  89. param.stride_h = param.stride_w = 2;
  90. param.pad_h = param.pad_w = p;
  91. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  92. param.mode = Param::Mode::MAX;
  93. param.window_h = param.window_w = 4;
  94. param.stride_h = param.stride_w = 2;
  95. param.pad_h = param.pad_w = p;
  96. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  97. param.mode = Param::Mode::MAX;
  98. param.window_h = param.window_w = 5;
  99. param.stride_h = param.stride_w = 2;
  100. param.pad_h = param.pad_w = p;
  101. if (ih + p * 2 >= 5 && iw + p * 2 >= 5)
  102. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  103. }
  104. for (size_t ih: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  105. for (size_t iw: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  106. for (size_t p: {1, 2})
  107. {
  108. Param param;
  109. param.mode = Param::Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  110. param.window_h = param.window_w = 3;
  111. param.stride_h = param.stride_w = 1;
  112. param.pad_h = param.pad_w = p;
  113. Checker<Pooling> checker(handle());
  114. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  115. }
  116. // clang-format on
  117. }
  118. TEST_F(FALLBACK, POOLING_GI_RECORD) {
  119. using Param = param::Pooling;
  120. TaskRecordChecker<Pooling> checker(0);
  121. // clang-format off
  122. for (size_t ih: {2, 3, 5, 7, 11, 13, 17})
  123. for (size_t iw: {2, 3, 5, 7, 11, 13, 17})
  124. for (size_t p: {1, 2})
  125. {
  126. Param param;
  127. param.mode = Param::Mode::MAX;
  128. param.window_h = param.window_w = 3;
  129. param.stride_h = param.stride_w = 2;
  130. param.pad_h = param.pad_w = p;
  131. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  132. param.mode = Param::Mode::AVERAGE;
  133. param.window_h = param.window_w = 3;
  134. param.stride_h = param.stride_w = 2;
  135. param.pad_h = param.pad_w = p;
  136. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  137. param.mode = Param::Mode::MAX;
  138. param.window_h = param.window_w = 4;
  139. param.stride_h = param.stride_w = 2;
  140. param.pad_h = param.pad_w = p;
  141. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  142. param.mode = Param::Mode::MAX;
  143. param.window_h = param.window_w = 5;
  144. param.stride_h = param.stride_w = 2;
  145. param.pad_h = param.pad_w = p;
  146. if (ih + p * 2 >= 5 && iw + p * 2 >= 5)
  147. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  148. }
  149. for (size_t ih: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  150. for (size_t iw: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  151. for (size_t p: {1, 2})
  152. {
  153. Param param;
  154. param.mode = Param::Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  155. param.window_h = param.window_w = 3;
  156. param.stride_h = param.stride_w = 1;
  157. param.pad_h = param.pad_w = p;
  158. Checker<Pooling> checker(handle());
  159. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  160. }
  161. // clang-format on
  162. }
  163. TEST_F(FALLBACK_MULTI_THREADS, POOLING_GI_RECORD) {
  164. using Param = param::Pooling;
  165. TaskRecordChecker<Pooling> checker(0);
  166. for (size_t ih : {2, 3, 5, 7, 11, 13, 17})
  167. for (size_t iw : {2, 3, 5, 7, 11, 13, 17})
  168. for (size_t p : {1, 2}) {
  169. Param param;
  170. param.mode = Param::Mode::MAX;
  171. param.window_h = param.window_w = 3;
  172. param.stride_h = param.stride_w = 2;
  173. param.pad_h = param.pad_w = p;
  174. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  175. param.mode = Param::Mode::AVERAGE;
  176. param.window_h = param.window_w = 3;
  177. param.stride_h = param.stride_w = 2;
  178. param.pad_h = param.pad_w = p;
  179. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  180. param.mode = Param::Mode::MAX;
  181. param.window_h = param.window_w = 4;
  182. param.stride_h = param.stride_w = 2;
  183. param.pad_h = param.pad_w = p;
  184. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  185. param.mode = Param::Mode::MAX;
  186. param.window_h = param.window_w = 5;
  187. param.stride_h = param.stride_w = 2;
  188. param.pad_h = param.pad_w = p;
  189. if (ih + p * 2 >= 5 && iw + p * 2 >= 5)
  190. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  191. }
  192. }
  193. TEST_F(FALLBACK_MULTI_THREADS, POOLING_GI_W9_w13_NCHW44) {
  194. UniformIntRNG rng{-10, 10};
  195. Checker<Pooling> checker(handle());
  196. checker.set_rng(0, &rng);
  197. // clang-format off
  198. for (size_t ih: {20, 15})
  199. for (size_t iw: {15, 20})
  200. for (size_t kernel: {9, 13})
  201. for (size_t pad: {4, 6})
  202. for(auto mode: {param::Pooling::Mode::MAX, param::Pooling::Mode::AVERAGE})
  203. if (kernel > pad)
  204. {
  205. param::Pooling param;
  206. param.mode = mode;
  207. param.format = param::Pooling::Format::NCHW44;
  208. param.pad_h = pad;
  209. param.pad_w = pad;
  210. param.stride_h = param.stride_w = 1;
  211. param.window_h = param.window_w = kernel ;
  212. checker.set_param(param).exec(TensorShapeArray{{2, 8, ih, iw, 4}, {}});
  213. }
  214. // clang-format on
  215. }
  216. TEST_F(FALLBACK_MULTI_THREADS, POOLING_GI_FALLBACK) {
  217. using Param = param::Pooling;
  218. for (size_t ih : {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  219. for (size_t iw : {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  220. for (size_t p : {1, 2}) {
  221. Param param;
  222. param.mode = Param::Mode::MAX;
  223. param.window_h = param.window_w = 3;
  224. param.stride_h = param.stride_w = 2;
  225. param.pad_h = param.pad_w = p;
  226. Checker<Pooling> checker(handle());
  227. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  228. }
  229. }
  230. TEST_F(FALLBACK_MULTI_THREADS, POOLING_GI) {
  231. using Param = param::Pooling;
  232. for (size_t ih : {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  233. for (size_t iw : {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  234. for (size_t p : {1, 2}) {
  235. Param param;
  236. param.mode = Param::Mode::MAX;
  237. param.window_h = param.window_w = 3;
  238. param.stride_h = param.stride_w = 2;
  239. param.pad_h = param.pad_w = p;
  240. Checker<Pooling> checker(handle());
  241. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  242. param.mode = Param::Mode::AVERAGE;
  243. param.window_h = param.window_w = 3;
  244. param.stride_h = param.stride_w = 2;
  245. param.pad_h = param.pad_w = p;
  246. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  247. param.mode = Param::Mode::MAX;
  248. param.window_h = param.window_w = 4;
  249. param.stride_h = param.stride_w = 2;
  250. param.pad_h = param.pad_w = p;
  251. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  252. param.mode = Param::Mode::MAX;
  253. param.window_h = param.window_w = 5;
  254. param.stride_h = param.stride_w = 2;
  255. param.pad_h = param.pad_w = p;
  256. if (ih + p * 2 >= 5 && iw + p * 2 >= 5)
  257. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  258. }
  259. }
  260. #if MEGDNN_WITH_BENCHMARK
  261. namespace {
  262. void benchmark_nchw44_fp32(Handle* handle) {
  263. using Param = param::Pooling;
  264. auto run = [&](size_t n, size_t c, size_t h, size_t w, size_t filter, size_t stride,
  265. size_t pad, Param::Mode mode) {
  266. Param param;
  267. param.window_h = param.window_w = filter;
  268. param.stride_h = param.stride_w = stride;
  269. param.pad_h = param.pad_w = pad;
  270. param.format = Param::Format::NCHW;
  271. param.mode = mode;
  272. TensorShape nchw_shape = {n, c, h, w};
  273. TensorShape nchw44_shape = {n, c / 4, h, w, 4};
  274. TensorLayout dst_layout;
  275. auto opr = handle->create_operator<Pooling>();
  276. opr->param() = param;
  277. opr->deduce_layout({nchw_shape, dtype::Float32()}, dst_layout);
  278. float calc_amount =
  279. dst_layout.total_nr_elems() * param.window_h * param.window_w;
  280. Benchmarker<Pooling> benchmarker_float_nchw(handle);
  281. Benchmarker<Pooling> benchmarker_float_nchw44(handle);
  282. Benchmarker<Pooling> benchmarker_int_nchw44(handle);
  283. size_t RUN = 500;
  284. auto t1 = benchmarker_float_nchw.set_display(false)
  285. .set_times(RUN)
  286. .set_param(param)
  287. .exec({nchw_shape, {}});
  288. param.format = Param::Format::NCHW44;
  289. auto t2 = benchmarker_int_nchw44.set_display(false)
  290. .set_times(RUN)
  291. .set_param(param)
  292. .execl({{nchw44_shape, dtype::QuantizedS8(1.0)},
  293. {{}, dtype::QuantizedS8(1.0)}});
  294. auto t3 = benchmarker_float_nchw44.set_display(false)
  295. .set_times(RUN)
  296. .set_param(param)
  297. .exec({nchw44_shape, {}});
  298. printf("{%zu %zu %zu %zu} filter = %zu, stride = %zu pad = %zu\n"
  299. "nchw_fp32={%.3f ms, %.3f Mflops}, "
  300. "nchw44_int={%.3f ms, %.3f Mflops}, "
  301. "nchw44_fp32={%.3f ms, %.3f Mflops, speed_up %f}\n\n",
  302. n, c, h, w, filter, stride, pad, t1 / RUN,
  303. calc_amount / (t1 / RUN * 1000), t2 / RUN,
  304. calc_amount / (t2 / RUN * 1000), t3 / RUN,
  305. calc_amount / (t3 / RUN * 1000), t1 / t3);
  306. };
  307. // Resnet50
  308. run(1, 64, 112, 112, 3, 2, 1, param::Pooling::Mode::MAX);
  309. run(1, 2048, 7, 7, 7, 1, 0, param::Pooling::Mode::AVERAGE);
  310. // VGG16
  311. run(1, 64, 224, 224, 2, 2, 0, param::Pooling::Mode::MAX);
  312. run(1, 128, 112, 112, 2, 2, 0, param::Pooling::Mode::MAX);
  313. run(1, 256, 56, 56, 2, 2, 0, param::Pooling::Mode::MAX);
  314. run(1, 512, 28, 28, 2, 2, 0, param::Pooling::Mode::MAX);
  315. run(1, 512, 14, 14, 2, 2, 0, param::Pooling::Mode::MAX);
  316. }
  317. } // namespace
  318. TEST_F(FALLBACK, BENCHMARK_POOLING_GI_NCHW44_FP32) {
  319. benchmark_nchw44_fp32(handle());
  320. }
  321. TEST_F(FALLBACK_MULTI_THREADS, BENCHMARK_POOLING_GI_NCHW44_FP32) {
  322. benchmark_nchw44_fp32(handle());
  323. }
  324. TEST_F(FALLBACK, BENCHMARK_POOLING_GI_W4x4_S2x2) {
  325. using Param = param::Pooling;
  326. auto run = [&](const TensorShapeArray& shapes, Param param) {
  327. std::cout << "N:" << shapes[0][0] << " "
  328. << "IC:" << shapes[0][1] << " "
  329. << "IH:" << shapes[0][2] << " "
  330. << "IW:" << shapes[0][3] << std::endl;
  331. auto handle_naive = create_cpu_handle(2);
  332. Benchmarker<Pooling> benchmarker_naive(handle_naive.get());
  333. Benchmarker<Pooling> benchmarker_float(handle());
  334. size_t RUN = 10;
  335. auto t1 = benchmarker_naive.set_display(false)
  336. .set_times(RUN)
  337. .set_param(param)
  338. .exec(shapes);
  339. auto t2 = benchmarker_float.set_display(false)
  340. .set_times(RUN)
  341. .set_param(param)
  342. .exec(shapes);
  343. TensorLayout dst_layout;
  344. auto opr = handle()->create_operator<Pooling>();
  345. opr->param() = param;
  346. opr->deduce_layout({shapes[0], dtype::Float32()}, dst_layout);
  347. float calc_amount =
  348. dst_layout.total_nr_elems() * param.window_h * param.window_w;
  349. printf("naive={%.3fms, %.3fMflops}, neon={%.3fms, %.3fMflops}\n", t1 / RUN,
  350. calc_amount / (t1 / RUN * 1000), t2 / RUN,
  351. calc_amount / (t2 / RUN * 1000));
  352. };
  353. Param param;
  354. param.window_h = param.window_w = 4;
  355. param.stride_h = param.stride_w = 2;
  356. param.pad_h = param.pad_w = 1;
  357. std::cout << "4x4 with 2x2 stride max pooling:" << std::endl;
  358. run({{1, 24, 160, 128}, {}}, param);
  359. run({{1, 4, 240, 135}, {}}, param);
  360. run({{1, 32, 120, 67}, {}}, param);
  361. run({{1, 64, 60, 33}, {}}, param);
  362. }
  363. TEST_F(FALLBACK, BENCHMARK_POOLING_GI_W5x5_S2x2) {
  364. using Param = param::Pooling;
  365. auto run = [&](const TensorShapeArray& shapes, Param param) {
  366. std::cout << "N:" << shapes[0][0] << " "
  367. << "IC:" << shapes[0][1] << " "
  368. << "IH:" << shapes[0][2] << " "
  369. << "IW:" << shapes[0][3] << std::endl;
  370. auto handle_naive = create_cpu_handle(2);
  371. Benchmarker<Pooling> benchmarker_naive(handle_naive.get());
  372. Benchmarker<Pooling> benchmarker_float(handle());
  373. size_t RUN = 10;
  374. auto t1 = benchmarker_naive.set_display(false)
  375. .set_times(RUN)
  376. .set_param(param)
  377. .exec(shapes);
  378. auto t2 = benchmarker_float.set_display(false)
  379. .set_times(RUN)
  380. .set_param(param)
  381. .exec(shapes);
  382. TensorLayout dst_layout;
  383. auto opr = handle()->create_operator<Pooling>();
  384. opr->param() = param;
  385. opr->deduce_layout({shapes[0], dtype::Float32()}, dst_layout);
  386. float calc_amount =
  387. dst_layout.total_nr_elems() * param.window_h * param.window_w;
  388. printf("naive={%.3fms, %.3fMflops}, neon={%.3fms, %.3fMflops}\n", t1 / RUN,
  389. calc_amount / (t1 / RUN * 1000), t2 / RUN,
  390. calc_amount / (t2 / RUN * 1000));
  391. };
  392. Param param;
  393. param.window_h = param.window_w = 5;
  394. param.stride_h = param.stride_w = 2;
  395. param.pad_h = param.pad_w = 1;
  396. std::cout << "5x5 with 2x2 stride max pooling:" << std::endl;
  397. run({{1, 24, 160, 128}, {}}, param);
  398. run({{1, 4, 240, 135}, {}}, param);
  399. run({{1, 32, 120, 67}, {}}, param);
  400. run({{1, 64, 60, 33}, {}}, param);
  401. }
  402. namespace {
  403. template <typename Opr>
  404. void benchmark_impl(
  405. const typename Opr::Param& param, std::vector<SmallVector<TensorShape>> shapes,
  406. size_t RUNS, TaskExecutorConfig&& multi_thread_config,
  407. TaskExecutorConfig&& single_thread_config, DType data_type) {
  408. std::vector<float> multi_thread_times, single_thread_times;
  409. {
  410. auto multi_thread_hanle = create_cpu_handle(0, true, &multi_thread_config);
  411. auto benchmarker = Benchmarker<Opr>(multi_thread_hanle.get());
  412. benchmarker.set_times(RUNS).set_display(false).set_param(param);
  413. benchmarker.set_dtype(0, data_type);
  414. for (auto shape : shapes) {
  415. multi_thread_times.push_back(benchmarker.exec(shape) / RUNS);
  416. }
  417. }
  418. {
  419. auto single_thread_handle = create_cpu_handle(0, true, &single_thread_config);
  420. auto benchmarker = Benchmarker<Opr>(single_thread_handle.get());
  421. benchmarker.set_times(RUNS).set_display(false).set_param(param);
  422. benchmarker.set_dtype(0, data_type);
  423. for (auto shape : shapes) {
  424. single_thread_times.push_back(benchmarker.exec(shape) / RUNS);
  425. }
  426. }
  427. printf("Benchmark : Multi threads %zu, ", multi_thread_config.nr_thread);
  428. printf("core_ids:");
  429. for (size_t i = 0; i < multi_thread_config.affinity_core_set.size(); i++) {
  430. printf("%zu ", multi_thread_config.affinity_core_set[i]);
  431. }
  432. printf(", Single thread core_id %zu\n", single_thread_config.affinity_core_set[0]);
  433. for (size_t i = 0; i < shapes.size(); i++) {
  434. auto shape = shapes[i];
  435. printf("Case: ");
  436. for (auto sh : shape)
  437. printf("%s ", sh.to_string().c_str());
  438. printf("%zu threads time: %f,\n single thread time: "
  439. "%f. spead up = %f, speedup/cores=%f\n",
  440. multi_thread_config.nr_thread, multi_thread_times[i],
  441. single_thread_times[i], single_thread_times[i] / multi_thread_times[i],
  442. single_thread_times[i] / multi_thread_times[i] /
  443. multi_thread_config.nr_thread);
  444. }
  445. }
  446. } // namespace
  447. TEST_F(FALLBACK_MULTI_THREADS, BENCHMARK_POOLING_GI) {
  448. constexpr size_t RUNS = 50;
  449. using Param = param::Pooling;
  450. Param param;
  451. param.window_h = param.window_w = 3;
  452. param.stride_h = param.stride_w = 2;
  453. param.pad_h = param.pad_w = 1;
  454. std::vector<SmallVector<TensorShape>> shapes;
  455. shapes.push_back({{32, 32, 215, 215}, {}});
  456. shapes.push_back({{32, 32, 128, 128}, {}});
  457. shapes.push_back({{8, 256, 100, 100}, {}});
  458. shapes.push_back({{1, 256, 100, 100}, {}});
  459. shapes.push_back({{1, 32, 100, 100}, {}});
  460. shapes.push_back({{1, 256, 80, 80}, {}});
  461. shapes.push_back({{1, 256, 60, 60}, {}});
  462. shapes.push_back({{1, 256, 30, 30}, {}});
  463. param.window_h = param.window_w = 3;
  464. param.stride_h = param.stride_w = 2;
  465. param.pad_h = param.pad_w = 1;
  466. printf("Benchmark POOLING kernel:%d*%d stride:%d,mode %d\n", param.window_h,
  467. param.window_w, param.stride_h, static_cast<int>(param.mode));
  468. benchmark_impl<Pooling>(
  469. param, shapes, RUNS, {4, {0, 1, 2, 3}}, {1, {0}}, dtype::Float32());
  470. benchmark_impl<Pooling>(
  471. param, shapes, RUNS, {4, {4, 5, 6, 7}}, {1, {4}}, dtype::Float32());
  472. benchmark_impl<Pooling>(
  473. param, shapes, RUNS, {2, {0, 1}}, {1, {0}}, dtype::Float32());
  474. }
  475. TEST_F(FALLBACK_MULTI_THREADS, BENCHMARK_POOLING_GI_NCHW44) {
  476. constexpr size_t RUNS = 50;
  477. using Param = param::Pooling;
  478. Param param;
  479. param.pad_h = param.pad_w = 0;
  480. param.mode = Param::Mode::MAX;
  481. std::vector<SmallVector<TensorShape>> shapes;
  482. std::vector<std::vector<size_t>> filter_and_stride = {
  483. {2, 1}, {2, 2}, {3, 1}, {3, 2}, {4, 1}, {4, 2}, {5, 1}, {5, 2}};
  484. for (auto mode : {param::Pooling::Mode::MAX, param::Pooling::Mode::AVERAGE}) {
  485. for (auto filter : filter_and_stride) {
  486. shapes.push_back({{1, 32 * 4, 215, 215}, {}});
  487. shapes.push_back({{1, 32 * 4, 128, 128}, {}});
  488. shapes.push_back({{1, 16 * 4, 56, 56}, {}});
  489. param.mode = mode;
  490. param.window_h = param.window_w = filter[0];
  491. param.stride_h = param.stride_w = filter[1];
  492. param.format = Param::Format::NCHW;
  493. printf("NCHW Benchmark POOLING kernel:%d*%d stride:%d,mode %d\n",
  494. param.window_h, param.window_h, param.stride_h,
  495. static_cast<int>(param.mode));
  496. benchmark_impl<Pooling>(
  497. param, shapes, RUNS, {4, {4, 5, 6, 7}}, {1, {4}},
  498. dtype::QuantizedS8(1.1f));
  499. shapes.clear();
  500. shapes.push_back({{1, 32, 215, 215, 4}, {}});
  501. shapes.push_back({{1, 32, 128, 128, 4}, {}});
  502. shapes.push_back({{1, 16, 56, 56, 4}, {}});
  503. param.format = Param::Format::NCHW44;
  504. printf("NCHW44 Benchmark POOLING kernel:%d*%d stride:%d,mode %d\n",
  505. param.window_h, param.window_w, param.stride_h,
  506. static_cast<int>(param.mode));
  507. benchmark_impl<Pooling>(
  508. param, shapes, RUNS, {4, {4, 5, 6, 7}}, {1, {4}},
  509. dtype::QuantizedS8(1.1f));
  510. shapes.clear();
  511. }
  512. }
  513. }
  514. #endif
  515. } // namespace test
  516. } // namespace megdnn
  517. // vim: syntax=cpp.doxygen