You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling_multi_thread.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. /**
  2. * \file dnn/test/arm_common/pooling_multi_thread.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/arm_common/fixture.h"
  12. #include "test/common/pooling.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/rng.h"
  16. namespace megdnn {
  17. namespace test {
  18. /*********************** mutli threads *********************************/
  19. TEST_F(ARM_COMMON_MULTI_THREADS, POOLING) {
  20. using Param = param::Pooling;
  21. for (size_t ih: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  22. for (size_t iw: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  23. for (size_t p: {1, 2})
  24. {
  25. Param param;
  26. param.mode = Param::Mode::MAX;
  27. param.window_h = param.window_w = 3;
  28. param.stride_h = param.stride_w = 2;
  29. param.pad_h = param.pad_w = p;
  30. Checker<Pooling> checker(handle());
  31. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  32. param.mode = Param::Mode::AVERAGE;
  33. param.window_h = param.window_w = 3;
  34. param.stride_h = param.stride_w = 2;
  35. param.pad_h = param.pad_w = p;
  36. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  37. param.mode = Param::Mode::MAX;
  38. param.window_h = param.window_w = 4;
  39. param.stride_h = param.stride_w = 2;
  40. param.pad_h = param.pad_w = p;
  41. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  42. param.mode = Param::Mode::MAX;
  43. param.window_h = param.window_w = 5;
  44. param.stride_h = param.stride_w = 2;
  45. param.pad_h = param.pad_w = p;
  46. if (ih + p * 2 >= 5 && iw + p * 2 >= 5)
  47. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  48. }
  49. }
  50. TEST_F(ARM_COMMON_MULTI_THREADS, POOLING_INT8_W3x3_S2x2)
  51. {
  52. for (size_t ih: {2, 3, 7, 13, 52, 53, 54, 55})
  53. for (size_t iw: {2, 3, 6, 14, 53, 54, 55, 56})
  54. for (size_t ph: {0, 1, 2})
  55. for (size_t pw: {0, 1, 2})
  56. if (ih+2*ph >= 3 && iw+2*pw >= 3)
  57. {
  58. Checker<Pooling> checker(handle());
  59. checker.set_dtype(0, dtype::Int8());
  60. param::Pooling param;
  61. param.mode = param::Pooling::Mode::MAX;
  62. param.pad_h = ph;
  63. param.pad_w = pw;
  64. param.stride_h = param.stride_w = 2;
  65. param.window_h = param.window_w = 3;
  66. checker.set_param(param).exec(TensorShapeArray{
  67. {2, 3, ih, iw}, {}});
  68. }
  69. }
  70. TEST_F(ARM_COMMON_MULTI_THREADS, POOLING_INT8_W2x2_S2x2)
  71. {
  72. for (size_t ih: {2, 3, 7, 13, 52, 53, 54, 55})
  73. for (size_t iw: {2, 3, 6, 14, 53, 54, 55, 56})
  74. for (size_t ph: {0, 1})
  75. for (size_t pw: {0, 1})
  76. if (ih+2*ph >= 3 && iw+2*pw >= 3)
  77. {
  78. Checker<Pooling> checker(handle());
  79. checker.set_dtype(0, dtype::Int8());
  80. param::Pooling param;
  81. param.mode = param::Pooling::Mode::MAX;
  82. param.pad_h = ph;
  83. param.pad_w = pw;
  84. param.stride_h = param.stride_w = 2;
  85. param.window_h = param.window_w = 2;
  86. checker.set_param(param).exec(TensorShapeArray{
  87. {2, 3, ih, iw}, {}});
  88. }
  89. }
  90. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  91. TEST_F(ARM_COMMON_MULTI_THREADS, POOLING_FP16) {
  92. Checker<Pooling> checker(handle());
  93. checker.set_dtype(0, dtype::Float16{})
  94. .set_dtype(1, dtype::Float16{})
  95. .set_epsilon(3e-3);
  96. using Param = param::Pooling;
  97. for (size_t ih : {2, 3, 5, 7, 11, 13, 17, 19, 23})
  98. for (size_t iw : {2, 3, 5, 7, 11, 13, 17, 19, 23})
  99. for (auto mode : {Param::Mode::AVERAGE, Param::Mode::MAX}) {
  100. for (size_t window : {2, 3}) {
  101. Param param;
  102. param.mode = mode;
  103. param.window_h = param.window_w = window;
  104. param.stride_h = param.stride_w = 1;
  105. param.pad_h = param.pad_w = window / 2;
  106. //! test for SH == 1 && SW == 1 && FH == FW (FH == 2 || FH
  107. //! == 3)
  108. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  109. //! test for SH = SW = 2 && FH = FW = 2
  110. param.stride_h = param.stride_w = 2;
  111. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  112. }
  113. }
  114. //! test for SH == 2 && SW == 2 && FH == FW == 3 max pooling
  115. for (size_t ih : {2, 3, 7, 13, 52, 53, 54, 55})
  116. for (size_t iw : {2, 3, 6, 14, 53, 54, 55, 56})
  117. for (size_t ph : {0, 1, 2})
  118. for (size_t pw : {0, 1, 2})
  119. if (ih + 2 * ph >= 3 && iw + 2 * pw >= 3) {
  120. param::Pooling param;
  121. param.mode = param::Pooling::Mode::MAX;
  122. param.pad_h = ph;
  123. param.pad_w = pw;
  124. param.stride_h = param.stride_w = 2;
  125. param.window_h = param.window_w = 3;
  126. checker.set_param(param).exec(
  127. TensorShapeArray{{2, 3, ih, iw}, {}});
  128. }
  129. //! test for SH == 2 && SW == 2 && FH = FW = 4 max pooling
  130. for (size_t ih :
  131. {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  132. for (size_t iw :
  133. {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  134. for (size_t p : {1, 2}) {
  135. Param param;
  136. param.mode = Param::Mode::MAX;
  137. param.window_h = param.window_w = 4;
  138. param.stride_h = param.stride_w = 2;
  139. param.pad_h = param.pad_w = p;
  140. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  141. }
  142. //! test for SH == 2 && SW == 2 && FH = FW = 5 max pooling
  143. for (size_t ih :
  144. {3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  145. for (size_t iw :
  146. {3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  147. for (size_t p : {1, 2}) {
  148. Param param;
  149. param.mode = Param::Mode::MAX;
  150. param.window_h = param.window_w = 5;
  151. param.stride_h = param.stride_w = 2;
  152. param.pad_h = param.pad_w = p;
  153. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  154. }
  155. }
  156. #endif
  157. TEST_F(ARM_COMMON_MULTI_THREADS, POOLING_QUANTIZED) {
  158. Checker<Pooling> checker(handle());
  159. UniformIntRNG rng1{INT8_MIN >> 1, INT8_MAX >> 1};
  160. UniformIntRNG rng2{0, UINT8_MAX >> 1};
  161. using Param = param::Pooling;
  162. for (auto type : std::vector<DType>{
  163. dtype::QuantizedS8(1.1f),
  164. dtype::Quantized8Asymm(1.1f, static_cast<uint8_t>(3))}) {
  165. if (type.enumv() == DTypeEnum::QuantizedS8) {
  166. checker.set_rng(0, &rng1);
  167. } else {
  168. megdnn_assert(type.enumv() == DTypeEnum::Quantized8Asymm);
  169. checker.set_rng(0, &rng2);
  170. }
  171. for (size_t ih : {2, 3, 5, 7, 11, 13, 17, 19, 23, 33, 49})
  172. for (size_t iw : {2, 3, 5, 7, 11, 13, 17, 19, 23, 33, 49})
  173. for (auto mode : {Param::Mode::AVERAGE, Param::Mode::MAX}) {
  174. for (size_t window : {2, 3}) {
  175. Param param;
  176. param.mode = mode;
  177. param.window_h = param.window_w = window;
  178. param.stride_h = param.stride_w = 1;
  179. param.pad_h = param.pad_w = window / 2;
  180. //! test for SH == 1 && SW == 1 && FH == FW (FH == 2 ||
  181. //! FH
  182. //! == 3)
  183. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  184. //! test for SH = SW = 2 && FH = FW = 2
  185. param.stride_h = param.stride_w = 2;
  186. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  187. }
  188. }
  189. //! test for SH == 2 && SW == 2 && FH == FW == 3 max pooling
  190. for (size_t ih : {2, 3, 7, 13, 52, 53, 54, 55})
  191. for (size_t iw : {2, 3, 6, 14, 53, 54, 55, 56})
  192. for (size_t ph : {0, 1, 2})
  193. for (size_t pw : {0, 1, 2})
  194. if (ih + 2 * ph >= 3 && iw + 2 * pw >= 3) {
  195. param::Pooling param;
  196. param.mode = param::Pooling::Mode::MAX;
  197. param.pad_h = ph;
  198. param.pad_w = pw;
  199. param.window_h = param.window_w = 3;
  200. param.stride_h = param.stride_w = 2;
  201. checker.set_param(param).exec(
  202. TensorShapeArray{{2, 3, ih, iw}, {}});
  203. }
  204. //! test for SH == 2 && SW == 2 && FH == FW == 4 max pooling
  205. for (size_t ih :
  206. {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  207. for (size_t iw :
  208. {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  209. for (size_t p : {1, 2}) {
  210. Param param;
  211. param.mode = Param::Mode::MAX;
  212. param.window_h = param.window_w = 4;
  213. param.stride_h = param.stride_w = 2;
  214. param.pad_h = param.pad_w = p;
  215. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  216. }
  217. //! test for SH == 2 && SW == 2 && FH == FW == 5 max pooling
  218. for (size_t ih :
  219. {3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  220. for (size_t iw :
  221. {3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  222. for (size_t p : {1, 2}) {
  223. Param param;
  224. param.mode = Param::Mode::MAX;
  225. param.window_h = param.window_w = 5;
  226. param.stride_h = param.stride_w = 2;
  227. param.pad_h = param.pad_w = p;
  228. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  229. }
  230. }
  231. }
  232. TEST_F(ARM_COMMON_MULTI_THREADS, POOLING_FALLBACK) {
  233. using Param = param::Pooling;
  234. for (size_t ih: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  235. for (size_t iw: {2, 3, 5, 7, 11, 13, 17, 19, 23, 24, 25, 26, 27, 28, 29, 30})
  236. for (size_t p: {1, 2})
  237. {
  238. Param param;
  239. param.mode = Param::Mode::MAX;
  240. param.window_h = param.window_w = 3;
  241. param.stride_h = param.stride_w = 2;
  242. param.pad_h = param.pad_w = p;
  243. Checker<Pooling> checker(handle());
  244. checker.set_param(param).exec({{2, 3, ih, iw}, {}});
  245. }
  246. }
  247. #if MEGDNN_WITH_BENCHMARK
  248. namespace {
  249. template <typename Opr>
  250. void benchmark_impl(const typename Opr::Param& param,
  251. std::vector<SmallVector<TensorShape>> shapes, size_t RUNS,
  252. TaskExecutorConfig&& multi_thread_config,
  253. TaskExecutorConfig&& single_thread_config) {
  254. std::vector<float> multi_thread_times, single_thread_times;
  255. {
  256. auto multi_thread_hanle =
  257. create_cpu_handle(0, true, &multi_thread_config);
  258. auto benchmarker = Benchmarker<Opr>(multi_thread_hanle.get());
  259. benchmarker.set_times(RUNS).set_display(false).set_param(param);
  260. for (auto shape : shapes) {
  261. multi_thread_times.push_back(benchmarker.exec(shape) / RUNS);
  262. }
  263. }
  264. {
  265. auto single_thread_handle =
  266. create_cpu_handle(0, true, &single_thread_config);
  267. auto benchmarker = Benchmarker<Opr>(single_thread_handle.get());
  268. benchmarker.set_times(RUNS).set_display(false).set_param(param);
  269. for (auto shape : shapes) {
  270. single_thread_times.push_back(benchmarker.exec(shape) / RUNS);
  271. }
  272. }
  273. printf("Benchmark : Multi threads %zu, ", multi_thread_config.nr_thread);
  274. printf("core_ids:");
  275. for (size_t i = 0; i < multi_thread_config.affinity_core_set.size(); i++) {
  276. printf("%zu ", multi_thread_config.affinity_core_set[i]);
  277. }
  278. printf(", Single thread core_id %zu\n",
  279. single_thread_config.affinity_core_set[0]);
  280. for (size_t i = 0; i < shapes.size(); i++) {
  281. auto shape = shapes[i];
  282. printf("Case: ");
  283. for (auto sh : shape)
  284. printf("%s ", sh.to_string().c_str());
  285. printf("%zu threads time: %f,\n single thread time: "
  286. "%f. spead up = %f, speedup/cores=%f\n",
  287. multi_thread_config.nr_thread, multi_thread_times[i],
  288. single_thread_times[i],
  289. single_thread_times[i] / multi_thread_times[i],
  290. single_thread_times[i] / multi_thread_times[i] /
  291. multi_thread_config.nr_thread);
  292. }
  293. }
  294. } // namespace
  295. TEST_F(ARM_COMMON_BENCHMARK_MULTI_THREADS, BENCHMARK_POOLING) {
  296. constexpr size_t RUNS = 50;
  297. using Param = param::Pooling;
  298. Param param;
  299. param.window_h = param.window_w = 3;
  300. param.stride_h = param.stride_w = 2;
  301. param.pad_h = param.pad_w = 1;
  302. std::vector<SmallVector<TensorShape>> shapes;
  303. shapes.push_back({{32, 32, 215, 215}, {}});
  304. shapes.push_back({{32, 32, 128, 128}, {}});
  305. shapes.push_back({{8, 256, 100, 100}, {}});
  306. shapes.push_back({{1, 256, 100, 100}, {}});
  307. shapes.push_back({{1, 32, 100, 100}, {}});
  308. shapes.push_back({{1, 256, 80, 80}, {}});
  309. shapes.push_back({{1, 256, 60, 60}, {}});
  310. shapes.push_back({{1, 256, 30, 30}, {}});
  311. param.window_h = param.window_w = 3;
  312. param.stride_h = param.stride_w = 2;
  313. param.pad_h = param.pad_w = 1;
  314. printf("Benchmark POOLING kernel:%d*%d stride:%d,mode %d\n", param.window_h,
  315. param.stride_h, param.pad_h, static_cast<int>(param.mode));
  316. benchmark_impl<Pooling>(param, shapes, RUNS, {4, {0, 1, 2, 3}}, {1, {0}});
  317. benchmark_impl<Pooling>(param, shapes, RUNS, {4, {4, 5, 6, 7}}, {1, {4}});
  318. benchmark_impl<Pooling>(param, shapes, RUNS, {2, {0, 1}}, {1, {0}});
  319. }
  320. #endif
  321. } // namespace test
  322. } // namespace megdnn
  323. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台