You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling.cpp 9.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /**
  2. * \file dnn/test/x86/pooling.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/pooling.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/task_record_check.h"
  15. #include "test/x86/fixture.h"
  16. namespace megdnn {
  17. namespace test {
  18. TEST_F(X86, POOLING) {
  19. auto args = pooling::get_args();
  20. for (auto&& arg : args) {
  21. Checker<Pooling> checker(handle());
  22. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  23. }
  24. }
  25. TEST_F(X86, POOLING_RECORD) {
  26. auto args = pooling::get_args();
  27. for (auto&& arg : args) {
  28. TaskRecordChecker<Pooling> checker(0);
  29. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  30. }
  31. }
  32. TEST_F(X86, S1POOLING88) {
  33. Checker<Pooling> checker(handle());
  34. auto run = [&](size_t WH, size_t WW, size_t PH, size_t PW, size_t SH, size_t SW,
  35. size_t N, size_t C, size_t H, size_t W) {
  36. Pooling::Param param;
  37. param.format = param::Pooling::Format::NCHW88;
  38. param.window_h = WH;
  39. param.window_w = WW;
  40. param.pad_h = PH;
  41. param.pad_w = PW;
  42. param.stride_w = SW;
  43. param.stride_h = SH;
  44. param.mode = param::Pooling::Mode::MAX;
  45. checker.set_param(param);
  46. checker.execs({{N, C, H, W, 8}, {}});
  47. };
  48. for (size_t wh = 10; wh < 15; ++wh) {
  49. for (size_t ww = 10; ww < 15; ++ww) {
  50. for (size_t n : {1, 2, 4}) {
  51. for (size_t c : {1, 4}) {
  52. for (size_t h : {10, 13, 20}) {
  53. for (size_t w : {10, 13, 20}) {
  54. run(wh, ww, wh / 2, ww / 2, 1, 1, n, c, h, w);
  55. }
  56. }
  57. }
  58. }
  59. }
  60. }
  61. }
  62. TEST_F(X86_MULTI_THREADS, S1POOLING88) {
  63. Checker<Pooling> checker(handle());
  64. auto run = [&](size_t WH, size_t WW, size_t PH, size_t PW, size_t SH, size_t SW,
  65. size_t N, size_t C, size_t H, size_t W) {
  66. Pooling::Param param;
  67. param.format = param::Pooling::Format::NCHW88;
  68. param.window_h = WH;
  69. param.window_w = WW;
  70. param.pad_h = PH;
  71. param.pad_w = PW;
  72. param.stride_w = SW;
  73. param.stride_h = SH;
  74. param.mode = param::Pooling::Mode::MAX;
  75. checker.set_param(param);
  76. checker.execs({{N, C, H, W, 8}, {}});
  77. };
  78. for (size_t wh = 10; wh < 15; ++wh) {
  79. for (size_t ww = 10; ww < 15; ++ww) {
  80. for (size_t n : {1, 2, 4}) {
  81. for (size_t c : {1, 4}) {
  82. for (size_t h : {10, 13, 20}) {
  83. for (size_t w : {10, 13, 20}) {
  84. run(wh, ww, wh / 2, ww / 2, 1, 1, n, c, h, w);
  85. }
  86. }
  87. }
  88. }
  89. }
  90. }
  91. }
  92. #if MEGDNN_X86_WITH_MKL_DNN
  93. TEST_F(X86, POOLING88) {
  94. Checker<Pooling> checker(handle());
  95. auto args = pooling::get_args();
  96. for (auto&& arg : args) {
  97. arg.ishape.ndim = 5;
  98. arg.ishape[1] = (arg.ishape[1] + 7) / 8;
  99. arg.ishape[4] = 8;
  100. arg.param.format = param::Pooling::Format::NCHW88;
  101. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  102. }
  103. }
  104. TEST_F(X86, POOLING88_RECORD) {
  105. TaskRecordChecker<Pooling> checker(0);
  106. auto args = pooling::get_args();
  107. for (auto&& arg : args) {
  108. arg.ishape.ndim = 5;
  109. arg.ishape[1] = (arg.ishape[1] + 7) / 8;
  110. arg.ishape[4] = 8;
  111. arg.param.format = param::Pooling::Format::NCHW88;
  112. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  113. }
  114. }
  115. TEST_F(X86_MULTI_THREADS, POOLING88) {
  116. Checker<Pooling> checker(handle());
  117. auto args = pooling::get_args();
  118. for (auto&& arg : args) {
  119. arg.ishape.ndim = 5;
  120. arg.ishape[1] = (arg.ishape[1] + 7) / 8;
  121. arg.ishape[4] = 8;
  122. arg.param.format = param::Pooling::Format::NCHW88;
  123. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  124. }
  125. }
  126. #endif
  127. #if MEGDNN_WITH_BENCHMARK
  128. static void test_x86_megdnn_pooling(Handle* handle) {
  129. constexpr size_t RUNS = 50;
  130. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  131. Benchmarker<Pooling> benchmarker_pooling(handle);
  132. benchmarker_pooling.set_times(RUNS)
  133. .set_dtype(0, dtype::QuantizedS8(1.2))
  134. .set_display(false)
  135. .set_rng(0, rng.get());
  136. auto run = [&](uint32_t pad, uint32_t stride, uint32_t window_size,
  137. size_t in_number, size_t in_channel, size_t in_height,
  138. size_t in_width) {
  139. TensorLayout dst_layout;
  140. auto opr = handle->create_operator<Pooling>();
  141. opr->param() = {param::Pooling::Mode::MAX,
  142. pad,
  143. pad,
  144. stride,
  145. stride,
  146. window_size,
  147. window_size};
  148. TensorShape shape{in_number, in_channel, in_height, in_width};
  149. opr->deduce_layout({shape, dtype::Int8{}}, dst_layout);
  150. float computation =
  151. dst_layout.total_nr_elems() * window_size * window_size * 1e-9;
  152. auto pooling_used = benchmarker_pooling
  153. .set_param(
  154. {param::Pooling::Mode::MAX, pad, pad,
  155. stride, stride, window_size, window_size})
  156. .exec(TensorShapeArray{shape, {}}) /
  157. RUNS;
  158. float through_put = computation / pooling_used * 1e3;
  159. std::cout << "{" << pad << "," << stride << "," << window_size << ","
  160. << in_number << "," << in_channel << "," << in_height << ","
  161. << in_width << "} "
  162. << "use time " << pooling_used << "ms, "
  163. << "through_put " << through_put << "Gops, " << std::endl;
  164. };
  165. for (auto widows_size : {2, 3})
  166. for (auto stride : {2})
  167. for (auto pad : {2})
  168. for (auto n : {1, 3, 4})
  169. for (auto c : {1, 32, 64})
  170. for (auto h_w : {12, 32, 64}) {
  171. run(pad, stride, widows_size, n, c, h_w, h_w);
  172. }
  173. }
  174. TEST_F(X86, BENCHMARK_POOLING) {
  175. test_x86_megdnn_pooling(handle());
  176. }
  177. TEST_F(X86_MULTI_THREADS, BENCHMARK_POOLING) {
  178. test_x86_megdnn_pooling(handle());
  179. }
  180. TEST_F(X86, BENCHMARK_POOLING_MAX_S1_NCHW88) {
  181. constexpr size_t RUNS = 50;
  182. auto x86_handle = handle();
  183. Benchmarker<Pooling> benchmarker_pooling(x86_handle);
  184. benchmarker_pooling.set_times(RUNS);
  185. auto run = [&](uint32_t pad, uint32_t stride, uint32_t window_size,
  186. size_t in_number, size_t in_channel, size_t in_height,
  187. size_t in_width) {
  188. auto opr = x86_handle->create_operator<Pooling>();
  189. opr->param() = {param::Pooling::Mode::MAX,
  190. pad,
  191. pad,
  192. stride,
  193. stride,
  194. window_size,
  195. window_size};
  196. opr->param().format = param::Pooling::Format::NCHW88;
  197. TensorShape shape{in_number, in_channel / 8, in_height, in_width, 8};
  198. TensorLayout dst_layout;
  199. opr->deduce_layout({shape, dtype::Float32()}, dst_layout);
  200. float computation =
  201. dst_layout.total_nr_elems() * window_size * window_size * 1e-9;
  202. auto pooling_used = benchmarker_pooling.set_param(opr->param())
  203. .exec(TensorShapeArray{shape, {}}) /
  204. RUNS;
  205. float through_put = computation / pooling_used * 1e3;
  206. printf("profiling max pooling NCHW88 {%zu,%zu,%zu,%zu,8}\nuse time : "
  207. "%f ms\nthrough_put : %f Gflops\n",
  208. in_number, in_channel / 8, in_height, in_width, pooling_used,
  209. through_put);
  210. };
  211. run(6, 1, 13, 1, 32 * 8, 20, 20);
  212. }
  213. #endif
  214. #if MEGDNN_X86_WITH_MKL_DNN
  215. TEST_F(X86, POOLING_INT8) {
  216. auto args = pooling::get_args();
  217. for (auto&& arg : args) {
  218. Checker<Pooling> checker(handle());
  219. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  220. checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get());
  221. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  222. }
  223. }
  224. TEST_F(X86, POOLING_INT8_RECORD) {
  225. auto args = pooling::get_args();
  226. for (auto&& arg : args) {
  227. Checker<Pooling> checker(handle());
  228. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  229. checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get());
  230. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  231. }
  232. }
  233. TEST_F(X86_MULTI_THREADS, POOLING_INT8) {
  234. auto args = pooling::get_args();
  235. for (auto&& arg : args) {
  236. Checker<Pooling> checker(handle());
  237. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  238. checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get());
  239. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  240. }
  241. }
  242. #endif
  243. } // namespace test
  244. } // namespace megdnn
  245. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台