You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling.cpp 8.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. /**
  2. * \file dnn/test/x86/pooling.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/pooling.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/x86/fixture.h"
  15. namespace megdnn {
  16. namespace test {
  17. TEST_F(X86, POOLING) {
  18. auto args = pooling::get_args();
  19. for (auto&& arg : args) {
  20. Checker<Pooling> checker(handle());
  21. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  22. }
  23. }
  24. TEST_F(X86, S1POOLING88) {
  25. Checker<Pooling> checker(handle());
  26. auto run = [&](size_t WH, size_t WW, size_t PH, size_t PW, size_t SH,
  27. size_t SW, size_t N, size_t C, size_t H, size_t W) {
  28. Pooling::Param param;
  29. param.format = param::Pooling::Format::NCHW88;
  30. param.window_h = WH;
  31. param.window_w = WW;
  32. param.pad_h = PH;
  33. param.pad_w = PW;
  34. param.stride_w = SW;
  35. param.stride_h = SH;
  36. param.mode = param::Pooling::Mode::MAX;
  37. checker.set_param(param);
  38. checker.execs({{N, C, H, W, 8}, {}});
  39. };
  40. for (size_t wh = 10; wh < 15; ++wh) {
  41. for (size_t ww = 10; ww < 15; ++ww) {
  42. for (size_t n : {1, 2, 4}) {
  43. for (size_t c : {1, 4}) {
  44. for (size_t h : {10, 13, 20}) {
  45. for (size_t w : {10, 13, 20}) {
  46. run(wh, ww, wh / 2, ww / 2, 1, 1, n, c, h, w);
  47. }
  48. }
  49. }
  50. }
  51. }
  52. }
  53. }
  54. TEST_F(X86_MULTI_THREADS, S1POOLING88) {
  55. Checker<Pooling> checker(handle());
  56. auto run = [&](size_t WH, size_t WW, size_t PH, size_t PW, size_t SH,
  57. size_t SW, size_t N, size_t C, size_t H, size_t W) {
  58. Pooling::Param param;
  59. param.format = param::Pooling::Format::NCHW88;
  60. param.window_h = WH;
  61. param.window_w = WW;
  62. param.pad_h = PH;
  63. param.pad_w = PW;
  64. param.stride_w = SW;
  65. param.stride_h = SH;
  66. param.mode = param::Pooling::Mode::MAX;
  67. checker.set_param(param);
  68. checker.execs({{N, C, H, W, 8}, {}});
  69. };
  70. for (size_t wh = 10; wh < 15; ++wh) {
  71. for (size_t ww = 10; ww < 15; ++ww) {
  72. for (size_t n : {1, 2, 4}) {
  73. for (size_t c : {1, 4}) {
  74. for (size_t h : {10, 13, 20}) {
  75. for (size_t w : {10, 13, 20}) {
  76. run(wh, ww, wh / 2, ww / 2, 1, 1, n, c, h, w);
  77. }
  78. }
  79. }
  80. }
  81. }
  82. }
  83. }
  84. #if MEGDNN_X86_WITH_MKL_DNN
  85. TEST_F(X86, POOLING88) {
  86. Checker<Pooling> checker(handle());
  87. auto args = pooling::get_args();
  88. for (auto&& arg : args) {
  89. arg.ishape.ndim = 5;
  90. arg.ishape[1] = (arg.ishape[1] + 7) / 8;
  91. arg.ishape[4] = 8;
  92. arg.param.format = param::Pooling::Format::NCHW88;
  93. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  94. }
  95. }
  96. TEST_F(X86_MULTI_THREADS, POOLING88) {
  97. Checker<Pooling> checker(handle());
  98. auto args = pooling::get_args();
  99. for (auto&& arg : args) {
  100. arg.ishape.ndim = 5;
  101. arg.ishape[1] = (arg.ishape[1] + 7) / 8;
  102. arg.ishape[4] = 8;
  103. arg.param.format = param::Pooling::Format::NCHW88;
  104. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  105. }
  106. }
  107. #endif
  108. #if MEGDNN_WITH_BENCHMARK
  109. static void test_x86_megdnn_pooling(Handle* handle) {
  110. constexpr size_t RUNS = 50;
  111. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  112. Benchmarker<Pooling> benchmarker_pooling(handle);
  113. benchmarker_pooling.set_times(RUNS)
  114. .set_dtype(0, dtype::QuantizedS8(1.2))
  115. .set_display(false)
  116. .set_rng(0, rng.get());
  117. auto run = [&](uint32_t pad, uint32_t stride, uint32_t window_size,
  118. size_t in_number, size_t in_channel, size_t in_height,
  119. size_t in_width) {
  120. TensorLayout dst_layout;
  121. auto opr = handle->create_operator<Pooling>();
  122. opr->param() = {param::Pooling::Mode::MAX,
  123. pad,
  124. pad,
  125. stride,
  126. stride,
  127. window_size,
  128. window_size};
  129. TensorShape shape{in_number, in_channel, in_height, in_width};
  130. opr->deduce_layout({shape, dtype::Int8{}}, dst_layout);
  131. float computation =
  132. dst_layout.total_nr_elems() * window_size * window_size * 1e-9;
  133. auto pooling_used =
  134. benchmarker_pooling
  135. .set_param({param::Pooling::Mode::MAX, pad, pad, stride,
  136. stride, window_size, window_size})
  137. .exec(TensorShapeArray{shape, {}}) /
  138. RUNS;
  139. float through_put = computation / pooling_used * 1e3;
  140. std::cout << "{" << pad << "," << stride << "," << window_size << ","
  141. << in_number << "," << in_channel << "," << in_height << ","
  142. << in_width << "} "
  143. << "use time " << pooling_used << "ms, "
  144. << "through_put " << through_put << "Gops, " << std::endl;
  145. };
  146. for (auto widows_size : {2, 3})
  147. for (auto stride : {2})
  148. for (auto pad : {2})
  149. for (auto n : {1, 3, 4})
  150. for (auto c : {1, 32, 64})
  151. for (auto h_w : {12, 32, 64}) {
  152. run(pad, stride, widows_size, n, c, h_w, h_w);
  153. }
  154. }
  155. TEST_F(X86, BENCHMARK_POOLING) {
  156. test_x86_megdnn_pooling(handle());
  157. }
  158. TEST_F(X86_MULTI_THREADS, BENCHMARK_POOLING) {
  159. test_x86_megdnn_pooling(handle());
  160. }
  161. TEST_F(X86, BENCHMARK_POOLING_MAX_S1_NCHW88) {
  162. constexpr size_t RUNS = 50;
  163. auto x86_handle = handle();
  164. Benchmarker<Pooling> benchmarker_pooling(x86_handle);
  165. benchmarker_pooling.set_times(RUNS);
  166. auto run = [&](uint32_t pad, uint32_t stride, uint32_t window_size,
  167. size_t in_number, size_t in_channel, size_t in_height,
  168. size_t in_width) {
  169. auto opr = x86_handle->create_operator<Pooling>();
  170. opr->param() = {param::Pooling::Mode::MAX,
  171. pad,
  172. pad,
  173. stride,
  174. stride,
  175. window_size,
  176. window_size};
  177. opr->param().format = param::Pooling::Format::NCHW88;
  178. TensorShape shape{in_number, in_channel / 8, in_height, in_width, 8};
  179. TensorLayout dst_layout;
  180. opr->deduce_layout({shape, dtype::Float32()}, dst_layout);
  181. float computation =
  182. dst_layout.total_nr_elems() * window_size * window_size * 1e-9;
  183. auto pooling_used = benchmarker_pooling.set_param(opr->param())
  184. .exec(TensorShapeArray{shape, {}}) /
  185. RUNS;
  186. float through_put = computation / pooling_used * 1e3;
  187. printf("profiling max pooling NCHW88 {%zu,%zu,%zu,%zu,8}\nuse time : "
  188. "%f ms\nthrough_put : %f Gflops\n",
  189. in_number, in_channel / 8, in_height, in_width, pooling_used,
  190. through_put);
  191. };
  192. run(6, 1, 13, 1, 32 * 8, 20, 20);
  193. }
  194. #endif
  195. #if MEGDNN_X86_WITH_MKL_DNN
  196. TEST_F(X86, POOLING_INT8) {
  197. auto args = pooling::get_args();
  198. for (auto&& arg : args) {
  199. Checker<Pooling> checker(handle());
  200. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  201. checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get());
  202. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  203. }
  204. }
  205. TEST_F(X86_MULTI_THREADS, POOLING_INT8) {
  206. auto args = pooling::get_args();
  207. for (auto&& arg : args) {
  208. Checker<Pooling> checker(handle());
  209. auto rng = std::make_unique<UniformIntRNG>(-127, 127);
  210. checker.set_dtype(0, dtype::Int8()).set_rng(0, rng.get());
  211. checker.set_param(arg.param).exec(TensorShapeArray{arg.ishape, {}});
  212. }
  213. }
  214. #endif
  215. } // namespace test
  216. } // namespace megdnn
  217. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台