You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling.cpp 9.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /**
  2. * \file dnn/test/rocm/pooling.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "hcc_detail/hcc_defs_prologue.h"
  12. #include "test/rocm/fixture.h"
  13. #include "megdnn/tensor_iter.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/pooling.h"
  16. #include "test/rocm/benchmarker.h"
  17. #include "src/rocm/utils.h"
  18. #include "src/common/utils.h"
  19. namespace megdnn {
  20. namespace test {
  21. TEST_F(ROCM, POOLING_FORWARD) {
  22. auto args = pooling::get_args();
  23. using Format = param::Pooling::Format;
  24. std::vector<DType> dtypes{MEGDNN_INC_FLOAT16(dtype::Float16() MEGDNN_COMMA)
  25. dtype::Float32()};
  26. for (auto dtype : dtypes)
  27. for (auto format : {Format::NCHW})
  28. for (auto&& arg : args) {
  29. auto param = arg.param;
  30. if (param.mode == param::Pooling::Mode::AVERAGE) {
  31. param.mode =
  32. param::Pooling::Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  33. }
  34. auto src = arg.ishape;
  35. param.format = format;
  36. Checker<Pooling> checker(handle_rocm());
  37. checker.set_epsilon(1e-2);
  38. checker.set_param(param)
  39. .set_dtype(0, dtype)
  40. .set_dtype(1, dtype)
  41. .exec(TensorShapeArray{src, {}});
  42. }
  43. }
  44. TEST_F(ROCM, POOLING_BACKWARD) {
  45. auto args = pooling::get_args();
  46. for (auto&& arg : args) {
  47. Checker<PoolingBackward> checker(handle_rocm());
  48. TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32());
  49. TensorLayout olayout;
  50. auto& param = arg.param;
  51. if (param.mode == param::Pooling::Mode::AVERAGE) {
  52. param.mode = param::Pooling::Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  53. }
  54. auto constraint = [this,
  55. arg](CheckerHelper::TensorValueArray& tensors_orig) {
  56. megdnn_assert(tensors_orig.size() == 4);
  57. auto opr = handle_rocm()->create_operator<PoolingForward>();
  58. opr->param() = arg.param;
  59. auto tensors_rocm_storage = CheckerHelper::alloc_tensors(
  60. handle_rocm(),
  61. {tensors_orig[0].layout, tensors_orig[1].layout}, 0);
  62. auto&& tensors_rocm = *tensors_rocm_storage;
  63. auto span = tensors_rocm[0].layout.span();
  64. auto dst = static_cast<dt_byte*>(tensors_rocm[0].raw_ptr) +
  65. span.low_byte;
  66. auto src = static_cast<const dt_byte*>(tensors_orig[0].raw_ptr) +
  67. span.low_byte;
  68. megdnn_memcpy_H2D(handle_rocm(), dst, src, span.dist_byte());
  69. auto workspace_size = opr->get_workspace_in_bytes(
  70. tensors_rocm[0].layout, tensors_rocm[1].layout);
  71. auto workspace_rocm = megdnn_malloc(handle_rocm(), workspace_size);
  72. Workspace workspace{static_cast<dt_byte*>(workspace_rocm),
  73. workspace_size};
  74. opr->exec(tensors_rocm[0], tensors_rocm[1], workspace);
  75. megdnn_free(handle_rocm(), workspace_rocm);
  76. span = tensors_rocm[1].layout.span();
  77. dst = static_cast<dt_byte*>(tensors_orig[1].raw_ptr) +
  78. span.low_byte;
  79. src = static_cast<const dt_byte*>(tensors_rocm[1].raw_ptr) +
  80. span.low_byte;
  81. megdnn_memcpy_D2H(handle_rocm(), dst, src, span.dist_byte());
  82. };
  83. {
  84. auto opr = handle_rocm()->create_operator<PoolingForward>();
  85. opr->param() = arg.param;
  86. opr->deduce_layout(ilayout, olayout);
  87. }
  88. auto set_dtype = [&checker](DType dtype) {
  89. checker.set_dtype(0, dtype)
  90. .set_dtype(1, dtype)
  91. .set_dtype(2, dtype)
  92. .set_dtype(3, dtype);
  93. };
  94. checker.set_tensors_constraint(constraint);
  95. set_dtype(dtype::Float32());
  96. checker.set_param(arg.param).exec(
  97. TensorShapeArray{ilayout, olayout, olayout, ilayout});
  98. #if !MEGDNN_DISABLE_FLOAT16
  99. //! FIXME: MIOpen pooling backward for fp16 with bug
  100. #if 0
  101. Float16PeriodicalRNG rng;
  102. set_dtype(dtype::Float16());
  103. checker.set_param(arg.param).set_rng(0, &rng).set_epsilon(1e-2).exec(
  104. TensorShapeArray{ilayout, olayout, olayout, ilayout});
  105. #endif
  106. #endif
  107. }
  108. }
  109. #if MEGDNN_WITH_BENCHMARK
  110. TEST_F(ROCM, POOLING_FWD_BENCHMARK) {
  111. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
  112. auto benchmarker =
  113. ROCMBenchmarker<PoolingForward>(handle_rocm(), handle_naive(false));
  114. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t SH = 1,
  115. size_t SW = 1, size_t FH = 1, size_t FW = 1, size_t PH = 0,
  116. size_t PW = 0, DType dtype = dtype::Float32()) {
  117. benchmarker.set_dtype(0, dtype).set_dtype(1, dtype);
  118. benchmarker.set_display(true);
  119. PoolingForward::Param param;
  120. param.mode = param::Pooling::Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  121. param.stride_h = SH;
  122. param.stride_w = SW;
  123. param.pad_h = PH;
  124. param.pad_w = PW;
  125. param.window_h = FH;
  126. param.window_w = FW;
  127. benchmarker.set_param(param);
  128. size_t OH = infer_conv_shape(IH, FH, SH, PH);
  129. size_t OW = infer_conv_shape(IW, FW, SW, PW);
  130. // warm up
  131. benchmarker.execs({{N, IC, IH, IW}, {N, IC, OH, OW}});
  132. // do actual benchmark
  133. auto time_ms = benchmarker.execs({{N, IC, IH, IW}, {N, IC, OH, OW}});
  134. time_ms = benchmarker.execs({{N, IC, IH, IW}, {N, IC, OH, OW}});
  135. auto io = (double)N * IC * OH * OW * (1 + FH * FW) * dtype.size();
  136. auto gbps = io / (time_ms * 1e6);
  137. printf("io %.2fGB, flops %.3fGB/s\n", io / 1e9, gbps);
  138. };
  139. run(32, 128, 80, 64, 2, 2, 2, 2, 0, 0);
  140. run(32, 128, 40, 128, 2, 2, 2, 2, 0, 0);
  141. run(32, 224, 40, 32, 2, 2, 2, 2, 0, 0);
  142. run(32, 24, 160, 128, 2, 2, 4, 4, 0, 0);
  143. run(32, 24, 160, 128, 2, 2, 4, 4, 1, 1);
  144. }
  145. TEST_F(ROCM, POOLING_BWD_BENCHMARK) {
  146. using Mode = param::Pooling::Mode;
  147. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
  148. auto benchmarker = ROCMBenchmarker<PoolingBackward>(handle_rocm(),
  149. handle_naive(false));
  150. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t SH = 1,
  151. size_t SW = 1, size_t FH = 1, size_t FW = 1, size_t PH = 0,
  152. size_t PW = 0,
  153. Mode mode = Mode::AVERAGE_COUNT_EXCLUDE_PADDING,
  154. DType dtype = dtype::Float32()) {
  155. benchmarker.set_dtype(0, dtype).set_dtype(1, dtype);
  156. benchmarker.set_display(true);
  157. PoolingForward::Param param;
  158. param.mode = mode;
  159. param.stride_h = SH;
  160. param.stride_w = SW;
  161. param.pad_h = PH;
  162. param.pad_w = PW;
  163. param.window_h = FH;
  164. param.window_w = FW;
  165. benchmarker.set_param(param);
  166. size_t OH = infer_conv_shape(IH, FH, SH, PH);
  167. size_t OW = infer_conv_shape(IW, FW, SW, PW);
  168. // warm up
  169. benchmarker.execs({{N, IC, IH, IW},
  170. {N, IC, OH, OW},
  171. {N, IC, OH, OW},
  172. {N, IC, IH, IW}});
  173. // do actual benchmark
  174. auto time_ms = benchmarker.execs({{N, IC, IH, IW},
  175. {N, IC, OH, OW},
  176. {N, IC, OH, OW},
  177. {N, IC, IH, IW}});
  178. time_ms = benchmarker.execs({{N, IC, IH, IW},
  179. {N, IC, OH, OW},
  180. {N, IC, OH, OW},
  181. {N, IC, IH, IW}});
  182. double io = 0.;
  183. double gbps = 0.;
  184. if (mode == Mode::AVERAGE_COUNT_EXCLUDE_PADDING) {
  185. io = (double)N * IC * OH * OW * FH * FW * 2 * dtype.size();
  186. gbps = io / (time_ms * 1e6);
  187. } else {
  188. io = (double)N * IC * OH * OW * 2 * dtype.size();
  189. gbps = io / (time_ms * 1e6);
  190. }
  191. printf("Mode = %s, io %.2fGB, flops %.3fGB/s\n",
  192. mode == Mode::AVERAGE_COUNT_EXCLUDE_PADDING ? "AVERAGE" : "MAX",
  193. io / 1e9, gbps);
  194. };
  195. Mode mode = Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  196. run(32, 128, 80, 64, 2, 2, 2, 2, 0, 0, mode);
  197. run(32, 128, 40, 128, 2, 2, 2, 2, 0, 0, mode);
  198. run(32, 224, 40, 32, 2, 2, 2, 2, 0, 0, mode);
  199. run(32, 24, 160, 128, 2, 2, 4, 4, 0, 0, mode);
  200. run(32, 24, 160, 128, 2, 2, 4, 4, 1, 1, mode);
  201. mode = Mode::MAX;
  202. run(32, 128, 80, 64, 2, 2, 2, 2, 0, 0, mode);
  203. run(32, 128, 40, 128, 2, 2, 2, 2, 0, 0, mode);
  204. run(32, 224, 40, 32, 2, 2, 2, 2, 0, 0, mode);
  205. run(32, 24, 160, 128, 2, 2, 4, 4, 0, 0, mode);
  206. run(32, 24, 160, 128, 2, 2, 4, 4, 1, 1, mode);
  207. }
  208. #endif
  209. } // namespace test
  210. } // namespace megdnn
  211. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台