You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pooling.cpp 9.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /**
  2. * \file dnn/test/rocm/pooling.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "hcc_detail/hcc_defs_prologue.h"
  12. #include "test/rocm/fixture.h"
  13. #include "megdnn/tensor_iter.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/pooling.h"
  16. #include "test/rocm/benchmarker.h"
  17. #include "src/rocm/utils.h"
  18. #include "src/common/utils.h"
  19. namespace megdnn {
  20. namespace test {
  21. TEST_F(ROCM, POOLING_FORWARD) {
  22. auto args = pooling::get_args();
  23. using Format = param::Pooling::Format;
  24. std::vector<DType> dtypes{MEGDNN_INC_FLOAT16(dtype::Float16() MEGDNN_COMMA)
  25. dtype::Float32()};
  26. for (auto dtype : dtypes)
  27. for (auto format : {Format::NCHW})
  28. for (auto&& arg : args) {
  29. auto param = arg.param;
  30. auto src = arg.ishape;
  31. param.format = format;
  32. Checker<Pooling> checker(handle_rocm());
  33. checker.set_epsilon(1e-2);
  34. checker.set_param(param)
  35. .set_dtype(0, dtype)
  36. .set_dtype(1, dtype)
  37. .exec(TensorShapeArray{src, {}});
  38. }
  39. }
  40. TEST_F(ROCM, POOLING_BACKWARD) {
  41. auto args = pooling::get_args();
  42. for (auto&& arg : args) {
  43. Checker<PoolingBackward> checker(handle_rocm());
  44. TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32());
  45. TensorLayout olayout;
  46. auto constraint = [this,
  47. arg](CheckerHelper::TensorValueArray& tensors_orig) {
  48. megdnn_assert(tensors_orig.size() == 4);
  49. auto opr = handle_rocm()->create_operator<PoolingForward>();
  50. opr->param() = arg.param;
  51. auto tensors_rocm_storage = CheckerHelper::alloc_tensors(
  52. handle_rocm(),
  53. {tensors_orig[0].layout, tensors_orig[1].layout}, 0);
  54. auto&& tensors_rocm = *tensors_rocm_storage;
  55. auto span = tensors_rocm[0].layout.span();
  56. auto dst = static_cast<dt_byte*>(tensors_rocm[0].raw_ptr) +
  57. span.low_byte;
  58. auto src = static_cast<const dt_byte*>(tensors_orig[0].raw_ptr) +
  59. span.low_byte;
  60. megdnn_memcpy_H2D(handle_rocm(), dst, src, span.dist_byte());
  61. auto workspace_size = opr->get_workspace_in_bytes(
  62. tensors_rocm[0].layout, tensors_rocm[1].layout);
  63. auto workspace_rocm = megdnn_malloc(handle_rocm(), workspace_size);
  64. Workspace workspace{static_cast<dt_byte*>(workspace_rocm),
  65. workspace_size};
  66. opr->exec(tensors_rocm[0], tensors_rocm[1], workspace);
  67. megdnn_free(handle_rocm(), workspace_rocm);
  68. span = tensors_rocm[1].layout.span();
  69. dst = static_cast<dt_byte*>(tensors_orig[1].raw_ptr) +
  70. span.low_byte;
  71. src = static_cast<const dt_byte*>(tensors_rocm[1].raw_ptr) +
  72. span.low_byte;
  73. megdnn_memcpy_D2H(handle_rocm(), dst, src, span.dist_byte());
  74. };
  75. {
  76. auto opr = handle_rocm()->create_operator<PoolingForward>();
  77. opr->param() = arg.param;
  78. opr->deduce_layout(ilayout, olayout);
  79. }
  80. auto set_dtype = [&checker](DType dtype) {
  81. checker.set_dtype(0, dtype)
  82. .set_dtype(1, dtype)
  83. .set_dtype(2, dtype)
  84. .set_dtype(3, dtype);
  85. };
  86. checker.set_tensors_constraint(constraint);
  87. set_dtype(dtype::Float32());
  88. checker.set_param(arg.param).exec(
  89. TensorShapeArray{ilayout, olayout, olayout, ilayout});
  90. #if !MEGDNN_DISABLE_FLOAT16
  91. //! FIXME: MIOpen pooling backward for fp16 with bug
  92. #if 0
  93. Float16PeriodicalRNG rng;
  94. set_dtype(dtype::Float16());
  95. checker.set_param(arg.param).set_rng(0, &rng).set_epsilon(1e-2).exec(
  96. TensorShapeArray{ilayout, olayout, olayout, ilayout});
  97. #endif
  98. #endif
  99. }
  100. }
  101. #if MEGDNN_WITH_BENCHMARK
  102. TEST_F(ROCM, POOLING_FWD_BENCHMARK) {
  103. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
  104. auto benchmarker =
  105. ROCMBenchmarker<PoolingForward>(handle_rocm(), handle_naive(false));
  106. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t SH = 1,
  107. size_t SW = 1, size_t FH = 1, size_t FW = 1, size_t PH = 0,
  108. size_t PW = 0, DType dtype = dtype::Float32()) {
  109. benchmarker.set_dtype(0, dtype).set_dtype(1, dtype);
  110. benchmarker.set_display(true);
  111. PoolingForward::Param param;
  112. param.mode = param::Pooling::Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  113. param.stride_h = SH;
  114. param.stride_w = SW;
  115. param.pad_h = PH;
  116. param.pad_w = PW;
  117. param.window_h = FH;
  118. param.window_w = FW;
  119. benchmarker.set_param(param);
  120. size_t OH = infer_conv_shape(IH, FH, SH, PH);
  121. size_t OW = infer_conv_shape(IW, FW, SW, PW);
  122. // warm up
  123. benchmarker.execs({{N, IC, IH, IW}, {N, IC, OH, OW}});
  124. // do actual benchmark
  125. auto time_ms = benchmarker.execs({{N, IC, IH, IW}, {N, IC, OH, OW}});
  126. time_ms = benchmarker.execs({{N, IC, IH, IW}, {N, IC, OH, OW}});
  127. auto io = (double)N * IC * OH * OW * (1 + FH * FW) * dtype.size();
  128. auto gbps = io / (time_ms * 1e6);
  129. printf("io %.2fGB, flops %.3fGB/s\n", io / 1e9, gbps);
  130. };
  131. run(32, 128, 80, 64, 2, 2, 2, 2, 0, 0);
  132. run(32, 128, 40, 128, 2, 2, 2, 2, 0, 0);
  133. run(32, 224, 40, 32, 2, 2, 2, 2, 0, 0);
  134. run(32, 24, 160, 128, 2, 2, 4, 4, 0, 0);
  135. run(32, 24, 160, 128, 2, 2, 4, 4, 1, 1);
  136. }
  137. TEST_F(ROCM, POOLING_BWD_BENCHMARK) {
  138. using Mode = param::Pooling::Mode;
  139. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
  140. auto benchmarker = ROCMBenchmarker<PoolingBackward>(handle_rocm(),
  141. handle_naive(false));
  142. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t SH = 1,
  143. size_t SW = 1, size_t FH = 1, size_t FW = 1, size_t PH = 0,
  144. size_t PW = 0,
  145. Mode mode = Mode::AVERAGE_COUNT_EXCLUDE_PADDING,
  146. DType dtype = dtype::Float32()) {
  147. benchmarker.set_dtype(0, dtype).set_dtype(1, dtype);
  148. benchmarker.set_display(true);
  149. PoolingForward::Param param;
  150. param.mode = mode;
  151. param.stride_h = SH;
  152. param.stride_w = SW;
  153. param.pad_h = PH;
  154. param.pad_w = PW;
  155. param.window_h = FH;
  156. param.window_w = FW;
  157. benchmarker.set_param(param);
  158. size_t OH = infer_conv_shape(IH, FH, SH, PH);
  159. size_t OW = infer_conv_shape(IW, FW, SW, PW);
  160. // warm up
  161. benchmarker.execs({{N, IC, IH, IW},
  162. {N, IC, OH, OW},
  163. {N, IC, OH, OW},
  164. {N, IC, IH, IW}});
  165. // do actual benchmark
  166. auto time_ms = benchmarker.execs({{N, IC, IH, IW},
  167. {N, IC, OH, OW},
  168. {N, IC, OH, OW},
  169. {N, IC, IH, IW}});
  170. time_ms = benchmarker.execs({{N, IC, IH, IW},
  171. {N, IC, OH, OW},
  172. {N, IC, OH, OW},
  173. {N, IC, IH, IW}});
  174. double io = 0.;
  175. double gbps = 0.;
  176. if (mode == Mode::AVERAGE_COUNT_EXCLUDE_PADDING) {
  177. io = (double)N * IC * OH * OW * FH * FW * 2 * dtype.size();
  178. gbps = io / (time_ms * 1e6);
  179. } else {
  180. io = (double)N * IC * OH * OW * 2 * dtype.size();
  181. gbps = io / (time_ms * 1e6);
  182. }
  183. printf("Mode = %s, io %.2fGB, flops %.3fGB/s\n",
  184. mode == Mode::AVERAGE_COUNT_EXCLUDE_PADDING ? "AVERAGE" : "MAX",
  185. io / 1e9, gbps);
  186. };
  187. Mode mode = Mode::AVERAGE_COUNT_EXCLUDE_PADDING;
  188. run(32, 128, 80, 64, 2, 2, 2, 2, 0, 0, mode);
  189. run(32, 128, 40, 128, 2, 2, 2, 2, 0, 0, mode);
  190. run(32, 224, 40, 32, 2, 2, 2, 2, 0, 0, mode);
  191. run(32, 24, 160, 128, 2, 2, 4, 4, 0, 0, mode);
  192. run(32, 24, 160, 128, 2, 2, 4, 4, 1, 1, mode);
  193. mode = Mode::MAX;
  194. run(32, 128, 80, 64, 2, 2, 2, 2, 0, 0, mode);
  195. run(32, 128, 40, 128, 2, 2, 2, 2, 0, 0, mode);
  196. run(32, 224, 40, 32, 2, 2, 2, 2, 0, 0, mode);
  197. run(32, 24, 160, 128, 2, 2, 4, 4, 0, 0, mode);
  198. run(32, 24, 160, 128, 2, 2, 4, 4, 1, 1, mode);
  199. }
  200. #endif
  201. } // namespace test
  202. } // namespace megdnn
  203. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台