You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduce.cpp 9.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /**
  2. * \file dnn/test/arm_common/reduce.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/arm_common/fixture.h"
  12. #include "megdnn/oprs.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/task_record_check.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. TEST_F(ARM_COMMON, REDUCE) {
  19. using Param = Reduce::Param;
  20. using Mode = Param::Mode;
  21. Checker<Reduce> checker(handle());
  22. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  23. checker.set_rng(0, &rng);
  24. struct Config {
  25. Param param;
  26. DType dtype;
  27. TensorShape shape;
  28. Config(Param param, DType dtype, TensorShape shape)
  29. : param(param), dtype(dtype), shape(shape) {}
  30. };
  31. std::vector<Config> configs;
  32. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  33. for (auto dtype : std::vector<DType>{
  34. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
  35. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  36. for (int32_t axis : {0, 1, 2}) {
  37. for (size_t A : {1, 3, 5}) {
  38. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  39. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  40. TensorShape shape{A, B, C};
  41. Param param(mode, axis);
  42. Config config(param, dtype, shape);
  43. configs.push_back(config);
  44. }
  45. }
  46. }
  47. }
  48. for (auto&& config : configs) {
  49. auto&& dtype = config.dtype;
  50. auto&& param = config.param;
  51. auto&& shape = config.shape;
  52. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  53. }
  54. configs.clear();
  55. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  56. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
  57. for (int32_t axis : {0, 1, 2}) {
  58. for (size_t A : {1, 3, 5}) {
  59. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  60. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  61. TensorShape shape{A, B, C};
  62. Param param(mode, axis);
  63. Config config(param, dtype, shape);
  64. configs.push_back(config);
  65. }
  66. }
  67. }
  68. }
  69. UniformFloatRNG rng_float(-2, 2);
  70. checker.set_rng(0, &rng_float);
  71. checker.set_epsilon(1e-1);
  72. for (auto&& config : configs) {
  73. auto&& dtype = config.dtype;
  74. auto&& param = config.param;
  75. auto&& shape = config.shape;
  76. if (dtype == dtype::Float16())
  77. checker.set_epsilon(1e-1);
  78. else
  79. checker.set_epsilon(1e-3);
  80. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  81. }
  82. }
  83. TEST_F(ARM_COMMON, REDUCE_RECORD) {
  84. using Param = Reduce::Param;
  85. using Mode = Param::Mode;
  86. TaskRecordChecker<Reduce> checker(0);
  87. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  88. checker.set_rng(0, &rng);
  89. struct Config {
  90. Param param;
  91. DType dtype;
  92. TensorShape shape;
  93. Config(Param param, DType dtype, TensorShape shape)
  94. : param(param), dtype(dtype), shape(shape) {}
  95. };
  96. std::vector<Config> configs;
  97. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  98. for (auto dtype : std::vector<DType>{
  99. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
  100. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  101. for (int32_t axis : {0, 1, 2}) {
  102. for (size_t A : {1, 3, 5}) {
  103. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  104. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  105. TensorShape shape{A, B, C};
  106. Param param(mode, axis);
  107. Config config(param, dtype, shape);
  108. configs.push_back(config);
  109. }
  110. }
  111. }
  112. }
  113. for (auto&& config : configs) {
  114. auto&& dtype = config.dtype;
  115. auto&& param = config.param;
  116. auto&& shape = config.shape;
  117. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  118. }
  119. configs.clear();
  120. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  121. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
  122. for (int32_t axis : {0, 1, 2}) {
  123. for (size_t A : {1, 3, 5}) {
  124. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  125. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  126. TensorShape shape{A, B, C};
  127. Param param(mode, axis);
  128. Config config(param, dtype, shape);
  129. configs.push_back(config);
  130. }
  131. }
  132. }
  133. }
  134. UniformFloatRNG rng_float(-2, 2);
  135. checker.set_rng(0, &rng_float);
  136. checker.set_epsilon(1e-1);
  137. for (auto&& config : configs) {
  138. auto&& dtype = config.dtype;
  139. auto&& param = config.param;
  140. auto&& shape = config.shape;
  141. if (dtype == dtype::Float16())
  142. checker.set_epsilon(1e-1);
  143. else
  144. checker.set_epsilon(1e-3);
  145. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  146. }
  147. }
  148. #if MEGDNN_WITH_BENCHMARK
  149. TEST_F(ARM_COMMON, BENCHMARK_REDUCE) {
  150. auto run = [&](size_t A, size_t B, size_t C, size_t axis,
  151. megdnn::param::Reduce::Mode mode, megdnn::DType& dtype) {
  152. auto handle_fallback = create_cpu_handle(1);
  153. Benchmarker<Reduce> benchmarker(handle());
  154. Benchmarker<Reduce> benchmarker_fallback(handle_fallback.get());
  155. benchmarker_fallback.set_display(false);
  156. benchmarker.set_display(false);
  157. constexpr size_t RUNS = 50;
  158. benchmarker_fallback.set_times(RUNS);
  159. benchmarker.set_times(RUNS);
  160. param::Reduce param;
  161. param.axis = axis;
  162. param.mode = mode;
  163. benchmarker.set_param(param);
  164. benchmarker_fallback.set_param(param);
  165. TensorLayout src({A, B, C}, dtype), dst;
  166. auto opr = handle()->create_operator<Reduce>();
  167. opr->param() = param;
  168. opr->deduce_layout(src, dst);
  169. auto bench = [&](const char* msg) {
  170. auto cur = benchmarker.execs({src, dst}) / RUNS;
  171. auto fallback = benchmarker_fallback.execs({src, dst}) / RUNS;
  172. float computation = src.total_nr_elems() / 1024.0 / 1024.0 / 1024.0 * 1e3;
  173. printf("run %s->%s %s: fallback: %fms %fGflops "
  174. "cur: %fms %fGflops speedup=%f\n",
  175. src.to_string().c_str(), dst.to_string().c_str(), msg, fallback,
  176. computation / fallback, cur, computation / cur, fallback / cur);
  177. };
  178. benchmarker_fallback.set_dtype(0, dtype);
  179. benchmarker.set_dtype(0, dtype);
  180. bench(dtype.name());
  181. };
  182. for (auto mode :
  183. {param::Reduce::Mode::MEAN, param::Reduce::Mode::MAX,
  184. param::Reduce::Mode::MIN})
  185. for (int32_t axis : {1, 2}) {
  186. if (mode == param::Reduce::Mode::MEAN)
  187. printf("testcase mean %s\n", axis == 2 ? "c == 1" : "c > 1");
  188. else if (mode == param::Reduce::Mode::MAX)
  189. printf("testcase max %s\n", axis == 2 ? "c == 1" : "c > 1");
  190. else if (mode == param::Reduce::Mode::MIN)
  191. printf("testcase min %s\n", axis == 2 ? "c == 1" : "c > 1");
  192. for (auto dtype : std::vector<megdnn::DType>{
  193. dtype::Float16(), dtype::Float32(), dtype::QuantizedS8(4.2f),
  194. dtype::Quantized8Asymm(3.2f, static_cast<uint8_t>(10))}) {
  195. run(1, 1024, 49, axis, mode, dtype);
  196. run(2, 10, 10000, axis, mode, dtype);
  197. run(2, 100, 10000, axis, mode, dtype);
  198. run(2, 10, 100000, axis, mode, dtype);
  199. }
  200. }
  201. for (auto mode :
  202. {param::Reduce::Mode::SUM, param::Reduce::Mode::PRODUCT,
  203. param::Reduce::Mode::SUM_SQR})
  204. for (int32_t axis : {1, 2}) {
  205. if (mode == param::Reduce::Mode::SUM)
  206. printf("testcase sum %s\n", axis == 2 ? "c == 1" : "c > 1");
  207. else if (mode == param::Reduce::Mode::PRODUCT)
  208. printf("testcase product %s\n", axis == 2 ? "c == 1" : "c > 1");
  209. else if (mode == param::Reduce::Mode::SUM_SQR)
  210. printf("testcase sum SumSqr %s\n", axis == 2 ? "c == 1" : "c > 1");
  211. for (auto dtype :
  212. std::vector<megdnn::DType>{dtype::Float16(), dtype::Float32()}) {
  213. run(1, 1024, 49, axis, mode, dtype);
  214. run(2, 10, 10000, axis, mode, dtype);
  215. run(2, 100, 10000, axis, mode, dtype);
  216. run(2, 10, 100000, axis, mode, dtype);
  217. }
  218. }
  219. }
  220. #endif
  221. // vim: syntax=cpp.doxygen