You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduce.cpp 9.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #include "test/arm_common/fixture.h"
  2. #include "megdnn/oprs.h"
  3. #include "test/common/benchmarker.h"
  4. #include "test/common/checker.h"
  5. #include "test/common/task_record_check.h"
  6. using namespace megdnn;
  7. using namespace test;
  8. TEST_F(ARM_COMMON, REDUCE) {
  9. using Param = Reduce::Param;
  10. using Mode = Param::Mode;
  11. Checker<Reduce> checker(handle());
  12. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  13. checker.set_rng(0, &rng);
  14. struct Config {
  15. Param param;
  16. DType dtype;
  17. TensorShape shape;
  18. Config(Param param, DType dtype, TensorShape shape)
  19. : param(param), dtype(dtype), shape(shape) {}
  20. };
  21. std::vector<Config> configs;
  22. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  23. for (auto dtype : std::vector<DType>{
  24. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
  25. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  26. for (int32_t axis : {0, 1, 2}) {
  27. for (size_t A : {1, 3, 5}) {
  28. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  29. for (size_t C : {2, 3, 4, 6, 9, 16, 33, 45}) {
  30. TensorShape shape{A, B, C};
  31. Param param(mode, axis);
  32. Config config(param, dtype, shape);
  33. configs.push_back(config);
  34. }
  35. }
  36. }
  37. }
  38. for (auto&& config : configs) {
  39. auto&& dtype = config.dtype;
  40. auto&& param = config.param;
  41. auto&& shape = config.shape;
  42. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  43. }
  44. configs.clear();
  45. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  46. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
  47. for (int32_t axis : {0, 1, 2}) {
  48. for (size_t A : {1, 3, 5}) {
  49. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  50. for (size_t C : {2, 3, 4, 6, 9, 16, 33, 45}) {
  51. TensorShape shape{A, B, C};
  52. Param param(mode, axis);
  53. Config config(param, dtype, shape);
  54. configs.push_back(config);
  55. }
  56. }
  57. }
  58. }
  59. UniformFloatRNG rng_float(-2, 2);
  60. checker.set_rng(0, &rng_float);
  61. checker.set_epsilon(1e-1);
  62. for (auto&& config : configs) {
  63. auto&& dtype = config.dtype;
  64. auto&& param = config.param;
  65. auto&& shape = config.shape;
  66. if (dtype == dtype::Float16())
  67. checker.set_epsilon(1e-1);
  68. else
  69. checker.set_epsilon(1e-3);
  70. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  71. }
  72. }
  73. TEST_F(ARM_COMMON, REDUCE_RECORD) {
  74. using Param = Reduce::Param;
  75. using Mode = Param::Mode;
  76. TaskRecordChecker<Reduce> checker(0);
  77. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  78. checker.set_rng(0, &rng);
  79. struct Config {
  80. Param param;
  81. DType dtype;
  82. TensorShape shape;
  83. Config(Param param, DType dtype, TensorShape shape)
  84. : param(param), dtype(dtype), shape(shape) {}
  85. };
  86. std::vector<Config> configs;
  87. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  88. for (auto dtype : std::vector<DType>{
  89. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
  90. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  91. for (int32_t axis : {0, 1, 2}) {
  92. for (size_t A : {1, 3, 5}) {
  93. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  94. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  95. TensorShape shape{A, B, C};
  96. Param param(mode, axis);
  97. Config config(param, dtype, shape);
  98. configs.push_back(config);
  99. }
  100. }
  101. }
  102. }
  103. for (auto&& config : configs) {
  104. auto&& dtype = config.dtype;
  105. auto&& param = config.param;
  106. auto&& shape = config.shape;
  107. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  108. }
  109. configs.clear();
  110. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  111. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
  112. for (int32_t axis : {0, 1, 2}) {
  113. for (size_t A : {1, 3, 5}) {
  114. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  115. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  116. TensorShape shape{A, B, C};
  117. Param param(mode, axis);
  118. Config config(param, dtype, shape);
  119. configs.push_back(config);
  120. }
  121. }
  122. }
  123. }
  124. UniformFloatRNG rng_float(-2, 2);
  125. checker.set_rng(0, &rng_float);
  126. checker.set_epsilon(1e-1);
  127. for (auto&& config : configs) {
  128. auto&& dtype = config.dtype;
  129. auto&& param = config.param;
  130. auto&& shape = config.shape;
  131. if (dtype == dtype::Float16())
  132. checker.set_epsilon(1e-1);
  133. else
  134. checker.set_epsilon(1e-3);
  135. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  136. }
  137. }
  138. #if MEGDNN_WITH_BENCHMARK
  139. TEST_F(ARM_COMMON, BENCHMARK_REDUCE) {
  140. auto run = [&](size_t A, size_t B, size_t C, size_t axis,
  141. megdnn::param::Reduce::Mode mode, megdnn::DType& dtype) {
  142. auto handle_fallback = create_cpu_handle(1);
  143. Benchmarker<Reduce> benchmarker(handle());
  144. Benchmarker<Reduce> benchmarker_fallback(handle_fallback.get());
  145. benchmarker_fallback.set_display(false);
  146. benchmarker.set_display(false);
  147. constexpr size_t RUNS = 50;
  148. benchmarker_fallback.set_times(RUNS);
  149. benchmarker.set_times(RUNS);
  150. param::Reduce param;
  151. param.axis = axis;
  152. param.mode = mode;
  153. benchmarker.set_param(param);
  154. benchmarker_fallback.set_param(param);
  155. TensorLayout src({A, B, C}, dtype), dst;
  156. auto opr = handle()->create_operator<Reduce>();
  157. opr->param() = param;
  158. opr->deduce_layout(src, dst);
  159. auto bench = [&](const char* msg) {
  160. auto cur = benchmarker.execs({src, dst}) / RUNS;
  161. auto fallback = benchmarker_fallback.execs({src, dst}) / RUNS;
  162. float computation = src.total_nr_elems() / 1024.0 / 1024.0 / 1024.0 * 1e3;
  163. printf("run %s->%s %s: fallback: %fms %fGflops "
  164. "cur: %fms %fGflops speedup=%f\n",
  165. src.to_string().c_str(), dst.to_string().c_str(), msg, fallback,
  166. computation / fallback, cur, computation / cur, fallback / cur);
  167. };
  168. benchmarker_fallback.set_dtype(0, dtype);
  169. benchmarker.set_dtype(0, dtype);
  170. bench(dtype.name());
  171. };
  172. for (auto mode :
  173. {param::Reduce::Mode::MEAN, param::Reduce::Mode::MAX,
  174. param::Reduce::Mode::MIN})
  175. for (int32_t axis : {1, 2}) {
  176. if (mode == param::Reduce::Mode::MEAN)
  177. printf("testcase mean %s\n", axis == 2 ? "c == 1" : "c > 1");
  178. else if (mode == param::Reduce::Mode::MAX)
  179. printf("testcase max %s\n", axis == 2 ? "c == 1" : "c > 1");
  180. else if (mode == param::Reduce::Mode::MIN)
  181. printf("testcase min %s\n", axis == 2 ? "c == 1" : "c > 1");
  182. for (auto dtype : std::vector<megdnn::DType>{
  183. dtype::Float16(), dtype::Float32(), dtype::QuantizedS8(4.2f),
  184. dtype::Quantized8Asymm(3.2f, static_cast<uint8_t>(10))}) {
  185. run(1, 1024, 49, axis, mode, dtype);
  186. run(2, 10, 10000, axis, mode, dtype);
  187. run(2, 100, 10000, axis, mode, dtype);
  188. run(2, 10, 100000, axis, mode, dtype);
  189. }
  190. }
  191. for (auto mode :
  192. {param::Reduce::Mode::SUM, param::Reduce::Mode::PRODUCT,
  193. param::Reduce::Mode::SUM_SQR})
  194. for (int32_t axis : {1, 2}) {
  195. if (mode == param::Reduce::Mode::SUM)
  196. printf("testcase sum %s\n", axis == 2 ? "c == 1" : "c > 1");
  197. else if (mode == param::Reduce::Mode::PRODUCT)
  198. printf("testcase product %s\n", axis == 2 ? "c == 1" : "c > 1");
  199. else if (mode == param::Reduce::Mode::SUM_SQR)
  200. printf("testcase sum SumSqr %s\n", axis == 2 ? "c == 1" : "c > 1");
  201. for (auto dtype :
  202. std::vector<megdnn::DType>{dtype::Float16(), dtype::Float32()}) {
  203. run(1, 1024, 49, axis, mode, dtype);
  204. run(2, 10, 10000, axis, mode, dtype);
  205. run(2, 100, 10000, axis, mode, dtype);
  206. run(2, 10, 100000, axis, mode, dtype);
  207. }
  208. }
  209. }
  210. #endif
  211. // vim: syntax=cpp.doxygen