You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduce.cpp 7.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /**
  2. * \file dnn/test/arm_common/reduce.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/arm_common/fixture.h"
  12. #include "megdnn/oprs.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. using namespace megdnn;
  16. using namespace test;
  17. TEST_F(ARM_COMMON, REDUCE) {
  18. using Param = Reduce::Param;
  19. using Mode = Param::Mode;
  20. Checker<Reduce> checker(handle());
  21. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  22. checker.set_rng(0, &rng);
  23. struct Config {
  24. Param param;
  25. DType dtype;
  26. TensorShape shape;
  27. Config(Param param, DType dtype, TensorShape shape)
  28. : param(param), dtype(dtype), shape(shape) {}
  29. };
  30. std::vector<Config> configs;
  31. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  32. for (auto dtype : std::vector<DType>{
  33. dtype::Float32(), dtype::Float16(),
  34. dtype::QuantizedS8(1.3f),
  35. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  36. for (int32_t axis : {0, 1, 2}) {
  37. for (size_t A : {1, 3, 5}) {
  38. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  39. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  40. TensorShape shape{A, B, C};
  41. Param param(mode, axis);
  42. Config config(param, dtype, shape);
  43. configs.push_back(config);
  44. }
  45. }
  46. }
  47. }
  48. for (auto&& config : configs) {
  49. auto&& dtype = config.dtype;
  50. auto&& param = config.param;
  51. auto&& shape = config.shape;
  52. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  53. }
  54. configs.clear();
  55. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  56. for (auto dtype :
  57. std::vector<DType>{dtype::Float32(), dtype::Float16()})
  58. for (int32_t axis : {0, 1, 2}) {
  59. for (size_t A : {1, 3, 5}) {
  60. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  61. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  62. TensorShape shape{A, B, C};
  63. Param param(mode, axis);
  64. Config config(param, dtype, shape);
  65. configs.push_back(config);
  66. }
  67. }
  68. }
  69. }
  70. UniformFloatRNG rng_float(-2, 2);
  71. checker.set_rng(0, &rng_float);
  72. checker.set_epsilon(1e-1);
  73. for (auto&& config : configs) {
  74. auto&& dtype = config.dtype;
  75. auto&& param = config.param;
  76. auto&& shape = config.shape;
  77. if(dtype == dtype::Float16())
  78. checker.set_epsilon(1e-1);
  79. else
  80. checker.set_epsilon(1e-3);
  81. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  82. }
  83. }
  84. #if MEGDNN_WITH_BENCHMARK
  85. TEST_F(ARM_COMMON, BENCHMARK_REDUCE) {
  86. auto run = [&](size_t A, size_t B, size_t C, size_t axis,
  87. megdnn::param::Reduce::Mode mode, megdnn::DType& dtype) {
  88. auto handle_fallback = create_cpu_handle(1);
  89. Benchmarker<Reduce> benchmarker(handle());
  90. Benchmarker<Reduce> benchmarker_fallback(handle_fallback.get());
  91. benchmarker_fallback.set_display(false);
  92. benchmarker.set_display(false);
  93. constexpr size_t RUNS = 50;
  94. benchmarker_fallback.set_times(RUNS);
  95. benchmarker.set_times(RUNS);
  96. param::Reduce param;
  97. param.axis = axis;
  98. param.mode = mode;
  99. benchmarker.set_param(param);
  100. benchmarker_fallback.set_param(param);
  101. TensorLayout src({A, B, C}, dtype), dst;
  102. auto opr = handle()->create_operator<Reduce>();
  103. opr->param() = param;
  104. opr->deduce_layout(src, dst);
  105. auto bench = [&](const char* msg) {
  106. auto cur = benchmarker.execs({src, dst}) / RUNS;
  107. auto fallback =
  108. benchmarker_fallback.execs({src, dst}) / RUNS;
  109. float computation =
  110. src.total_nr_elems() / 1024.0 / 1024.0 / 1024.0 * 1e3;
  111. printf("run %s->%s %s: fallback: %fms %fGflops "
  112. "cur: %fms %fGflops speedup=%f\n",
  113. src.to_string().c_str(), dst.to_string().c_str(), msg,
  114. fallback, computation / fallback, cur, computation / cur,
  115. fallback / cur);
  116. };
  117. benchmarker_fallback.set_dtype(0, dtype);
  118. benchmarker.set_dtype(0, dtype);
  119. bench(dtype.name());
  120. };
  121. for (auto mode : {param::Reduce::Mode::MEAN, param::Reduce::Mode::MAX,
  122. param::Reduce::Mode::MIN})
  123. for (int32_t axis : {1, 2}) {
  124. if (mode == param::Reduce::Mode::MEAN)
  125. printf("testcase mean %s\n", axis == 2 ? "c == 1" : "c > 1");
  126. else if (mode == param::Reduce::Mode::MAX)
  127. printf("testcase max %s\n", axis == 2 ? "c == 1" : "c > 1");
  128. else if (mode == param::Reduce::Mode::MIN)
  129. printf("testcase min %s\n", axis == 2 ? "c == 1" : "c > 1");
  130. for (auto dtype :
  131. std::vector<megdnn::DType>{dtype::Float16(), dtype::Float32(),
  132. dtype::QuantizedS8(4.2f),
  133. dtype::Quantized8Asymm(3.2f, static_cast<uint8_t>(10))}) {
  134. run(1, 1024, 49, axis, mode, dtype);
  135. run(2, 10, 10000, axis, mode, dtype);
  136. run(2, 100, 10000, axis, mode, dtype);
  137. run(2, 10, 100000, axis, mode, dtype);
  138. }
  139. }
  140. for (auto mode : {param::Reduce::Mode::SUM, param::Reduce::Mode::PRODUCT,
  141. param::Reduce::Mode::SUM_SQR})
  142. for (int32_t axis : {1, 2}) {
  143. if (mode == param::Reduce::Mode::SUM)
  144. printf("testcase sum %s\n", axis == 2 ? "c == 1" : "c > 1");
  145. else if (mode == param::Reduce::Mode::PRODUCT)
  146. printf("testcase product %s\n", axis == 2 ? "c == 1" : "c > 1");
  147. else if (mode == param::Reduce::Mode::SUM_SQR)
  148. printf("testcase sum SumSqr %s\n",
  149. axis == 2 ? "c == 1" : "c > 1");
  150. for (auto dtype : std::vector<megdnn::DType>{dtype::Float16(),
  151. dtype::Float32()}) {
  152. run(1, 1024, 49, axis, mode, dtype);
  153. run(2, 10, 10000, axis, mode, dtype);
  154. run(2, 100, 10000, axis, mode, dtype);
  155. run(2, 10, 100000, axis, mode, dtype);
  156. }
  157. }
  158. }
  159. #endif
  160. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台