You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduce.cpp 6.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. /**
  2. * \file dnn/test/arm_common/reduce.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/arm_common/fixture.h"
  12. #include "megdnn/oprs.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. using namespace megdnn;
  16. using namespace test;
  17. TEST_F(ARM_COMMON, REDUCE) {
  18. using Param = Reduce::Param;
  19. using Mode = Param::Mode;
  20. Checker<Reduce> checker(handle());
  21. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  22. checker.set_rng(0, &rng);
  23. struct Config {
  24. Param param;
  25. DType dtype;
  26. TensorShape shape;
  27. Config(Param param, DType dtype, TensorShape shape)
  28. : param(param), dtype(dtype), shape(shape) {}
  29. };
  30. std::vector<Config> configs;
  31. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  32. for (auto dtype : std::vector<DType>{
  33. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
  34. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  35. for (int32_t axis : {0, 1, 2}) {
  36. for (size_t A : {1, 3, 5}) {
  37. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  38. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  39. TensorShape shape{A, B, C};
  40. Param param(mode, axis);
  41. Config config(param, dtype, shape);
  42. configs.push_back(config);
  43. }
  44. }
  45. }
  46. }
  47. for (auto&& config : configs) {
  48. auto&& dtype = config.dtype;
  49. auto&& param = config.param;
  50. auto&& shape = config.shape;
  51. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  52. }
  53. configs.clear();
  54. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  55. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
  56. for (int32_t axis : {0, 1, 2}) {
  57. for (size_t A : {1, 3, 5}) {
  58. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  59. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  60. TensorShape shape{A, B, C};
  61. Param param(mode, axis);
  62. Config config(param, dtype, shape);
  63. configs.push_back(config);
  64. }
  65. }
  66. }
  67. }
  68. UniformFloatRNG rng_float(-2, 2);
  69. checker.set_rng(0, &rng_float);
  70. checker.set_epsilon(1e-1);
  71. for (auto&& config : configs) {
  72. auto&& dtype = config.dtype;
  73. auto&& param = config.param;
  74. auto&& shape = config.shape;
  75. if (dtype == dtype::Float16())
  76. checker.set_epsilon(1e-1);
  77. else
  78. checker.set_epsilon(1e-3);
  79. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  80. }
  81. }
  82. #if MEGDNN_WITH_BENCHMARK
  83. TEST_F(ARM_COMMON, BENCHMARK_REDUCE) {
  84. auto run = [&](size_t A, size_t B, size_t C, size_t axis,
  85. megdnn::param::Reduce::Mode mode, megdnn::DType& dtype) {
  86. auto handle_fallback = create_cpu_handle(1);
  87. Benchmarker<Reduce> benchmarker(handle());
  88. Benchmarker<Reduce> benchmarker_fallback(handle_fallback.get());
  89. benchmarker_fallback.set_display(false);
  90. benchmarker.set_display(false);
  91. constexpr size_t RUNS = 50;
  92. benchmarker_fallback.set_times(RUNS);
  93. benchmarker.set_times(RUNS);
  94. param::Reduce param;
  95. param.axis = axis;
  96. param.mode = mode;
  97. benchmarker.set_param(param);
  98. benchmarker_fallback.set_param(param);
  99. TensorLayout src({A, B, C}, dtype), dst;
  100. auto opr = handle()->create_operator<Reduce>();
  101. opr->param() = param;
  102. opr->deduce_layout(src, dst);
  103. auto bench = [&](const char* msg) {
  104. auto cur = benchmarker.execs({src, dst}) / RUNS;
  105. auto fallback = benchmarker_fallback.execs({src, dst}) / RUNS;
  106. float computation = src.total_nr_elems() / 1024.0 / 1024.0 / 1024.0 * 1e3;
  107. printf("run %s->%s %s: fallback: %fms %fGflops "
  108. "cur: %fms %fGflops speedup=%f\n",
  109. src.to_string().c_str(), dst.to_string().c_str(), msg, fallback,
  110. computation / fallback, cur, computation / cur, fallback / cur);
  111. };
  112. benchmarker_fallback.set_dtype(0, dtype);
  113. benchmarker.set_dtype(0, dtype);
  114. bench(dtype.name());
  115. };
  116. for (auto mode :
  117. {param::Reduce::Mode::MEAN, param::Reduce::Mode::MAX,
  118. param::Reduce::Mode::MIN})
  119. for (int32_t axis : {1, 2}) {
  120. if (mode == param::Reduce::Mode::MEAN)
  121. printf("testcase mean %s\n", axis == 2 ? "c == 1" : "c > 1");
  122. else if (mode == param::Reduce::Mode::MAX)
  123. printf("testcase max %s\n", axis == 2 ? "c == 1" : "c > 1");
  124. else if (mode == param::Reduce::Mode::MIN)
  125. printf("testcase min %s\n", axis == 2 ? "c == 1" : "c > 1");
  126. for (auto dtype : std::vector<megdnn::DType>{
  127. dtype::Float16(), dtype::Float32(), dtype::QuantizedS8(4.2f),
  128. dtype::Quantized8Asymm(3.2f, static_cast<uint8_t>(10))}) {
  129. run(1, 1024, 49, axis, mode, dtype);
  130. run(2, 10, 10000, axis, mode, dtype);
  131. run(2, 100, 10000, axis, mode, dtype);
  132. run(2, 10, 100000, axis, mode, dtype);
  133. }
  134. }
  135. for (auto mode :
  136. {param::Reduce::Mode::SUM, param::Reduce::Mode::PRODUCT,
  137. param::Reduce::Mode::SUM_SQR})
  138. for (int32_t axis : {1, 2}) {
  139. if (mode == param::Reduce::Mode::SUM)
  140. printf("testcase sum %s\n", axis == 2 ? "c == 1" : "c > 1");
  141. else if (mode == param::Reduce::Mode::PRODUCT)
  142. printf("testcase product %s\n", axis == 2 ? "c == 1" : "c > 1");
  143. else if (mode == param::Reduce::Mode::SUM_SQR)
  144. printf("testcase sum SumSqr %s\n", axis == 2 ? "c == 1" : "c > 1");
  145. for (auto dtype :
  146. std::vector<megdnn::DType>{dtype::Float16(), dtype::Float32()}) {
  147. run(1, 1024, 49, axis, mode, dtype);
  148. run(2, 10, 10000, axis, mode, dtype);
  149. run(2, 100, 10000, axis, mode, dtype);
  150. run(2, 10, 100000, axis, mode, dtype);
  151. }
  152. }
  153. }
  154. #endif
  155. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台