You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

local.cpp 2.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. /**
  2. * \file dnn/test/cpu/local.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cpu/fixture.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/local.h"
  15. #include "test/common/timer.h"
  16. namespace megdnn {
  17. namespace test {
  18. TEST_F(CPU, LOCAL) {
  19. auto args = local::get_args();
  20. for (auto&& arg : args) {
  21. Checker<Local> checker(handle());
  22. checker.set_param(arg.param).exec(
  23. TensorShapeArray{arg.sshape(), arg.fshape(), arg.dshape()});
  24. }
  25. }
  26. #if MEGDNN_WITH_BENCHMARK
  27. TEST_F(CPU, BENCHMARK_LOCAL) {
  28. size_t T = 10;
  29. float memcpy_bandwidth, local_bandwidth;
  30. {
  31. std::vector<float> src(1000000), dst(1000000);
  32. auto total_mem = (src.size() + dst.size()) * sizeof(float) * T;
  33. Timer timer;
  34. timer.start();
  35. for (size_t t = 0; t < T; ++t) {
  36. std::memcpy(dst.data(), src.data(), sizeof(float) * src.size());
  37. // to prevent compiler optimizing out memcpy above.
  38. asm volatile("");
  39. }
  40. timer.stop();
  41. auto time_in_ms = timer.get_time_in_us() / 1e3;
  42. auto bandwidth = total_mem / (time_in_ms / 1000.0f);
  43. std::cout << "Copy from src(" << src.data() << ") to dst(" << dst.data() << ")"
  44. << std::endl;
  45. std::cout << "Memcpy bandwidth is " << bandwidth / 1e9 << "GB/s" << std::endl;
  46. memcpy_bandwidth = bandwidth;
  47. }
  48. {
  49. Benchmarker<Local> benchmarker(handle());
  50. TensorShape src{2, 64, 7, 7}, filter{5, 5, 64, 3, 3, 64}, dst{2, 64, 5, 5};
  51. Local::Param param;
  52. param.pad_h = param.pad_w = 0;
  53. auto time_in_ms =
  54. benchmarker.set_times(T).set_param(param).set_display(false).exec(
  55. {src, filter, dst});
  56. auto total_mem = (src.total_nr_elems() + filter.total_nr_elems() +
  57. dst.total_nr_elems()) *
  58. sizeof(float) * T;
  59. auto bandwidth = total_mem / (time_in_ms / 1000.0f);
  60. std::cout << "Bandwidth is " << bandwidth / 1e9 << "GB/s" << std::endl;
  61. local_bandwidth = bandwidth;
  62. }
  63. float ratio = local_bandwidth / memcpy_bandwidth;
  64. ASSERT_GE(ratio, 0.05);
  65. }
  66. #endif
  67. } // namespace test
  68. } // namespace megdnn
  69. // vim: syntax=cpp.doxygen