You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

local.cpp 2.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. /**
  2. * \file dnn/test/cpu/local.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cpu/fixture.h"
  12. #include "test/common/checker.h"
  13. #include "test/common/local.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/timer.h"
  16. namespace megdnn {
  17. namespace test {
  18. TEST_F(CPU, LOCAL)
  19. {
  20. auto args = local::get_args();
  21. for (auto &&arg: args) {
  22. Checker<Local> checker(handle());
  23. checker.set_param(arg.param).exec(TensorShapeArray{
  24. arg.sshape(), arg.fshape(), arg.dshape()});
  25. }
  26. }
  27. #if MEGDNN_WITH_BENCHMARK
  28. TEST_F(CPU, BENCHMARK_LOCAL)
  29. {
  30. size_t T = 10;
  31. float memcpy_bandwidth, local_bandwidth;
  32. {
  33. std::vector<float> src(1000000), dst(1000000);
  34. auto total_mem = (src.size() + dst.size()) * sizeof(float) * T;
  35. Timer timer;
  36. timer.start();
  37. for (size_t t = 0; t < T; ++t) {
  38. std::memcpy(dst.data(), src.data(), sizeof(float) * src.size());
  39. // to prevent compiler optimizing out memcpy above.
  40. asm volatile ("");
  41. }
  42. timer.stop();
  43. auto time_in_ms = timer.get_time_in_us() / 1e3;
  44. auto bandwidth = total_mem / (time_in_ms/1000.0f);
  45. std::cout << "Copy from src(" << src.data()
  46. << ") to dst(" << dst.data()
  47. << ")" << std::endl;
  48. std::cout << "Memcpy bandwidth is " << bandwidth / 1e9 << "GB/s" << std::endl;
  49. memcpy_bandwidth = bandwidth;
  50. }
  51. {
  52. Benchmarker<Local> benchmarker(handle());
  53. TensorShape src{2, 64, 7, 7},
  54. filter{5, 5, 64, 3, 3, 64},
  55. dst{2, 64, 5, 5};
  56. Local::Param param;
  57. param.pad_h = param.pad_w = 0;
  58. auto time_in_ms = benchmarker.set_times(T).
  59. set_param(param).
  60. set_display(false).
  61. exec({src, filter, dst});
  62. auto total_mem = (src.total_nr_elems() +
  63. filter.total_nr_elems() +
  64. dst.total_nr_elems()) * sizeof(float)*T;
  65. auto bandwidth = total_mem / (time_in_ms/1000.0f);
  66. std::cout << "Bandwidth is " << bandwidth / 1e9 << "GB/s" << std::endl;
  67. local_bandwidth = bandwidth;
  68. }
  69. float ratio = local_bandwidth / memcpy_bandwidth;
  70. ASSERT_GE(ratio, 0.05);
  71. }
  72. #endif
  73. } // namespace test
  74. } // namespace megdnn
  75. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台