You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmarker.inl 3.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. /**
  2. * \file dnn/test/rocm/benchmarker.inl
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "test/rocm/benchmarker.h"
  13. #include <gtest/gtest.h>
  14. #include "test/common/timer.h"
  15. namespace megdnn {
  16. namespace test {
  17. template <typename Opr>
  18. ROCMBenchmarker<Opr>::ROCMBenchmarker(Handle* handle_rocm, Handle* handle_naive)
  19. : m_handle_naive{handle_naive},
  20. m_handle_rocm{handle_rocm},
  21. m_default_rng{new NormalRNG()},
  22. m_param{Param()},
  23. m_device_timer{
  24. megdnn::rocm::concrete_handle(m_handle_rocm)->stream()} {}
  25. template <typename Opr>
  26. float ROCMBenchmarker<Opr>::exec(const TensorShapeArray& shapes) {
  27. return exec(make_layouts(shapes));
  28. }
  29. template <typename Opr>
  30. float ROCMBenchmarker<Opr>::exec(TensorLayoutArray layouts) {
  31. auto opr = m_handle_rocm->create_operator<Opr>();
  32. opr->param() = m_param;
  33. auto user_layouts = layouts;
  34. m_proxy.deduce_layout(opr.get(), layouts);
  35. for (size_t i = 0; i < layouts.size(); ++i)
  36. if (user_layouts[i].ndim > 0) {
  37. auto run = [&]() {
  38. ASSERT_TRUE(layouts[i].eq_shape(user_layouts[i]))
  39. << "User provided shape is "
  40. << user_layouts[i].TensorShape::to_string()
  41. << "\nExpected shape is "
  42. << layouts[i].TensorShape::to_string();
  43. };
  44. run();
  45. }
  46. auto allocate = [&layouts](Handle* handle) {
  47. TensorNDArray tensors(layouts.size());
  48. auto trans_func = [handle](const TensorLayout& layout) {
  49. auto span = layout.span();
  50. TensorND res;
  51. res.raw_ptr = static_cast<uint8_t*>(
  52. megdnn_malloc(handle, span.dist_byte())) +
  53. span.low_byte;
  54. res.layout = layout;
  55. return res;
  56. };
  57. std::transform(layouts.begin(), layouts.end(), tensors.begin(),
  58. trans_func);
  59. return tensors;
  60. };
  61. auto tensors_cur = allocate(m_handle_rocm);
  62. auto tensors_cur_host = allocate(m_handle_naive);
  63. // init
  64. for (size_t i = 0; i < tensors_cur_host.size(); ++i) {
  65. TensorND& tensor = tensors_cur_host[i];
  66. auto rng = m_rng[i];
  67. if (!rng)
  68. rng = m_default_rng.get();
  69. auto size = tensor.layout.span().high_byte;
  70. rng->gen(tensor);
  71. megdnn_memcpy_H2D(m_handle_rocm, tensors_cur[i].raw_ptr, tensor.raw_ptr,
  72. size);
  73. }
  74. m_device_timer.reset();
  75. m_device_timer.start();
  76. m_proxy.exec(opr.get(), tensors_cur);
  77. m_device_timer.stop();
  78. auto time_in_ms = m_device_timer.get_time_in_ms();
  79. if (m_display) {
  80. std::cout << "Total time is " << time_in_ms << "ms " << std::endl;
  81. }
  82. auto free = [](Handle* handle, TensorNDArray& tensors) {
  83. std::for_each(tensors.begin(), tensors.end(),
  84. [handle](const TensorND& tensor) {
  85. megdnn_free(handle, tensor.raw_ptr);
  86. });
  87. };
  88. free(m_handle_rocm, tensors_cur);
  89. free(m_handle_naive, tensors_cur_host);
  90. return time_in_ms;
  91. }
  92. } // namespace test
  93. } // namespace megdnn
  94. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台