You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmarker.h 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. /**
  2. * \file dnn/test/rocm/benchmarker.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/basic_types.h"
  13. #include "test/common/opr_proxy.h"
  14. #include "megdnn/tensor_format.h"
  15. #include "test/common/rng.h"
  16. #include "test/rocm/fixture.h"
  17. #include "src/rocm/utils.h"
  18. #include "hip_header.h"
  19. #include <map>
  20. namespace megdnn {
  21. namespace test {
  22. template <typename Opr>
  23. class ROCMBenchmarker {
  24. public:
  25. using Param = typename Opr::Param;
  26. ROCMBenchmarker(Handle* handle_rocm, Handle* handle_naive);
  27. const Handle* handle() const { return m_handle_rocm; }
  28. /*!
  29. * \brief benchmark opr on current param/dtype/rng config
  30. * \returns elapsed time in ms
  31. *
  32. * ROCMBenchmarker would construct TensorLayout vectors from shapes and
  33. * dtypes and call exec(TensorLayoutArray &).
  34. */
  35. float exec(const TensorShapeArray& shapes);
  36. float exec(TensorLayoutArray layouts);
  37. //! disabiguate overloaded exec
  38. float execs(const TensorShapeArray& shapes) {
  39. return exec(make_layouts(shapes));
  40. }
  41. float execl(const TensorLayoutArray& layouts) { return exec(layouts); }
  42. ROCMBenchmarker& set_param(Param param) {
  43. m_param = param;
  44. return *this;
  45. }
  46. ROCMBenchmarker& set_dtype(size_t idx, DType dtype) {
  47. m_dtype[idx] = dtype;
  48. return *this;
  49. }
  50. ROCMBenchmarker& set_rng(size_t idx, RNG* rng) {
  51. m_rng[idx] = rng;
  52. return *this;
  53. }
  54. ROCMBenchmarker& set_proxy(const OprProxy<Opr>& proxy) {
  55. m_proxy = proxy;
  56. return *this;
  57. }
  58. ROCMBenchmarker& set_display(bool display) {
  59. m_display = display;
  60. return *this;
  61. }
  62. ROCMBenchmarker& set_fmt(size_t idx, TensorFormat fmt) {
  63. m_fmt[idx] = fmt;
  64. return *this;
  65. }
  66. TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
  67. TensorLayoutArray layouts(shapes.size());
  68. for (size_t i = 0; i < shapes.size(); ++i) {
  69. DType dt = (m_dtype.find(i) != m_dtype.end() ? m_dtype[i]
  70. : dtype::Float32());
  71. TensorFormat fmt = (m_fmt.find(i) != m_fmt.end()
  72. ? m_fmt[i]
  73. : DefaultTensorFormat::make());
  74. layouts[i] = TensorLayout(shapes[i], dt, fmt);
  75. }
  76. return layouts;
  77. }
  78. private:
  79. class ROCMTimer {
  80. private:
  81. bool m_started, m_stopped;
  82. hipEvent_t m_event_start, m_event_end;
  83. hipStream_t m_stream;
  84. public:
  85. ROCMTimer() = delete;
  86. ROCMTimer(hipStream_t strm) : m_stream{strm} {
  87. hip_check(hipEventCreate(&m_event_start));
  88. hip_check(hipEventCreate(&m_event_end));
  89. reset();
  90. }
  91. ~ROCMTimer() {
  92. hip_check(hipEventDestroy(m_event_start));
  93. hip_check(hipEventDestroy(m_event_end));
  94. }
  95. void start() {
  96. megdnn_assert(!m_started);
  97. megdnn_assert(!m_stopped);
  98. m_started = true;
  99. hip_check(hipEventRecord(m_event_start, m_stream));
  100. }
  101. void stop() {
  102. megdnn_assert(m_started);
  103. megdnn_assert(!m_stopped);
  104. m_stopped = true;
  105. hip_check(hipEventRecord(m_event_end, m_stream));
  106. }
  107. float get_time_in_ms() const {
  108. megdnn_assert(m_started);
  109. megdnn_assert(m_stopped);
  110. hip_check(hipEventSynchronize(m_event_end));
  111. float ms;
  112. hip_check(hipEventElapsedTime(&ms, m_event_start, m_event_end));
  113. return ms;
  114. }
  115. void reset() {
  116. m_started = false;
  117. m_stopped = false;
  118. }
  119. };
  120. bool m_display = true;
  121. Handle* m_handle_naive;
  122. Handle* m_handle_rocm;
  123. std::unique_ptr<RNG> m_default_rng;
  124. std::map<size_t, RNG*> m_rng;
  125. std::map<size_t, DType> m_dtype;
  126. std::map<size_t, TensorFormat> m_fmt;
  127. Param m_param;
  128. OprProxy<Opr> m_proxy;
  129. ROCMTimer m_device_timer;
  130. };
  131. } // namespace test
  132. } // namespace megdnn
  133. #include "test/rocm/benchmarker.inl"
  134. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台