You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmarker.h 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /**
  2. * \file dnn/test/rocm/benchmarker.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/basic_types.h"
  13. #include "megdnn/tensor_format.h"
  14. #include "src/rocm/utils.h"
  15. #include "test/common/opr_proxy.h"
  16. #include "test/common/rng.h"
  17. #include "test/rocm/fixture.h"
  18. #include "hip_header.h"
  19. #include <map>
  20. namespace megdnn {
  21. namespace test {
  22. template <typename Opr>
  23. class ROCMBenchmarker {
  24. public:
  25. using Param = typename Opr::Param;
  26. ROCMBenchmarker(Handle* handle_rocm, Handle* handle_naive);
  27. const Handle* handle() const { return m_handle_rocm; }
  28. /*!
  29. * \brief benchmark opr on current param/dtype/rng config
  30. * \returns elapsed time in ms
  31. *
  32. * ROCMBenchmarker would construct TensorLayout vectors from shapes and
  33. * dtypes and call exec(TensorLayoutArray &).
  34. */
  35. float exec(const TensorShapeArray& shapes);
  36. float exec(TensorLayoutArray layouts);
  37. //! disabiguate overloaded exec
  38. float execs(const TensorShapeArray& shapes) { return exec(make_layouts(shapes)); }
  39. float execl(const TensorLayoutArray& layouts) { return exec(layouts); }
  40. ROCMBenchmarker& set_param(Param param) {
  41. m_param = param;
  42. return *this;
  43. }
  44. ROCMBenchmarker& set_dtype(size_t idx, DType dtype) {
  45. m_dtype[idx] = dtype;
  46. return *this;
  47. }
  48. ROCMBenchmarker& set_rng(size_t idx, RNG* rng) {
  49. m_rng[idx] = rng;
  50. return *this;
  51. }
  52. ROCMBenchmarker& set_proxy(const OprProxy<Opr>& proxy) {
  53. m_proxy = proxy;
  54. return *this;
  55. }
  56. ROCMBenchmarker& set_display(bool display) {
  57. m_display = display;
  58. return *this;
  59. }
  60. ROCMBenchmarker& set_fmt(size_t idx, TensorFormat fmt) {
  61. m_fmt[idx] = fmt;
  62. return *this;
  63. }
  64. TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
  65. TensorLayoutArray layouts(shapes.size());
  66. for (size_t i = 0; i < shapes.size(); ++i) {
  67. DType dt =
  68. (m_dtype.find(i) != m_dtype.end() ? m_dtype[i] : dtype::Float32());
  69. TensorFormat fmt =
  70. (m_fmt.find(i) != m_fmt.end() ? m_fmt[i]
  71. : DefaultTensorFormat::make());
  72. layouts[i] = TensorLayout(shapes[i], dt, fmt);
  73. }
  74. return layouts;
  75. }
  76. private:
  77. class ROCMTimer {
  78. private:
  79. bool m_started, m_stopped;
  80. hipEvent_t m_event_start, m_event_end;
  81. hipStream_t m_stream;
  82. public:
  83. ROCMTimer() = delete;
  84. ROCMTimer(hipStream_t strm) : m_stream{strm} {
  85. hip_check(hipEventCreate(&m_event_start));
  86. hip_check(hipEventCreate(&m_event_end));
  87. reset();
  88. }
  89. ~ROCMTimer() {
  90. hip_check(hipEventDestroy(m_event_start));
  91. hip_check(hipEventDestroy(m_event_end));
  92. }
  93. void start() {
  94. megdnn_assert(!m_started);
  95. megdnn_assert(!m_stopped);
  96. m_started = true;
  97. hip_check(hipEventRecord(m_event_start, m_stream));
  98. }
  99. void stop() {
  100. megdnn_assert(m_started);
  101. megdnn_assert(!m_stopped);
  102. m_stopped = true;
  103. hip_check(hipEventRecord(m_event_end, m_stream));
  104. }
  105. float get_time_in_ms() const {
  106. megdnn_assert(m_started);
  107. megdnn_assert(m_stopped);
  108. hip_check(hipEventSynchronize(m_event_end));
  109. float ms;
  110. hip_check(hipEventElapsedTime(&ms, m_event_start, m_event_end));
  111. return ms;
  112. }
  113. void reset() {
  114. m_started = false;
  115. m_stopped = false;
  116. }
  117. };
  118. bool m_display = true;
  119. Handle* m_handle_naive;
  120. Handle* m_handle_rocm;
  121. std::unique_ptr<RNG> m_default_rng;
  122. std::map<size_t, RNG*> m_rng;
  123. std::map<size_t, DType> m_dtype;
  124. std::map<size_t, TensorFormat> m_fmt;
  125. Param m_param;
  126. OprProxy<Opr> m_proxy;
  127. ROCMTimer m_device_timer;
  128. };
  129. } // namespace test
  130. } // namespace megdnn
  131. #include "test/rocm/benchmarker.inl"
  132. // vim: syntax=cpp.doxygen