You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmarker.inl 3.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #pragma once
  2. #include "test/rocm/benchmarker.h"
  3. #include <gtest/gtest.h>
  4. #include "test/common/timer.h"
  5. namespace megdnn {
  6. namespace test {
  7. template <typename Opr>
  8. ROCMBenchmarker<Opr>::ROCMBenchmarker(Handle* handle_rocm, Handle* handle_naive)
  9. : m_handle_naive{handle_naive},
  10. m_handle_rocm{handle_rocm},
  11. m_default_rng{new NormalRNG()},
  12. m_param{Param()},
  13. m_device_timer{megdnn::rocm::concrete_handle(m_handle_rocm)->stream()} {}
  14. template <typename Opr>
  15. float ROCMBenchmarker<Opr>::exec(const TensorShapeArray& shapes) {
  16. return exec(make_layouts(shapes));
  17. }
  18. template <typename Opr>
  19. float ROCMBenchmarker<Opr>::exec(TensorLayoutArray layouts) {
  20. auto opr = m_handle_rocm->create_operator<Opr>();
  21. opr->param() = m_param;
  22. auto user_layouts = layouts;
  23. m_proxy.deduce_layout(opr.get(), layouts);
  24. for (size_t i = 0; i < layouts.size(); ++i)
  25. if (user_layouts[i].ndim > 0) {
  26. auto run = [&]() {
  27. ASSERT_TRUE(layouts[i].eq_shape(user_layouts[i]))
  28. << "User provided shape is "
  29. << user_layouts[i].TensorShape::to_string()
  30. << "\nExpected shape is "
  31. << layouts[i].TensorShape::to_string();
  32. };
  33. run();
  34. }
  35. auto allocate = [&layouts](Handle* handle) {
  36. TensorNDArray tensors(layouts.size());
  37. auto trans_func = [handle](const TensorLayout& layout) {
  38. auto span = layout.span();
  39. TensorND res;
  40. res.reset_ptr(
  41. (static_cast<uint8_t*>(megdnn_malloc(handle, span.dist_byte())) +
  42. span.low_byte));
  43. res.layout = layout;
  44. return res;
  45. };
  46. std::transform(layouts.begin(), layouts.end(), tensors.begin(), trans_func);
  47. return tensors;
  48. };
  49. auto tensors_cur = allocate(m_handle_rocm);
  50. auto tensors_cur_host = allocate(m_handle_naive);
  51. // init
  52. for (size_t i = 0; i < tensors_cur_host.size(); ++i) {
  53. TensorND& tensor = tensors_cur_host[i];
  54. auto rng = m_rng[i];
  55. if (!rng)
  56. rng = m_default_rng.get();
  57. auto size = tensor.layout.span().high_byte;
  58. rng->gen(tensor);
  59. megdnn_memcpy_H2D(
  60. m_handle_rocm, tensors_cur[i].raw_ptr(), tensor.raw_ptr(), size);
  61. }
  62. m_device_timer.reset();
  63. m_device_timer.start();
  64. m_proxy.exec(opr.get(), tensors_cur);
  65. m_device_timer.stop();
  66. auto time_in_ms = m_device_timer.get_time_in_ms();
  67. if (m_display) {
  68. std::cout << "Total time is " << time_in_ms << "ms " << std::endl;
  69. }
  70. auto free = [](Handle* handle, TensorNDArray& tensors) {
  71. std::for_each(tensors.begin(), tensors.end(), [handle](const TensorND& tensor) {
  72. megdnn_free(handle, tensor.raw_ptr());
  73. });
  74. };
  75. free(m_handle_rocm, tensors_cur);
  76. free(m_handle_naive, tensors_cur_host);
  77. return time_in_ms;
  78. }
  79. } // namespace test
  80. } // namespace megdnn
  81. // vim: syntax=cpp.doxygen