You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param_pack.cpp 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. /**
  2. * \file dnn/test/rocm/param_pack.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "hcc_detail/hcc_defs_prologue.h"
  12. #include "test/rocm/fixture.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/utils.h"
  15. using namespace megdnn;
  16. using namespace test;
  17. namespace {
  18. template <class T>
  19. std::vector<int32_t> create_offsets(const TensorShapeArray& shapes,
  20. size_t alignment) {
  21. size_t dtype_size = sizeof(T);
  22. if (alignment < dtype_size)
  23. alignment = dtype_size;
  24. alignment /= dtype_size;
  25. auto get_aligned = [alignment](size_t v) {
  26. auto mod = v & (alignment - 1);
  27. return v + ((alignment - mod) & (alignment - 1));
  28. };
  29. std::vector<dt_int32> offsets(shapes.size() << 1);
  30. size_t offset = 0;
  31. for (size_t i = 0; i < shapes.size(); i++) {
  32. offset = get_aligned(offset);
  33. offsets[i << 1] = offset;
  34. offset += shapes[i].total_nr_elems();
  35. offsets[(i << 1) + 1] = offset;
  36. }
  37. return offsets;
  38. }
  39. template <class T>
  40. std::vector<T> create_pack(size_t pack_size,
  41. const std::vector<int32_t>& offsets,
  42. const std::vector<std::vector<T>>& ptr) {
  43. megdnn_assert(pack_size == static_cast<size_t>(offsets.back()));
  44. std::vector<T> data(pack_size, 0);
  45. for (size_t i = 0; i * 2 < offsets.size(); ++i) {
  46. size_t begin = offsets[i * 2], end = offsets[i * 2 + 1];
  47. for (size_t j = 0; j < end - begin; j++)
  48. data[begin + j] = ptr[i][j];
  49. }
  50. return data;
  51. }
  52. template <class T>
  53. std::vector<std::vector<T>> create_params(size_t nr_params,
  54. const TensorShapeArray& shapes) {
  55. std::vector<std::vector<T>> params;
  56. for (size_t i = 0; i < nr_params; ++i) {
  57. std::vector<T> expected_data;
  58. for (size_t x = 0; x < shapes[i].total_nr_elems(); ++x) {
  59. expected_data.push_back(rand());
  60. }
  61. params.push_back(std::move(expected_data));
  62. }
  63. return params;
  64. }
  65. template <class T>
  66. T* create_device_data(Handle* handle, const T* data, size_t size) {
  67. T* data_device =
  68. static_cast<T*>(test::megdnn_malloc(handle, size * sizeof(T)));
  69. if (data)
  70. test::megdnn_memcpy_H2D(handle, data_device, data, size * sizeof(T));
  71. return data_device;
  72. }
  73. template <class T>
  74. void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes,
  75. DType type) {
  76. auto concat = handle->create_operator<ParamPackConcat>();
  77. size_t nr_params = shapes.size();
  78. std::vector<T*> param_ptrs;
  79. std::vector<std::vector<T>> params = create_params<T>(nr_params, shapes);
  80. for (size_t i = 0; i < nr_params; ++i) {
  81. param_ptrs.push_back(create_device_data<T>(handle, params[i].data(),
  82. shapes[i].total_nr_elems()));
  83. }
  84. std::vector<int32_t> offsets =
  85. create_offsets<T>(shapes, handle->alignment_requirement());
  86. size_t pack_size = offsets.back();
  87. int32_t* offsets_gpu =
  88. create_device_data<int32_t>(handle, offsets.data(), offsets.size());
  89. std::vector<T> expected_pack = create_pack<T>(pack_size, offsets, params);
  90. T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size());
  91. TensorLayout dst_layout({pack_size}, type);
  92. TensorND dst_tensor(pack_gpu, dst_layout);
  93. TensorLayout offsets_layout({offsets.size()}, dtype::Int32());
  94. TensorND offsets_tensor(offsets_gpu, offsets_layout);
  95. test::WorkspaceWrapper workspace(
  96. handle, concat->get_workspace_in_bytes(shapes, offsets_layout,
  97. {pack_size}));
  98. TensorND src_tensor(param_ptrs.data(),
  99. TensorLayout({nr_params}, dtype::Int32()));
  100. concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace());
  101. // check
  102. T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T)));
  103. test::megdnn_memcpy_D2H(handle, actual_pack, pack_gpu,
  104. sizeof(T) * pack_size);
  105. for (size_t i = 0; i < pack_size; ++i) {
  106. ASSERT_EQ(actual_pack[i], expected_pack[i]);
  107. }
  108. free(actual_pack);
  109. test::megdnn_free(handle, pack_gpu);
  110. test::megdnn_free(handle, offsets_gpu);
  111. for (auto ptr : param_ptrs) {
  112. test::megdnn_free(handle, ptr);
  113. }
  114. }
  115. } // namespace
  116. TEST_F(ROCM, PARAM_PACK) {
  117. SmallVector<TensorShapeArray> shapes_vec;
  118. shapes_vec.push_back({{1}});
  119. shapes_vec.push_back({{129}, {21}});
  120. shapes_vec.push_back({{15}, {21}, {34}});
  121. shapes_vec.push_back({{1, 2}, {3, 5}, {5, 8}, {7, 11}, {9, 14}});
  122. shapes_vec.push_back({{1, 2},
  123. {3, 5},
  124. {1},
  125. {3, 3, 3, 4},
  126. {71},
  127. {9, 14},
  128. {111, 111, 111},
  129. {128, 128, 128}});
  130. for (auto shapes : shapes_vec) {
  131. test_param_pack_concat<int32_t>(handle_rocm(), shapes, dtype::Int32());
  132. test_param_pack_concat<int16_t>(handle_rocm(), shapes, dtype::Int16());
  133. test_param_pack_concat<float>(handle_rocm(), shapes, dtype::Float32());
  134. }
  135. }
  136. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台