You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param_pack.cpp 8.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /**
  2. * \file dnn/test/cuda/param_pack.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/checker.h"
  12. #include "test/common/utils.h"
  13. #include "test/cuda/fixture.h"
  14. using namespace megdnn;
  15. using namespace test;
  16. namespace {
  17. template<class T>
  18. std::vector<int32_t> create_table(const TensorShapeArray& shapes,
  19. size_t align) {
  20. size_t dtype_size = sizeof(T);
  21. if (align < dtype_size)
  22. align = dtype_size;
  23. align /= dtype_size;
  24. size_t offset = shapes[0].total_nr_elems();
  25. for (size_t i = 1; i < shapes.size(); i++) {
  26. auto d = offset & (align - 1);
  27. offset += (align - d) & (align - 1);
  28. offset += shapes[i].total_nr_elems();
  29. }
  30. std::vector<int32_t> table(offset * 2);
  31. int32_t* outer_table = table.data();
  32. int32_t* inner_table = outer_table + offset;
  33. offset = 0;
  34. for (size_t i = 0; i < shapes.size(); i++) {
  35. for (; (offset & (align - 1)) != 0; offset++) {
  36. outer_table[offset] = inner_table[offset] = -1;
  37. }
  38. size_t j = 0;
  39. for (; j < shapes[i].total_nr_elems(); j++) {
  40. outer_table[offset + j] = i;
  41. inner_table[offset + j] = j;
  42. }
  43. offset += j;
  44. }
  45. return table;
  46. }
  47. template<class T>
  48. std::vector<T> create_pack(size_t pack_size, const std::vector<int32_t>& table,
  49. const std::vector<std::vector<T>>& ptr) {
  50. assert(pack_size == table.size() / 2);
  51. const int32_t* outer_table = table.data();
  52. const int32_t* inner_table = outer_table + pack_size;
  53. std::vector<T> data(pack_size);
  54. for (size_t idx = 0; idx < pack_size; ++idx) {
  55. int32_t out_idx = outer_table[idx];
  56. int32_t in_idx = inner_table[idx];
  57. if (in_idx != -1) {
  58. data[idx] = ptr[out_idx][in_idx];
  59. }
  60. }
  61. return data;
  62. }
  63. template <class T>
  64. std::vector<std::vector<T>> create_params(size_t nr_params,
  65. const TensorShapeArray& shapes) {
  66. std::vector<std::vector<T>> params;
  67. for (size_t i = 0; i < nr_params; ++i) {
  68. std::vector<T> expected_data;
  69. for (size_t x = 0; x < shapes[i].total_nr_elems(); ++x) {
  70. expected_data.push_back(rand());
  71. }
  72. params.push_back(std::move(expected_data));
  73. }
  74. return params;
  75. }
  76. template <class T>
  77. T* create_device_data(Handle* handle, const T* data, size_t size) {
  78. T* data_device =
  79. static_cast<T*>(test::megdnn_malloc(handle, size * sizeof(T)));
  80. if (data)
  81. test::megdnn_memcpy_H2D(handle, data_device, data, size * sizeof(T));
  82. return data_device;
  83. }
  84. template<class T>
  85. void test_param_pack_split(Handle* handle, const TensorShapeArray& shapes,
  86. DType type) {
  87. auto split = handle->create_operator<ParamPackSplit>();
  88. size_t nr_params = shapes.size();
  89. std::vector<T*> param_ptrs;
  90. for (size_t i = 0; i < nr_params; ++i) {
  91. param_ptrs.push_back(create_device_data<T>(handle,
  92. nullptr, shapes[i].total_nr_elems()));
  93. }
  94. std::vector<std::vector<T>> expected_param = create_params<T>(nr_params,
  95. shapes);
  96. std::vector<int32_t> table =
  97. create_table<T>(shapes, handle->alignment_requirement());
  98. ASSERT_EQ(table,
  99. ParamPackSplit::gen_table(shapes, handle->alignment_requirement(),
  100. sizeof(T)));
  101. size_t pack_size = table.size() / 2;
  102. int32_t* table_gpu = create_device_data<int32_t>(handle, table.data(),
  103. table.size());
  104. std::vector<T> pack =
  105. create_pack<T>(pack_size, table, expected_param);
  106. T* pack_gpu = create_device_data<T>(handle, pack.data(), pack.size());
  107. TensorLayout src_layout({pack_size}, type);
  108. TensorND src_tensor(pack_gpu, src_layout);
  109. TensorLayout table_layout({table.size()}, dtype::Int32());
  110. TensorND table_tensor(table_gpu, table_layout);
  111. test::WorkspaceWrapper workspace(handle, split->get_workspace_in_bytes(
  112. {pack_size}, table_layout, shapes));
  113. TensorND dst_tensor(param_ptrs.data(),
  114. TensorLayout({nr_params}, dtype::Int32()));
  115. split->exec(src_tensor, table_tensor, dst_tensor, workspace.workspace());
  116. // check
  117. for (size_t i = 0; i < nr_params; ++i) {
  118. T* actual_param = static_cast<T*>(malloc(shapes[i].total_nr_elems()
  119. * sizeof(T)));
  120. test::megdnn_memcpy_D2H(handle, actual_param, param_ptrs[i],
  121. shapes[i].total_nr_elems() * sizeof(T));
  122. for (size_t idx = 0; idx < shapes[i].total_nr_elems(); ++idx) {
  123. ASSERT_EQ(actual_param[idx], expected_param[i][idx]);
  124. }
  125. free(actual_param);
  126. }
  127. test::megdnn_free(handle, pack_gpu);
  128. test::megdnn_free(handle, table_gpu);
  129. for (auto ptr : param_ptrs) {
  130. test::megdnn_free(handle, ptr);
  131. }
  132. }
  133. template <class T>
  134. void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes,
  135. DType type) {
  136. auto concat = handle->create_operator<ParamPackConcat>();
  137. size_t nr_params = shapes.size();
  138. std::vector<T*> param_ptrs;
  139. std::vector<std::vector<T>> params = create_params<T>(nr_params,
  140. shapes);
  141. for (size_t i = 0; i < nr_params; ++i) {
  142. param_ptrs.push_back(create_device_data<T>(handle,
  143. params[i].data(), shapes[i].total_nr_elems()));
  144. }
  145. std::vector<int32_t> table =
  146. create_table<T>(shapes, handle->alignment_requirement());
  147. size_t pack_size = table.size() / 2;
  148. int32_t* table_gpu = create_device_data<int32_t>(handle, table.data(),
  149. table.size());
  150. std::vector<T> expected_pack =
  151. create_pack<T>(pack_size, table, params);
  152. T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size());
  153. TensorLayout dst_layout({pack_size}, type);
  154. TensorND dst_tensor(pack_gpu, dst_layout);
  155. TensorLayout table_layout({table.size()}, dtype::Int32());
  156. TensorND table_tensor(table_gpu, table_layout);
  157. test::WorkspaceWrapper workspace(handle, concat->get_workspace_in_bytes(
  158. shapes, table_layout, {pack_size}));
  159. TensorND src_tensor(param_ptrs.data(),
  160. TensorLayout({nr_params}, dtype::Int32()));
  161. concat->exec(src_tensor, table_tensor, dst_tensor, workspace.workspace());
  162. // check
  163. T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T)));
  164. test::megdnn_memcpy_D2H(handle, actual_pack,
  165. pack_gpu, sizeof(T) * pack_size);
  166. for (size_t i = 0; i < pack_size; ++i) {
  167. ASSERT_EQ(actual_pack[i], expected_pack[i]);
  168. }
  169. free(actual_pack);
  170. test::megdnn_free(handle, pack_gpu);
  171. test::megdnn_free(handle, table_gpu);
  172. for (auto ptr : param_ptrs) {
  173. test::megdnn_free(handle, ptr);
  174. }
  175. }
  176. } // namespace
  177. TEST_F(CUDA, PARAM_PACK) {
  178. SmallVector<TensorShapeArray> shapes_vec;
  179. shapes_vec.push_back({{1}});
  180. shapes_vec.push_back({{129}, {21}});
  181. shapes_vec.push_back({{15}, {21}, {34}});
  182. shapes_vec.push_back({{1, 2}, {3, 5}, {5, 8}, {7, 11}, {9, 14}});
  183. shapes_vec.push_back({{1, 2},
  184. {3, 5},
  185. {1},
  186. {3, 3, 3, 4},
  187. {71},
  188. {9, 14},
  189. {111, 111, 111},
  190. {128, 128, 128}});
  191. for (auto shapes : shapes_vec) {
  192. test_param_pack_split<int32_t>(handle_cuda(), shapes, dtype::Int32());
  193. test_param_pack_split<int16_t>(handle_cuda(), shapes, dtype::Int16());
  194. test_param_pack_split<float>(handle_cuda(), shapes, dtype::Float32());
  195. test_param_pack_concat<int32_t>(handle_cuda(), shapes, dtype::Int32());
  196. test_param_pack_concat<int16_t>(handle_cuda(), shapes, dtype::Int16());
  197. test_param_pack_concat<float>(handle_cuda(), shapes, dtype::Float32());
  198. }
  199. }
  200. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台