You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param_pack.cpp 4.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #include "test/common/checker.h"
  2. #include "test/common/utils.h"
  3. #include "test/cuda/fixture.h"
  4. using namespace megdnn;
  5. using namespace test;
  6. namespace {
  7. template <class T>
  8. std::vector<int32_t> create_offsets(const TensorShapeArray& shapes, size_t alignment) {
  9. size_t dtype_size = sizeof(T);
  10. if (alignment < dtype_size)
  11. alignment = dtype_size;
  12. alignment /= dtype_size;
  13. auto get_aligned = [alignment](size_t v) {
  14. auto mod = v & (alignment - 1);
  15. return v + ((alignment - mod) & (alignment - 1));
  16. };
  17. std::vector<dt_int32> offsets(shapes.size() << 1);
  18. size_t offset = 0;
  19. for (size_t i = 0; i < shapes.size(); i++) {
  20. offset = get_aligned(offset);
  21. offsets[i << 1] = offset;
  22. offset += shapes[i].total_nr_elems();
  23. offsets[(i << 1) + 1] = offset;
  24. }
  25. return offsets;
  26. }
  27. template <class T>
  28. std::vector<T> create_pack(
  29. size_t pack_size, const std::vector<int32_t>& offsets,
  30. const std::vector<std::vector<T>>& ptr) {
  31. megdnn_assert(pack_size == static_cast<size_t>(offsets.back()));
  32. std::vector<T> data(pack_size, 0);
  33. for (size_t i = 0; i * 2 < offsets.size(); ++i) {
  34. size_t begin = offsets[i * 2], end = offsets[i * 2 + 1];
  35. for (size_t j = 0; j < end - begin; j++)
  36. data[begin + j] = ptr[i][j];
  37. }
  38. return data;
  39. }
  40. template <class T>
  41. std::vector<std::vector<T>> create_params(
  42. size_t nr_params, const TensorShapeArray& shapes) {
  43. std::vector<std::vector<T>> params;
  44. for (size_t i = 0; i < nr_params; ++i) {
  45. std::vector<T> expected_data;
  46. for (size_t x = 0; x < shapes[i].total_nr_elems(); ++x) {
  47. expected_data.push_back(rand());
  48. }
  49. params.push_back(std::move(expected_data));
  50. }
  51. return params;
  52. }
  53. template <class T>
  54. T* create_device_data(Handle* handle, const T* data, size_t size) {
  55. T* data_device = static_cast<T*>(test::megdnn_malloc(handle, size * sizeof(T)));
  56. if (data)
  57. test::megdnn_memcpy_H2D(handle, data_device, data, size * sizeof(T));
  58. return data_device;
  59. }
  60. template <class T>
  61. void test_param_pack_concat(
  62. Handle* handle, const TensorShapeArray& shapes, DType type) {
  63. auto concat = handle->create_operator<ParamPackConcat>();
  64. size_t nr_params = shapes.size();
  65. std::vector<T*> param_ptrs;
  66. std::vector<std::vector<T>> params = create_params<T>(nr_params, shapes);
  67. for (size_t i = 0; i < nr_params; ++i) {
  68. param_ptrs.push_back(create_device_data<T>(
  69. handle, params[i].data(), shapes[i].total_nr_elems()));
  70. }
  71. std::vector<int32_t> offsets =
  72. create_offsets<T>(shapes, handle->alignment_requirement());
  73. size_t pack_size = offsets.back();
  74. int32_t* offsets_gpu =
  75. create_device_data<int32_t>(handle, offsets.data(), offsets.size());
  76. std::vector<T> expected_pack = create_pack<T>(pack_size, offsets, params);
  77. T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size());
  78. TensorLayout dst_layout({pack_size}, type);
  79. TensorND dst_tensor(pack_gpu, dst_layout);
  80. TensorLayout offsets_layout({offsets.size()}, dtype::Int32());
  81. TensorND offsets_tensor(offsets_gpu, offsets_layout);
  82. test::WorkspaceWrapper workspace(
  83. handle,
  84. concat->get_workspace_in_bytes({nr_params}, offsets_layout, {pack_size}));
  85. TensorND src_tensor(param_ptrs.data(), TensorLayout({nr_params}, dtype::Int32()));
  86. concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace());
  87. // check
  88. T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T)));
  89. test::megdnn_memcpy_D2H(handle, actual_pack, pack_gpu, sizeof(T) * pack_size);
  90. for (size_t i = 0; i < pack_size; ++i) {
  91. ASSERT_EQ(actual_pack[i], expected_pack[i]);
  92. }
  93. free(actual_pack);
  94. test::megdnn_free(handle, pack_gpu);
  95. test::megdnn_free(handle, offsets_gpu);
  96. for (auto ptr : param_ptrs) {
  97. test::megdnn_free(handle, ptr);
  98. }
  99. }
  100. } // namespace
  101. TEST_F(CUDA, PARAM_PACK) {
  102. SmallVector<TensorShapeArray> shapes_vec;
  103. shapes_vec.push_back({{1}});
  104. shapes_vec.push_back({{129}, {21}});
  105. shapes_vec.push_back({{15}, {21}, {34}});
  106. shapes_vec.push_back({{1, 2}, {3, 5}, {5, 8}, {7, 11}, {9, 14}});
  107. shapes_vec.push_back(
  108. {{1, 2},
  109. {3, 5},
  110. {1},
  111. {3, 3, 3, 4},
  112. {71},
  113. {9, 14},
  114. {111, 111, 111},
  115. {128, 128, 128}});
  116. for (auto shapes : shapes_vec) {
  117. test_param_pack_concat<int32_t>(handle_cuda(), shapes, dtype::Int32());
  118. test_param_pack_concat<int16_t>(handle_cuda(), shapes, dtype::Int16());
  119. test_param_pack_concat<float>(handle_cuda(), shapes, dtype::Float32());
  120. }
  121. }
  122. // vim: syntax=cpp.doxygen