You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

svd.cpp 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /**
  2. * \file dnn/test/common/svd.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/svd.h"
  12. #include "test/common/checker.h"
  13. #include "test/common/rng.h"
  14. #include "test/common/tensor.h"
  15. #include "test/common/utils.h"
  16. #include "test/common/workspace_wrapper.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. using Param = SVDForward::Param;
  20. namespace {
  21. template <typename T>
  22. void fill_diag(const TensorND& v, TensorND& diag) {
  23. const auto& layout = diag.layout;
  24. megdnn_assert_contiguous(layout);
  25. megdnn_assert(layout.ndim >= 2);
  26. size_t block_cnt = 1;
  27. for (size_t i = 0; i < layout.ndim - 2; i++) {
  28. block_cnt *= layout[i];
  29. }
  30. size_t m = layout[layout.ndim - 2];
  31. size_t n = layout[layout.ndim - 1];
  32. size_t mn = std::min(m, n);
  33. auto v_ptr = v.ptr<T>();
  34. auto ptr = diag.ptr<T>();
  35. memset(ptr, 0, diag.layout.span().dist_byte());
  36. auto ld = layout.stride[layout.ndim - 2];
  37. for (size_t blk = 0; blk < block_cnt; blk++) {
  38. size_t offset(0), s_offset(0);
  39. if (block_cnt > 1) {
  40. offset = blk * layout.stride[layout.ndim - 3];
  41. s_offset = blk * v.layout.stride[v.layout.ndim - 2];
  42. }
  43. for (size_t i = 0; i < mn; i++) {
  44. ptr[offset + i * ld + i] = v_ptr[s_offset + i];
  45. }
  46. }
  47. }
  48. std::shared_ptr<Tensor<>> matmul(Handle* handle, const TensorND& A,
  49. const TensorND& B) {
  50. auto matmul_opr = handle->create_operator<BatchedMatrixMul>();
  51. TensorLayout result_layout;
  52. matmul_opr->deduce_layout(A.layout, B.layout, result_layout);
  53. std::shared_ptr<Tensor<>> result(new Tensor<>(handle, result_layout));
  54. WorkspaceWrapper ws(handle, matmul_opr->get_workspace_in_bytes(
  55. A.layout, B.layout, result->layout()));
  56. matmul_opr->exec(A, B, result->tensornd(), ws.workspace());
  57. return result;
  58. }
  59. } // namespace
  60. std::vector<SVDTestcase> SVDTestcase::make() {
  61. std::vector<SVDTestcase> ret;
  62. auto param = Param(false /* compute_uv */, false /* full_matrices */);
  63. auto add_shape = [&ret, &param](const TensorShape& shape) {
  64. ret.push_back({param, TensorLayout{shape, dtype::Float32()}});
  65. };
  66. add_shape({1, 7, 7});
  67. add_shape({1, 3, 7});
  68. add_shape({1, 7, 3});
  69. for (size_t rows : {2, 3, 5, 7, 10, 32, 100}) {
  70. for (size_t cols : {2, 3, 5, 7, 10, 32, 100}) {
  71. param.compute_uv = false;
  72. param.full_matrices = false;
  73. add_shape({3, rows, cols});
  74. param.compute_uv = true;
  75. add_shape({2, rows, cols});
  76. param.full_matrices = true;
  77. add_shape({3, rows, cols});
  78. }
  79. }
  80. NormalRNG data_rng;
  81. auto fill_data = [&](TensorND& data) {
  82. auto sz = data.layout.span().dist_byte(), szf = sz / sizeof(dt_float32);
  83. auto pf = static_cast<dt_float32*>(data.raw_ptr);
  84. data_rng.fill_fast_float32(pf, szf);
  85. };
  86. for (auto&& i : ret) {
  87. i.m_mem.reset(new dt_float32[i.m_mat.layout.span().dist_elem()]);
  88. i.m_mat.raw_ptr = i.m_mem.get();
  89. fill_data(i.m_mat);
  90. }
  91. return ret;
  92. }
  93. SVDTestcase::Result SVDTestcase::run(SVDForward* opr) {
  94. auto handle = opr->handle();
  95. auto src = make_tensor_h2d(handle, m_mat);
  96. // Deduce layout
  97. TensorLayout u_layout, s_layout, vt_layout;
  98. opr->param() = m_param;
  99. opr->deduce_layout(m_mat.layout, u_layout, s_layout, vt_layout);
  100. // Alloc tensor on device
  101. Tensor<> u{handle, u_layout}, s{handle, s_layout}, vt{handle, vt_layout};
  102. WorkspaceWrapper ws(handle,
  103. opr->get_workspace_in_bytes(m_mat.layout, u_layout,
  104. s_layout, vt_layout));
  105. opr->exec(*src, u.tensornd(), s.tensornd(), vt.tensornd(), ws.workspace());
  106. auto u_host = make_tensor_d2h(handle, u.tensornd());
  107. // Defined in wsdk8/Include/shared/inaddr.h Surprise! It's Windows.
  108. #undef s_host
  109. auto s_host = make_tensor_d2h(handle, s.tensornd());
  110. auto vt_host = make_tensor_d2h(handle, vt.tensornd());
  111. if (m_param.compute_uv) {
  112. // Copy back singular value, build diag(s)
  113. std::unique_ptr<dt_float32> diag_s_host_mem(
  114. new dt_float32[m_mat.layout.span().dist_elem()]);
  115. TensorLayout diag_layout = m_mat.layout;
  116. if (!m_param.full_matrices) {
  117. SmallVector<size_t> shape;
  118. for (int i = 0; i < (int)diag_layout.ndim - 2; i++) {
  119. shape.push_back(diag_layout[i]);
  120. }
  121. size_t x = std::min(diag_layout[diag_layout.ndim - 1],
  122. diag_layout[diag_layout.ndim - 2]);
  123. shape.push_back(x);
  124. shape.push_back(x);
  125. diag_layout = {shape, diag_layout.dtype};
  126. }
  127. TensorND diag_s_host{diag_s_host_mem.get(), diag_layout};
  128. fill_diag<dt_float32>(*s_host, diag_s_host);
  129. // Try to recover original matrix by u * diag(s) * vt
  130. auto diag_s_dev = make_tensor_h2d(handle, diag_s_host);
  131. auto tmp = matmul(handle, u.tensornd(), *diag_s_dev);
  132. auto recovered = matmul(handle, tmp->tensornd(), vt.tensornd());
  133. return {u_host, s_host, vt_host,
  134. make_tensor_d2h(handle, recovered->tensornd())};
  135. }
  136. return {u_host, s_host, vt_host, nullptr};
  137. }
  138. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台

Contributors (1)