You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rng.cpp 9.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /**
  2. * \file dnn/test/cuda/rng.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/naive/rng.h"
  12. #include "megdnn/oprs.h"
  13. #include "test/common/tensor.h"
  14. #include "test/cuda/fixture.h"
  15. namespace megdnn {
  16. namespace test {
  17. namespace {
  18. template <typename T>
  19. void run_gamma(Handle* handle) {
  20. using ctype = typename DTypeTrait<T>::ctype;
  21. auto opr = handle->create_operator<GammaRNG>();
  22. TensorLayout ly{TensorShape{2000000 * 5}, T()};
  23. SyncedTensor<ctype> out(handle, ly);
  24. SyncedTensor<ctype> shape(handle, ly);
  25. SyncedTensor<ctype> scale(handle, ly);
  26. auto shape_ptr = shape.ptr_mutable_host();
  27. auto scale_ptr = scale.ptr_mutable_host();
  28. for (int i = 0; i < 5; ++i) {
  29. for (int j = 0; j < 2000000; ++j) {
  30. shape_ptr[i * 2000000 + j] =2 * 0.3 * i + 0.3;
  31. scale_ptr[i * 2000000 + j] = i * 0.2 + 0.1;
  32. }
  33. }
  34. opr->exec(shape.tensornd_dev(), scale.tensornd_dev(), out.tensornd_dev(),
  35. {});
  36. auto ptr = out.ptr_mutable_host();
  37. for (int i = 0; i < 5; ++i) {
  38. float a = 2 * 0.3 * i + 0.3, b = i * 0.2 + 0.1;
  39. float mean = a *b;
  40. float std = a * (b * b);
  41. auto stat = get_mean_var(ptr + i * 2000000, 2000000, ctype(mean));
  42. ASSERT_LE(std::abs(stat.first - mean), 0.01);
  43. ASSERT_LE(std::abs(stat.second - std), 0.01);
  44. }
  45. }
  46. template <typename T>
  47. void run_poisson(Handle* handle) {
  48. using ctype = typename DTypeTrait<T>::ctype;
  49. auto opr = handle->create_operator<PoissonRNG>();
  50. TensorLayout ly{TensorShape{200000 * 5}, T()};
  51. SyncedTensor<ctype> out(handle, ly);
  52. SyncedTensor<ctype> lam(handle, ly);
  53. auto lam_ptr = lam.ptr_mutable_host();
  54. for (int i = 0; i < 5; ++i) {
  55. for (int j = 0; j < 200000; ++j) {
  56. lam_ptr[i * 200000 + j] = ctype(i + 1);
  57. }
  58. }
  59. opr->exec(lam.tensornd_dev(), out.tensornd_dev(), {});
  60. auto ptr = out.ptr_mutable_host();
  61. for (int i = 0; i < 5; ++i) {
  62. auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(i + 1));
  63. ASSERT_LE(std::abs(stat.first - ctype(i + 1)), 0.01);
  64. ASSERT_LE(std::abs(stat.second - ctype(i + 1)), 0.01);
  65. }
  66. }
  67. template <typename T>
  68. void run_beta(Handle* handle) {
  69. using ctype = typename DTypeTrait<T>::ctype;
  70. auto opr = handle->create_operator<BetaRNG>();
  71. TensorLayout ly{TensorShape{200000 * 5}, T()};
  72. SyncedTensor<ctype> out(handle, ly);
  73. SyncedTensor<ctype> alpha(handle, ly);
  74. SyncedTensor<ctype> beta(handle, ly);
  75. auto alpha_ptr = alpha.ptr_mutable_host();
  76. auto beta_ptr = beta.ptr_mutable_host();
  77. for (int i = 0; i < 5; ++i) {
  78. for (int j = 0; j < 200000; ++j) {
  79. alpha_ptr[i * 200000 + j] = 0.3 * i + 0.1;
  80. beta_ptr[i * 200000 + j] = 2 * i * 0.3 + 0.1;
  81. }
  82. }
  83. opr->exec(alpha.tensornd_dev(), beta.tensornd_dev(), out.tensornd_dev(),
  84. {});
  85. auto ptr = out.ptr_mutable_host();
  86. for (int i = 0; i < 5; ++i) {
  87. float a = 0.3 * i + 0.1, b = 2 * i * 0.3 + 0.1;
  88. float mean = a / (a + b);
  89. float std = a * b / ((a + b) * (a + b) * (a + b + 1));
  90. auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(mean));
  91. ASSERT_LE(std::abs(stat.first - mean), 0.01);
  92. ASSERT_LE(std::abs(stat.second - std), 0.01);
  93. }
  94. }
  95. template <typename T>
  96. void run_permutation(Handle* handle) {
  97. using ctype = typename DTypeTrait<T>::ctype;
  98. size_t sample_num =
  99. std::min(200000, static_cast<int>(DTypeTrait<T>::max()) - 10);
  100. auto opr = handle->create_operator<PermutationRNG>();
  101. opr->param().dtype = DTypeTrait<T>::enumv;
  102. TensorLayout ly{TensorShape{sample_num}, T()};
  103. Tensor<dt_byte> workspace(
  104. handle,
  105. {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()});
  106. SyncedTensor<ctype> t(handle, ly);
  107. opr->exec(t.tensornd_dev(),
  108. {workspace.ptr(), workspace.layout().total_nr_elems()});
  109. auto ptr = t.ptr_mutable_host();
  110. auto size = t.layout().total_nr_elems();
  111. std::vector<ctype> res(size);
  112. int not_same = 0;
  113. for (size_t i = 0; i < size; ++i) {
  114. if ((ptr[i] - ctype(i)) >= ctype(1)) not_same++;
  115. res[i] = ptr[i];
  116. }
  117. ASSERT_GT(not_same, 5000);
  118. std::sort(res.begin(), res.end());
  119. for (size_t i = 0; i < size; ++i) {
  120. ASSERT_LE(std::abs(res[i] - ctype(i)), 1e-8);
  121. }
  122. }
  123. template <typename T>
  124. void run_shuffle(Handle* handle, bool bwd_flag) {
  125. using ctype = typename DTypeTrait<T>::ctype;
  126. auto run = [&](TensorShape shape) {
  127. auto opr = handle->create_operator<ShuffleRNGForward>();
  128. TensorLayout srclay{shape, T()};
  129. TensorLayout dstlay{shape, T()};
  130. TensorLayout indexlay{TensorShape{shape[0]}, dtype::Int32()};
  131. Tensor<dt_byte> workspace(
  132. handle, {TensorShape{opr->get_workspace_in_bytes(srclay, dstlay,
  133. indexlay)},
  134. dtype::Byte()});
  135. SyncedTensor<ctype> src(handle, srclay);
  136. SyncedTensor<ctype> dst(handle, dstlay);
  137. SyncedTensor<DTypeTrait<dt_int32>::ctype> index(handle, indexlay);
  138. auto sptr = src.ptr_mutable_host();
  139. size_t size = src.layout().total_nr_elems();
  140. for (size_t j = 0; j < size; ++j) {
  141. sptr[j] = j;
  142. }
  143. opr->exec(src.tensornd_dev(), dst.tensornd_dev(), index.tensornd_dev(),
  144. {workspace.ptr(), workspace.layout().total_nr_elems()});
  145. auto dptr = dst.ptr_mutable_host();
  146. auto iptr = index.ptr_mutable_host();
  147. size_t len = index.layout().total_nr_elems();
  148. size_t step = size / len;
  149. for (size_t i = 0; i < len; ++i) {
  150. for (size_t j = 0; j < step; ++j) {
  151. ASSERT_EQ(dptr[i * step + j], sptr[iptr[i] * step + j]);
  152. }
  153. }
  154. if (bwd_flag) {
  155. for (size_t j = 0; j < size; ++j) {
  156. sptr[j] = 0;
  157. }
  158. auto oprbwd = handle->create_operator<ShuffleRNGBackward>();
  159. oprbwd->exec(
  160. dst.tensornd_dev(), index.tensornd_dev(),
  161. src.tensornd_dev(),
  162. {workspace.ptr(), workspace.layout().total_nr_elems()});
  163. auto sptr_bwd = src.ptr_mutable_host();
  164. for (size_t i = 0; i < len; ++i) {
  165. for (size_t j = 0; j < step; ++j) {
  166. ASSERT_EQ(dptr[i * step + j], sptr_bwd[iptr[i] * step + j]);
  167. }
  168. }
  169. }
  170. };
  171. run({10});
  172. run({6, 3});
  173. }
  174. } // anonymous namespace
  175. TEST_F(CUDA, UNIFORM_RNG_F32) {
  176. auto opr = handle_cuda()->create_operator<UniformRNG>();
  177. opr->param().dtype = DTypeTrait<dtype::Float32>::enumv;
  178. SyncedTensor<> t(handle_cuda(), {TensorShape{200000}, dtype::Float32()});
  179. opr->exec(t.tensornd_dev(), {});
  180. assert_uniform_correct(t.ptr_mutable_host(), t.layout().total_nr_elems());
  181. }
  182. TEST_F(CUDA, GAUSSIAN_RNG_F32) {
  183. auto opr = handle_cuda()->create_operator<GaussianRNG>();
  184. opr->param().mean = 0.8;
  185. opr->param().std = 2.3;
  186. opr->param().dtype = DTypeTrait<dtype::Float32>::enumv;
  187. for (size_t size : {1, 200000, 200001}) {
  188. TensorLayout ly{{size}, dtype::Float32()};
  189. Tensor<dt_byte> workspace(
  190. handle_cuda(),
  191. {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()});
  192. SyncedTensor<> t(handle_cuda(), ly);
  193. opr->exec(t.tensornd_dev(),
  194. {workspace.ptr(), workspace.layout().total_nr_elems()});
  195. auto ptr = t.ptr_mutable_host();
  196. ASSERT_LE(std::abs(ptr[0] - 0.8), 2.3);
  197. if (size >= 1000) {
  198. auto stat = get_mean_var(ptr, size, 0.8f);
  199. ASSERT_LE(std::abs(stat.first - 0.8), 5e-3);
  200. ASSERT_LE(std::abs(stat.second - 2.3 * 2.3), 5e-2);
  201. }
  202. }
  203. }
  204. TEST_F(CUDA, GAMMA_RNG_F32) {
  205. run_gamma<dtype::Float32>(handle_cuda());
  206. }
  207. TEST_F(CUDA, GAMMA_RNG_F16) {
  208. run_gamma<dtype::Float16>(handle_cuda());
  209. }
  210. TEST_F(CUDA, POISSON_RNG_F32) {
  211. run_poisson<dtype::Float32>(handle_cuda());
  212. }
  213. TEST_F(CUDA, POISSON_RNG_F16) {
  214. run_poisson<dtype::Float16>(handle_cuda());
  215. }
  216. TEST_F(CUDA, BETA_RNG_F32) {
  217. run_beta<dtype::Float32>(handle_cuda());
  218. }
  219. TEST_F(CUDA, BETA_RNG_F16) {
  220. run_beta<dtype::Float16>(handle_cuda());
  221. }
  222. TEST_F(CUDA, PERMUTATION_RNG_F32) {
  223. run_permutation<dtype::Float32>(handle_cuda());
  224. }
  225. TEST_F(CUDA, PERMUTATION_RNG_INT32) {
  226. run_permutation<dtype::Int32>(handle_cuda());
  227. }
  228. TEST_F(CUDA, PERMUTATION_RNG_INT16) {
  229. run_permutation<dtype::Int16>(handle_cuda());
  230. }
  231. TEST_F(CUDA, SHUFFLE_RNG_F32) {
  232. run_shuffle<dtype::Float32>(handle_cuda(), false);
  233. }
  234. TEST_F(CUDA, SHUFFLE_RNG_INT32) {
  235. run_shuffle<dtype::Int32>(handle_cuda(), false);
  236. }
  237. TEST_F(CUDA, SHUFFLE_RNG_F16) {
  238. run_shuffle<dtype::Float16>(handle_cuda(), false);
  239. }
  240. TEST_F(CUDA, SHUFFLE_RNG_BWD_F32) {
  241. run_shuffle<dtype::Float32>(handle_cuda(), true);
  242. }
  243. TEST_F(CUDA, SHUFFLE_RNG_BWD_INT32) {
  244. run_shuffle<dtype::Int32>(handle_cuda(), true);
  245. }
  246. TEST_F(CUDA, SHUFFLE_RNG_BWD_F16) {
  247. run_shuffle<dtype::Float16>(handle_cuda(), true);
  248. }
  249. } // namespace test
  250. } // namespace megdnn
  251. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台