You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rng.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /**
  2. * \file dnn/test/cuda/rng.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/naive/rng.h"
  12. #include "megdnn/oprs.h"
  13. #include "test/common/tensor.h"
  14. #include "test/cuda/fixture.h"
  15. namespace megdnn {
  16. namespace test {
  17. namespace {
  18. template <typename T>
  19. void run_gamma(Handle* handle) {
  20. using ctype = typename DTypeTrait<T>::ctype;
  21. auto opr = handle->create_operator<GammaRNG>();
  22. TensorLayout ly{TensorShape{2000000 * 5}, T()};
  23. SyncedTensor<ctype> out(handle, ly);
  24. SyncedTensor<ctype> shape(handle, ly);
  25. SyncedTensor<ctype> scale(handle, ly);
  26. auto shape_ptr = shape.ptr_mutable_host();
  27. auto scale_ptr = scale.ptr_mutable_host();
  28. for (int i = 0; i < 5; ++i) {
  29. for (int j = 0; j < 2000000; ++j) {
  30. shape_ptr[i * 2000000 + j] = 2 * 0.3 * i + 0.3;
  31. scale_ptr[i * 2000000 + j] = i * 0.2 + 0.1;
  32. }
  33. }
  34. opr->exec(shape.tensornd_dev(), scale.tensornd_dev(), out.tensornd_dev(), {});
  35. auto ptr = out.ptr_mutable_host();
  36. for (int i = 0; i < 5; ++i) {
  37. float a = 2 * 0.3 * i + 0.3, b = i * 0.2 + 0.1;
  38. float mean = a * b;
  39. float std = a * (b * b);
  40. auto stat = get_mean_var(ptr + i * 2000000, 2000000, ctype(mean));
  41. ASSERT_LE(std::abs(stat.first - mean), 0.01);
  42. ASSERT_LE(std::abs(stat.second - std), 0.01);
  43. }
  44. }
  45. template <typename T>
  46. void run_poisson(Handle* handle) {
  47. using ctype = typename DTypeTrait<T>::ctype;
  48. auto opr = handle->create_operator<PoissonRNG>();
  49. TensorLayout ly{TensorShape{200000 * 5}, T()};
  50. SyncedTensor<ctype> out(handle, ly);
  51. SyncedTensor<ctype> lam(handle, ly);
  52. auto lam_ptr = lam.ptr_mutable_host();
  53. for (int i = 0; i < 5; ++i) {
  54. for (int j = 0; j < 200000; ++j) {
  55. lam_ptr[i * 200000 + j] = ctype(i + 1);
  56. }
  57. }
  58. opr->exec(lam.tensornd_dev(), out.tensornd_dev(), {});
  59. auto ptr = out.ptr_mutable_host();
  60. for (int i = 0; i < 5; ++i) {
  61. auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(i + 1));
  62. ASSERT_LE(std::abs(stat.first - ctype(i + 1)), 0.01);
  63. ASSERT_LE(std::abs(stat.second - ctype(i + 1)), 0.01);
  64. }
  65. }
  66. template <typename T>
  67. void run_beta(Handle* handle) {
  68. using ctype = typename DTypeTrait<T>::ctype;
  69. auto opr = handle->create_operator<BetaRNG>();
  70. TensorLayout ly{TensorShape{200000 * 5}, T()};
  71. SyncedTensor<ctype> out(handle, ly);
  72. SyncedTensor<ctype> alpha(handle, ly);
  73. SyncedTensor<ctype> beta(handle, ly);
  74. auto alpha_ptr = alpha.ptr_mutable_host();
  75. auto beta_ptr = beta.ptr_mutable_host();
  76. for (int i = 0; i < 5; ++i) {
  77. for (int j = 0; j < 200000; ++j) {
  78. alpha_ptr[i * 200000 + j] = 0.3 * i + 0.1;
  79. beta_ptr[i * 200000 + j] = 2 * i * 0.3 + 0.1;
  80. }
  81. }
  82. opr->exec(alpha.tensornd_dev(), beta.tensornd_dev(), out.tensornd_dev(), {});
  83. auto ptr = out.ptr_mutable_host();
  84. for (int i = 0; i < 5; ++i) {
  85. float a = 0.3 * i + 0.1, b = 2 * i * 0.3 + 0.1;
  86. float mean = a / (a + b);
  87. float std = a * b / ((a + b) * (a + b) * (a + b + 1));
  88. auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(mean));
  89. ASSERT_LE(std::abs(stat.first - mean), 0.01);
  90. ASSERT_LE(std::abs(stat.second - std), 0.01);
  91. }
  92. }
  93. template <typename T>
  94. void run_permutation(Handle* handle) {
  95. using ctype = typename DTypeTrait<T>::ctype;
  96. size_t sample_num = std::min(200000, static_cast<int>(DTypeTrait<T>::max()) - 10);
  97. auto opr = handle->create_operator<PermutationRNG>();
  98. opr->param().dtype = DTypeTrait<T>::enumv;
  99. TensorLayout ly{TensorShape{sample_num}, T()};
  100. Tensor<dt_byte> workspace(
  101. handle, {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()});
  102. SyncedTensor<ctype> t(handle, ly);
  103. opr->exec(t.tensornd_dev(), {workspace.ptr(), workspace.layout().total_nr_elems()});
  104. auto ptr = t.ptr_mutable_host();
  105. auto size = t.layout().total_nr_elems();
  106. std::vector<ctype> res(size);
  107. int not_same = 0;
  108. for (size_t i = 0; i < size; ++i) {
  109. if ((ptr[i] - ctype(i)) >= ctype(1))
  110. not_same++;
  111. res[i] = ptr[i];
  112. }
  113. ASSERT_GT(not_same, 5000);
  114. std::sort(res.begin(), res.end());
  115. for (size_t i = 0; i < size; ++i) {
  116. ASSERT_LE(std::abs(res[i] - ctype(i)), 1e-8);
  117. }
  118. }
  119. template <typename T>
  120. void run_shuffle(Handle* handle, bool bwd_flag) {
  121. using ctype = typename DTypeTrait<T>::ctype;
  122. auto run = [&](TensorShape shape) {
  123. auto opr = handle->create_operator<ShuffleRNGForward>();
  124. TensorLayout srclay{shape, T()};
  125. TensorLayout dstlay{shape, T()};
  126. TensorLayout indexlay{TensorShape{shape[0]}, dtype::Int32()};
  127. Tensor<dt_byte> workspace(
  128. handle,
  129. {TensorShape{opr->get_workspace_in_bytes(srclay, dstlay, indexlay)},
  130. dtype::Byte()});
  131. SyncedTensor<ctype> src(handle, srclay);
  132. SyncedTensor<ctype> dst(handle, dstlay);
  133. SyncedTensor<DTypeTrait<dt_int32>::ctype> index(handle, indexlay);
  134. auto sptr = src.ptr_mutable_host();
  135. size_t size = src.layout().total_nr_elems();
  136. for (size_t j = 0; j < size; ++j) {
  137. sptr[j] = j;
  138. }
  139. opr->exec(
  140. src.tensornd_dev(), dst.tensornd_dev(), index.tensornd_dev(),
  141. {workspace.ptr(), workspace.layout().total_nr_elems()});
  142. auto dptr = dst.ptr_mutable_host();
  143. auto iptr = index.ptr_mutable_host();
  144. size_t len = index.layout().total_nr_elems();
  145. size_t step = size / len;
  146. for (size_t i = 0; i < len; ++i) {
  147. for (size_t j = 0; j < step; ++j) {
  148. ASSERT_EQ(dptr[i * step + j], sptr[iptr[i] * step + j]);
  149. }
  150. }
  151. if (bwd_flag) {
  152. for (size_t j = 0; j < size; ++j) {
  153. sptr[j] = 0;
  154. }
  155. auto oprbwd = handle->create_operator<ShuffleRNGBackward>();
  156. oprbwd->exec(
  157. dst.tensornd_dev(), index.tensornd_dev(), src.tensornd_dev(),
  158. {workspace.ptr(), workspace.layout().total_nr_elems()});
  159. auto sptr_bwd = src.ptr_mutable_host();
  160. for (size_t i = 0; i < len; ++i) {
  161. for (size_t j = 0; j < step; ++j) {
  162. ASSERT_EQ(dptr[i * step + j], sptr_bwd[iptr[i] * step + j]);
  163. }
  164. }
  165. }
  166. };
  167. run({10});
  168. run({6, 3});
  169. }
  170. template <typename T>
  171. void run_dropout(Handle* handle) {
  172. using ctype = typename DTypeTrait<T>::ctype;
  173. auto run = [&](TensorShape shape, float drop_prob) {
  174. auto fwd = handle->create_operator<DropoutForward>();
  175. auto bwd = handle->create_operator<DropoutBackward>();
  176. fwd->param().drop_prob = drop_prob;
  177. bwd->param().drop_prob = drop_prob;
  178. double scale = 1.0 / (1.0 - drop_prob);
  179. TensorLayout inp_lay{shape, T()};
  180. TensorLayout oup_lay{shape, T()};
  181. TensorLayout mask_lay{{fwd->get_mask_size_in_bytes(inp_lay)}, dtype::Byte()};
  182. TensorLayout doup_lay{shape, T()};
  183. TensorLayout dinp_lay{shape, T()};
  184. TensorLayout fwd_ws_lay{
  185. {fwd->get_workspace_in_bytes(inp_lay, oup_lay, mask_lay)},
  186. dtype::Byte()};
  187. TensorLayout bwd_ws_lay{
  188. {bwd->get_workspace_in_bytes(doup_lay, mask_lay, dinp_lay)},
  189. dtype::Byte()};
  190. SyncedTensor<ctype> inp(handle, inp_lay);
  191. SyncedTensor<ctype> oup(handle, oup_lay);
  192. SyncedTensor<DTypeTrait<dt_byte>::ctype> mask(handle, mask_lay);
  193. SyncedTensor<ctype> doup(handle, doup_lay);
  194. SyncedTensor<ctype> dinp(handle, dinp_lay);
  195. SyncedTensor<DTypeTrait<dt_byte>::ctype> fwd_ws(handle, fwd_ws_lay);
  196. SyncedTensor<DTypeTrait<dt_byte>::ctype> bwd_ws(handle, bwd_ws_lay);
  197. for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) {
  198. inp.ptr_mutable_host()[i] = 1;
  199. doup.ptr_mutable_host()[i] = 1;
  200. }
  201. fwd->exec(
  202. inp.tensornd_dev(), oup.tensornd_dev(), mask.tensornd_dev(),
  203. {fwd_ws.ptr_mutable_dev(), fwd_ws.layout().total_nr_elems()});
  204. size_t droped_cnt = 0;
  205. for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) {
  206. ASSERT_TRUE(
  207. oup.ptr_host()[i] == 0 ||
  208. oup.ptr_host()[i] == static_cast<ctype>(scale));
  209. if (oup.ptr_host()[i] == 0) {
  210. droped_cnt++;
  211. }
  212. }
  213. float real_drop = droped_cnt * 1.0 / inp.layout().total_nr_elems();
  214. ASSERT_LT(abs(drop_prob - real_drop), 1e-2);
  215. #if CUDNN_VERSION >= 7000
  216. bwd->exec(
  217. doup.tensornd_dev(), mask.tensornd_dev(), dinp.tensornd_dev(),
  218. {bwd_ws.ptr_mutable_dev(), bwd_ws.layout().total_nr_elems()});
  219. for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) {
  220. ASSERT_TRUE(oup.ptr_host()[i] == dinp.ptr_host()[i]);
  221. }
  222. #endif
  223. };
  224. run({32, 32, 32, 32}, 0.2);
  225. run({100000}, 0.3);
  226. }
  227. } // anonymous namespace
  228. TEST_F(CUDA, UNIFORM_RNG_F32) {
  229. auto opr = handle_cuda()->create_operator<UniformRNG>();
  230. opr->param().dtype = DTypeTrait<dtype::Float32>::enumv;
  231. SyncedTensor<> t(handle_cuda(), {TensorShape{200000}, dtype::Float32()});
  232. opr->exec(t.tensornd_dev(), {});
  233. assert_uniform_correct(t.ptr_mutable_host(), t.layout().total_nr_elems());
  234. }
  235. TEST_F(CUDA, GAUSSIAN_RNG_F32) {
  236. auto opr = handle_cuda()->create_operator<GaussianRNG>();
  237. opr->param().mean = 0.8;
  238. opr->param().std = 2.3;
  239. opr->param().dtype = DTypeTrait<dtype::Float32>::enumv;
  240. for (size_t size : {1, 200000, 200001}) {
  241. TensorLayout ly{{size}, dtype::Float32()};
  242. Tensor<dt_byte> workspace(
  243. handle_cuda(),
  244. {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()});
  245. SyncedTensor<> t(handle_cuda(), ly);
  246. opr->exec(
  247. t.tensornd_dev(),
  248. {workspace.ptr(), workspace.layout().total_nr_elems()});
  249. auto ptr = t.ptr_mutable_host();
  250. ASSERT_LE(std::abs(ptr[0] - 0.8), 2.3);
  251. if (size >= 1000) {
  252. auto stat = get_mean_var(ptr, size, 0.8f);
  253. ASSERT_LE(std::abs(stat.first - 0.8), 5e-3);
  254. ASSERT_LE(std::abs(stat.second - 2.3 * 2.3), 5e-2);
  255. }
  256. }
  257. }
  258. TEST_F(CUDA, GAMMA_RNG_F32) {
  259. run_gamma<dtype::Float32>(handle_cuda());
  260. }
  261. TEST_F(CUDA, GAMMA_RNG_F16) {
  262. run_gamma<dtype::Float16>(handle_cuda());
  263. }
  264. TEST_F(CUDA, POISSON_RNG_F32) {
  265. run_poisson<dtype::Float32>(handle_cuda());
  266. }
  267. TEST_F(CUDA, POISSON_RNG_F16) {
  268. run_poisson<dtype::Float16>(handle_cuda());
  269. }
  270. TEST_F(CUDA, BETA_RNG_F32) {
  271. run_beta<dtype::Float32>(handle_cuda());
  272. }
  273. TEST_F(CUDA, BETA_RNG_F16) {
  274. run_beta<dtype::Float16>(handle_cuda());
  275. }
  276. TEST_F(CUDA, PERMUTATION_RNG_F32) {
  277. run_permutation<dtype::Float32>(handle_cuda());
  278. }
  279. TEST_F(CUDA, PERMUTATION_RNG_INT32) {
  280. run_permutation<dtype::Int32>(handle_cuda());
  281. }
  282. TEST_F(CUDA, PERMUTATION_RNG_INT16) {
  283. run_permutation<dtype::Int16>(handle_cuda());
  284. }
  285. TEST_F(CUDA, SHUFFLE_RNG_F32) {
  286. run_shuffle<dtype::Float32>(handle_cuda(), false);
  287. }
  288. TEST_F(CUDA, SHUFFLE_RNG_INT32) {
  289. run_shuffle<dtype::Int32>(handle_cuda(), false);
  290. }
  291. TEST_F(CUDA, SHUFFLE_RNG_F16) {
  292. run_shuffle<dtype::Float16>(handle_cuda(), false);
  293. }
  294. TEST_F(CUDA, SHUFFLE_RNG_BWD_F32) {
  295. run_shuffle<dtype::Float32>(handle_cuda(), true);
  296. }
  297. TEST_F(CUDA, SHUFFLE_RNG_BWD_INT32) {
  298. run_shuffle<dtype::Int32>(handle_cuda(), true);
  299. }
  300. TEST_F(CUDA, SHUFFLE_RNG_BWD_F16) {
  301. run_shuffle<dtype::Float16>(handle_cuda(), true);
  302. }
  303. TEST_F(CUDA, DROPOUT_F32) {
  304. run_dropout<dtype::Float32>(handle_cuda());
  305. }
  306. TEST_F(CUDA, DROPOUT_F16) {
  307. run_dropout<dtype::Float16>(handle_cuda());
  308. }
  309. } // namespace test
  310. } // namespace megdnn
  311. // vim: syntax=cpp.doxygen