You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. #include "test/naive/rng.h"
  2. #include "megdnn/oprs.h"
  3. #include "test/common/tensor.h"
  4. #include "test/cuda/fixture.h"
  5. namespace megdnn {
  6. namespace test {
  7. namespace {
  8. template <typename T>
  9. void run_gamma(Handle* handle) {
  10. using ctype = typename DTypeTrait<T>::ctype;
  11. auto opr = handle->create_operator<GammaRNG>();
  12. TensorLayout ly{TensorShape{2000000 * 5}, T()};
  13. SyncedTensor<ctype> out(handle, ly);
  14. SyncedTensor<ctype> shape(handle, ly);
  15. SyncedTensor<ctype> scale(handle, ly);
  16. auto shape_ptr = shape.ptr_mutable_host();
  17. auto scale_ptr = scale.ptr_mutable_host();
  18. for (int i = 0; i < 5; ++i) {
  19. for (int j = 0; j < 2000000; ++j) {
  20. shape_ptr[i * 2000000 + j] = 2 * 0.3 * i + 0.3;
  21. scale_ptr[i * 2000000 + j] = i * 0.2 + 0.1;
  22. }
  23. }
  24. opr->exec(shape.tensornd_dev(), scale.tensornd_dev(), out.tensornd_dev(), {});
  25. auto ptr = out.ptr_mutable_host();
  26. for (int i = 0; i < 5; ++i) {
  27. float a = 2 * 0.3 * i + 0.3, b = i * 0.2 + 0.1;
  28. float mean = a * b;
  29. float std = a * (b * b);
  30. auto stat = get_mean_var(ptr + i * 2000000, 2000000, ctype(mean));
  31. ASSERT_LE(std::abs(stat.first - mean), 0.01);
  32. ASSERT_LE(std::abs(stat.second - std), 0.01);
  33. }
  34. }
  35. template <typename T>
  36. void run_poisson(Handle* handle) {
  37. using ctype = typename DTypeTrait<T>::ctype;
  38. auto opr = handle->create_operator<PoissonRNG>();
  39. TensorLayout ly{TensorShape{200000 * 5}, T()};
  40. SyncedTensor<ctype> out(handle, ly);
  41. SyncedTensor<ctype> lam(handle, ly);
  42. auto lam_ptr = lam.ptr_mutable_host();
  43. for (int i = 0; i < 5; ++i) {
  44. for (int j = 0; j < 200000; ++j) {
  45. lam_ptr[i * 200000 + j] = ctype(i + 1);
  46. }
  47. }
  48. opr->exec(lam.tensornd_dev(), out.tensornd_dev(), {});
  49. auto ptr = out.ptr_mutable_host();
  50. for (int i = 0; i < 5; ++i) {
  51. auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(i + 1));
  52. ASSERT_LE(std::abs(stat.first - ctype(i + 1)), 0.01);
  53. ASSERT_LE(std::abs(stat.second - ctype(i + 1)), 0.01);
  54. }
  55. }
  56. template <typename T>
  57. void run_beta(Handle* handle) {
  58. using ctype = typename DTypeTrait<T>::ctype;
  59. auto opr = handle->create_operator<BetaRNG>();
  60. TensorLayout ly{TensorShape{200000 * 5}, T()};
  61. SyncedTensor<ctype> out(handle, ly);
  62. SyncedTensor<ctype> alpha(handle, ly);
  63. SyncedTensor<ctype> beta(handle, ly);
  64. auto alpha_ptr = alpha.ptr_mutable_host();
  65. auto beta_ptr = beta.ptr_mutable_host();
  66. for (int i = 0; i < 5; ++i) {
  67. for (int j = 0; j < 200000; ++j) {
  68. alpha_ptr[i * 200000 + j] = 0.3 * i + 0.1;
  69. beta_ptr[i * 200000 + j] = 2 * i * 0.3 + 0.1;
  70. }
  71. }
  72. opr->exec(alpha.tensornd_dev(), beta.tensornd_dev(), out.tensornd_dev(), {});
  73. auto ptr = out.ptr_mutable_host();
  74. for (int i = 0; i < 5; ++i) {
  75. float a = 0.3 * i + 0.1, b = 2 * i * 0.3 + 0.1;
  76. float mean = a / (a + b);
  77. float std = a * b / ((a + b) * (a + b) * (a + b + 1));
  78. auto stat = get_mean_var(ptr + i * 200000, 200000, ctype(mean));
  79. ASSERT_LE(std::abs(stat.first - mean), 0.01);
  80. ASSERT_LE(std::abs(stat.second - std), 0.01);
  81. }
  82. }
  83. template <typename T>
  84. void run_permutation(Handle* handle) {
  85. using ctype = typename DTypeTrait<T>::ctype;
  86. size_t sample_num = std::min(200000, static_cast<int>(DTypeTrait<T>::max()) - 10);
  87. auto opr = handle->create_operator<PermutationRNG>();
  88. opr->param().dtype = DTypeTrait<T>::enumv;
  89. TensorLayout ly{TensorShape{sample_num}, T()};
  90. Tensor<dt_byte> workspace(
  91. handle, {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()});
  92. SyncedTensor<ctype> t(handle, ly);
  93. opr->exec(t.tensornd_dev(), {workspace.ptr(), workspace.layout().total_nr_elems()});
  94. auto ptr = t.ptr_mutable_host();
  95. auto size = t.layout().total_nr_elems();
  96. std::vector<ctype> res(size);
  97. int not_same = 0;
  98. for (size_t i = 0; i < size; ++i) {
  99. if ((ptr[i] - ctype(i)) >= ctype(1))
  100. not_same++;
  101. res[i] = ptr[i];
  102. }
  103. ASSERT_GT(not_same, 5000);
  104. std::sort(res.begin(), res.end());
  105. for (size_t i = 0; i < size; ++i) {
  106. ASSERT_LE(std::abs(res[i] - ctype(i)), 1e-8);
  107. }
  108. }
  109. template <typename T>
  110. void run_shuffle(Handle* handle, bool bwd_flag) {
  111. using ctype = typename DTypeTrait<T>::ctype;
  112. auto run = [&](TensorShape shape) {
  113. auto opr = handle->create_operator<ShuffleRNGForward>();
  114. TensorLayout srclay{shape, T()};
  115. TensorLayout dstlay{shape, T()};
  116. TensorLayout indexlay{TensorShape{shape[0]}, dtype::Int32()};
  117. Tensor<dt_byte> workspace(
  118. handle,
  119. {TensorShape{opr->get_workspace_in_bytes(srclay, dstlay, indexlay)},
  120. dtype::Byte()});
  121. SyncedTensor<ctype> src(handle, srclay);
  122. SyncedTensor<ctype> dst(handle, dstlay);
  123. SyncedTensor<DTypeTrait<dt_int32>::ctype> index(handle, indexlay);
  124. auto sptr = src.ptr_mutable_host();
  125. size_t size = src.layout().total_nr_elems();
  126. for (size_t j = 0; j < size; ++j) {
  127. sptr[j] = j;
  128. }
  129. opr->exec(
  130. src.tensornd_dev(), dst.tensornd_dev(), index.tensornd_dev(),
  131. {workspace.ptr(), workspace.layout().total_nr_elems()});
  132. auto dptr = dst.ptr_mutable_host();
  133. auto iptr = index.ptr_mutable_host();
  134. size_t len = index.layout().total_nr_elems();
  135. size_t step = size / len;
  136. for (size_t i = 0; i < len; ++i) {
  137. for (size_t j = 0; j < step; ++j) {
  138. ASSERT_EQ(dptr[i * step + j], sptr[iptr[i] * step + j]);
  139. }
  140. }
  141. if (bwd_flag) {
  142. for (size_t j = 0; j < size; ++j) {
  143. sptr[j] = 0;
  144. }
  145. auto oprbwd = handle->create_operator<ShuffleRNGBackward>();
  146. oprbwd->exec(
  147. dst.tensornd_dev(), index.tensornd_dev(), src.tensornd_dev(),
  148. {workspace.ptr(), workspace.layout().total_nr_elems()});
  149. auto sptr_bwd = src.ptr_mutable_host();
  150. for (size_t i = 0; i < len; ++i) {
  151. for (size_t j = 0; j < step; ++j) {
  152. ASSERT_EQ(dptr[i * step + j], sptr_bwd[iptr[i] * step + j]);
  153. }
  154. }
  155. }
  156. };
  157. run({10});
  158. run({6, 3});
  159. }
  160. template <typename T>
  161. void run_dropout(Handle* handle) {
  162. using ctype = typename DTypeTrait<T>::ctype;
  163. auto run = [&](TensorShape shape, float drop_prob) {
  164. auto fwd = handle->create_operator<DropoutForward>();
  165. auto bwd = handle->create_operator<DropoutBackward>();
  166. fwd->param().drop_prob = drop_prob;
  167. bwd->param().drop_prob = drop_prob;
  168. double scale = 1.0 / (1.0 - drop_prob);
  169. TensorLayout inp_lay{shape, T()};
  170. TensorLayout oup_lay{shape, T()};
  171. TensorLayout mask_lay{{fwd->get_mask_size_in_bytes(inp_lay)}, dtype::Byte()};
  172. TensorLayout doup_lay{shape, T()};
  173. TensorLayout dinp_lay{shape, T()};
  174. TensorLayout fwd_ws_lay{
  175. {fwd->get_workspace_in_bytes(inp_lay, oup_lay, mask_lay)},
  176. dtype::Byte()};
  177. TensorLayout bwd_ws_lay{
  178. {bwd->get_workspace_in_bytes(doup_lay, mask_lay, dinp_lay)},
  179. dtype::Byte()};
  180. SyncedTensor<ctype> inp(handle, inp_lay);
  181. SyncedTensor<ctype> oup(handle, oup_lay);
  182. SyncedTensor<DTypeTrait<dt_byte>::ctype> mask(handle, mask_lay);
  183. SyncedTensor<ctype> doup(handle, doup_lay);
  184. SyncedTensor<ctype> dinp(handle, dinp_lay);
  185. SyncedTensor<DTypeTrait<dt_byte>::ctype> fwd_ws(handle, fwd_ws_lay);
  186. SyncedTensor<DTypeTrait<dt_byte>::ctype> bwd_ws(handle, bwd_ws_lay);
  187. for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) {
  188. inp.ptr_mutable_host()[i] = 1;
  189. doup.ptr_mutable_host()[i] = 1;
  190. }
  191. fwd->exec(
  192. inp.tensornd_dev(), oup.tensornd_dev(), mask.tensornd_dev(),
  193. {fwd_ws.ptr_mutable_dev(), fwd_ws.layout().total_nr_elems()});
  194. size_t droped_cnt = 0;
  195. for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) {
  196. ASSERT_TRUE(
  197. oup.ptr_host()[i] == 0 ||
  198. oup.ptr_host()[i] == static_cast<ctype>(scale));
  199. if (oup.ptr_host()[i] == 0) {
  200. droped_cnt++;
  201. }
  202. }
  203. float real_drop = droped_cnt * 1.0 / inp.layout().total_nr_elems();
  204. ASSERT_LT(abs(drop_prob - real_drop), 1e-2);
  205. #if CUDNN_VERSION >= 7000
  206. bwd->exec(
  207. doup.tensornd_dev(), mask.tensornd_dev(), dinp.tensornd_dev(),
  208. {bwd_ws.ptr_mutable_dev(), bwd_ws.layout().total_nr_elems()});
  209. for (size_t i = 0; i < inp.layout().total_nr_elems(); ++i) {
  210. ASSERT_TRUE(oup.ptr_host()[i] == dinp.ptr_host()[i]);
  211. }
  212. #endif
  213. };
  214. run({32, 32, 32, 32}, 0.2);
  215. run({100000}, 0.3);
  216. }
  217. } // anonymous namespace
  218. TEST_F(CUDA, UNIFORM_RNG_F32) {
  219. auto opr = handle_cuda()->create_operator<UniformRNG>();
  220. opr->param().dtype = DTypeTrait<dtype::Float32>::enumv;
  221. SyncedTensor<> t(handle_cuda(), {TensorShape{200000}, dtype::Float32()});
  222. opr->exec(t.tensornd_dev(), {});
  223. assert_uniform_correct(t.ptr_mutable_host(), t.layout().total_nr_elems());
  224. }
  225. TEST_F(CUDA, GAUSSIAN_RNG_F32) {
  226. auto opr = handle_cuda()->create_operator<GaussianRNG>();
  227. opr->param().mean = 0.8;
  228. opr->param().std = 2.3;
  229. opr->param().dtype = DTypeTrait<dtype::Float32>::enumv;
  230. for (size_t size : {1, 200000, 200001}) {
  231. TensorLayout ly{{size}, dtype::Float32()};
  232. Tensor<dt_byte> workspace(
  233. handle_cuda(),
  234. {TensorShape{opr->get_workspace_in_bytes(ly)}, dtype::Byte()});
  235. SyncedTensor<> t(handle_cuda(), ly);
  236. opr->exec(
  237. t.tensornd_dev(),
  238. {workspace.ptr(), workspace.layout().total_nr_elems()});
  239. auto ptr = t.ptr_mutable_host();
  240. ASSERT_LE(std::abs(ptr[0] - 0.8), 2.3);
  241. if (size >= 1000) {
  242. auto stat = get_mean_var(ptr, size, 0.8f);
  243. ASSERT_LE(std::abs(stat.first - 0.8), 5e-3);
  244. ASSERT_LE(std::abs(stat.second - 2.3 * 2.3), 5e-2);
  245. }
  246. }
  247. }
  248. TEST_F(CUDA, GAMMA_RNG_F32) {
  249. run_gamma<dtype::Float32>(handle_cuda());
  250. }
  251. TEST_F(CUDA, GAMMA_RNG_F16) {
  252. run_gamma<dtype::Float16>(handle_cuda());
  253. }
  254. TEST_F(CUDA, POISSON_RNG_F32) {
  255. run_poisson<dtype::Float32>(handle_cuda());
  256. }
  257. TEST_F(CUDA, POISSON_RNG_F16) {
  258. run_poisson<dtype::Float16>(handle_cuda());
  259. }
  260. TEST_F(CUDA, BETA_RNG_F32) {
  261. run_beta<dtype::Float32>(handle_cuda());
  262. }
  263. TEST_F(CUDA, BETA_RNG_F16) {
  264. run_beta<dtype::Float16>(handle_cuda());
  265. }
  266. TEST_F(CUDA, PERMUTATION_RNG_F32) {
  267. run_permutation<dtype::Float32>(handle_cuda());
  268. }
  269. TEST_F(CUDA, PERMUTATION_RNG_INT32) {
  270. run_permutation<dtype::Int32>(handle_cuda());
  271. }
  272. TEST_F(CUDA, PERMUTATION_RNG_INT16) {
  273. run_permutation<dtype::Int16>(handle_cuda());
  274. }
  275. TEST_F(CUDA, SHUFFLE_RNG_F32) {
  276. run_shuffle<dtype::Float32>(handle_cuda(), false);
  277. }
  278. TEST_F(CUDA, SHUFFLE_RNG_INT32) {
  279. run_shuffle<dtype::Int32>(handle_cuda(), false);
  280. }
  281. TEST_F(CUDA, SHUFFLE_RNG_F16) {
  282. run_shuffle<dtype::Float16>(handle_cuda(), false);
  283. }
  284. TEST_F(CUDA, SHUFFLE_RNG_BWD_F32) {
  285. run_shuffle<dtype::Float32>(handle_cuda(), true);
  286. }
  287. TEST_F(CUDA, SHUFFLE_RNG_BWD_INT32) {
  288. run_shuffle<dtype::Int32>(handle_cuda(), true);
  289. }
  290. TEST_F(CUDA, SHUFFLE_RNG_BWD_F16) {
  291. run_shuffle<dtype::Float16>(handle_cuda(), true);
  292. }
  293. TEST_F(CUDA, DROPOUT_F32) {
  294. run_dropout<dtype::Float32>(handle_cuda());
  295. }
  296. TEST_F(CUDA, DROPOUT_F16) {
  297. run_dropout<dtype::Float16>(handle_cuda());
  298. }
  299. } // namespace test
  300. } // namespace megdnn
  301. // vim: syntax=cpp.doxygen