You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rand.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /**
  2. * \file src/opr/test/rand.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/opr/rand.h"
  12. #include "megbrain/opr/io.h"
  13. #include "megbrain/test/helper.h"
  14. #include "megbrain/utils/arith_helper.h"
  15. #include <cmath>
  16. using namespace mgb;
  17. namespace {
  18. struct BasicStat {
  19. double mean, std, min, max;
  20. static BasicStat make(const float* ptr, size_t size,
  21. double mean_expect = 0) {
  22. double sum = 0, sum2 = 0, min = std::numeric_limits<double>::max(),
  23. max = std::numeric_limits<double>::lowest();
  24. for (size_t i = 0; i < size; ++i) {
  25. double cur = ptr[i];
  26. min = std::min(min, cur);
  27. max = std::max(max, cur);
  28. cur -= mean_expect;
  29. sum += cur;
  30. sum2 += cur * cur;
  31. }
  32. double mean = sum / size + mean_expect,
  33. std = sqrt((sum2 - sum * sum / size) / (size - 1));
  34. return {mean, std, min, max};
  35. }
  36. };
  37. void check_reproducibility(std::shared_ptr<ComputingGraph> graph, size_t size,
  38. thin_function<SymbolVar(uint64_t seed)> make) {
  39. // out[func][opr][run]
  40. HostTensorND out[2][2][2];
  41. auto run = [&](int fid) {
  42. SymbolVar o0 = make(0), o1 = make(1);
  43. HostTensorND host_o0, host_o1;
  44. auto func = graph->compile({make_callback_copy(o0, host_o0),
  45. make_callback_copy(o1, host_o1)});
  46. for (int i = 0; i < 2; ++i) {
  47. func->execute();
  48. out[fid][0][i].copy_from(host_o0);
  49. out[fid][1][i].copy_from(host_o1);
  50. }
  51. };
  52. run(0);
  53. run(1);
  54. for (int i = 0; i < 2; ++i) {
  55. for (int j = 0; j < 2; ++j)
  56. MGB_ASSERT_TENSOR_EQ(out[0][i][j], out[1][i][j]);
  57. }
  58. auto max_diff = [&](int off0, int off1) {
  59. float diff = 0;
  60. auto p0 = out[0][off0 / 2][off0 % 2].ptr<float>(),
  61. p1 = out[0][off1 / 2][off1 % 2].ptr<float>();
  62. for (size_t i = 0; i < size; ++i) {
  63. update_max(diff, std::abs(p0[i] - p1[i]));
  64. }
  65. return diff;
  66. };
  67. for (int i = 0; i < 4; ++i) {
  68. for (int j = i + 1; j < 4; ++j)
  69. ASSERT_GT(max_diff(i, j), 0.3) << i << " " << j;
  70. }
  71. }
  72. } // anonymous namespace
  73. TEST(TestOprRand, Uniform) {
  74. static constexpr size_t M = 128, N = 64;
  75. auto graph = ComputingGraph::make();
  76. SymbolVar dev_out = opr::UniformRNG::make(
  77. *graph, {M, N}, {CompNode::load("xpu0")}, {23, DTypeEnum::Float32});
  78. HostTensorND host_out;
  79. auto func = graph->compile({make_callback_copy(dev_out, host_out)});
  80. func->execute();
  81. ASSERT_EQ(host_out.shape(), TensorShape({M, N}));
  82. auto stat = BasicStat::make(host_out.ptr<float>(), M * N, 0.5);
  83. ASSERT_LT(fabs(stat.mean - 0.5), 0.01);
  84. ASSERT_LT(fabs(stat.std - sqrt(1 / 12.0)), 0.1);
  85. ASSERT_GT(stat.min, 0);
  86. ASSERT_LE(stat.max, 1);
  87. }
  88. TEST(TestOprRand, Gaussian) {
  89. static constexpr size_t SIZE = 123451;
  90. constexpr float MEAN = 1, STD = 2;
  91. auto graph = ComputingGraph::make();
  92. auto y = opr::GaussianRNG::make(
  93. SymbolVar::make_scalar(int(SIZE), *graph, {CompNode::load("xpu0")}),
  94. {23, MEAN, STD, DTypeEnum::Float32});
  95. HostTensorND host_y;
  96. auto func = graph->compile({make_callback_copy(y, host_y)});
  97. func->execute();
  98. ASSERT_EQ(TensorShape({SIZE}), host_y.shape());
  99. auto stat = BasicStat::make(host_y.ptr<float>(), SIZE, MEAN);
  100. ASSERT_LT(fabs(stat.mean - MEAN), 0.01);
  101. ASSERT_LT(fabs(stat.std - STD), 0.1);
  102. }
  103. TEST(TestOprRand, Gamma) {
  104. std::shared_ptr<HostTensorND> shape_host(new HostTensorND{
  105. CompNode::load("xpux"), TensorShape{2000000*5}, dtype::Float32()});
  106. std::shared_ptr<HostTensorND> scale_host(new HostTensorND{
  107. CompNode::load("xpux"), TensorShape{2000000*5}, dtype::Float32()});
  108. auto shape_ptr = shape_host->ptr<float>();
  109. auto scale_ptr = scale_host->ptr<float>();
  110. for (int i = 0; i < 5; ++i) {
  111. for (int j = 0; j < 2000000; ++j) {
  112. shape_ptr[i * 2000000 + j] = 2 * 0.3 * i + 0.5;
  113. scale_ptr[i * 2000000 + j] = i * 0.3 + 0.5;
  114. }
  115. }
  116. auto graph = ComputingGraph::make();
  117. auto shape_sym = opr::Host2DeviceCopy::make(*graph, shape_host);
  118. auto scale_sym = opr::Host2DeviceCopy::make(*graph, scale_host);
  119. auto y = opr::GammaRNG::make(shape_sym, scale_sym, {10});
  120. HostTensorND host_y;
  121. auto func = graph->compile({make_callback_copy(y, host_y)});
  122. func->execute();
  123. ASSERT_EQ(TensorShape({2000000*5}), host_y.shape());
  124. for (int i = 0; i < 5; ++i) {
  125. float a = 2 * 0.3 * i + 0.5, b = i * 0.3 + 0.5;
  126. float mean = a * b;
  127. float std = a * (b * b);
  128. auto stat = BasicStat::make(host_y.ptr<float>() + 2000000 * i,
  129. 2000000, mean);
  130. ASSERT_LT(fabs(stat.mean - mean), 0.01);
  131. ASSERT_LT(fabs(stat.std - sqrt(std)), 0.01);
  132. }
  133. }
  134. TEST(TestOprRand, Poisson) {
  135. std::shared_ptr<HostTensorND> lam_host(new HostTensorND{
  136. CompNode::load("xpux"), TensorShape{200000*5}, dtype::Float32()});
  137. auto lam_ptr = lam_host->ptr<float>();
  138. for (int i = 0; i < 5; ++i) {
  139. for (int j = 0; j < 200000; ++j) {
  140. lam_ptr[i * 200000 + j] = i + 1;
  141. }
  142. }
  143. auto graph = ComputingGraph::make();
  144. auto lam_sym = opr::Host2DeviceCopy::make(*graph, lam_host);
  145. auto y = opr::PoissonRNG::make(lam_sym, {10});
  146. HostTensorND host_y;
  147. auto func = graph->compile({make_callback_copy(y, host_y)});
  148. func->execute();
  149. ASSERT_EQ(TensorShape({200000*5}), host_y.shape());
  150. for (int i = 0; i < 5; ++i) {
  151. float lambda = i + 1;
  152. auto stat = BasicStat::make(host_y.ptr<float>() + 200000 * i,
  153. 200000,lambda);
  154. ASSERT_LT(fabs(stat.mean - lambda), 0.01);
  155. ASSERT_LT(fabs(stat.std - sqrt(lambda)), 0.1);
  156. }
  157. }
  158. TEST(TestOprRand, Beta) {
  159. std::shared_ptr<HostTensorND> alpha_host(new HostTensorND{
  160. CompNode::load("xpux"), TensorShape{200000*5}, dtype::Float32()});
  161. std::shared_ptr<HostTensorND> beta_host(new HostTensorND{
  162. CompNode::load("xpux"), TensorShape{200000*5}, dtype::Float32()});
  163. auto alpha_ptr = alpha_host->ptr<float>();
  164. auto beta_ptr = beta_host->ptr<float>();
  165. for (int i = 0; i < 5; ++i) {
  166. for (int j = 0; j < 200000; ++j) {
  167. alpha_ptr[i * 200000 + j] = 0.3 * i + 0.1;
  168. beta_ptr[i * 200000 + j] = 2 * i * 0.3 + 0.1;
  169. }
  170. }
  171. auto graph = ComputingGraph::make();
  172. auto alpha_sym = opr::Host2DeviceCopy::make(*graph, alpha_host);
  173. auto beta_sym = opr::Host2DeviceCopy::make(*graph, beta_host);
  174. auto y = opr::BetaRNG::make(alpha_sym,beta_sym, {10});
  175. HostTensorND host_y;
  176. auto func = graph->compile({make_callback_copy(y, host_y)});
  177. func->execute();
  178. ASSERT_EQ(TensorShape({200000*5}), host_y.shape());
  179. for (int i = 0; i < 5; ++i) {
  180. float a = 0.3 * i + 0.1, b = 2 * i * 0.3 + 0.1;
  181. float mean = a / (a + b);
  182. float std = a * b / ((a + b) * (a + b) * (a + b + 1));
  183. auto stat = BasicStat::make(host_y.ptr<float>() + 200000 * i,
  184. 200000, mean);
  185. ASSERT_LT(fabs(stat.mean - mean), 0.01);
  186. ASSERT_LT(fabs(stat.std - sqrt(std)), 0.01);
  187. }
  188. }
  189. TEST(TestOprRand, PermutationRNG) {
  190. static constexpr size_t SIZE = 123451;
  191. auto graph = ComputingGraph::make();
  192. auto y = opr::PermutationRNG::make(
  193. SymbolVar::make_scalar(int(SIZE), *graph, {CompNode::load("xpu0")}),
  194. {23, DTypeEnum::Int32});
  195. HostTensorND host_y;
  196. auto func = graph->compile({make_callback_copy(y, host_y)});
  197. func->execute();
  198. ASSERT_EQ(TensorShape({SIZE}), host_y.shape());
  199. auto ptr = host_y.ptr<int32_t>();
  200. std::vector<int32_t> res(SIZE);
  201. int not_same = 0;
  202. for (size_t i = 0; i < SIZE; ++i) {
  203. if ((ptr[i] - int32_t(i)) >= 1) not_same++;
  204. res[i] = ptr[i];
  205. }
  206. ASSERT_GT(not_same, 5000);
  207. std::sort(res.begin(), res.end());
  208. for (size_t i = 0; i < SIZE; ++i) {
  209. ASSERT_LE(std::abs(res[i] - int32_t(i)), 1e-8);
  210. }
  211. }
  212. TEST(TestOprRand, EmptyShape) {
  213. auto test_uniform = []() {
  214. static constexpr size_t M = 128, N = 0;
  215. auto graph = ComputingGraph::make();
  216. SymbolVar dev_out = opr::UniformRNG::make(
  217. *graph, {M, N}, {CompNode::load("xpu0")}, {23, DTypeEnum::Float32});
  218. HostTensorND host_out;
  219. auto func = graph->compile({make_callback_copy(dev_out, host_out)});
  220. func->execute();
  221. ASSERT_EQ(host_out.shape(), TensorShape({M, N}));
  222. };
  223. auto test_gaussian = []() {
  224. size_t SIZE = 0;
  225. constexpr float MEAN = 1, STD = 2;
  226. auto graph = ComputingGraph::make();
  227. auto y = opr::GaussianRNG::make(
  228. SymbolVar::make_scalar(int(SIZE), *graph, {CompNode::load("xpu0")}),
  229. {23, MEAN, STD, DTypeEnum::Float32});
  230. HostTensorND host_y;
  231. auto func = graph->compile({make_callback_copy(y, host_y)});
  232. func->execute();
  233. ASSERT_EQ(TensorShape({SIZE}), host_y.shape());
  234. };
  235. auto test_gamma = []() {
  236. std::shared_ptr<HostTensorND> shape_host(new HostTensorND{
  237. CompNode::load("xpux"), TensorShape{10, 0}, dtype::Float32()});
  238. std::shared_ptr<HostTensorND> scale_host(new HostTensorND{
  239. CompNode::load("xpux"), TensorShape{10, 0}, dtype::Float32()});
  240. auto graph = ComputingGraph::make();
  241. auto shape_sym = opr::Host2DeviceCopy::make(*graph, shape_host);
  242. auto scale_sym = opr::Host2DeviceCopy::make(*graph, scale_host);
  243. auto y = opr::GammaRNG::make(shape_sym, scale_sym, {10});
  244. HostTensorND host_y;
  245. auto func = graph->compile({make_callback_copy(y, host_y)});
  246. func->execute();
  247. ASSERT_EQ(TensorShape({10, 0}), host_y.shape());
  248. };
  249. auto test_poisson = []() {
  250. std::shared_ptr<HostTensorND> lam_host(new HostTensorND{
  251. CompNode::load("xpux"), TensorShape{10, 0}, dtype::Float32()});
  252. auto graph = ComputingGraph::make();
  253. auto lam_sym = opr::Host2DeviceCopy::make(*graph, lam_host);
  254. auto y = opr::PoissonRNG::make(lam_sym, {10});
  255. HostTensorND host_y;
  256. auto func = graph->compile({make_callback_copy(y, host_y)});
  257. func->execute();
  258. ASSERT_EQ(TensorShape({10, 0}), host_y.shape());
  259. };
  260. auto test_beta = []() {
  261. std::shared_ptr<HostTensorND> alpha_host(new HostTensorND{
  262. CompNode::load("xpux"), TensorShape{10, 0}, dtype::Float32()});
  263. std::shared_ptr<HostTensorND> beta_host(new HostTensorND{
  264. CompNode::load("xpux"), TensorShape{10, 0}, dtype::Float32()});
  265. auto graph = ComputingGraph::make();
  266. auto alpha_sym = opr::Host2DeviceCopy::make(*graph, alpha_host);
  267. auto beta_sym = opr::Host2DeviceCopy::make(*graph, beta_host);
  268. auto y = opr::BetaRNG::make(alpha_sym,beta_sym, {10});
  269. HostTensorND host_y;
  270. auto func = graph->compile({make_callback_copy(y, host_y)});
  271. func->execute();
  272. ASSERT_EQ(TensorShape({10, 0}), host_y.shape());
  273. };
  274. auto test_permutation = []() {
  275. static constexpr size_t SIZE = 0;
  276. auto graph = ComputingGraph::make();
  277. auto y = opr::PermutationRNG::make(
  278. SymbolVar::make_scalar(int(SIZE), *graph, {CompNode::load("xpu0")}),
  279. {23, DTypeEnum::Int32});
  280. HostTensorND host_y;
  281. auto func = graph->compile({make_callback_copy(y, host_y)});
  282. func->execute();
  283. ASSERT_EQ(TensorShape({SIZE}), host_y.shape());
  284. };
  285. test_uniform();
  286. test_gaussian();
  287. test_gamma();
  288. test_poisson();
  289. test_beta();
  290. test_permutation();
  291. }
  292. TEST(TestOprRand, ShuffleForward) {
  293. auto run = [&](TensorShape shape) {
  294. std::shared_ptr<HostTensorND> src_host(new HostTensorND{
  295. CompNode::load("xpux"), shape, dtype::Float32()});
  296. auto sptr = src_host->ptr<dt_float32>();
  297. auto size = shape.total_nr_elems();
  298. for (size_t i = 0; i < size; ++i) {
  299. sptr[i] = i;
  300. }
  301. auto graph = ComputingGraph::make();
  302. auto src_sym = opr::Host2DeviceCopy::make(*graph, src_host);
  303. auto rec = opr::ShuffleRNG::make(src_sym, {10});
  304. HostTensorND host_y, host_index;
  305. auto func = graph->compile({make_callback_copy(rec[0], host_y),
  306. make_callback_copy(rec[1], host_index)});
  307. func->execute();
  308. auto dptr = host_y.ptr<dt_float32>();
  309. auto iptr = host_index.ptr<dt_int32>();
  310. size_t len = shape[0];
  311. size_t step = size / len;
  312. for (size_t i = 0; i < len; ++i) {
  313. for (size_t j = 0; j < step; ++j) {
  314. assert(dptr[i * step + j] == sptr[iptr[i] * step + j]);
  315. }
  316. }
  317. };
  318. run({10});
  319. run({6, 3});
  320. run({1, 1});
  321. }
  322. TEST(TestOprRand, UniformReprod) {
  323. static constexpr size_t SIZE = 123;
  324. auto graph = ComputingGraph::make();
  325. auto shp = cg::var_from_tensor_shape(*graph, {CompNode::load("xpu0")},
  326. "shp0", {SIZE});
  327. check_reproducibility(graph, SIZE, [&shp](uint64_t seed) {
  328. return opr::UniformRNG::make(shp, {seed});
  329. });
  330. }
  331. TEST(TestOprRand, GaussianReprod) {
  332. static constexpr size_t SIZE = 123;
  333. auto graph = ComputingGraph::make();
  334. auto shp = cg::var_from_tensor_shape(*graph, {CompNode::load("xpu0")},
  335. "shp0", {SIZE});
  336. check_reproducibility(graph, SIZE, [&shp](uint64_t seed) {
  337. return opr::GaussianRNG::make(shp, {seed});
  338. });
  339. }
  340. TEST(TestOprRand, GammaReprod) {
  341. static constexpr size_t SIZE = 123;
  342. std::shared_ptr<HostTensorND> shape_host(new HostTensorND{
  343. CompNode::load("xpux"), TensorShape{SIZE}, dtype::Float32()});
  344. std::shared_ptr<HostTensorND> scale_host(new HostTensorND{
  345. CompNode::load("xpux"), TensorShape{SIZE}, dtype::Float32()});
  346. auto shape_ptr = shape_host->ptr<float>();
  347. auto scale_ptr = scale_host->ptr<float>();
  348. for (size_t i = 0; i < SIZE; ++i){
  349. shape_ptr[i] = 0.5;
  350. scale_ptr[i] = 1.2;
  351. }
  352. auto graph = ComputingGraph::make();
  353. auto shape_sym = opr::Host2DeviceCopy::make(*graph, shape_host);
  354. auto scale_sym = opr::Host2DeviceCopy::make(*graph, scale_host);
  355. check_reproducibility(graph, SIZE, [&shape_sym,&scale_sym](uint64_t seed) {
  356. return opr::GammaRNG::make(shape_sym, scale_sym, {seed});
  357. });
  358. }
  359. TEST(TestOprRand, PoissonReprod) {
  360. static constexpr size_t SIZE = 123;
  361. std::shared_ptr<HostTensorND> lam_host(new HostTensorND{
  362. CompNode::load("xpux"), TensorShape{SIZE}, dtype::Float32()});
  363. auto lam_ptr = lam_host->ptr<float>();
  364. for (size_t i = 0; i < SIZE; ++i)
  365. lam_ptr[i] = 2;
  366. auto graph = ComputingGraph::make();
  367. auto lam_sym = opr::Host2DeviceCopy::make(*graph, lam_host);
  368. check_reproducibility(graph, SIZE, [&lam_sym](uint64_t seed) {
  369. return opr::PoissonRNG::make(lam_sym, {seed});
  370. });
  371. }
  372. TEST(TestOprRand, BetaReprod) {
  373. static constexpr size_t SIZE = 123;
  374. std::shared_ptr<HostTensorND> alpha_host(new HostTensorND{
  375. CompNode::load("xpux"), TensorShape{SIZE}, dtype::Float32()});
  376. std::shared_ptr<HostTensorND> beta_host(new HostTensorND{
  377. CompNode::load("xpux"), TensorShape{SIZE}, dtype::Float32()});
  378. auto alpha_ptr = alpha_host->ptr<float>();
  379. auto beta_ptr = beta_host->ptr<float>();
  380. for (size_t i = 0; i < SIZE; ++i){
  381. alpha_ptr[i] = 0.5;
  382. beta_ptr[i] = 1.2;
  383. }
  384. auto graph = ComputingGraph::make();
  385. auto alpha_sym = opr::Host2DeviceCopy::make(*graph, alpha_host);
  386. auto beta_sym = opr::Host2DeviceCopy::make(*graph, beta_host);
  387. check_reproducibility(graph, SIZE, [&alpha_sym,&beta_sym](uint64_t seed) {
  388. return opr::BetaRNG::make(alpha_sym, beta_sym, {seed});
  389. });
  390. }
  391. TEST(TestOprRand, PermutationReprod) {
  392. static constexpr size_t SIZE = 123;
  393. auto graph = ComputingGraph::make();
  394. auto shp = cg::var_from_tensor_shape(*graph, {CompNode::load("xpu0")},
  395. "shp0", {SIZE});
  396. check_reproducibility(graph, SIZE, [&shp](uint64_t seed) {
  397. return opr::PermutationRNG::make(shp, {seed, DTypeEnum::Float32});
  398. });
  399. }
  400. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台