You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rng.cpp 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /**
  2. * \file dnn/test/common/rng.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/rng.h"
  12. #include "test/common/random_state.h"
  13. #include "test/common/tensor.h"
  14. #include <gtest/gtest.h>
  15. using namespace megdnn;
  16. using namespace test;
  17. /*!
  18. * \brief xorshift+ RNG, which is very fast
  19. *
  20. * see https://en.wikipedia.org/wiki/Xorshift#xorshift.2B
  21. */
  22. class RNG::RNGxorshf {
  23. uint64_t s[2];
  24. public:
  25. using result_type = uint64_t;
  26. #ifdef WIN32
  27. static uint64_t min() {
  28. return 0;
  29. }
  30. static uint64_t max() {
  31. return std::numeric_limits<uint64_t>::max();
  32. }
  33. #else
  34. static constexpr uint64_t min() {
  35. return 0;
  36. }
  37. static constexpr uint64_t max() {
  38. return std::numeric_limits<uint64_t>::max();
  39. }
  40. #endif
  41. template<typename T>
  42. explicit RNGxorshf(T &&gen) {
  43. s[0] = gen();
  44. s[1] = gen();
  45. }
  46. uint64_t operator() () {
  47. uint64_t x = s[0];
  48. uint64_t const y = s[1];
  49. s[0] = y;
  50. x ^= x << 23; // a
  51. s[1] = x ^ y ^ (x >> 17) ^ (y >> 26); // b, c
  52. return s[1] + y;
  53. }
  54. };
  55. Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) {
  56. for (size_t x = 0; x < (1u<<16); ++x) {
  57. size_t exponent = (x >> 10) & 0x1F;
  58. if (exponent == 0x1F) {
  59. // +inf, -inf, NaN
  60. continue;
  61. }
  62. union U {
  63. U(){}
  64. uint16_t i;
  65. dt_float16 f;
  66. } i2f;
  67. i2f.i = static_cast<uint16_t>(x);
  68. m_sequence.push_back(i2f.f);
  69. }
  70. COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
  71. }
  72. Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
  73. union U {
  74. U() {}
  75. uint16_t i;
  76. dt_float16 f;
  77. } i2f;
  78. size_t x = 0;
  79. i2f.i = static_cast<uint16_t>(x);
  80. for (size_t i = 0; i < range; i++) {
  81. x += 1;
  82. i2f.i = static_cast<uint16_t>(x);
  83. m_sequence.push_back(i2f.f);
  84. }
  85. x = 1u << 15;
  86. i2f.i = static_cast<uint16_t>(x);
  87. for (size_t i = 0; i < range; i++) {
  88. x += 1;
  89. i2f.i = static_cast<uint16_t>(x);
  90. m_sequence.push_back(i2f.f);
  91. }
  92. COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
  93. }
  94. void Float16PeriodicalRNG::gen(const TensorND& tensor) {
  95. megdnn_assert(tensor.layout.dtype == dtype::Float16());
  96. size_t nr_elems = tensor.layout.span().dist_elem();
  97. auto offset = tensor.layout.span().low_elem;
  98. for (size_t i = 0; i < nr_elems; ++i) {
  99. tensor.ptr<dt_float16>()[offset+i] = get_single_val();
  100. }
  101. }
  102. dt_float16 Float16PeriodicalRNG::get_single_val() {
  103. if (m_offset >= m_sequence.size()) {
  104. m_offset = 0;
  105. }
  106. return m_sequence[m_offset++];
  107. }
  108. void IIDRNG::gen(const TensorND& tensor) {
  109. if (tensor.layout.dtype == dtype::Float32() && has_fast_float32() &&
  110. tensor.layout.is_physical_contiguous()) {
  111. fill_fast_float32(tensor.ptr<dt_float32>(),
  112. tensor.layout.total_nr_elems());
  113. return;
  114. }
  115. auto offset = tensor.layout.span().low_elem;
  116. auto nr_elems = tensor.layout.span().dist_elem();
  117. #define cb(DType) \
  118. if (tensor.layout.dtype == DType()) { \
  119. using ctype = typename DTypeTrait<DType>::ctype; \
  120. auto ptr = tensor.ptr<ctype>(); \
  121. for (size_t i = 0; i < nr_elems; ++i) { \
  122. ptr[offset + i] = static_cast<ctype>(gen_single_val()); \
  123. } \
  124. return; \
  125. }
  126. MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
  127. #undef cb
  128. #define cb(DType) \
  129. if (tensor.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
  130. using ctype = typename DTypeTrait<DType>::ctype; \
  131. auto ptr = tensor.ptr<ctype>(); \
  132. if (output_is_float()) { \
  133. for (size_t i = 0; i < nr_elems; ++i) { \
  134. ptr[offset + i] = tensor.layout.dtype.param<DType>().quantize( \
  135. static_cast<float>(gen_single_val())); \
  136. } \
  137. } else { \
  138. for (size_t i = 0; i < nr_elems; ++i) { \
  139. ptr[offset + i] = static_cast<ctype>(gen_single_val()); \
  140. } \
  141. } \
  142. return; \
  143. }
  144. MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
  145. //! In order to avoid an unnecessary increase in binary size, we just
  146. //! use QuantizedS16 dtype in winograd_filter_preprocess now.
  147. cb(::megdnn::dtype::QuantizedS16)
  148. #undef cb
  149. if (tensor.layout.dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  150. auto ptr = static_cast<uint8_t*>(tensor.raw_ptr);
  151. if (output_is_float()) {
  152. for (size_t i = 0; i < nr_elems; i += 2) {
  153. uint8_t val0 =
  154. tensor.layout.dtype.param<dt_quint4>()
  155. .quantize(static_cast<float>(gen_single_val()))
  156. .as_uint8();
  157. uint8_t val1 =
  158. tensor.layout.dtype.param<dt_quint4>()
  159. .quantize(static_cast<float>(gen_single_val()))
  160. .as_uint8();
  161. ptr[(offset + i) / 2] = (val1 << 4) | val0;
  162. }
  163. } else {
  164. for (size_t i = 0; i < nr_elems; i += 2) {
  165. uint8_t val0 = static_cast<uint8_t>(gen_single_val());
  166. uint8_t val1 = static_cast<uint8_t>(gen_single_val());
  167. ptr[(offset + i) / 2] = (val1 << 4) | val0;
  168. }
  169. }
  170. return;
  171. }
  172. megdnn_assert(0, "IIDRNG does not know how to generate value for DType %s",
  173. tensor.layout.dtype.name());
  174. }
  175. bool IIDRNG::has_fast_float32() {
  176. return false;
  177. }
  178. void IIDRNG::fill_fast_float32(dt_float32 *, size_t ) {
  179. megdnn_assert(0);
  180. }
  181. dt_float32 NormalRNG::gen_single_val()
  182. {
  183. auto &&gen = RandomState::generator();
  184. return m_dist(gen);
  185. }
  186. bool NormalRNG::has_fast_float32() {
  187. return true;
  188. }
  189. void NormalRNG::fill_fast_float32(dt_float32 *dest, size_t size) {
  190. RNGxorshf gen{RandomState::generator()};
  191. for (size_t i = 0; i < size; ++ i) {
  192. dest[i] = m_dist(gen);
  193. }
  194. }
  195. void ConstValue::fill_fast_float32(dt_float32 *dest, size_t size) {
  196. for (size_t i = 0; i < size; ++ i)
  197. dest[i] = value_;
  198. }
  199. dt_float32 UniformIntRNG::gen_single_val()
  200. {
  201. auto &&gen = RandomState::generator();
  202. return static_cast<dt_float32>(m_dist(gen));
  203. }
  204. dt_float32 UniformIntNonZeroRNG::gen_single_val() {
  205. auto&& gen = RandomState::generator();
  206. auto ret = UniformIntRNG::gen_single_val();
  207. if (m_dist_flip(gen)) {
  208. ret = -ret;
  209. }
  210. megdnn_assert(ret != 0);
  211. return ret;
  212. }
  213. dt_float32 UniformFloatRNG::gen_single_val()
  214. {
  215. auto &&gen = RandomState::generator();
  216. return m_dist(gen);
  217. }
  218. bool UniformFloatRNG::has_fast_float32() {
  219. return true;
  220. }
  221. void UniformFloatRNG::fill_fast_float32(dt_float32 *dest, size_t size) {
  222. RNGxorshf gen{RandomState::generator()};
  223. auto k = double(m_dist.b() - m_dist.a()) /
  224. double(RNGxorshf::max() - RNGxorshf::min() + 1.0);
  225. auto b = m_dist.a() - RNGxorshf::min() * k;
  226. for (size_t i = 0; i < size; ++ i) {
  227. dest[i] = gen() * k + b;
  228. }
  229. }
  230. dt_float32 UniformFloatNonZeroRNG::gen_single_val() {
  231. auto&& gen = RandomState::generator();
  232. auto ret = UniformFloatRNG::gen_single_val();
  233. if (m_dist_flip(gen)) {
  234. ret = -ret;
  235. }
  236. megdnn_assert(ret != 0);
  237. return ret;
  238. }
  239. void UniformFloatNonZeroRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  240. RNGxorshf gen{RandomState::generator()};
  241. UniformFloatRNG::fill_fast_float32(dest, size);
  242. for (size_t i = 0; i < size; ++i) {
  243. if (m_dist_flip(gen)) {
  244. dest[i] = -dest[i];
  245. }
  246. }
  247. }
  248. void UniformFloatWithZeroRNG::fill_fast_float32(dt_float32 *dest, size_t size) {
  249. RNGxorshf gen{RandomState::generator()};
  250. printf("a %f, b %f \n", m_dist.a(), m_dist.b());
  251. auto k = double(m_dist.b() - m_dist.a()) /
  252. double(RNGxorshf::max() - RNGxorshf::min() + 1.0);
  253. auto b = m_dist.a() - RNGxorshf::min() * k;
  254. auto p = 1.0 / double(RNGxorshf::max() - RNGxorshf::min() + 1.0);
  255. auto pb = 0.f - RNGxorshf::min() * p;
  256. for (size_t i = 0; i < size; ++ i) {
  257. float rnd = gen() * p + pb;
  258. //printf("%.3f \n", rnd);
  259. if(rnd < zero_val_proportion_) {
  260. dest[i] = 0.f;
  261. } else {
  262. dest[i] = gen() * k + b;
  263. }
  264. }
  265. }
  266. BernoulliRNG::BernoulliRNG(float probability_):
  267. m_dist(0, 1)
  268. {
  269. megdnn_assert(0.0f <= probability_ && probability_ < 1.0f);
  270. m_probability = probability_;
  271. }
  272. dt_float32 BernoulliRNG::gen_single_val()
  273. {
  274. auto &&gen = RandomState::generator();
  275. return m_dist(gen) < m_probability ? 1.0 : 0.0;
  276. }
  277. void NoReplacementRNG::gen(const TensorND &tensor) {
  278. auto offset = tensor.layout.span().low_elem;
  279. auto nr_elems = tensor.layout.span().dist_elem();
  280. #define cb(DType) \
  281. if (tensor.layout.dtype == DType()) { \
  282. using ctype = typename DTypeTrait<DType>::ctype; \
  283. std::set<ctype> values; \
  284. auto ptr = tensor.ptr<ctype>(); \
  285. for (size_t i = 0; i < nr_elems; ++i) { \
  286. ctype val; \
  287. do { \
  288. val = static_cast<ctype>(m_iid_rng->gen_single_val()); \
  289. } while (!values.insert(val).second); \
  290. ptr[offset+i] = val; \
  291. } \
  292. }
  293. MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
  294. #undef cb
  295. }
  296. InvertibleMatrixRNG::InvertibleMatrixRNG() :
  297. m_rng{new RNGxorshf{RandomState::generator()}}
  298. {
  299. }
  300. InvertibleMatrixRNG::~InvertibleMatrixRNG() noexcept = default;
  301. template<typename ctype>
  302. void InvertibleMatrixRNG::do_gen(ctype *ptr, size_t batch, size_t n)
  303. {
  304. auto&& gen = *m_rng;
  305. std::vector<size_t> perm(n);
  306. for (size_t i = 0; i < n; ++ i) {
  307. perm[i] = i;
  308. }
  309. for (size_t i = 0; i < batch; ++ i, ptr += n * n) {
  310. for (size_t j = 0; j < n; ++ j) {
  311. for (size_t k = 0; k < n; ++ k) {
  312. ptr[j * n + k] = static_cast<ctype>(
  313. gen() / (RNGxorshf::max() + 1.0) * 2 - 0.5);
  314. }
  315. }
  316. for (size_t i = 0; i < n; ++ i) {
  317. auto idx = gen() % (n - i) + i;
  318. ptr[i * n + perm[idx]] +=
  319. static_cast<ctype>(gen() / (RNGxorshf::max() + 1.0) + 3);
  320. std::swap(perm[idx], perm[i]);
  321. }
  322. }
  323. }
  324. void InvertibleMatrixRNG::gen(const TensorND& tensor) {
  325. #define cb(DType) \
  326. if (tensor.layout.dtype == DType()) { \
  327. using ctype = typename DTypeTrait<DType>::ctype; \
  328. auto ptr = tensor.ptr<ctype>(); \
  329. megdnn_assert(tensor.layout.ndim >= 2 && \
  330. tensor.layout.is_physical_contiguous()); \
  331. size_t batch = 1; \
  332. for (size_t i = 0; i < tensor.layout.ndim - 2; ++i) { \
  333. batch *= tensor.layout[i]; \
  334. } \
  335. size_t n = tensor.layout[tensor.layout.ndim - 1]; \
  336. megdnn_assert(n == tensor.layout[tensor.layout.ndim - 2]); \
  337. do_gen<ctype>(ptr, batch, n); \
  338. return; \
  339. }
  340. MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
  341. #undef cb
  342. }
  343. void ConsecutiveRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  344. for (size_t i = 0; i < size; ++ i)
  345. dest[i] = value_ + i * delta_;
  346. }
  347. TEST(RNG, NO_REPLACEMENT_RNG)
  348. {
  349. static const size_t N = 10, TIMES = 100;
  350. UniformIntRNG base_rng(0, N-1);
  351. NoReplacementRNG rng(&base_rng);
  352. auto handle = create_cpu_handle(2, false);
  353. for (size_t t = 0; t < TIMES; ++t) {
  354. TensorLayout layout({N}, dtype::Float32());
  355. Tensor<> tensor(handle.get(), layout);
  356. rng.gen(tensor.tensornd());
  357. std::vector<float> vals;
  358. for (size_t i = 0; i < N; ++i) vals.push_back(tensor.ptr()[i]);
  359. std::sort(vals.begin(), vals.end());
  360. for (size_t i = 0; i < N; ++i) ASSERT_EQ(static_cast<float>(i), vals[i]);
  361. }
  362. }
  363. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台