You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rng.cpp 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /**
  2. * \file dnn/test/common/rng.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/rng.h"
  12. #include <gtest/gtest.h>
  13. #include "test/common/random_state.h"
  14. #include "test/common/tensor.h"
  15. using namespace megdnn;
  16. using namespace test;
  17. /*!
  18. * \brief xorshift+ RNG, which is very fast
  19. *
  20. * see https://en.wikipedia.org/wiki/Xorshift#xorshift.2B
  21. */
  22. class RNG::RNGxorshf {
  23. uint64_t s[2];
  24. public:
  25. using result_type = uint64_t;
  26. #ifdef WIN32
  27. static uint64_t min() { return 0; }
  28. static uint64_t max() { return std::numeric_limits<uint64_t>::max(); }
  29. #else
  30. static constexpr uint64_t min() { return 0; }
  31. static constexpr uint64_t max() { return std::numeric_limits<uint64_t>::max(); }
  32. #endif
  33. template <typename T>
  34. explicit RNGxorshf(T&& gen) {
  35. s[0] = gen();
  36. s[1] = gen();
  37. }
  38. uint64_t operator()() {
  39. uint64_t x = s[0];
  40. uint64_t const y = s[1];
  41. s[0] = y;
  42. x ^= x << 23; // a
  43. s[1] = x ^ y ^ (x >> 17) ^ (y >> 26); // b, c
  44. return s[1] + y;
  45. }
  46. };
  47. Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) {
  48. for (size_t x = 0; x < (1u << 16); ++x) {
  49. size_t exponent = (x >> 10) & 0x1F;
  50. if (exponent == 0x1F) {
  51. // +inf, -inf, NaN
  52. continue;
  53. }
  54. union U {
  55. U() {}
  56. uint16_t i;
  57. dt_float16 f;
  58. } i2f;
  59. i2f.i = static_cast<uint16_t>(x);
  60. m_sequence.push_back(i2f.f);
  61. }
  62. COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
  63. }
  64. Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
  65. union U {
  66. U() {}
  67. uint16_t i;
  68. dt_float16 f;
  69. } i2f;
  70. size_t x = 0;
  71. i2f.i = static_cast<uint16_t>(x);
  72. for (size_t i = 0; i < range; i++) {
  73. x += 1;
  74. i2f.i = static_cast<uint16_t>(x);
  75. m_sequence.push_back(i2f.f);
  76. }
  77. x = 1u << 15;
  78. i2f.i = static_cast<uint16_t>(x);
  79. for (size_t i = 0; i < range; i++) {
  80. x += 1;
  81. i2f.i = static_cast<uint16_t>(x);
  82. m_sequence.push_back(i2f.f);
  83. }
  84. COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
  85. }
  86. void Float16PeriodicalRNG::gen(const TensorND& tensor) {
  87. megdnn_assert(tensor.layout.dtype == dtype::Float16());
  88. size_t nr_elems = tensor.layout.span().dist_elem();
  89. auto offset = tensor.layout.span().low_elem;
  90. for (size_t i = 0; i < nr_elems; ++i) {
  91. tensor.ptr<dt_float16>()[offset + i] = get_single_val();
  92. }
  93. }
  94. dt_float16 Float16PeriodicalRNG::get_single_val() {
  95. if (m_offset >= m_sequence.size()) {
  96. m_offset = 0;
  97. }
  98. return m_sequence[m_offset++];
  99. }
  100. void IIDRNG::gen(const TensorND& tensor) {
  101. if (tensor.layout.dtype == dtype::Float32() && has_fast_float32() &&
  102. tensor.layout.is_physical_contiguous()) {
  103. fill_fast_float32(tensor.ptr<dt_float32>(), tensor.layout.total_nr_elems());
  104. return;
  105. }
  106. auto offset = tensor.layout.span().low_elem;
  107. auto nr_elems = tensor.layout.span().dist_elem();
  108. #define cb(DType) \
  109. if (tensor.layout.dtype == DType()) { \
  110. using ctype = typename DTypeTrait<DType>::ctype; \
  111. auto ptr = tensor.ptr<ctype>(); \
  112. for (size_t i = 0; i < nr_elems; ++i) { \
  113. ptr[offset + i] = static_cast<ctype>(gen_single_val()); \
  114. } \
  115. return; \
  116. }
  117. MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
  118. #undef cb
  119. #define cb(DType) \
  120. if (tensor.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
  121. using ctype = typename DTypeTrait<DType>::ctype; \
  122. auto ptr = tensor.ptr<ctype>(); \
  123. if (output_is_float()) { \
  124. for (size_t i = 0; i < nr_elems; ++i) { \
  125. ptr[offset + i] = tensor.layout.dtype.param<DType>().quantize( \
  126. static_cast<float>(gen_single_val())); \
  127. } \
  128. } else { \
  129. for (size_t i = 0; i < nr_elems; ++i) { \
  130. ptr[offset + i] = static_cast<ctype>(gen_single_val()); \
  131. } \
  132. } \
  133. return; \
  134. }
  135. MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
  136. //! In order to avoid an unnecessary increase in binary size, we just
  137. //! use QuantizedS16 dtype in winograd_filter_preprocess now.
  138. cb(::megdnn::dtype::QuantizedS16) cb(::megdnn::dtype::QuantizedS1)
  139. #undef cb
  140. if (tensor.layout.dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  141. auto ptr = static_cast<uint8_t*>(tensor.raw_ptr());
  142. if (output_is_float()) {
  143. for (size_t i = 0; i < nr_elems; i += 2) {
  144. uint8_t val0 = tensor.layout.dtype.param<dt_quint4>()
  145. .quantize(static_cast<float>(gen_single_val()))
  146. .as_uint8();
  147. uint8_t val1 = tensor.layout.dtype.param<dt_quint4>()
  148. .quantize(static_cast<float>(gen_single_val()))
  149. .as_uint8();
  150. ptr[(offset + i) / 2] = (val1 << 4) | val0;
  151. }
  152. } else {
  153. for (size_t i = 0; i < nr_elems; i += 2) {
  154. uint8_t val0 = static_cast<uint8_t>(gen_single_val());
  155. uint8_t val1 = static_cast<uint8_t>(gen_single_val());
  156. ptr[(offset + i) / 2] = (val1 << 4) | val0;
  157. }
  158. }
  159. return;
  160. }
  161. if (tensor.layout.dtype.enumv() == DTypeEnum::QuantizedS4) {
  162. auto ptr = static_cast<int8_t*>(tensor.raw_ptr());
  163. if (output_is_float()) {
  164. for (size_t i = 0; i < nr_elems; i += 2) {
  165. int8_t val0 = tensor.layout.dtype.param<dt_qint4>()
  166. .quantize(static_cast<float>(gen_single_val()))
  167. .as_int8();
  168. int8_t val1 = tensor.layout.dtype.param<dt_qint4>()
  169. .quantize(static_cast<float>(gen_single_val()))
  170. .as_int8();
  171. ptr[(offset + i) / 2] = (val0 & 0xF) | (val1 << 4);
  172. }
  173. } else {
  174. for (size_t i = 0; i < nr_elems; i += 2) {
  175. int8_t val0 = static_cast<int8_t>(gen_single_val());
  176. int8_t val1 = static_cast<int8_t>(gen_single_val());
  177. val0 = std::min(val0, DTypeTrait<dtype::QuantizedS4>::max());
  178. val0 = std::max(val0, DTypeTrait<dtype::QuantizedS4>::min());
  179. val1 = std::min(val1, DTypeTrait<dtype::QuantizedS4>::max());
  180. val1 = std::max(val1, DTypeTrait<dtype::QuantizedS4>::min());
  181. ptr[(offset + i) / 2] = (val0 & 0xF) | (val1 << 4);
  182. }
  183. }
  184. return;
  185. }
  186. if (tensor.layout.dtype.enumv() == DTypeEnum::Byte) {
  187. memset(tensor.raw_ptr(), 0, tensor.layout.access_bytes());
  188. return;
  189. }
  190. if (tensor.layout.dtype.enumv() == DTypeEnum::Uint16) {
  191. return;
  192. }
  193. megdnn_assert(
  194. 0, "IIDRNG does not know how to generate value for DType %s",
  195. tensor.layout.dtype.name());
  196. }
  197. bool IIDRNG::has_fast_float32() {
  198. return false;
  199. }
  200. void IIDRNG::fill_fast_float32(dt_float32*, size_t) {
  201. megdnn_assert(0);
  202. }
  203. dt_float32 NormalRNG::gen_single_val() {
  204. auto&& gen = RandomState::generator();
  205. return m_dist(gen);
  206. }
  207. bool NormalRNG::has_fast_float32() {
  208. return true;
  209. }
  210. void NormalRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  211. RNGxorshf gen{RandomState::generator()};
  212. for (size_t i = 0; i < size; ++i) {
  213. dest[i] = m_dist(gen);
  214. }
  215. }
  216. void ConstValue::fill_fast_float32(dt_float32* dest, size_t size) {
  217. for (size_t i = 0; i < size; ++i)
  218. dest[i] = value_;
  219. }
  220. dt_float32 UniformIntRNG::gen_single_val() {
  221. auto&& gen = RandomState::generator();
  222. return static_cast<dt_float32>(m_dist(gen));
  223. }
  224. dt_float32 UniformIntNonZeroRNG::gen_single_val() {
  225. auto&& gen = RandomState::generator();
  226. auto ret = UniformIntRNG::gen_single_val();
  227. if (m_dist_flip(gen)) {
  228. ret = -ret;
  229. }
  230. megdnn_assert(ret != 0);
  231. return ret;
  232. }
  233. dt_float32 UniformFloatRNG::gen_single_val() {
  234. auto&& gen = RandomState::generator();
  235. return m_dist(gen);
  236. }
  237. bool UniformFloatRNG::has_fast_float32() {
  238. return true;
  239. }
  240. void UniformFloatRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  241. RNGxorshf gen{RandomState::generator()};
  242. auto k = double(m_dist.b() - m_dist.a()) /
  243. double(RNGxorshf::max() - RNGxorshf::min() + 1.0);
  244. auto b = m_dist.a() - RNGxorshf::min() * k;
  245. for (size_t i = 0; i < size; ++i) {
  246. dest[i] = gen() * k + b;
  247. }
  248. }
  249. dt_float32 UniformFloatNonZeroRNG::gen_single_val() {
  250. auto&& gen = RandomState::generator();
  251. auto ret = UniformFloatRNG::gen_single_val();
  252. if (m_dist_flip(gen)) {
  253. ret = -ret;
  254. }
  255. megdnn_assert(ret != 0);
  256. return ret;
  257. }
  258. void UniformFloatNonZeroRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  259. RNGxorshf gen{RandomState::generator()};
  260. UniformFloatRNG::fill_fast_float32(dest, size);
  261. for (size_t i = 0; i < size; ++i) {
  262. if (m_dist_flip(gen)) {
  263. dest[i] = -dest[i];
  264. }
  265. }
  266. }
  267. void UniformFloatWithValueRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  268. RNGxorshf gen{RandomState::generator()};
  269. auto k = double(m_dist.b() - m_dist.a()) /
  270. double(RNGxorshf::max() - RNGxorshf::min() + 1.0);
  271. auto b = m_dist.a() - RNGxorshf::min() * k;
  272. auto p = 1.0 / double(RNGxorshf::max() - RNGxorshf::min() + 1.0);
  273. auto pb = 0.f - RNGxorshf::min() * p;
  274. for (size_t i = 0; i < size; ++i) {
  275. float rnd = gen() * p + pb;
  276. if (rnd < val_proportion_) {
  277. dest[i] = val_;
  278. } else {
  279. dest[i] = gen() * k + b;
  280. }
  281. }
  282. }
  283. BernoulliRNG::BernoulliRNG(float probability_) : m_dist(0, 1) {
  284. megdnn_assert(0.0f <= probability_ && probability_ < 1.0f);
  285. m_probability = probability_;
  286. }
  287. dt_float32 BernoulliRNG::gen_single_val() {
  288. auto&& gen = RandomState::generator();
  289. return m_dist(gen) < m_probability ? 1.0 : 0.0;
  290. }
  291. void NoReplacementRNG::gen(const TensorND& tensor) {
  292. auto offset = tensor.layout.span().low_elem;
  293. auto nr_elems = tensor.layout.span().dist_elem();
  294. #define cb(DType) \
  295. if (tensor.layout.dtype == DType()) { \
  296. using ctype = typename DTypeTrait<DType>::ctype; \
  297. std::set<ctype> values; \
  298. auto ptr = tensor.ptr<ctype>(); \
  299. for (size_t i = 0; i < nr_elems; ++i) { \
  300. ctype val; \
  301. do { \
  302. val = static_cast<ctype>(m_iid_rng->gen_single_val()); \
  303. } while (!values.insert(val).second); \
  304. ptr[offset + i] = val; \
  305. } \
  306. }
  307. MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
  308. #undef cb
  309. }
  310. InvertibleMatrixRNG::InvertibleMatrixRNG()
  311. : m_rng{new RNGxorshf{RandomState::generator()}} {}
  312. InvertibleMatrixRNG::~InvertibleMatrixRNG() noexcept = default;
  313. template <typename ctype>
  314. void InvertibleMatrixRNG::do_gen(ctype* ptr, size_t batch, size_t n) {
  315. auto&& gen = *m_rng;
  316. std::vector<size_t> perm(n);
  317. for (size_t i = 0; i < n; ++i) {
  318. perm[i] = i;
  319. }
  320. for (size_t i = 0; i < batch; ++i, ptr += n * n) {
  321. for (size_t j = 0; j < n; ++j) {
  322. for (size_t k = 0; k < n; ++k) {
  323. ptr[j * n + k] =
  324. static_cast<ctype>(gen() / (RNGxorshf::max() + 1.0) * 2 - 0.5);
  325. }
  326. }
  327. for (size_t i = 0; i < n; ++i) {
  328. auto idx = gen() % (n - i) + i;
  329. ptr[i * n + perm[idx]] +=
  330. static_cast<ctype>(gen() / (RNGxorshf::max() + 1.0) + 3);
  331. std::swap(perm[idx], perm[i]);
  332. }
  333. }
  334. }
  335. void InvertibleMatrixRNG::gen(const TensorND& tensor) {
  336. #define cb(DType) \
  337. if (tensor.layout.dtype == DType()) { \
  338. using ctype = typename DTypeTrait<DType>::ctype; \
  339. auto ptr = tensor.ptr<ctype>(); \
  340. megdnn_assert( \
  341. tensor.layout.ndim >= 2 && tensor.layout.is_physical_contiguous()); \
  342. size_t batch = 1; \
  343. for (size_t i = 0; i < tensor.layout.ndim - 2; ++i) { \
  344. batch *= tensor.layout[i]; \
  345. } \
  346. size_t n = tensor.layout[tensor.layout.ndim - 1]; \
  347. megdnn_assert(n == tensor.layout[tensor.layout.ndim - 2]); \
  348. do_gen<ctype>(ptr, batch, n); \
  349. return; \
  350. }
  351. MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb)
  352. #undef cb
  353. }
  354. void ConsecutiveRNG::fill_fast_float32(dt_float32* dest, size_t size) {
  355. for (size_t i = 0; i < size; ++i)
  356. dest[i] = value_ + i * delta_;
  357. }
  358. TEST(RNG, NO_REPLACEMENT_RNG) {
  359. static const size_t N = 10, TIMES = 100;
  360. UniformIntRNG base_rng(0, N - 1);
  361. NoReplacementRNG rng(&base_rng);
  362. auto handle = create_cpu_handle(2, false);
  363. for (size_t t = 0; t < TIMES; ++t) {
  364. TensorLayout layout({N}, dtype::Float32());
  365. Tensor<> tensor(handle.get(), layout);
  366. rng.gen(tensor.tensornd());
  367. std::vector<float> vals;
  368. for (size_t i = 0; i < N; ++i)
  369. vals.push_back(tensor.ptr()[i]);
  370. std::sort(vals.begin(), vals.end());
  371. for (size_t i = 0; i < N; ++i)
  372. ASSERT_EQ(static_cast<float>(i), vals[i]);
  373. }
  374. }
  375. // vim: syntax=cpp.doxygen