You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

misc.cpp 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. #include "megbrain/opr/misc.h"
  2. #include "megbrain/opr/basic_arith_wrapper.h"
  3. #include "megbrain/opr/blas.h"
  4. #include "megbrain/opr/io.h"
  5. #include "megbrain/opr/tensor_manip.h"
  6. #include "megbrain/opr/utility.h"
  7. #include "megbrain/test/autocheck.h"
  8. #include "megbrain/test/helper.h"
  9. #include "megbrain/test/megdnn_helper.h"
  10. #include <numeric>
  11. #include <random>
  12. using namespace mgb;
  13. namespace {
  14. void shape_abc(const TensorShape& shape, size_t axis, size_t& A, size_t& B, size_t& C) {
  15. auto acc_mul = [](const size_t* first, const size_t* last) {
  16. return std::accumulate(first, last, 1u, std::multiplies<size_t>());
  17. };
  18. A = acc_mul(shape.shape, shape.shape + axis);
  19. B = shape.shape[axis];
  20. C = acc_mul(shape.shape + axis + 1, shape.shape + shape.ndim);
  21. }
  22. void argsort_data_gen(HostTensorND& dest) {
  23. mgb_assert(dest.layout().ndim == 2 && dest.layout().is_contiguous());
  24. size_t m = dest.layout()[0], n = dest.layout()[1];
  25. auto ptr = dest.ptr<float>();
  26. RNGxorshf rng{next_rand_seed()};
  27. std::uniform_real_distribution<float> dist_base{-10.f, 10.f},
  28. dist_delta{0.1f, 1.2f};
  29. for (size_t i = 0; i < m; ++i) {
  30. auto v = dist_base(rng);
  31. for (size_t j = 0; j < n; ++j) {
  32. ptr[j] = v;
  33. v += dist_delta(rng);
  34. }
  35. std::shuffle(ptr, ptr + n, rng);
  36. ptr += n;
  37. }
  38. }
  39. } // namespace
  40. TEST(TestOprMisc, Argmxx) {
  41. auto run = [](bool is_max, int32_t axis, TensorShape sshape) {
  42. auto dshape = sshape;
  43. dshape.shape[axis] = 1;
  44. using Checker = AutoOprChecker<1, 1>;
  45. auto make_graph =
  46. [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  47. if (is_max)
  48. return {opr::Argmax::make(inputs[0], {axis})};
  49. else
  50. return {opr::Argmin::make(inputs[0], {axis})};
  51. };
  52. auto better_than = [&](float curr, float best) {
  53. if (is_max)
  54. return curr > best;
  55. else
  56. return curr < best;
  57. };
  58. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  59. out[0].dtype(dtype::Int32()).resize(dshape);
  60. size_t A, B, C;
  61. shape_abc(sshape, axis, A, B, C);
  62. for (size_t a = 0; a < A; ++a)
  63. for (size_t c = 0; c < C; ++c) {
  64. float best_val;
  65. size_t best_arg = -1;
  66. if (is_max)
  67. best_val = std::numeric_limits<float>::lowest();
  68. else
  69. best_val = std::numeric_limits<float>::max();
  70. for (size_t b = 0; b < B; ++b) {
  71. float curr_val = inp[0]->ptr<float>()[(a * B + b) * C + c];
  72. if (better_than(curr_val, best_val)) {
  73. best_val = curr_val;
  74. best_arg = b;
  75. }
  76. }
  77. out[0].ptr<int>()[a * C + c] = best_arg;
  78. }
  79. };
  80. Checker{make_graph, fwd}
  81. .set_input_allow_grad(0, false)
  82. .set_output_allow_grad(0, false)
  83. .run({sshape})
  84. .run({sshape})
  85. .run({sshape});
  86. };
  87. run(true, 0, {5});
  88. run(true, 1, {2, 3, 4, 5});
  89. run(true, 2, {2, 3, 4, 5});
  90. run(true, 3, {2, 3, 4, 5});
  91. run(false, 0, {3, 4, 5});
  92. run(false, 1, {2, 3, 4, 5});
  93. run(false, 2, {2, 3, 4, 5});
  94. run(false, 3, {2, 3, 4, 5});
  95. }
  96. TEST(TestOprMisc, Argsort) {
  97. using Order = opr::Argsort::Param::Order;
  98. auto run = [](Order order) {
  99. using Checker = AutoOprChecker<1, 2>;
  100. auto make_graph =
  101. [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  102. return opr::Argsort::make(inputs[0], order);
  103. };
  104. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  105. size_t m = inp[0]->shape()[0], n = inp[0]->shape()[1];
  106. auto pi = inp[0]->ptr<float>();
  107. auto poval = out[0].resize({m, n}).ptr<float>();
  108. auto poidx = out[1].resize({m, n}).ptr<int>();
  109. using KV = std::pair<float, int>;
  110. std::vector<KV> row(n);
  111. for (size_t i = 0; i < m; ++i) {
  112. for (size_t j = 0; j < n; ++j) {
  113. row[j].first = pi[i * n + j];
  114. row[j].second = j;
  115. }
  116. if (order == Order::ASCENDING) {
  117. std::sort(row.begin(), row.end());
  118. } else {
  119. std::sort(row.begin(), row.end(), std::greater<KV>{});
  120. }
  121. for (size_t j = 0; j < n; ++j) {
  122. poval[i * n + j] = row[j].first;
  123. poidx[i * n + j] = row[j].second;
  124. }
  125. }
  126. };
  127. Checker::RunOptions opt;
  128. opt.numdiff_eps = 0.045;
  129. Checker{make_graph, fwd}
  130. .set_input_generator(0, argsort_data_gen)
  131. .set_output_allow_grad(1, false)
  132. .run({TensorShape{1, 1}}, opt)
  133. .run({TensorShape{5, 3}}, opt)
  134. .run({TensorShape{10, 24}}, opt);
  135. };
  136. run(Order::ASCENDING);
  137. run(Order::DESCENDING);
  138. }
  139. TEST(TestOprMisc, Cumsum) {
  140. using Param = opr::Cumsum::Param;
  141. auto run = [](const Param& param) {
  142. using Checker = AutoOprChecker<1, 1>;
  143. auto make_graph =
  144. [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  145. return {opr::Cumsum::make(inputs[0], param)};
  146. };
  147. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  148. out[0].resize(inp[0]->shape());
  149. auto pin = inp[0]->ptr<float>(), pout = out[0].ptr<float>();
  150. size_t A, B, C;
  151. int real_axis = param.axis;
  152. if (real_axis < 0)
  153. real_axis += 3;
  154. shape_abc(inp[0]->shape(), real_axis, A, B, C);
  155. ptrdiff_t stride = C;
  156. if (param.reverse)
  157. stride = -stride;
  158. for (size_t i = 0; i < A; ++i) {
  159. for (size_t k = 0; k < C; ++k) {
  160. auto pi = pin + i * B * C + k, po = pout + i * B * C + k;
  161. if (param.reverse) {
  162. pi += (B - 1) * C;
  163. po += (B - 1) * C;
  164. }
  165. if (param.exclusive) {
  166. *po = 0;
  167. po += stride;
  168. }
  169. float sum = 0;
  170. for (size_t j = 0; j < B - 1; ++j) {
  171. sum += pi[j * stride];
  172. po[j * stride] = sum;
  173. }
  174. if (!param.exclusive) {
  175. po[(B - 1) * stride] = sum + pi[(B - 1) * stride];
  176. }
  177. }
  178. }
  179. };
  180. Checker{make_graph, fwd}
  181. .run({TensorShape{2, 3, 4}})
  182. .run({TensorShape{3, 1, 2}})
  183. .run({TensorShape{4, 2, 3}});
  184. };
  185. // test negative axis
  186. for (int32_t axis = -3; axis < 3; ++axis)
  187. for (int mask = 0; mask < 4; ++mask)
  188. run({axis, bool(mask >> 1), bool(mask & 1)});
  189. }
  190. TEST(TestOprMisc, CondTake) {
  191. using Param = opr::CondTake::Param;
  192. using Checker = AutoOprChecker<2, 1>;
  193. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  194. return {opr::CondTake::make(inputs[0], inputs[1], {Param::Mode::LT})[0]};
  195. };
  196. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  197. std::vector<float> values;
  198. auto data = inp[0]->ptr<float>(), mask = inp[1]->ptr<float>();
  199. auto isize = inp[0]->shape().total_nr_elems();
  200. for (size_t i = 0; i < isize; ++i) {
  201. if (mask[i] < 0) {
  202. values.push_back(data[i]);
  203. }
  204. }
  205. out[0].resize({values.size()});
  206. memcpy(out[0].ptr<float>(), values.data(), sizeof(float) * values.size());
  207. };
  208. auto ensure_nonempty = [](Checker::NumInpArray inp) {
  209. auto mask = inp[1]->ptr<float>();
  210. auto isize = inp[1]->shape().total_nr_elems();
  211. for (size_t i = 0; i < isize; ++i) {
  212. if (mask[i] < 0)
  213. return;
  214. }
  215. mask[isize - 1] = -1;
  216. };
  217. auto mki = [](const TensorShape& shp) -> Checker::ShapeInpArray {
  218. return {shp, shp};
  219. };
  220. Checker{make_graph, fwd}
  221. .set_input_allow_grad(1, false)
  222. .set_input_coordinator(ensure_nonempty)
  223. .run(mki({2}))
  224. .run(mki({3, 5, 8}))
  225. .run(mki({100}));
  226. }
  227. TEST(TestOprMisc, CondTakeEmptyIO) {
  228. using Param = opr::CondTake::Param;
  229. HostTensorGenerator<> gen;
  230. auto check = [&](const TensorShape& shp) {
  231. auto host_x = gen(shp);
  232. auto graph = ComputingGraph::make();
  233. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  234. auto y = x + 1;
  235. auto out = opr::CondTake::make(x, y, {Param::Mode::EQ});
  236. HostTensorND host_out0, host_out1;
  237. auto func = graph->compile(
  238. {make_callback_copy(out[0], host_out0),
  239. make_callback_copy(out[1], host_out1)});
  240. func->execute();
  241. ASSERT_EQ(TensorShape{0}, host_out0.shape());
  242. ASSERT_EQ(TensorShape{0}, host_out1.shape());
  243. };
  244. check({1});
  245. check({0});
  246. check({1, 0});
  247. }
  248. TEST(TestOprMisc, TopKValueOnly) {
  249. auto run = [](bool dyn_k, bool non_contig) {
  250. using Checker = AutoOprChecker<1, 1>;
  251. std::shared_ptr<HostTensorND> host_k;
  252. SymbolVar var_x0, var_x1;
  253. auto make_graph =
  254. [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  255. auto k = opr::Host2DeviceCopy::make(
  256. *inputs[0].node()->owner_graph(), host_k);
  257. if (dyn_k) {
  258. k = opr::MarkDynamicVar::make(k);
  259. }
  260. auto x = inputs[0];
  261. if (non_contig) {
  262. var_x0 = x;
  263. x = opr::Subtensor::make(
  264. x, {opr::Subtensor::AxisIndexer::make_interval(
  265. 1, None, opr::GetVarShape::make(x, 1) / 2, None)});
  266. var_x1 = x;
  267. }
  268. auto outs = opr::TopK::make(x, k, opr::TopK::Param::Mode::KTH_ONLY);
  269. return {outs[0]};
  270. };
  271. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  272. auto opr = megdnn_naive_handle()->create_operator<megdnn::TopK>();
  273. int k = host_k->ptr<int>()[0];
  274. HostTensorND x = *inp[0];
  275. if (non_contig) {
  276. auto layout = x.layout();
  277. layout.shape[1] /= 2;
  278. x = x.sub(SubTensorSpec::make_from_layout(layout));
  279. }
  280. TensorLayout outl0, outl1;
  281. opr->deduce_layout(k, x.layout(), outl0, outl1);
  282. size_t wk_size = opr->get_workspace_in_bytes(k, x.layout(), outl0, outl1);
  283. std::unique_ptr<dt_byte[]> wk_store{new dt_byte[wk_size]};
  284. opr->exec(
  285. k, x.as_megdnn(), out[0].resize(outl0).as_megdnn(), {},
  286. {wk_store.get(), wk_size});
  287. };
  288. Checker checker{make_graph, fwd};
  289. checker.set_input_generator(0, argsort_data_gen);
  290. host_k = std::make_shared<HostTensorND>(
  291. checker.comp_node(), TensorShape{1}, dtype::Int32{});
  292. host_k->ptr<int>()[0] = 1;
  293. Checker::RunOptions opt;
  294. opt.numdiff_eps = 0.047;
  295. auto invoke = [&](int k, size_t m, size_t n) {
  296. host_k->ptr<int>()[0] = k;
  297. checker.run({TensorShape{m, n}}, opt);
  298. };
  299. if (!non_contig) {
  300. invoke(1, 1, 1);
  301. }
  302. invoke(-2, 3, 2);
  303. invoke(-1, 4, 5);
  304. invoke(3, 10, 33);
  305. invoke(-8, 23, 35);
  306. if (non_contig) {
  307. ASSERT_EQ(prev_dev_ptr(var_x0), prev_dev_ptr(var_x1));
  308. }
  309. };
  310. for (auto i : {false, true}) {
  311. for (auto j : {false, true}) {
  312. run(i, j);
  313. }
  314. }
  315. }
  316. TEST(TestOprMisc, TopKSorted) {
  317. using Checker = AutoOprChecker<1, 2>;
  318. std::shared_ptr<HostTensorND> host_k;
  319. auto constexpr mode = opr::TopK::Param::Mode::VALUE_IDX_SORTED;
  320. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  321. auto k = opr::Host2DeviceCopy::make(*inputs[0].node()->owner_graph(), host_k);
  322. auto x = inputs[0];
  323. return opr::TopK::make(x, k, mode);
  324. };
  325. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  326. auto opr = megdnn_naive_handle()->create_operator<megdnn::TopK>();
  327. opr->param().mode = mode;
  328. int k = host_k->ptr<int>()[0];
  329. TensorLayout outl0, outl1;
  330. opr->deduce_layout(k, inp[0]->layout(), outl0, outl1);
  331. size_t wk_size = opr->get_workspace_in_bytes(k, inp[0]->layout(), outl0, outl1);
  332. std::unique_ptr<dt_byte[]> wk_store{new dt_byte[wk_size]};
  333. opr->exec(
  334. k, inp[0]->as_megdnn(), out[0].resize(outl0).as_megdnn(),
  335. out[1].resize(outl1).as_megdnn(), {wk_store.get(), wk_size});
  336. };
  337. Checker checker{make_graph, fwd};
  338. checker.set_input_generator(0, argsort_data_gen).set_output_allow_grad(1, false);
  339. host_k = std::make_shared<HostTensorND>(
  340. checker.comp_node(), TensorShape{1}, dtype::Int32{});
  341. host_k->ptr<int>()[0] = 1;
  342. Checker::RunOptions opt;
  343. opt.numdiff_eps = 0.047;
  344. auto invoke = [&](int k, size_t m, size_t n) {
  345. host_k->ptr<int>()[0] = k;
  346. checker.run({TensorShape{m, n}}, opt);
  347. };
  348. invoke(1, 1, 1);
  349. invoke(-1, 3, 5);
  350. invoke(5, 13, 23);
  351. invoke(-8, 35, 4);
  352. }
  353. TEST(TestOprMisc, TopKSortedIdxOnly) {
  354. HostTensorGenerator<> gen;
  355. auto graph = ComputingGraph::make();
  356. std::shared_ptr<HostTensorND> host_x = gen({2, 5});
  357. std::shared_ptr<HostTensorND> host_y = gen({2, 5});
  358. for (size_t i = 0; i < 10; ++i) {
  359. host_y->ptr<float>()[i] = 0.0f;
  360. }
  361. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  362. idx = opr::TopK::make(
  363. x, x.make_scalar(3), opr::TopK::Param::Mode::VALUE_IDX_SORTED)[1],
  364. y = opr::TypeCvt::make(idx, dtype::Float32{}),
  365. gx = cg::grad(opr::reduce_sum(y, y.make_scalar(1)), x);
  366. HostTensorND host_gx;
  367. auto func = graph->compile({make_callback_copy(gx, host_gx)});
  368. func->execute();
  369. MGB_ASSERT_TENSOR_EQ(host_gx, *host_y);
  370. }
  371. TEST(TestOprMisc, TopKGrad) {
  372. HostTensorGenerator<> gen;
  373. auto graph = ComputingGraph::make();
  374. std::shared_ptr<HostTensorND> host_x = gen({2, 5});
  375. std::shared_ptr<HostTensorND> host_k = gen({1});
  376. host_k->ptr<float>()[0] = 3;
  377. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  378. k = opr::Host2DeviceCopy::make(*graph, host_k),
  379. ki = opr::TypeCvt::make(k, dtype::Int32{}),
  380. val = opr::TopK::make(x, ki, opr::TopK::Param::Mode::VALUE_IDX_SORTED)[0],
  381. gk = cg::grad(opr::reduce_sum(val, val.make_scalar(1)), ki, true, false);
  382. EXPECT_TRUE(gk == nullptr);
  383. }
  384. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}