You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor_manip.cpp 86 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219
  1. /**
  2. * \file src/opr/test/tensor_manip.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/opr/tensor_manip.h"
  12. #include "megbrain/opr/basic_arith_wrapper.h"
  13. #include "megbrain/opr/blas.h"
  14. #include "megbrain/opr/io.h"
  15. #include "megbrain/opr/misc.h"
  16. #include "megbrain/opr/tensor_gen.h"
  17. #include "megbrain/opr/utility.h"
  18. #include "megbrain/test/autocheck.h"
  19. #include "megbrain/test/helper.h"
  20. #include "megbrain/test/megdnn_helper.h"
  21. #include "megbrain/utils/arith_helper.h"
  22. using namespace mgb;
  23. using namespace opr;
  24. TEST(TestTensorManip, GetVarShape) {
  25. HostTensorGenerator<> gen;
  26. auto host_x = gen({3, 1}), host_y = gen({1, 2});
  27. auto graph = ComputingGraph::make();
  28. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  29. y = opr::Host2DeviceCopy::make(*graph, host_y),
  30. z0 = opr::GetVarShape::make({x, y, x.make_scalar(5)}),
  31. z1 = opr::GetVarShape::make({x, y}, 1);
  32. // ensure scalar is removed
  33. ASSERT_EQ(2u, z0.node()->owner_opr()->input().size());
  34. constexpr auto tdt = cg::OperatorNodeBase::NodeProp::DepType::SHAPE;
  35. auto&& dt = z0.node()->owner_opr()->node_prop().dep_map();
  36. ASSERT_EQ(2u, dt.size());
  37. ASSERT_EQ(tdt, dt.at(x.node()));
  38. ASSERT_EQ(tdt, dt.at(y.node()));
  39. auto as_shp = [](const HostTensorND& hv) {
  40. mgb_assert(hv.dtype() == dtype::Int32());
  41. mgb_assert(hv.shape().ndim == 1);
  42. TensorShape ret;
  43. ret.ndim = hv.shape()[0];
  44. auto p = hv.ptr<int>();
  45. for (size_t i = 0; i < ret.ndim; ++i)
  46. ret[i] = p[i];
  47. return ret;
  48. };
  49. HostTensorND host_z0, host_z1;
  50. auto func = graph->compile(
  51. {make_callback_copy(z0, host_z0), make_callback_copy(z1, host_z1)});
  52. func->execute();
  53. ASSERT_EQ(TensorShape({3, 2}), as_shp(host_z0));
  54. ASSERT_EQ(TensorShape({2}), as_shp(host_z1));
  55. *host_x = *gen({5, 1, 6});
  56. *host_y = *gen({1, 8, 1});
  57. func->execute();
  58. ASSERT_EQ(TensorShape({5, 8, 6}), as_shp(host_z0));
  59. ASSERT_EQ(TensorShape({8}), as_shp(host_z1));
  60. }
  61. TEST(TestTensorManip, GetVarShapeBypass) {
  62. HostTensorGenerator<> gen;
  63. auto graph = ComputingGraph::make();
  64. auto x = opr::Host2DeviceCopy::make(*graph, gen({3, 2})),
  65. t = opr::Host2DeviceCopy::make(*graph, gen({2, 3})),
  66. tshp = opr::GetVarShape::make(t),
  67. y = opr::GetVarShape::make(opr::Reshape::make(x, tshp));
  68. ASSERT_EQ(tshp, y);
  69. }
  70. TEST(TestTensorManip, GetVarShapeNegativeAxis) {
  71. HostTensorGenerator<> gen;
  72. auto host_x = gen({1, 3}), host_y = gen({2, 1});
  73. auto graph = ComputingGraph::make();
  74. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  75. y = opr::Host2DeviceCopy::make(*graph, host_y),
  76. z0 = opr::GetVarShape::make({x, y}, -1),
  77. z1 = opr::GetVarShape::make({x, y}, -2);
  78. // ensure scalar is removed
  79. ASSERT_EQ(2u, z0.node()->owner_opr()->input().size());
  80. constexpr auto tdt = cg::OperatorNodeBase::NodeProp::DepType::SHAPE;
  81. auto&& dt = z0.node()->owner_opr()->node_prop().dep_map();
  82. ASSERT_EQ(2u, dt.size());
  83. ASSERT_EQ(tdt, dt.at(x.node()));
  84. ASSERT_EQ(tdt, dt.at(y.node()));
  85. auto as_shp = [](const HostTensorND& hv) {
  86. mgb_assert(hv.dtype() == dtype::Int32());
  87. mgb_assert(hv.shape().ndim == 1);
  88. TensorShape ret;
  89. ret.ndim = hv.shape()[0];
  90. auto p = hv.ptr<int>();
  91. for (size_t i = 0; i < ret.ndim; ++i)
  92. ret[i] = p[i];
  93. return ret;
  94. };
  95. HostTensorND host_z0, host_z1;
  96. auto func = graph->compile(
  97. {make_callback_copy(z0, host_z0), make_callback_copy(z1, host_z1)});
  98. func->execute();
  99. ASSERT_EQ(TensorShape({3}), as_shp(host_z0));
  100. ASSERT_EQ(TensorShape({2}), as_shp(host_z1));
  101. *host_x = *gen({5, 1, 6});
  102. *host_y = *gen({1, 8, 1});
  103. func->execute();
  104. ASSERT_EQ(TensorShape({6}), as_shp(host_z0));
  105. ASSERT_EQ(TensorShape({8}), as_shp(host_z1));
  106. }
  107. TEST(TestTensorManip, Reshape) {
  108. constexpr size_t N = 123, C = 456;
  109. HostTensorGenerator<> gen;
  110. auto host_opr0 = gen({N * C}), host_opr1 = gen({N, C});
  111. auto graph = ComputingGraph::make();
  112. SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}),
  113. opr1 = opr::Host2DeviceCopy::make(*graph, host_opr1, {"opr1"}),
  114. opr0_reshp = opr::Reshape::make(opr0, opr::GetVarShape::make(opr1)),
  115. sum = opr::add(opr0_reshp, opr1);
  116. {
  117. // check dep type
  118. auto op = opr0_reshp.node()->owner_opr();
  119. auto&& dep_map = opr0_reshp.node()->owner_opr()->node_prop().dep_map();
  120. using DT = cg::OperatorNodeBase::NodeProp::DepType;
  121. ASSERT_EQ(2u, dep_map.size());
  122. ASSERT_EQ(DT::DEV_VALUE | DT::VALUE_ALLOW_EMPTY, dep_map.at(op->input(0)));
  123. ASSERT_EQ(DT::HOST_VALUE, dep_map.at(op->input(1)));
  124. }
  125. HostTensorND host_sum;
  126. auto func = graph->compile({make_callback_copy(sum, host_sum)});
  127. func->execute();
  128. ASSERT_TRUE(cg::is_static_var_storage(opr0_reshp.node()));
  129. ASSERT_FALSE(host_sum.layout().eq_layout(host_opr0->layout()));
  130. ASSERT_TRUE(host_sum.layout().eq_layout(host_opr1->layout()));
  131. ASSERT_EQ(dev_ptr(opr0), dev_ptr(opr0_reshp));
  132. auto o0 = host_opr0->ptr<float>(), o1 = host_opr1->ptr<float>(),
  133. s = host_sum.ptr<float>();
  134. for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
  135. MGB_ASSERT_FLOAT_EQ(o0[i] + o1[i], s[i])
  136. << ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i);
  137. }
  138. }
  139. TEST(TestTensorManip, ReshapeNoncontigValueInfer) {
  140. HostTensorGenerator<> gen;
  141. auto host_x = gen({2, 1});
  142. auto graph = ComputingGraph::make();
  143. auto x = opr::ImmutableTensor::make(*graph, *host_x), y = x.broadcast({2, 2}),
  144. z = opr::Reshape::make(y, {1, 0}, 1);
  145. auto&& mgr = graph->static_infer_manager();
  146. ASSERT_EQ(cg::static_infer::InferType::CONST, mgr.get_infer_type(z.node()).value);
  147. auto zv = mgr.infer_value(z.node());
  148. auto xp = host_x->ptr<float>(), zp = zv.ptr<float>();
  149. for (int i = 0; i < 2; ++i) {
  150. for (int j = 0; j < 2; ++j) {
  151. ASSERT_EQ(xp[i], zp[i * 2 + j]);
  152. }
  153. }
  154. ASSERT_THROW(opr::Reshape::make(y, {3, 0}, 1), TensorReshapeError);
  155. ASSERT_THROW(opr::Reshape::make(y, {3, 2}), TensorReshapeError);
  156. }
  157. TEST(TestTensorManip, ReshapeSameShapeBypass) {
  158. HostTensorGenerator<> gen;
  159. auto host_x = gen({2, 3});
  160. auto graph = ComputingGraph::make();
  161. auto x = opr::Host2DeviceCopy::make(*graph, host_x), x1 = x.reshape({6}),
  162. x2 = x1.reshape({6}), x3 = x.reshape(opr::GetVarShape::make(x));
  163. ASSERT_EQ(x1.node(), x2.node());
  164. ASSERT_EQ(x.node(), x3.node());
  165. ASSERT_NE(x.node(), x1.node());
  166. }
  167. TEST(TestTensorManip, ReshapeAndInplace) {
  168. constexpr size_t C = 456;
  169. HostTensorGenerator<> gen;
  170. auto host_opr0 = gen({C}), host_opr1 = gen({C / 2, 2});
  171. auto graph = ComputingGraph::make();
  172. SymbolVar opr0 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_opr0),
  173. opr1 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_opr1),
  174. reshape = opr::Reshape::make(opr0, TensorShape{C / 2, 2}),
  175. sum = reshape + opr1;
  176. opr1.node()->add_flag(cg::VarNode::Flag::NO_MEM_RECLAIM);
  177. HostTensorND host_sum(CompNode::load("xpu0"));
  178. auto func = graph->compile({make_callback_copy(sum, host_sum)});
  179. func->execute();
  180. ASSERT_EQ(dev_ptr(reshape), dev_ptr(sum));
  181. // assert contiguous layout
  182. ASSERT_EQ(host_opr1->layout(), host_sum.layout());
  183. auto o0 = host_opr0->ptr<float>(), o1 = host_opr1->ptr<float>(),
  184. s = host_sum.sync().ptr<float>();
  185. for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; ++i) {
  186. MGB_ASSERT_FLOAT_EQ(o0[i] + o1[i], s[i])
  187. << ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i);
  188. }
  189. }
  190. TEST(TestTensorManip, DynamicReshape) {
  191. HostTensorGenerator<> gen;
  192. auto host_x = gen({3, 4}), host_tshp = std::make_shared<HostTensorND>(
  193. host_x->comp_node(), dtype::Int32());
  194. host_tshp->resize({1}).ptr<int>()[0] = 12;
  195. auto graph = ComputingGraph::make();
  196. auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"),
  197. x_rshp_shp = opr::MarkDynamicVar::make(
  198. opr::Host2DeviceCopy::make(*graph, host_tshp).rename("x_rshp_shp")),
  199. x_rshp = opr::Reshape::make(x, x_rshp_shp).rename("x_rshp"),
  200. x_flat = x_rshp.flatten(),
  201. gx = cg::grad(opr::Dot::make(x_flat, x_flat).rename("loss"), x).rename("gx");
  202. ASSERT_FALSE(cg::is_static_var_shape(x_rshp.node()));
  203. ASSERT_TRUE(cg::is_static_var_shape(gx.node()));
  204. ASSERT_EQ(host_x->shape(), gx.node()->shape());
  205. HostTensorND host_rshp, host_gx;
  206. auto func = graph->compile(
  207. {make_callback_copy(x_rshp, host_rshp), make_callback_copy(gx, host_gx)});
  208. auto check = [&](const TensorShape& ishp, const TensorShape& tshp) {
  209. host_x->copy_from(*gen(ishp));
  210. {
  211. DeviceTensorND tmp;
  212. cg::copy_shape_to_tensor_value(tmp, tshp);
  213. host_tshp->copy_from(tmp);
  214. }
  215. func->execute();
  216. ASSERT_EQ(tshp, host_rshp.shape());
  217. ASSERT_EQ(host_x->shape(), host_gx.shape());
  218. for (size_t i = 0, it = host_x->shape().total_nr_elems(); i < it; ++i)
  219. MGB_ASSERT_FLOAT_EQ(host_x->ptr<float>()[i] * 2, host_gx.ptr<float>()[i]);
  220. };
  221. check({3, 4}, {12});
  222. check({5, 3}, {15});
  223. check({3, 4, 35}, {21, 20});
  224. }
  225. TEST(TestTensorManip, ReshapeWithUnspec) {
  226. HostTensorGenerator<> gen;
  227. auto host_x = gen({4, 8});
  228. auto graph = ComputingGraph::make();
  229. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  230. y = opr::Reshape::make(x, {1, 8}, 0);
  231. HostTensorND host_y;
  232. auto func = graph->compile({make_callback_copy(y, host_y)});
  233. for (size_t ishp : {1, 5, 6}) {
  234. host_x->copy_from(*gen({ishp * 8}));
  235. func->execute();
  236. TensorShape expect_shape({ishp, 8});
  237. ASSERT_EQ(expect_shape, host_y.shape());
  238. MGB_ASSERT_TENSOR_EQ(
  239. host_x->sub(SubTensorSpec::make_from_layout(
  240. host_x->layout().reshape(expect_shape))),
  241. host_y);
  242. }
  243. }
  244. TEST(TestTensorManip, ReshapeInferShapeForDynamicInput) {
  245. constexpr size_t N0 = 2, C0 = 3;
  246. HostTensorGenerator<> gen;
  247. auto host_x = gen({N0, C0}), host_tshp = gen({1});
  248. auto graph = ComputingGraph::make();
  249. host_tshp->ptr<float>()[0] = N0 * C0;
  250. SymbolVar x = opr::Host2DeviceCopy::make(*graph, host_x),
  251. xd = opr::MarkDynamicVar::make(x),
  252. tshp = opr::Host2DeviceCopy::make(*graph, host_tshp),
  253. y0 = opr::Reshape::make(xd, tshp) + 1,
  254. y1 = opr::Reshape::make(xd, opr::GetVarShape::make(x)) + 2;
  255. ASSERT_EQ(y0.shape(), TensorShape({N0 * C0}));
  256. ASSERT_EQ(y1.shape(), TensorShape({N0, C0}));
  257. HostTensorND host_y0, host_y1;
  258. auto func = graph->compile(
  259. {make_callback_copy(y0, host_y0), make_callback_copy(y1, host_y1)});
  260. auto run = [&](const TensorShape& ishp) {
  261. auto tot = ishp.total_nr_elems();
  262. host_x->copy_from(*gen(ishp));
  263. host_tshp->ptr<float>()[0] = tot;
  264. func->execute();
  265. ASSERT_EQ(host_y0.shape(), TensorShape({tot}));
  266. ASSERT_EQ(host_y1.shape(), ishp);
  267. for (size_t i = 0; i < tot; ++i) {
  268. ASSERT_EQ(host_x->ptr<float>()[i] + 1, host_y0.ptr<float>()[i]);
  269. ASSERT_EQ(host_x->ptr<float>()[i] + 2, host_y1.ptr<float>()[i]);
  270. }
  271. };
  272. run({3, 2});
  273. run({23, 12, 5});
  274. }
  275. TEST(TestTensorManip, ReshapeEmptyShape) {
  276. HostTensorGenerator<> gen;
  277. constexpr size_t x_length = 233;
  278. auto host_x = gen({x_length}), host_v = gen({2, 3, 3, 3});
  279. for (size_t i = 0; i < x_length; ++i) {
  280. host_x->ptr<float>()[i] = 1.f;
  281. }
  282. constexpr auto INVALID_AXIS = opr::Reshape::Param::INVALID_AXIS;
  283. for (auto unspec_axis : {INVALID_AXIS, 0, 1, 3}) {
  284. auto graph = ComputingGraph::make();
  285. graph->options().graph_opt_level = 0;
  286. TensorShape tshape{2, 3, 3, 3};
  287. auto zero_axis = unspec_axis;
  288. if (unspec_axis == INVALID_AXIS) {
  289. tshape[zero_axis = 2] = 0;
  290. }
  291. using CondTakeMode = opr::CondTake::Param::Mode;
  292. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  293. x_empty = opr::CondTake::make(x, x, {CondTakeMode::EQ, 0.f})[0],
  294. v = opr::Host2DeviceCopy::make(*graph, host_v),
  295. x_reshape = opr::Reshape::make(x_empty, tshape, {unspec_axis}),
  296. y = opr::Concat::make({x_reshape, v}, zero_axis);
  297. HostTensorND host_empty, host_y;
  298. auto func = graph->compile(
  299. {make_callback_copy(x_reshape, host_empty),
  300. make_callback_copy(y, host_y)});
  301. func->execute().wait();
  302. ASSERT_TRUE(host_empty.layout().is_empty());
  303. MGB_ASSERT_TENSOR_EQ(*host_v, host_y);
  304. }
  305. }
  306. TEST(TestTensorManip, ReshapeWithNegativeUnspec) {
  307. HostTensorGenerator<> gen;
  308. auto host_x = gen({4, 8});
  309. auto graph = ComputingGraph::make();
  310. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  311. y = opr::Reshape::make(x, {1, 8}, -2);
  312. HostTensorND host_y;
  313. auto func = graph->compile({make_callback_copy(y, host_y)});
  314. for (size_t ishp : {1, 5, 6}) {
  315. host_x->copy_from(*gen({ishp * 8}));
  316. func->execute();
  317. TensorShape expect_shape({ishp, 8});
  318. ASSERT_EQ(expect_shape, host_y.shape());
  319. MGB_ASSERT_TENSOR_EQ(
  320. host_x->sub(SubTensorSpec::make_from_layout(
  321. host_x->layout().reshape(expect_shape))),
  322. host_y);
  323. }
  324. }
  325. TEST(TestTensorManip, Broadcast) {
  326. constexpr size_t N = 20, C = 30;
  327. HostTensorGenerator<> gen;
  328. auto host_opr0 = gen({1, 1}), host_opr1 = gen({N, C});
  329. auto graph = ComputingGraph::make();
  330. SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}),
  331. opr1 = opr::Host2DeviceCopy::make(*graph, host_opr1, {"opr1"}),
  332. sum = opr::add(opr::Broadcast::make(opr0, host_opr1->shape()), opr1);
  333. HostTensorND host_sum(CompNode::load("xpu0"));
  334. auto func =
  335. graph->compile({{sum, [&](DeviceTensorND& s) { host_sum.copy_from(s); }}});
  336. func->execute();
  337. ASSERT_TRUE(host_sum.layout().eq_layout(host_opr1->layout()));
  338. auto o0 = host_opr0->ptr<float>(), o1 = host_opr1->ptr<float>(),
  339. s = host_sum.sync().ptr<float>();
  340. for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
  341. MGB_ASSERT_FLOAT_EQ(o0[0] + o1[i], s[i])
  342. << ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i);
  343. }
  344. }
  345. TEST(TestTensorManip, BroadcastEmptyShape) {
  346. HostTensorGenerator<> gen;
  347. for (auto&& arg :
  348. {std::make_pair(TensorShape{1}, TensorShape{0}),
  349. {{1, 2, 3}, {0, 2, 3}},
  350. {{2, 3}, {1, 0, 2, 3}},
  351. {{1, 0, 2, 3}, {4, 0, 2, 3}},
  352. {{0, 1, 2, 3}, {3, 0, 4, 2, 3}}}) {
  353. auto host_x = gen(arg.first);
  354. auto graph = ComputingGraph::make();
  355. graph->options().graph_opt_level = 0;
  356. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  357. y = opr::Broadcast::make(x, arg.second);
  358. HostTensorND host_y;
  359. auto func = graph->compile({make_callback_copy(y, host_y)});
  360. func->execute();
  361. ASSERT_TRUE(host_y.shape().eq_shape(arg.second));
  362. }
  363. }
  364. TEST(TestTensorManip, Dimshuffle) {
  365. HostTensorGenerator<> gen;
  366. constexpr size_t S0 = 8, S1 = 3;
  367. auto host_x = gen({S0, S1}), host_prod = gen({S1, 1, S0, 1});
  368. auto graph = ComputingGraph::make();
  369. auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"),
  370. prod = opr::Host2DeviceCopy::make(*graph, host_prod).rename("prod"),
  371. x_ds = opr::Dimshuffle::make(x, {1, -1, 0, -1}).rename("x_ds"),
  372. y = (x_ds * prod).reshape({S0 * S1}).rename("y"),
  373. loss = opr::Dot::make(y, y).rename("loss"),
  374. gx = cg::grad(loss, x).rename("gx");
  375. ASSERT_TRUE(cg::is_static_var_shape(gx.node()));
  376. ASSERT_EQ(host_x->shape(), gx.node()->shape());
  377. HostTensorND host_gx;
  378. auto func = graph->compile({make_callback_copy(gx, host_gx)});
  379. func->execute();
  380. for (size_t i = 0; i < S0; i++)
  381. for (size_t j = 0; j < S1; j++) {
  382. float x = host_x->ptr<float>({i, j})[0],
  383. prod = host_prod->ptr<float>({j, 0, i, 0})[0],
  384. gx = host_gx.ptr<float>({i, j})[0];
  385. MGB_ASSERT_FLOAT_EQ(2 * prod * prod * x, gx) << ssprintf(
  386. "failed at (%zd, %zd): x=%g prod=%g gx=%g", i, j, x, prod, gx);
  387. }
  388. }
  389. TEST(TestTensorManip, DimshuffleEmptyShape) {
  390. HostTensorGenerator<> gen;
  391. for (auto&& arg :
  392. {std::make_pair(TensorShape{3, 0}, std::vector<int>{1, -1, 0, -1}),
  393. {{3, 1, 0, 4}, {-1, 3, -1, 0, 2}},
  394. {{2, 0, 3, 0}, {1, 0, 2, 3}}}) {
  395. auto host_x = gen(arg.first);
  396. auto graph = ComputingGraph::make();
  397. graph->options().graph_opt_level = 0;
  398. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  399. y = opr::Dimshuffle::make(x, arg.second);
  400. HostTensorND host_y;
  401. auto func = graph->compile({make_callback_copy(y, host_y)});
  402. func->execute();
  403. auto&& y_shape = host_y.shape();
  404. for (size_t idx = 0; idx < arg.second.size(); ++idx) {
  405. auto elem = arg.second[idx];
  406. if (elem == -1) {
  407. ASSERT_EQ(y_shape[idx], 1u);
  408. } else {
  409. ASSERT_EQ(arg.first[elem], y_shape[idx]);
  410. }
  411. }
  412. }
  413. }
  414. TEST(TestTensorManip, DimshuffleCombined) {
  415. using Checker = AutoOprChecker<1, 1>;
  416. constexpr int RED0 = 2, RED1 = 3;
  417. for (bool dyn : {false, true}) {
  418. auto make_graph =
  419. [dyn](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  420. auto x = inputs[0];
  421. if (dyn)
  422. x = opr::MarkDynamicVar::make(x);
  423. auto cv = [&](int v) {
  424. auto rst = x.make_scalar(v);
  425. if (dyn)
  426. rst = opr::MarkDynamicVar::make(rst);
  427. return rst;
  428. };
  429. auto xshp = opr::GetVarShape::make(x);
  430. auto sub = [&](int idx) {
  431. return opr::IndexAt::make(xshp, {{0, cv(idx)}});
  432. };
  433. auto tshp0 = opr::Concat::make(
  434. {sub(0), sub(1) / (RED0 * RED1), cv(RED0), cv(RED1), sub(2),
  435. sub(3)},
  436. 0),
  437. tshp1 = opr::Concat::make(
  438. {sub(0), sub(1) / (RED0 * RED1), sub(2) * RED0, sub(3) * RED1},
  439. 0);
  440. auto y0 = opr::Reshape::make(x, tshp0),
  441. y1 = opr::Dimshuffle::make(y0, {0, 1, 3, 2, 4, 5}),
  442. y2 = opr::Reshape::make(y1, tshp1);
  443. return {y2.node()};
  444. };
  445. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  446. auto&& iv = *inp.at(0);
  447. auto&& ov = dest.at(0);
  448. auto ishp = iv.shape();
  449. auto oshp = ishp;
  450. oshp.shape[1] /= RED0 * RED1;
  451. oshp.shape[2] *= RED0;
  452. oshp.shape[3] *= RED1;
  453. ov.comp_node(iv.comp_node()).resize(oshp);
  454. size_t tmpshp[6] = {oshp.shape[0], oshp.shape[1], RED1,
  455. RED0, ishp.shape[2], ishp.shape[3]},
  456. tmpidx[6];
  457. for (size_t oidx = 0, oidxt = oshp.total_nr_elems(); oidx < oidxt; ++oidx) {
  458. for (int i = 5, x = oidx; i >= 0; --i) {
  459. tmpidx[i] = x % tmpshp[i];
  460. x /= tmpshp[i];
  461. mgb_assert(i || !x);
  462. }
  463. std::swap(tmpshp[2], tmpshp[3]);
  464. std::swap(tmpidx[2], tmpidx[3]);
  465. size_t iidx = 0;
  466. for (int i = 5, d = 1; i >= 0; --i) {
  467. iidx += d * tmpidx[i];
  468. d *= tmpshp[i];
  469. }
  470. std::swap(tmpshp[2], tmpshp[3]);
  471. ov.ptr<float>()[oidx] = iv.ptr<float>()[iidx];
  472. }
  473. };
  474. Checker::RunOptions opt;
  475. opt.numdiff_eps = 1; // large eps because all linear
  476. constexpr size_t R = RED0 * RED1;
  477. Checker(make_graph, fwd)
  478. .run({{{1, R, 1, 1}}}, opt)
  479. .run({{{5, R * 2, 3, 2}}}, opt)
  480. .run({{{2, R * 3, 4, 3}}}, opt);
  481. }
  482. }
  483. TEST(TestTensorManip, Subtensor) {
  484. using Checker = AutoOprChecker<1, 1>;
  485. SymbolVar sub0, sub1, sub2, sub3, sub4;
  486. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  487. using AIdx = opr::Subtensor::AxisIndexer;
  488. auto x = inputs[0];
  489. x = x.rename("x");
  490. auto cv = [&](int v, bool dyn = false) {
  491. auto rst = x.make_scalar(v);
  492. if (dyn)
  493. rst = opr::MarkDynamicVar::make(rst);
  494. return rst;
  495. };
  496. // sub0 = (0.9*x)[10:shp0:2]
  497. sub0 = opr::Subtensor::make(
  498. x * 0.9f,
  499. {AIdx::make_interval(
  500. 0, cv(10, true), opr::GetVarShape::make(x, 0), cv(2))})
  501. .rename("sub0");
  502. // sub1 = x[:-10:2]
  503. sub1 = opr::Subtensor::make(
  504. opr::MarkDynamicVar::make(x),
  505. {AIdx::make_interval(0, None, cv(-10), cv(2))})
  506. .rename("sub1");
  507. // sub2_raw = x[5:-5:2, 3]
  508. auto sub2_raw = opr::Subtensor::make(
  509. opr::IndexAt::make(x, {{1, cv(3)}}),
  510. {AIdx::make_interval(0, cv(5), cv(-5), cv(2))});
  511. {
  512. auto opr = sub2_raw.node()->owner_opr();
  513. auto&& inp = opr->input();
  514. auto&& dmap = opr->node_prop().dep_map();
  515. for (size_t i = 1; i < inp.size(); ++i) {
  516. mgb_assert(
  517. dmap.at(inp[i]) &
  518. cg::OperatorNodeBase::NodeProp::DepType::HOST_VALUE);
  519. }
  520. }
  521. sub2 = opr::AxisAddRemove::make(
  522. sub2_raw, {opr::AxisAddRemove::AxisDesc::make_add(1)})
  523. .rename("sub2");
  524. // sub3 = x[4:-6:2, -1:]
  525. sub3 = opr::Subtensor::make(
  526. x, {AIdx::make_interval(0, cv(4), cv(-6), cv(2)),
  527. AIdx::make_interval(1, cv(-1), None, None)});
  528. // sub4 = (x + 0.1)[-3:7:-2, 1::-3] (negative stride)
  529. sub4 = opr::Subtensor::make(
  530. x + .1f, {AIdx::make_interval(0, cv(-3), cv(7), cv(-2)),
  531. AIdx::make_interval(1, cv(1), None, cv(-3, true))});
  532. return {(sub0 + sub1 + sub2 + sub3 + sub4).rename("y")};
  533. };
  534. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  535. auto iptr = inp[0]->ptr<float>();
  536. auto ishp = inp[0]->shape();
  537. auto oshp = ishp;
  538. auto s0 = ishp.shape[0], s1 = ishp.total_nr_elems() / s0,
  539. s2 = s1 / ishp.shape[1];
  540. auto os0 = (s0 - 10 + 1) / 2;
  541. oshp.shape[0] = os0;
  542. dest[0].comp_node(inp[0]->comp_node());
  543. dest[0].resize(oshp);
  544. auto optr = dest[0].ptr<float>();
  545. for (size_t i = 0; i < os0; ++i)
  546. for (size_t j = 0; j < s1; ++j) {
  547. optr[i * s1 + j] =
  548. iptr[(i * 2 + 10) * s1 + j] * .9f + iptr[(i * 2) * s1 + j] +
  549. iptr[(i * 2 + 5) * s1 + j % s2 + s2 * 3] +
  550. iptr[(i * 2 + 4) * s1 + j % s2 + s2 * (ishp.shape[1] - 1)] +
  551. iptr[(ishp.shape[0] - 3 - i * 2) * s1 + j % s2 + s2 * 1] + 0.1;
  552. }
  553. };
  554. Checker::RunOptions opt;
  555. opt.numdiff_eps = 1; // large eps because all linear
  556. Checker checker(make_graph, fwd);
  557. checker.run({{{11, 5}}}, opt).run({{{20, 6}}}, opt).run({{{56, 6, 4}}}, opt);
  558. ASSERT_FALSE(cg::is_static_var_shape(sub0.node()));
  559. ASSERT_FALSE(cg::is_static_var_shape(sub1.node()));
  560. ASSERT_TRUE(cg::is_static_var_storage(sub2.node()));
  561. ASSERT_TRUE(cg::is_static_var_storage(sub3.node()));
  562. ASSERT_FALSE(cg::is_static_var_storage(sub4.node()));
  563. }
  564. TEST(TestTensorManip, SubtensorNegativeAxis) {
  565. using Checker = AutoOprChecker<1, 1>;
  566. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  567. using AIdx = opr::Subtensor::AxisIndexer;
  568. auto x = inputs[0];
  569. return {opr::Subtensor::make(x, {AIdx::make_index(-1, x.make_scalar(2))})};
  570. };
  571. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  572. auto iptr = inp[0]->ptr<float>();
  573. auto ishp = inp[0]->shape();
  574. auto oshp = ishp;
  575. --oshp.ndim;
  576. auto stride = oshp.shape[oshp.ndim];
  577. if (!oshp.ndim)
  578. oshp = {1};
  579. auto optr = dest[0].resize(oshp).ptr<float>();
  580. for (size_t i = 0, it = oshp.total_nr_elems(); i < it; ++i) {
  581. optr[i] = iptr[i * stride + 2];
  582. }
  583. };
  584. Checker checker(make_graph, fwd);
  585. checker.run({TensorShape{5}})
  586. .run({TensorShape{2, 3}})
  587. .run({TensorShape{2, 3, 4}})
  588. .run({TensorShape{2, 3, 4, 5}});
  589. }
  590. TEST(TestTensorManip, SubtensorWithEmptyIndexDesc) {
  591. using Checker = AutoOprChecker<1, 1>;
  592. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  593. auto x = inputs[0];
  594. return {opr::Subtensor::make(x, {})};
  595. };
  596. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  597. auto iptr = inp[0]->ptr<float>();
  598. auto oshp = inp[0]->shape();
  599. auto optr = dest[0].resize(oshp).ptr<float>();
  600. for (size_t i = 0, it = oshp.total_nr_elems(); i < it; ++i) {
  601. optr[i] = iptr[i];
  602. }
  603. };
  604. Checker checker(make_graph, fwd);
  605. checker.run({TensorShape{5}})
  606. .run({TensorShape{2, 3}})
  607. .run({TensorShape{2, 3, 4}})
  608. .run({TensorShape{2, 3, 4, 5}});
  609. }
  610. TEST(TestTensorManip, SubtensorShapeInferForDynAxisIdx) {
  611. HostTensorGenerator<> gen;
  612. auto host_x = gen({5, 6, 3});
  613. auto host_idx = gen({1});
  614. auto graph = ComputingGraph::make();
  615. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  616. idx = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_idx));
  617. auto cv = [&](int v) { return x.make_scalar(v); };
  618. using Ad = opr::Subtensor::AxisIndexer;
  619. // y = x[2, 1:-2:2]
  620. auto y = opr::Subtensor::make(
  621. x, {Ad::make_interval(1, cv(1), cv(-2), cv(2)), Ad::make_index(0, idx)});
  622. ASSERT_TRUE(cg::is_static_var_shape(y.node()));
  623. ASSERT_EQ(y.node()->shape(), TensorShape({2, 3}));
  624. HostTensorND host_y;
  625. auto func = graph->compile({make_callback_copy(y, host_y)});
  626. host_idx->ptr<float>()[0] = 2;
  627. func->execute();
  628. HostTensorND expt{host_x->comp_node(), host_x->dtype()};
  629. expt.resize({2, 3});
  630. for (size_t i = 0; i < 2; ++i)
  631. for (size_t j = 0; j < 3; ++j) {
  632. expt.ptr<float>()[i * 3 + j] = host_x->ptr<float>({2, i * 2 + 1, j})[0];
  633. }
  634. MGB_ASSERT_TENSOR_EQ(expt, host_y);
  635. }
  636. TEST(TestTensorManip, SubtensorDynCaseMemFwd) {
  637. auto run = [](int dyn_type) {
  638. // dyn_type: 0->const idx, 1->static idx, 2->dynamic idx, 3->dynamic inp
  639. ASSERT_FALSE(HasFailure()) << "already failed before " << dyn_type;
  640. HostTensorGenerator<> gen;
  641. auto host_x = gen({2, 3});
  642. auto graph = ComputingGraph::make();
  643. auto x = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x);
  644. SymbolVar idx;
  645. if (dyn_type == 0 || dyn_type == 3) {
  646. idx = x.make_scalar(1);
  647. if (dyn_type == 3) {
  648. // force dynamic storage by reading on another comp node
  649. auto xrd = opr::Copy::make(x, host_x->comp_node().change_stream(1));
  650. graph->options().extra_vardeps[x.node()].push_back(xrd.node());
  651. }
  652. } else {
  653. auto host_idx =
  654. std::make_shared<HostTensorND>(host_x->comp_node(), dtype::Int32{});
  655. host_idx->resize({1}).ptr<int>()[0] = 1;
  656. idx = opr::Host2DeviceCopy::make(*graph, host_idx);
  657. if (dyn_type == 2) {
  658. idx = opr::MarkDynamicVar::make(idx);
  659. }
  660. }
  661. auto y = opr::Subtensor::make(
  662. x, {opr::Subtensor::AxisIndexer::make_interval(0, idx, None, None)});
  663. if (dyn_type != 2) {
  664. ASSERT_EQ(TensorShape({1, 3}), y.shape());
  665. }
  666. HostTensorND host_y;
  667. auto func = graph->compile({make_callback_copy(y, host_y)});
  668. func->execute();
  669. auto xsub = host_x->sub(SubTensorSpec::make_from_offset_elem(
  670. TensorLayout({1, 3}, dtype::Float32{}), 3));
  671. MGB_ASSERT_TENSOR_EQ(xsub, host_y);
  672. ASSERT_EQ(dyn_type == 0, cg::is_static_var_storage(y.node()));
  673. ASSERT_EQ(dyn_type != 2, cg::is_static_var_shape(y.node()));
  674. ASSERT_EQ(
  675. static_cast<const uint8_t*>(prev_dev_ptr(x)) + 3 * sizeof(float),
  676. prev_dev_ptr(y));
  677. };
  678. run(0);
  679. run(1);
  680. run(2);
  681. run(3);
  682. }
  683. TEST(TestTensorManip, SubtensorWithNoValInferInp) {
  684. HostTensorGenerator<> gen;
  685. auto host_x = gen({5, 1}), host_idx = gen({1});
  686. auto graph = ComputingGraph::make();
  687. using Ad = opr::Subtensor::AxisIndexer;
  688. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  689. idx = opr::Host2DeviceCopy::make_no_value_infer(*graph, host_idx),
  690. y = opr::Subtensor::make(x, {Ad::make_index(0, idx)});
  691. HostTensorND host_y;
  692. auto func = graph->compile({make_callback_copy(y, host_y)});
  693. host_idx->ptr<float>()[0] = 2;
  694. func->execute();
  695. HostTensorND expt{host_x->comp_node(), host_x->dtype()};
  696. expt.resize({1}).ptr<float>()[0] = host_x->ptr<float>()[2];
  697. MGB_ASSERT_TENSOR_EQ(expt, host_y);
  698. }
  699. TEST(TestTensorManip, SubtensorDedup) {
  700. HostTensorGenerator<> gen;
  701. auto host_x = gen({5, 5, 5, 5});
  702. auto graph = ComputingGraph::make();
  703. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  704. auto cv = [&](int v) { return x.make_scalar(v); };
  705. using S = opr::Subtensor;
  706. using D = S::AxisIndexer;
  707. std::unordered_set<VarNode*> nodes;
  708. for (int i : {0, 1, 1, 0}) {
  709. nodes.insert(S::make(x, {D::make_index(i, cv(2))}).node());
  710. nodes.insert(S::make(x, {D::make_interval(i, cv(2), None, None)}).node());
  711. nodes.insert(S::make(x, {D::make_interval(i, None, cv(2), None)}).node());
  712. nodes.insert(S::make(x, {D::make_interval(i, None, None, cv(2))}).node());
  713. }
  714. ASSERT_EQ(8u, nodes.size());
  715. }
  716. TEST(TestTensorManip, SubtensorIdxChange) {
  717. auto run = [](bool dyn) {
  718. HostTensorGenerator<> gen;
  719. auto host_x = gen({10});
  720. auto host_idx =
  721. std::make_shared<HostTensorND>(host_x->comp_node(), dtype::Int32());
  722. host_idx->resize({1}).ptr<int>()[0] = 1;
  723. bool idx_exec = false, idx_infered = false;
  724. auto cb_set_idx_exec = [&](DeviceTensorND& dv) {
  725. if (dv.comp_node() == CompNode::default_cpu()) {
  726. idx_infered = true;
  727. } else {
  728. idx_exec = true;
  729. }
  730. };
  731. auto graph = ComputingGraph::make();
  732. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  733. SymbolVar idx_;
  734. if (dyn) {
  735. idx_ = opr::Host2DeviceCopy::make(*graph, host_idx);
  736. } else {
  737. idx_ = opr::ImmutableTensor::make(*graph, *host_idx);
  738. }
  739. auto idx = opr::CallbackInjector::make(idx_, {false, true, cb_set_idx_exec}),
  740. y = opr::Subtensor::make(
  741. x, {opr::Subtensor::AxisIndexer::make_interval(
  742. 0, idx, idx + 1, None)});
  743. HostTensorND host_y;
  744. auto func = graph->compile({make_callback_copy(y, host_y)});
  745. ASSERT_TRUE(cg::is_static_var_shape(y.node()));
  746. ASSERT_TRUE(cg::is_static_var_value(y.node()));
  747. ASSERT_EQ(!dyn, cg::is_static_var_storage(y.node()));
  748. ASSERT_EQ(TensorShape({1}), y.node()->shape());
  749. auto px = host_x->ptr<float>();
  750. func->execute();
  751. ASSERT_EQ(px[1], host_y.ptr<float>()[0]);
  752. host_idx->ptr<int>()[0] = 5;
  753. func->execute();
  754. if (dyn) {
  755. ASSERT_EQ(px[5], host_y.ptr<float>()[0]);
  756. } else {
  757. ASSERT_EQ(px[1], host_y.ptr<float>()[0]);
  758. }
  759. ASSERT_TRUE(idx_infered);
  760. ASSERT_FALSE(idx_exec);
  761. };
  762. run(true);
  763. run(false);
  764. }
  765. TEST(TestTensorManip, SubtensorEmptyIO) {
  766. using AIdx = opr::Subtensor::AxisIndexer;
  767. using IndexDesc = std::vector<AIdx>;
  768. using IndexDescCreater = thin_function<IndexDesc(SymbolVar)>;
  769. HostTensorGenerator<> gen;
  770. auto run = [&](const TensorShape& inp_shp, const TensorShape& out_shp,
  771. const IndexDescCreater& c) {
  772. auto host_x = gen(inp_shp);
  773. auto graph = ComputingGraph::make();
  774. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  775. auto y = opr::Subtensor::make(x, c(x));
  776. HostTensorND host_y;
  777. auto func = graph->compile({make_callback_copy(y, host_y)});
  778. func->execute();
  779. ASSERT_EQ(host_y.shape(), out_shp);
  780. ASSERT_TRUE(host_y.empty());
  781. };
  782. // x.shape = {0}, x[:0]
  783. run({0}, {0}, [&](SymbolVar x) -> IndexDesc {
  784. return {AIdx::make_interval(0, None, x.make_scalar(0), None)};
  785. });
  786. // x.shape = {100, 0}, x[0:-10:2]
  787. run({100, 0}, {45, 0}, [&](SymbolVar x) -> IndexDesc {
  788. return {AIdx::make_interval(
  789. 0, x.make_scalar(0), x.make_scalar(-10), x.make_scalar(2))};
  790. });
  791. // x.shape = {100, 0}, x[10:-10:2, 0:0]
  792. run({100, 0}, {40, 0}, [&](SymbolVar x) -> IndexDesc {
  793. return {AIdx::make_interval(
  794. 0, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2)),
  795. AIdx::make_interval(1, x.make_scalar(0), x.make_scalar(0), None)};
  796. });
  797. // x.shape = {10, 0, 10}, x[5, 10:-10:-2]
  798. run({10, 0, 10}, {0, 10}, [&](SymbolVar x) -> IndexDesc {
  799. return {AIdx::make_index(0, x.make_scalar(5)),
  800. AIdx::make_interval(
  801. 1, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2))};
  802. });
  803. // x.shape = {10}, x[100:]
  804. run({10}, {0}, [&](SymbolVar x) -> IndexDesc {
  805. return {AIdx::make_interval(0, x.make_scalar(100), None, None)};
  806. });
  807. }
  808. TEST(TestTensorManip, SetSubtensorEmptyIO) {
  809. using AIdx = opr::SetSubtensor::AxisIndexer;
  810. using IndexDesc = std::vector<AIdx>;
  811. using IndexDescCreater = thin_function<IndexDesc(SymbolVar)>;
  812. HostTensorGenerator<> gen;
  813. auto run = [&](const TensorShape& inp_shp, const TensorShape& val_shp,
  814. const IndexDescCreater& c) {
  815. auto host_x = gen(inp_shp), host_v = gen(val_shp);
  816. auto graph = ComputingGraph::make();
  817. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  818. v = opr::Host2DeviceCopy::make(*graph, host_v);
  819. auto y = opr::SetSubtensor::make(x, v, c(x));
  820. HostTensorND host_y;
  821. auto func = graph->compile({make_callback_copy(y, host_y)});
  822. func->execute();
  823. ASSERT_EQ(host_y.shape(), inp_shp);
  824. };
  825. // x.shape = {0}, v.shape = {0}, x[:0] = v
  826. run({0}, {0}, [&](SymbolVar x) -> IndexDesc {
  827. return {AIdx::make_interval(0, None, x.make_scalar(0), None)};
  828. });
  829. // x.shape = {100, 0}, v.shape = {45, 0}, x[0:-10:2] = v
  830. run({100, 0}, {45, 0}, [&](SymbolVar x) -> IndexDesc {
  831. return {AIdx::make_interval(
  832. 0, x.make_scalar(0), x.make_scalar(-10), x.make_scalar(2))};
  833. });
  834. // x.shape = {100, 0}, v.shape = {40, 0}, x[10:-10:2, 0:0] = v
  835. run({100, 0}, {40, 0}, [&](SymbolVar x) -> IndexDesc {
  836. return {AIdx::make_interval(
  837. 0, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2)),
  838. AIdx::make_interval(1, x.make_scalar(0), x.make_scalar(0), None)};
  839. });
  840. // x.shape = {10, 0, 10}, v.shape = {0, 10}, x[5, 10:-10:-2] = v
  841. run({10, 0, 10}, {0, 10}, [&](SymbolVar x) -> IndexDesc {
  842. return {AIdx::make_index(0, x.make_scalar(5)),
  843. AIdx::make_interval(
  844. 1, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2))};
  845. });
  846. // x.shape = {10}, v.shape = {0}, x[100:] = v
  847. run({10}, {0}, [&](SymbolVar x) -> IndexDesc {
  848. return {AIdx::make_interval(0, x.make_scalar(100), None, None)};
  849. });
  850. }
  851. namespace {
  852. void test_subtensor_fwdonly(bool dyn_inp, bool dyn_idx) {
  853. constexpr size_t SIZE = 25;
  854. auto mkhost = [](size_t size, DType dtype) {
  855. auto rst = std::make_shared<HostTensorND>(CompNode::load("xpu0"), dtype);
  856. rst->resize({size});
  857. return rst;
  858. };
  859. auto host_x = mkhost(SIZE, dtype::Float32()), host_idx0 = mkhost(1, dtype::Int32()),
  860. host_idx1 = mkhost(1, dtype::Int32());
  861. for (size_t i = 0; i < SIZE; ++i) {
  862. host_x->ptr<float>()[i] = i;
  863. }
  864. host_idx0->ptr<int>()[0] = 2;
  865. host_idx1->ptr<int>()[0] = 6;
  866. auto graph = ComputingGraph::make();
  867. using AIdx = opr::Subtensor::AxisIndexer;
  868. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  869. idx0 = opr::Host2DeviceCopy::make(*graph, host_idx0),
  870. idx1 = opr::Host2DeviceCopy::make(*graph, host_idx1);
  871. float *x_ptr = nullptr, *x_ptr_end = nullptr, *xsub_ptr = nullptr;
  872. if (dyn_inp)
  873. x = opr::MarkDynamicVar::make(x);
  874. x = opr::CallbackInjector::make(x, [&](DeviceTensorND& v) {
  875. x_ptr = v.ptr<float>();
  876. x_ptr_end = v.ptr<float>() + v.layout().total_nr_elems();
  877. });
  878. if (dyn_idx)
  879. idx0 = opr::MarkDynamicVar::make(idx0);
  880. auto xsub = opr::Subtensor::make(x, {AIdx::make_interval(0, idx0, idx1, None)});
  881. xsub = opr::CallbackInjector::make(
  882. xsub, [&](DeviceTensorND& v) { xsub_ptr = v.ptr<float>(); });
  883. ASSERT_EQ(!dyn_inp && !dyn_idx, cg::is_static_var_shape(xsub.node()));
  884. HostTensorND host_sub;
  885. auto func = graph->compile({make_callback_copy(xsub, host_sub)});
  886. bool failed = false;
  887. auto run_and_check = [&](size_t begin, size_t end) {
  888. ASSERT_FALSE(failed);
  889. failed = true;
  890. host_idx0->ptr<int>()[0] = begin;
  891. host_idx1->ptr<int>()[0] = end;
  892. func->execute();
  893. if (!(!dyn_inp && dyn_idx)) {
  894. ASSERT_GE(xsub_ptr, x_ptr);
  895. ASSERT_LE(xsub_ptr, x_ptr_end);
  896. }
  897. ASSERT_EQ(TensorShape({end - begin}), host_sub.shape());
  898. for (size_t i = 0; i < end - begin; ++i)
  899. ASSERT_EQ(host_x->ptr<float>()[i + begin], host_sub.ptr<float>()[i])
  900. << ssprintf("failed [%zu, %zu): i=%zu", begin, end, i);
  901. failed = false;
  902. };
  903. run_and_check(0, 1);
  904. run_and_check(2, 3);
  905. run_and_check(0, 5);
  906. run_and_check(1, 6);
  907. run_and_check(3, 21);
  908. run_and_check(0, SIZE);
  909. run_and_check(1, SIZE);
  910. run_and_check(0, SIZE - 1);
  911. }
  912. } // anonymous namespace
  913. TEST(TestTensorManip, SubtensorFwdOnly00) {
  914. test_subtensor_fwdonly(false, false);
  915. }
  916. TEST(TestTensorManip, SubtensorFwdOnly01) {
  917. test_subtensor_fwdonly(false, true);
  918. }
  919. TEST(TestTensorManip, SubtensorFwdOnly10) {
  920. test_subtensor_fwdonly(true, false);
  921. }
  922. TEST(TestTensorManip, SubtensorFwdOnly11) {
  923. test_subtensor_fwdonly(true, true);
  924. }
  925. TEST(TestTensorManip, OverlapSetSubtensor) {
  926. constexpr size_t SIZE = 2048, SIZE_SUB = (SIZE - 4) / 2;
  927. auto host_x =
  928. std::make_shared<HostTensorND>(CompNode::load("xpu0"), dtype::Float32());
  929. host_x->resize({SIZE});
  930. for (size_t i = 0; i < SIZE; ++i)
  931. host_x->ptr<float>()[i] = i;
  932. auto graph = ComputingGraph::make();
  933. graph->options().allocate_static_mem_after_graph_compile = true;
  934. auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x");
  935. auto cv = [&](int v, bool dyn = false) {
  936. auto rst = x.make_scalar(v);
  937. if (dyn)
  938. rst = opr::MarkDynamicVar::make(rst);
  939. return rst;
  940. };
  941. using AIdx = opr::Subtensor::AxisIndexer;
  942. auto xsub = opr::Subtensor::make(x, {AIdx::make_interval(0, cv(2), cv(-2), cv(2))})
  943. .rename("xsub"),
  944. // y = xsub[:-10] := xsub[10:]
  945. y = opr::SetSubtensor::make(
  946. xsub,
  947. opr::Subtensor::make(
  948. xsub, {AIdx::make_interval(0, cv(10), None, None)})
  949. .rename("xsub[10:]"),
  950. {AIdx::make_interval(0, None, cv(-10), None)})
  951. .rename("y");
  952. HostTensorND expected(host_x->comp_node(), dtype::Float32());
  953. expected.resize({SIZE_SUB});
  954. for (size_t i = 0; i < SIZE_SUB; ++i) {
  955. auto i0 = i;
  956. if (i0 < SIZE_SUB - 10)
  957. i0 += 10;
  958. expected.ptr<float>()[i] = i0 * 2 + 2;
  959. }
  960. ASSERT_TRUE(cg::is_static_var_value(y.node()));
  961. HostTensorND infer_result;
  962. infer_result.copy_from(graph->static_infer_manager().infer_value(y.node()));
  963. MGB_ASSERT_TENSOR_EQ(expected, infer_result);
  964. HostTensorND host_y;
  965. auto func = graph->compile({make_callback_copy(y, host_y)});
  966. func->to_json()->writeto_fpath(output_file("OverlapSetSubtensor.json"));
  967. func->execute();
  968. MGB_ASSERT_TENSOR_EQ(expected, host_y);
  969. }
  970. TEST(TestTensorManip, OverlapSetSubtensor2) {
  971. constexpr size_t SIZE_X = 20, SIZE_Y = 23;
  972. auto run = [](bool should_overlap) {
  973. auto host_x = std::make_shared<HostTensorND>(
  974. CompNode::load("xpu0"), dtype::Float32());
  975. host_x->resize({SIZE_X, SIZE_Y});
  976. for (size_t i = 0; i < SIZE_X * SIZE_Y; ++i)
  977. host_x->ptr<float>()[i] = i;
  978. auto graph = ComputingGraph::make();
  979. auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x");
  980. auto cv = [&](int v) { return x.make_scalar(v); };
  981. auto make_sub_desc = [&](int begin, int end) -> opr::Subtensor::IndexDesc {
  982. using AIdx = opr::Subtensor::AxisIndexer;
  983. return {AIdx::make_interval(0, cv(begin), cv(end), None)};
  984. };
  985. auto slice = [&](SymbolVar inp, int begin, int end) {
  986. return opr::Subtensor::make(inp, make_sub_desc(begin, end));
  987. };
  988. // y = x.copy()
  989. // y[2:7] = y[4:9].copy()
  990. // y[1:6] += y[3:8].copy()
  991. auto xsub = slice(x, 4, 9).rename("xsub"),
  992. y0 = opr::SetSubtensor::make(x, xsub, make_sub_desc(2, 7)).rename("y0"),
  993. y0sub = slice(y0, 3, 8).rename("y0sub"),
  994. ypar = should_overlap ? y0 : y0 + 1,
  995. y = opr::IncrSubtensor::make(ypar, y0sub, make_sub_desc(1, 6))
  996. .rename("y1");
  997. HostTensorND expect;
  998. expect.copy_from(*host_x);
  999. auto ptr = expect.ptr<float>();
  1000. memmove(ptr + 2 * SIZE_Y, ptr + 4 * SIZE_Y, 5 * SIZE_Y * sizeof(float));
  1001. for (size_t i = 1; i < 6; ++i) {
  1002. for (size_t j = 0; j < SIZE_Y; ++j) {
  1003. ptr[i * SIZE_Y + j] += ptr[(i + 2) * SIZE_Y + j];
  1004. }
  1005. }
  1006. if (!should_overlap) {
  1007. for (size_t i = 0; i < SIZE_X * SIZE_Y; ++i) {
  1008. ++ptr[i];
  1009. }
  1010. }
  1011. ASSERT_TRUE(cg::is_static_var_value(y.node()));
  1012. HostTensorND infer_result;
  1013. infer_result.copy_from(graph->static_infer_manager().infer_value(y.node()));
  1014. MGB_ASSERT_TENSOR_EQ(expect, infer_result);
  1015. HostTensorND host_y;
  1016. auto func = graph->compile({make_callback_copy(y, host_y)});
  1017. func->execute();
  1018. MGB_ASSERT_TENSOR_EQ(expect, host_y);
  1019. if (!should_overlap) {
  1020. ASSERT_EQ(prev_dev_ptr(ypar), prev_dev_ptr(y));
  1021. }
  1022. };
  1023. run(false);
  1024. run(true);
  1025. }
  1026. TEST(TestTensorManip, SetSubtensor) {
  1027. using Checker = AutoOprChecker<3, 1>;
  1028. auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1029. using AIdx = opr::Subtensor::AxisIndexer;
  1030. auto x = inputs[0], v0 = inputs[1], v1 = inputs[2];
  1031. x = x.rename("x");
  1032. v0 = v0.rename("v0");
  1033. v1 = v1.rename("v1");
  1034. auto cv = [&](int v, bool dyn = false) {
  1035. auto rst = x.make_scalar(v);
  1036. if (dyn)
  1037. rst = opr::MarkDynamicVar::make(rst);
  1038. return rst;
  1039. };
  1040. auto
  1041. // x0 = x[10::2] := v0
  1042. x0 = opr::SetSubtensor::make(
  1043. x, v0, {AIdx::make_interval(0, cv(10), None, cv(2))})
  1044. .rename("x0"),
  1045. // x1 = x[:-10:2] := v0[:, 3] := v1
  1046. x1 = opr::SetSubtensor::make(
  1047. opr::MarkDynamicVar::make(x),
  1048. opr::SetSubtensor::make(
  1049. v0, v1, {AIdx::make_index(1, cv(3))}),
  1050. {AIdx::make_interval(0, None, cv(-10), cv(2))})
  1051. .rename("x_sub1"),
  1052. // x2 = (x[:5] := x[4:9])[3:-7:2, -1] := v1
  1053. x2_t = opr::Subtensor::make(
  1054. x, {AIdx::make_interval(0, cv(4), cv(9), None)})
  1055. .rename("x2_t"),
  1056. x2 = opr::SetSubtensor::make(
  1057. opr::SetSubtensor::make(
  1058. x, x2_t,
  1059. {AIdx::make_interval(0, None, cv(5), None)}),
  1060. v1,
  1061. {AIdx::make_interval(0, cv(3), cv(-7), cv(2)),
  1062. AIdx::make_index(1, cv(-1))})
  1063. .rename("x2"),
  1064. y = (x0 + x1 + x2).rename("y");
  1065. mgb_assert(cg::is_static_var_storage(x0.node()));
  1066. mgb_assert(!cg::is_static_var_shape(x1.node()));
  1067. mgb_assert(cg::is_static_var_storage(x2.node()));
  1068. return {y};
  1069. };
  1070. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1071. auto px = inp[0]->ptr<float>(), pv0 = inp[1]->ptr<float>(),
  1072. pv1 = inp[2]->ptr<float>();
  1073. auto ishp = inp[0]->shape();
  1074. dest[0].comp_node(inp[0]->comp_node());
  1075. dest[0].resize(ishp);
  1076. auto optr = dest[0].ptr<float>();
  1077. auto s0 = ishp.shape[0], s1 = ishp.total_nr_elems() / s0,
  1078. s2 = s1 / ishp.shape[1];
  1079. for (size_t i = 0; i < s0; ++i) {
  1080. for (size_t j = 0; j < s1; ++j) {
  1081. float x0, x1, x2;
  1082. x0 = x1 = x2 = px[i * s1 + j];
  1083. if (i >= 10 && (i - 10) % 2 == 0)
  1084. x0 = pv0[((i - 10) / 2) * s1 + j];
  1085. if (i < s0 - 10 && i % 2 == 0) {
  1086. auto row = i / 2;
  1087. if (j / s2 == 3)
  1088. x1 = pv1[row * s2 + j % s2];
  1089. else
  1090. x1 = pv0[row * s1 + j];
  1091. }
  1092. if (i >= 3 && i < s0 - 7 && (i - 3) % 2 == 0 &&
  1093. j / s2 == ishp.shape[1] - 1)
  1094. x2 = pv1[((i - 3) / 2) * s2 + j % s2];
  1095. else if (i < 5)
  1096. x2 = px[(i + 4) * s1 + j];
  1097. optr[i * s1 + j] = x0 + x1 + x2;
  1098. }
  1099. }
  1100. };
  1101. auto mkshp = [](const TensorShape& shp0) -> Checker::ShapeInpArray {
  1102. mgb_assert(shp0.shape[0] > 10 && shp0.ndim >= 2 && shp0.shape[1] >= 4);
  1103. auto shp1 = shp0;
  1104. shp1.shape[0] = (shp0.shape[0] - 10) / 2;
  1105. auto shp2 = shp1;
  1106. for (size_t i = 2; i < shp2.ndim; ++i)
  1107. shp2.shape[i - 1] = shp2.shape[i];
  1108. --shp2.ndim;
  1109. return {shp0, shp1, shp2};
  1110. };
  1111. Checker::RunOptions opt;
  1112. opt.numdiff_eps = 1;
  1113. Checker(make_graph, fwd)
  1114. .run(mkshp({16, 4, 2}), opt)
  1115. .run(mkshp({14, 10}), opt)
  1116. .run(mkshp({18, 5, 2, 3}), opt);
  1117. }
  1118. TEST(TestTensorManip, SetSubtensorCheckByShapeInfer) {
  1119. HostTensorGenerator<> gen;
  1120. HostTensorGenerator<dtype::Int32> gen_int;
  1121. auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen_int({1});
  1122. host_idx->ptr<int>()[0] = 13;
  1123. auto graph = ComputingGraph::make();
  1124. using Ad = opr::Subtensor::AxisIndexer;
  1125. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1126. sub = opr::Host2DeviceCopy::make(*graph, host_sub);
  1127. auto idx1 = Ad::make_index(0, opr::ImmutableTensor::make(*graph, *host_idx)),
  1128. idx2 = Ad::make_index(0, opr::Host2DeviceCopy::make(*graph, host_idx));
  1129. MGB_MARK_USED_VAR(x);
  1130. MGB_MARK_USED_VAR(sub);
  1131. MGB_MARK_USED_VAR(idx1);
  1132. MGB_MARK_USED_VAR(idx2);
  1133. ASSERT_THROW(opr::SetSubtensor::make(x, sub, {idx1}), MegBrainError);
  1134. ASSERT_THROW(opr::SetSubtensor::make(x, sub, {idx2}), MegBrainError);
  1135. }
  1136. TEST(TestTensorManip, SetSubtensorShapeInfer) {
  1137. HostTensorGenerator<> gen;
  1138. HostTensorGenerator<dtype::Int32> gen_int;
  1139. auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen_int({1});
  1140. host_idx->ptr<int>()[0] = 13;
  1141. auto graph = ComputingGraph::make();
  1142. auto&& mgr = graph->static_infer_manager();
  1143. using Ad = opr::Subtensor::AxisIndexer;
  1144. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1145. sub = opr::Host2DeviceCopy::make(*graph, host_sub),
  1146. index = opr::Host2DeviceCopy::make_no_value_infer(*graph, host_idx);
  1147. auto rt_static_idx = Ad::make_index(0, index * 2);
  1148. auto y = opr::SetSubtensor::make(x, sub, {rt_static_idx});
  1149. ASSERT_TRUE(mgr.infer_shape_fallible(y.node()));
  1150. }
  1151. TEST(TestTensorManip, SetSubtensorDynIdx) {
  1152. HostTensorGenerator<> gen;
  1153. auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen({1});
  1154. host_idx->ptr<float>()[0] = 3;
  1155. auto dev_idx = std::make_shared<DeviceTensorND>();
  1156. dev_idx->copy_from(*host_idx);
  1157. auto graph = ComputingGraph::make();
  1158. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1159. sub = opr::Host2DeviceCopy::make(*graph, host_sub),
  1160. idx = opr::SharedDeviceTensor::make(*graph, dev_idx),
  1161. y = opr::SetSubtensor::make(
  1162. x, sub, {opr::SetSubtensor::AxisIndexer::make_index(0, idx)});
  1163. ASSERT_TRUE(cg::is_static_var_storage(y.node()));
  1164. HostTensorND host_y;
  1165. auto func = graph->compile({make_callback_copy(y, host_y)});
  1166. func->execute();
  1167. host_x->ptr<float>()[3] = host_sub->ptr<float>()[0];
  1168. MGB_ASSERT_TENSOR_EQ(*host_x, host_y);
  1169. }
  1170. TEST(TestTensorManip, SetSubtensorWithEmptyIndexDesc) {
  1171. HostTensorGenerator<> gen;
  1172. auto host_x = gen({12}), host_y = gen({12});
  1173. auto graph = ComputingGraph::make();
  1174. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1175. y = opr::Host2DeviceCopy::make(*graph, host_y),
  1176. z = opr::SetSubtensor::make(x, y, {});
  1177. ASSERT_TRUE(cg::is_static_var_storage(z.node()));
  1178. HostTensorND host_z;
  1179. auto func = graph->compile({make_callback_copy(z, host_z)});
  1180. func->execute();
  1181. MGB_ASSERT_TENSOR_EQ(*host_y, host_z);
  1182. }
  1183. TEST(TestTensorManip, IncrSubtensor) {
  1184. using Checker = AutoOprChecker<2, 1>;
  1185. auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1186. using AIdx = opr::Subtensor::AxisIndexer;
  1187. auto x = inputs[0];
  1188. return {opr::IncrSubtensor::make(
  1189. x, inputs[1],
  1190. {AIdx::make_interval(
  1191. 0, x.make_scalar(2), x.make_scalar(-2), x.make_scalar(2))})};
  1192. };
  1193. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1194. auto nr = inp[0]->shape(0);
  1195. auto pv = inp[1]->ptr<float>(), pd = dest[0].copy_from(*inp[0]).ptr<float>();
  1196. for (size_t i = 0; i < (nr - 3) / 2; ++i) {
  1197. pd[i * 2 + 2] += pv[i];
  1198. }
  1199. };
  1200. Checker{make_graph, fwd}
  1201. .run({TensorShape{5}, {1}})
  1202. .run({TensorShape{8}, {2}})
  1203. .run({TensorShape{23}, {10}});
  1204. }
  1205. TEST(TestTensorManip, Concat) {
  1206. auto cns = load_multiple_xpus(4);
  1207. using Checker = AutoOprChecker<3, 1>;
  1208. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1209. auto sub0 = inputs[0], sub1 = opr::Copy::make(inputs[1], cns[1]),
  1210. sub2 = opr::Copy::make(inputs[2], cns[2]),
  1211. ret = opr::Concat::make({sub0, sub1, sub2}, 1, cns[3]);
  1212. return {opr::Copy::make(ret, cns[0])};
  1213. };
  1214. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1215. size_t n = inp[0]->shape(0), c0 = inp[0]->shape(1), c1 = inp[1]->shape(1),
  1216. c2 = inp[2]->shape(1), c = c0 + c1 + c2;
  1217. auto i0 = inp[0]->ptr<float>(), i1 = inp[1]->ptr<float>(),
  1218. i2 = inp[2]->ptr<float>(), o = dest[0].resize({n, c}).ptr<float>();
  1219. for (size_t i = 0; i < n; ++i) {
  1220. for (size_t j = 0; j < c; ++j) {
  1221. float cur;
  1222. if (j < c0) {
  1223. cur = i0[i * c0 + j];
  1224. } else if (j < c0 + c1) {
  1225. cur = i1[i * c1 + j - c0];
  1226. } else {
  1227. cur = i2[i * c2 + j - c0 - c1];
  1228. }
  1229. o[i * c + j] = cur;
  1230. }
  1231. }
  1232. };
  1233. Checker checker{make_graph, fwd, cns[0]};
  1234. checker.run({TensorShape{2, 3}, {2, 4}, {2, 5}})
  1235. .run({TensorShape{2, 8}, {2, 3}, {2, 9}})
  1236. .run({TensorShape{5, 10}, {5, 3}, {5, 4}});
  1237. }
  1238. TEST(TestTensorManip, ConcatWithNegativeAxis) {
  1239. auto cns = load_multiple_xpus(4);
  1240. using Checker = AutoOprChecker<3, 1>;
  1241. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1242. auto sub0 = inputs[0], sub1 = opr::Copy::make(inputs[1], cns[1]),
  1243. sub2 = opr::Copy::make(inputs[2], cns[2]),
  1244. ret = opr::Concat::make({sub0, sub1, sub2}, -1, cns[3]);
  1245. return {opr::Copy::make(ret, cns[0])};
  1246. };
  1247. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1248. size_t n = inp[0]->shape(0), c0 = inp[0]->shape(1), c1 = inp[1]->shape(1),
  1249. c2 = inp[2]->shape(1), c = c0 + c1 + c2;
  1250. auto i0 = inp[0]->ptr<float>(), i1 = inp[1]->ptr<float>(),
  1251. i2 = inp[2]->ptr<float>(), o = dest[0].resize({n, c}).ptr<float>();
  1252. for (size_t i = 0; i < n; ++i) {
  1253. for (size_t j = 0; j < c; ++j) {
  1254. float cur;
  1255. if (j < c0) {
  1256. cur = i0[i * c0 + j];
  1257. } else if (j < c0 + c1) {
  1258. cur = i1[i * c1 + j - c0];
  1259. } else {
  1260. cur = i2[i * c2 + j - c0 - c1];
  1261. }
  1262. o[i * c + j] = cur;
  1263. }
  1264. }
  1265. };
  1266. Checker checker{make_graph, fwd, cns[0]};
  1267. checker.run({TensorShape{2, 3}, {2, 4}, {2, 5}})
  1268. .run({TensorShape{2, 8}, {2, 3}, {2, 9}})
  1269. .run({TensorShape{5, 10}, {5, 3}, {5, 4}});
  1270. }
  1271. TEST(TestTensorManip, ConcatEmpty) {
  1272. HostTensorGenerator<> gen;
  1273. auto host_x = gen({2, 3, 5}), host_y = gen({2, 0, 5});
  1274. auto graph = ComputingGraph::make();
  1275. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1276. y = opr::Host2DeviceCopy::make(*graph, host_y),
  1277. z = opr::Concat::make({x, y}, 1);
  1278. HostTensorND host_z;
  1279. auto func = graph->compile({make_callback_copy(z, host_z)});
  1280. func->execute();
  1281. MGB_ASSERT_TENSOR_EQ(*host_x, host_z);
  1282. host_x->resize({2, 0, 5});
  1283. func->execute();
  1284. MGB_ASSERT_TENSOR_EQ(*host_y, host_z);
  1285. }
  1286. TEST(TestTensorManip, ConcatEmpty2) {
  1287. HostTensorGenerator<> gen;
  1288. auto host_x = gen({2, 0, 5}), host_y = gen({2, 0, 6});
  1289. auto graph = ComputingGraph::make();
  1290. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1291. y = opr::Host2DeviceCopy::make(*graph, host_y),
  1292. z = opr::Concat::make({x, y}, 2);
  1293. HostTensorND host_z;
  1294. auto func = graph->compile({make_callback_copy(z, host_z)});
  1295. func->execute();
  1296. ASSERT_EQ(TensorShape({2, 0, 11}), host_z.shape());
  1297. }
  1298. #if MGB_OPENCL
  1299. #include "megcore_opencl.h"
  1300. #define REQUIRE_OPENCL() \
  1301. do { \
  1302. if (!CompNode::get_device_count(CompNode::DeviceType::OPENCL)) { \
  1303. return; \
  1304. } \
  1305. } while (0)
  1306. TEST(TestTensorManip, ConcatCD4) {
  1307. REQUIRE_OPENCL();
  1308. auto cn = CompNode::load("openclx");
  1309. HostTensorGenerator<> gen;
  1310. auto host_x = gen({1, 4, 2, 2}, cn), host_y = gen({1, 4, 2, 2}, cn);
  1311. auto graph0 = ComputingGraph::make();
  1312. auto x = opr::Host2DeviceCopy::make(*graph0, host_x);
  1313. auto y = opr::Host2DeviceCopy::make(*graph0, host_y);
  1314. x = opr::RelayoutFormat::make(x, {opr::RelayoutFormat::Param::Mode::NCHW_NHWCD4I});
  1315. y = opr::RelayoutFormat::make(y, {opr::RelayoutFormat::Param::Mode::NCHW_NHWCD4I});
  1316. auto z = opr::Concat::make({x, y}, 2);
  1317. HostTensorND host_z0;
  1318. auto func = graph0->compile({make_callback_copy(z, host_z0)});
  1319. func->execute();
  1320. ASSERT_EQ(TensorShape({1, 2, 2, 2, 4}), host_z0.shape());
  1321. auto graph1 = ComputingGraph::make();
  1322. x = opr::Host2DeviceCopy::make(*graph1, host_x);
  1323. y = opr::Host2DeviceCopy::make(*graph1, host_y);
  1324. z = opr::RelayoutFormat::make(
  1325. opr::Concat::make({x, y}, 1),
  1326. {opr::RelayoutFormat::Param::Mode::NCHW_NHWCD4I});
  1327. HostTensorND host_z1;
  1328. func = graph1->compile({make_callback_copy(z, host_z1)});
  1329. func->execute();
  1330. MGB_ASSERT_TENSOR_EQ(host_z0, host_z1);
  1331. }
  1332. #endif
  1333. TEST(TestTensorManip, AxisAddRemove) {
  1334. HostTensorGenerator<> gen;
  1335. for (bool dyn_shape : {false, true}) {
  1336. auto host_x = gen({2, 1, 5});
  1337. using AD = opr::AxisAddRemove::AxisDesc;
  1338. auto graph = ComputingGraph::make();
  1339. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  1340. if (dyn_shape) {
  1341. x = opr::MarkDynamicVar::make(x);
  1342. }
  1343. auto y = opr::AxisAddRemove::make(x, {AD::make_add(0)}),
  1344. z = opr::AxisAddRemove::make(x, {AD::make_remove(1)});
  1345. HostTensorND host_y, host_z;
  1346. auto func = graph->compile(
  1347. {make_callback_copy(y, host_y), make_callback_copy(z, host_z)});
  1348. func->execute();
  1349. ASSERT_EQ(TensorShape({1, 2, 1, 5}), host_y.shape());
  1350. ASSERT_EQ(TensorShape({2, 5}), host_z.shape());
  1351. MGB_ASSERT_TENSOR_EQ(*host_x, host_y.resize(host_x->shape()));
  1352. MGB_ASSERT_TENSOR_EQ(*host_x, host_z.resize(host_x->shape()));
  1353. // test empty tensor
  1354. host_x->resize({2, 1, 0});
  1355. func->execute();
  1356. ASSERT_EQ(TensorShape({1, 2, 1, 0}), host_y.shape());
  1357. ASSERT_EQ(TensorShape({2, 0}), host_z.shape());
  1358. }
  1359. }
  1360. TEST(TestTensorManip, Split) {
  1361. auto cns = load_multiple_xpus(3);
  1362. constexpr size_t C1 = 20, C2 = 30;
  1363. constexpr size_t N = 2, C = C1 + C2;
  1364. HostTensorGenerator<> gen;
  1365. auto host_opr0 = gen({N, C}, cns[0]);
  1366. auto graph = ComputingGraph::make();
  1367. SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"});
  1368. auto spl = opr::Split::make(
  1369. opr0, Split::Options::make_partition(opr0, 1, {C1, C2}),
  1370. OperatorNodeConfig("split").comp_node_arr({cns[1], cns[2]}));
  1371. auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()),
  1372. cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()),
  1373. cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)),
  1374. cost = opr::Copy::make(
  1375. cost0 + cost1, OperatorNodeConfig().follow_comp_node(opr0)),
  1376. grad = cg::grad(cost, opr0);
  1377. HostTensorND host_spl0, host_spl1, host_grad;
  1378. auto func = graph->compile(
  1379. {{spl[0], [&](DeviceTensorND& s) { host_spl0.copy_from(s); }},
  1380. {spl[1], [&](DeviceTensorND& s) { host_spl1.copy_from(s); }},
  1381. {grad, [&](DeviceTensorND& s) { host_grad.copy_from(s); }}});
  1382. func->execute();
  1383. auto o0 = host_spl0.sync().ptr<float>(), o1 = host_spl1.sync().ptr<float>(),
  1384. c = host_opr0->ptr<float>(), g = host_grad.sync().ptr<float>();
  1385. for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
  1386. auto ch = i % C;
  1387. auto n = i / C;
  1388. if (ch < C1) {
  1389. MGB_ASSERT_FLOAT_EQ(o0[n * C1 + ch], c[i]) << ssprintf("failed at %zd", i);
  1390. } else {
  1391. MGB_ASSERT_FLOAT_EQ(o1[n * C2 + ch - C1], c[i])
  1392. << ssprintf("failed at %zd", i);
  1393. }
  1394. MGB_ASSERT_FLOAT_EQ(c[i] * 2, g[i]) << ssprintf("grad failed at %zd", i);
  1395. }
  1396. }
  1397. TEST(TestTensorManip, SplitWithNegativeAxis) {
  1398. auto cns = load_multiple_xpus(3);
  1399. constexpr size_t C1 = 20, C2 = 30;
  1400. constexpr size_t N = 2, C = C1 + C2;
  1401. HostTensorGenerator<> gen;
  1402. auto host_opr0 = gen({N, C}, cns[0]);
  1403. auto graph = ComputingGraph::make();
  1404. SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"});
  1405. auto spl = opr::Split::make(
  1406. opr0, Split::Options::make_partition(opr0, -1, {C1, C2}),
  1407. OperatorNodeConfig("split").comp_node_arr({cns[1], cns[2]}));
  1408. auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()),
  1409. cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()),
  1410. cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)),
  1411. cost = opr::Copy::make(
  1412. cost0 + cost1, OperatorNodeConfig().follow_comp_node(opr0)),
  1413. grad = cg::grad(cost, opr0);
  1414. HostTensorND host_spl0, host_spl1, host_grad;
  1415. auto func = graph->compile(
  1416. {{spl[0], [&](DeviceTensorND& s) { host_spl0.copy_from(s); }},
  1417. {spl[1], [&](DeviceTensorND& s) { host_spl1.copy_from(s); }},
  1418. {grad, [&](DeviceTensorND& s) { host_grad.copy_from(s); }}});
  1419. func->execute();
  1420. auto o0 = host_spl0.sync().ptr<float>(), o1 = host_spl1.sync().ptr<float>(),
  1421. c = host_opr0->ptr<float>(), g = host_grad.sync().ptr<float>();
  1422. for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) {
  1423. auto ch = i % C;
  1424. auto n = i / C;
  1425. if (ch < C1) {
  1426. MGB_ASSERT_FLOAT_EQ(o0[n * C1 + ch], c[i]) << ssprintf("failed at %zd", i);
  1427. } else {
  1428. MGB_ASSERT_FLOAT_EQ(o1[n * C2 + ch - C1], c[i])
  1429. << ssprintf("failed at %zd", i);
  1430. }
  1431. MGB_ASSERT_FLOAT_EQ(c[i] * 2, g[i]) << ssprintf("grad failed at %zd", i);
  1432. }
  1433. }
  1434. TEST(TestTensorManip, SplitToDynOutShape) {
  1435. using Checker = AutoOprChecker<1, 2>;
  1436. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1437. auto x = inputs[0];
  1438. auto y = opr::Split::make(
  1439. x, opr::Split::Options::make_partition(
  1440. 0, {x.make_scalar(3), opr::MarkDynamicVar::make(
  1441. opr::GetVarShape::make(x, 0) -
  1442. x.make_scalar(3))}));
  1443. return {y[0], y[1]};
  1444. };
  1445. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1446. auto sub = [&](size_t begin, Maybe<ptrdiff_t> end) {
  1447. auto&& iv = inp[0];
  1448. return iv->sub(Slice(begin, end, None).apply(iv->layout(), 0));
  1449. };
  1450. dest[0].copy_from(sub(0, 3));
  1451. dest[1].copy_from(sub(3, None));
  1452. };
  1453. Checker{make_graph, fwd}
  1454. .run({TensorShape{5}})
  1455. .run({TensorShape{8}})
  1456. .run({TensorShape{9, 3}});
  1457. }
  1458. TEST(TestTensorManip, SplitToDynOutStorage) {
  1459. using Checker = AutoOprChecker<1, 2>;
  1460. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1461. auto x = inputs[0];
  1462. auto y = opr::Split::make(
  1463. x, opr::Split::Options::make_partition(
  1464. 0, {x.make_scalar(3),
  1465. opr::GetVarShape::make(x, 0) - x.make_scalar(3)}));
  1466. auto y0 = opr::Copy::make(y[0], x.node()->comp_node().change_stream(1));
  1467. y0 = opr::Copy::make(y0, x.node()->comp_node());
  1468. return {y0, y[1]};
  1469. };
  1470. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1471. auto sub = [&](size_t begin, Maybe<ptrdiff_t> end) {
  1472. auto&& iv = inp[0];
  1473. return iv->sub(Slice(begin, end, None).apply(iv->layout(), 0));
  1474. };
  1475. dest[0].copy_from(sub(0, 3));
  1476. dest[1].copy_from(sub(3, None));
  1477. };
  1478. Checker{make_graph, fwd}
  1479. .run({TensorShape{5}})
  1480. .run({TensorShape{8}})
  1481. .run({TensorShape{9, 3}});
  1482. }
  1483. namespace {
  1484. void do_test_dynamic_split(bool multiple_cn, bool force_dynamic) {
  1485. auto cns = load_multiple_xpus(3);
  1486. constexpr size_t N = 2, C = 51;
  1487. HostTensorGenerator<> gen;
  1488. auto host_x = gen({N, C}, cns[0]), host_sub_begin = gen({1}, cns[0]),
  1489. host_sub_end = gen({1}, cns[0]);
  1490. host_sub_begin->ptr<float>()[0] = 0;
  1491. host_sub_end->ptr<float>()[0] = 2;
  1492. auto graph = ComputingGraph::make();
  1493. SymbolVar x = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x, {"x"}),
  1494. sub_begin = opr::Host2DeviceCopy::make_no_fwd(
  1495. *graph, host_sub_begin, {"sub_begin"}),
  1496. sub_end = opr::Host2DeviceCopy::make_no_fwd(
  1497. *graph, host_sub_end, {"sub_end"}),
  1498. xsub = opr::Subtensor::make(
  1499. x, {opr::Subtensor::AxisIndexer::make_interval(
  1500. 1, sub_begin, sub_end, None)})
  1501. .rename("xsub");
  1502. OperatorNodeConfig split_config("split");
  1503. if (multiple_cn) {
  1504. split_config.comp_node_arr({cns[1], cns[2]});
  1505. }
  1506. if (force_dynamic)
  1507. xsub = opr::MarkDynamicVar::make(xsub);
  1508. auto spl = opr::Split::make(
  1509. xsub,
  1510. Split::Options::make_callback(
  1511. 1, 2,
  1512. [](size_t s) {
  1513. return std::vector<size_t>{s / 2, s - s / 2};
  1514. }),
  1515. split_config);
  1516. if (multiple_cn) {
  1517. spl[0] = opr::Sleep::make(spl[0], 0.1);
  1518. spl[1] = opr::Sleep::make(spl[1], 0.2);
  1519. }
  1520. auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()),
  1521. cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()),
  1522. cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)),
  1523. cost = opr::Copy::make(
  1524. cost0 + cost1, OperatorNodeConfig().follow_comp_node(x)) *
  1525. 0.5f,
  1526. grad = cg::grad(cost, x);
  1527. HostTensorND host_spl0, host_spl1, host_grad;
  1528. auto func = graph->compile(
  1529. {make_callback_copy(spl[0], host_spl0),
  1530. make_callback_copy(spl[1], host_spl1),
  1531. make_callback_copy(grad, host_grad)});
  1532. if (force_dynamic)
  1533. ASSERT_TRUE(!cg::is_static_var_shape(spl[0].node()));
  1534. else {
  1535. auto cb = [](cg::OperatorNodeBase* op) {
  1536. for (auto i : op->output()) {
  1537. mgb_assert(
  1538. cg::is_static_var_shape(i), "dynamic var: %s",
  1539. cg::dump_var_info({i}).c_str());
  1540. }
  1541. return true;
  1542. };
  1543. func->iter_opr_seq(cb);
  1544. }
  1545. bool failed = false, fwd_checked = false;
  1546. auto run_and_check = [&](size_t begin, size_t end) {
  1547. ASSERT_FALSE(failed);
  1548. failed = true;
  1549. host_sub_begin->ptr<float>()[0] = begin;
  1550. host_sub_end->ptr<float>()[0] = end;
  1551. func->execute();
  1552. auto mid = begin + (end - begin) / 2;
  1553. auto inp = host_x->ptr<float>(), grad = host_grad.ptr<float>();
  1554. ASSERT_EQ(host_spl0.shape(), TensorShape({N, mid - begin}));
  1555. ASSERT_EQ(host_spl1.shape(), TensorShape({N, end - mid}));
  1556. if (!force_dynamic && !multiple_cn && !begin && mid - begin == 1) {
  1557. // check mem fwd for spl[0]
  1558. // do not check for spl[1] since flatten() causes copy
  1559. ASSERT_EQ(
  1560. prev_dev_ptr(spl[0]),
  1561. static_cast<const dt_float32*>(prev_dev_ptr(x)));
  1562. fwd_checked = true;
  1563. }
  1564. for (size_t i = 0, it = host_x->layout().total_nr_elems(); i < it; ++i) {
  1565. auto ch = i % C;
  1566. auto n = i / C;
  1567. float expect_grad;
  1568. if (ch >= begin && ch < mid) {
  1569. MGB_ASSERT_FLOAT_EQ(inp[i], *host_spl0.ptr<float>({n, ch - begin}))
  1570. << ssprintf(
  1571. "failed at (%zu, %zu),sub=[: ,%zu:%zu]", i, ch,
  1572. begin, end);
  1573. expect_grad = inp[i];
  1574. } else if (ch >= mid && ch < end) {
  1575. MGB_ASSERT_FLOAT_EQ(inp[i], *host_spl1.ptr<float>({n, ch - mid}))
  1576. << ssprintf(
  1577. "failed at (%zu, %zu),sub=[: ,%zu:%zu]", i, ch,
  1578. begin, end);
  1579. expect_grad = inp[i];
  1580. } else {
  1581. expect_grad = 0;
  1582. }
  1583. MGB_ASSERT_FLOAT_EQ(expect_grad, grad[i]) << ssprintf(
  1584. "grad failed at (%zu, %zu), sub=x[:, %zu:%zu]", n, ch, begin, end);
  1585. }
  1586. failed = false;
  1587. };
  1588. run_and_check(0, 3);
  1589. run_and_check(2, 8);
  1590. run_and_check(5, 12);
  1591. run_and_check(1, C - 1);
  1592. run_and_check(0, C);
  1593. run_and_check(C - 2, C);
  1594. run_and_check(0, 2);
  1595. if (!multiple_cn && !force_dynamic) {
  1596. ASSERT_TRUE(fwd_checked);
  1597. }
  1598. }
  1599. } // namespace
  1600. TEST(TestTensorManip, DynamicSplit00) {
  1601. do_test_dynamic_split(false, false);
  1602. }
  1603. TEST(TestTensorManip, DynamicSplit01) {
  1604. do_test_dynamic_split(false, true);
  1605. }
  1606. TEST(TestTensorManip, DynamicSplit10) {
  1607. do_test_dynamic_split(true, false);
  1608. }
  1609. TEST(TestTensorManip, DynamicSplit11) {
  1610. do_test_dynamic_split(true, true);
  1611. }
  1612. TEST(TestTensorManip, SplitFromDynStorage) {
  1613. HostTensorGenerator<> gen;
  1614. auto host_x = gen({4});
  1615. auto graph = cg::ComputingGraph::make();
  1616. auto x = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_x))
  1617. .reshape({4});
  1618. ASSERT_TRUE(cg::is_static_var_shape(x.node()));
  1619. auto y = opr::Split::make(x, opr::Split::Options::make_partition(x, 0, {1, 3}));
  1620. HostTensorND y0, y1;
  1621. auto func = graph->compile(
  1622. {make_callback_copy(y[0], y0), make_callback_copy(y[1], y1)});
  1623. func->execute();
  1624. ASSERT_FALSE(cg::is_static_var_storage(x.node()));
  1625. HostTensorND expt{host_x->comp_node(), host_x->dtype()};
  1626. expt.resize({1}).ptr<float>()[0] = host_x->ptr<float>()[0];
  1627. MGB_ASSERT_TENSOR_EQ(expt, y0);
  1628. expt.resize({3});
  1629. for (int i = 0; i < 3; ++i)
  1630. expt.ptr<float>()[i] = host_x->ptr<float>()[i + 1];
  1631. MGB_ASSERT_TENSOR_EQ(expt, y1);
  1632. }
  1633. TEST(TestTensorManip, SplitPreAllocatedMultiCN) {
  1634. auto cns = load_multiple_xpus(3);
  1635. HostTensorGenerator<> gen;
  1636. auto host_x = gen({3}, cns[0]);
  1637. auto dev_x = std::make_shared<DeviceTensorND>();
  1638. dev_x->copy_from(*host_x).sync();
  1639. auto graph = cg::ComputingGraph::make();
  1640. auto x = opr::SharedDeviceTensor::make(*graph, dev_x);
  1641. auto ys = opr::Split::make(
  1642. x, opr::Split::Options::make_average(0, 3),
  1643. OperatorNodeConfig{}.comp_node_arr({cns.begin(), cns.end()}));
  1644. ASSERT_EQ(3u, ys.size());
  1645. HostTensorND y0, y1, y2;
  1646. auto func = graph->compile(
  1647. {make_callback_copy(ys[0], y0),
  1648. make_callback_copy(opr::Copy::make(ys[1], {cns[0]}), y1),
  1649. make_callback_copy(ys[2], y2)});
  1650. func->execute();
  1651. ASSERT_TRUE(cg::is_static_var_storage(ys[0].node()));
  1652. ASSERT_FALSE(cg::is_static_var_storage(ys[1].node()));
  1653. ASSERT_EQ(x.node()->prev_dev_ptr(), ys[0].node()->prev_dev_ptr());
  1654. ASSERT_EQ(host_x->ptr<float>()[0], y0.ptr<float>()[0]);
  1655. ASSERT_EQ(host_x->ptr<float>()[1], y1.ptr<float>()[0]);
  1656. ASSERT_EQ(host_x->ptr<float>()[2], y2.ptr<float>()[0]);
  1657. }
  1658. TEST(TestTensorManip, SplitMemfwdMultipleTimesWithOffset) {
  1659. auto cns = load_multiple_xpus(2);
  1660. HostTensorGenerator<> gen;
  1661. auto host_x = gen({4}, cns[0]);
  1662. auto graph = cg::ComputingGraph::make();
  1663. auto x0 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x),
  1664. x = opr::Subtensor::make(
  1665. x0, {opr::Subtensor::AxisIndexer::make_interval(
  1666. 0, x0.make_scalar(1), None, None)});
  1667. auto ys = opr::Split::make(x, opr::Split::Options::make_average(0, 3));
  1668. ASSERT_EQ(3u, ys.size());
  1669. HostTensorND y0, y1, y2;
  1670. auto func = graph->compile(
  1671. {make_callback_copy(ys[0], y0),
  1672. make_callback_copy(opr::Copy::make(ys[1], {cns[1]}), y1),
  1673. make_callback_copy(ys[2], y2)});
  1674. func->execute();
  1675. ASSERT_FALSE(cg::is_static_var_storage(ys[0].node()));
  1676. ASSERT_TRUE(cg::is_static_var_shape(ys[0].node()));
  1677. ASSERT_FALSE(cg::is_static_var_storage(ys[1].node()));
  1678. ASSERT_EQ(host_x->ptr<float>()[1], y0.ptr<float>()[0]);
  1679. ASSERT_EQ(host_x->ptr<float>()[2], y1.ptr<float>()[0]);
  1680. ASSERT_EQ(host_x->ptr<float>()[3], y2.ptr<float>()[0]);
  1681. ASSERT_EQ(static_cast<const float*>(prev_dev_ptr(x0)) + 3, prev_dev_ptr(ys[2]));
  1682. }
  1683. TEST(TestTensorManip, SplitValueInfer) {
  1684. auto cns = load_multiple_xpus(3);
  1685. HostTensorGenerator<> gen;
  1686. auto host_x = gen({3});
  1687. auto graph = cg::ComputingGraph::make();
  1688. auto x = opr::ImmutableTensor::make(*graph, *host_x);
  1689. auto ys = opr::Split::make(
  1690. x, opr::Split::Options::make_average(0, 3),
  1691. OperatorNodeConfig{}.comp_node_arr({cns.begin(), cns.end()}));
  1692. for (size_t i = 0; i < 3; ++i) {
  1693. // split itself does not replace imm vars; use +0 to trigger optimizer
  1694. auto var = (ys[i] + 0).node();
  1695. ASSERT_TRUE(var->owner_opr()->same_type<opr::ImmutableTensor>());
  1696. ASSERT_EQ(cns[i], var->comp_node());
  1697. HostTensorND hv;
  1698. hv.copy_from(var->owner_graph()->static_infer_manager().infer_value(var));
  1699. ASSERT_EQ(TensorShape{1}, hv.shape());
  1700. ASSERT_EQ(host_x->ptr<float>()[i], hv.ptr<float>()[0]);
  1701. }
  1702. }
  1703. TEST(TestTensorManip, SplitZeroGrad) {
  1704. HostTensorGenerator<> gen;
  1705. auto host_x = gen({3, 2});
  1706. auto graph = cg::ComputingGraph::make();
  1707. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  1708. auto ys = opr::Split::make(x, opr::Split::Options::make_average(0, 3));
  1709. auto loss = opr::reduce_sum(ys[2] * ys[2], x.make_scalar(1)),
  1710. gx = cg::grad(loss, x);
  1711. HostTensorND host_gx;
  1712. auto func = graph->compile({make_callback_copy(gx, host_gx)});
  1713. func->execute();
  1714. auto px = host_x->ptr<float>(), pgx = host_gx.ptr<float>();
  1715. for (int i = 0; i < 2; ++i) {
  1716. MGB_ASSERT_FLOAT_EQ(0.f, pgx[i]);
  1717. MGB_ASSERT_FLOAT_EQ(0.f, pgx[2 + i]);
  1718. MGB_ASSERT_FLOAT_EQ(px[4 + i] * 2, pgx[4 + i]);
  1719. }
  1720. }
  1721. TEST(TestTensorManip, DynamicFill) {
  1722. HostTensorGenerator<> gen;
  1723. auto host_x = gen({1});
  1724. auto graph = cg::ComputingGraph::make();
  1725. auto x = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_x)),
  1726. y = x.fill_retain_dtype(23);
  1727. HostTensorND host_y;
  1728. auto func = graph->compile({make_callback_copy(y, host_y)});
  1729. bool failed = false;
  1730. auto check = [&](const TensorShape& ishp) {
  1731. ASSERT_FALSE(failed);
  1732. failed = true;
  1733. host_x->resize(ishp);
  1734. func->execute();
  1735. ASSERT_EQ(ishp, host_y.shape());
  1736. auto ptr = host_y.ptr<float>();
  1737. for (size_t i = 0, it = host_y.shape().total_nr_elems(); i < it; ++i)
  1738. ASSERT_EQ(23, ptr[i]);
  1739. failed = false;
  1740. };
  1741. check({4, 2});
  1742. check({2, 4});
  1743. check({23});
  1744. }
  1745. TEST(TestTensorManip, Pooling2DBySetSub) {
  1746. constexpr int PH = 4, PW = 3;
  1747. using Checker = AutoOprChecker<1, 1>;
  1748. bool run_dyn = false;
  1749. auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  1750. auto x = inputs.at(0);
  1751. if (run_dyn)
  1752. x = opr::MarkDynamicVar::make(x);
  1753. x.rename("x");
  1754. auto cv = [&](int v, bool dyn = false) {
  1755. auto rst = x.make_scalar(v);
  1756. if (dyn)
  1757. rst = opr::MarkDynamicVar::make(rst);
  1758. return rst;
  1759. };
  1760. auto oh = (opr::GetVarShape::make(x, 0) / PH).rename("oh"),
  1761. ow = (opr::GetVarShape::make(x, 1) / PW).rename("ow"),
  1762. y_tmp_shape = opr::Concat::make({cv(PH * PW), oh, ow}, 0),
  1763. y_tmp = opr::Alloc::make(y_tmp_shape, dtype::Float32());
  1764. if (!run_dyn)
  1765. mgb_assert(cg::is_static_var_storage(y_tmp.node()));
  1766. using Ad = opr::Subtensor::AxisIndexer;
  1767. for (size_t i = 0, num = 0; i < (size_t)PH; ++i) {
  1768. for (size_t j = 0; j < (size_t)PW; ++j) {
  1769. bool dyn = run_dyn && num % 2;
  1770. auto xsub = opr::Subtensor::make(
  1771. x, {Ad::make_interval(0, cv(i, dyn), None, cv(PH)),
  1772. Ad::make_interval(1, cv(j), None, cv(PW))})
  1773. .rename(ssprintf("sub(%zu, %zu)", i, j));
  1774. y_tmp = opr::SetSubtensor::make(
  1775. y_tmp, xsub, {Ad::make_index(0, cv(num, dyn))})
  1776. .rename(ssprintf("y(%zu, %zu)", i, j));
  1777. if (!run_dyn) {
  1778. mgb_assert(cg::is_static_var_storage(xsub.node()));
  1779. mgb_assert(cg::is_static_var_storage(y_tmp.node()));
  1780. } else if (dyn)
  1781. y_tmp = opr::MarkDynamicVar::make(y_tmp);
  1782. ++num;
  1783. }
  1784. }
  1785. auto y = opr::Reduce::make(y_tmp, {opr::Reduce::Mode::SUM, 0});
  1786. y = opr::AxisAddRemove::make(y, {opr::AxisAddRemove::AxisDesc::make_remove(0)});
  1787. if (!run_dyn)
  1788. mgb_assert(cg::is_static_var_storage(y.node()));
  1789. return {y};
  1790. };
  1791. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  1792. auto&& ishp = inp.at(0)->shape();
  1793. auto oshp = ishp;
  1794. mgb_assert(oshp.shape[0] % PH == 0);
  1795. mgb_assert(oshp.shape[1] % PW == 0);
  1796. oshp.shape[0] /= PH;
  1797. oshp.shape[1] /= PW;
  1798. auto optr = dest.at(0).comp_node(inp[0]->comp_node()).resize(oshp).ptr<float>();
  1799. auto&& iv = *inp.at(0);
  1800. for (size_t i = 0; i < oshp.shape[0]; ++i)
  1801. for (size_t j = 0; j < oshp.shape[1]; ++j) {
  1802. auto ii = i * PH, ij = j * PW;
  1803. float sum = 0;
  1804. for (size_t di = 0; di < PH; ++di)
  1805. for (size_t dj = 0; dj < PW; ++dj) {
  1806. sum += *iv.ptr<float>({ii + di, ij + dj});
  1807. }
  1808. *(optr++) = sum;
  1809. }
  1810. };
  1811. auto run = [&](bool dyn) {
  1812. run_dyn = dyn;
  1813. Checker(make_graph, fwd)
  1814. .run({TensorShape{PH * 1, PW * 2}})
  1815. .run({TensorShape{PH * 4, PW * 3}})
  1816. .run({TensorShape{PH * 2, PW * 2}});
  1817. };
  1818. run(false);
  1819. run(true);
  1820. }
  1821. TEST(TestTensorManip, Flatten) {
  1822. HostTensorGenerator<> gen;
  1823. auto host_x = gen({20});
  1824. auto graph = ComputingGraph::make();
  1825. auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), y = x.flatten();
  1826. y = y + x.reshape(y.symshape());
  1827. ASSERT_EQ(TensorShape{20}, y.node()->shape());
  1828. HostTensorND host_y;
  1829. auto func = graph->compile({make_callback_copy(y, host_y)});
  1830. for (auto&& ishp : {TensorShape{2, 5}, TensorShape{6, 8, 1}, TensorShape{3}}) {
  1831. *host_x = *gen(ishp);
  1832. func->execute();
  1833. auto expected = host_x->sub(SubTensorSpec::make_from_layout(
  1834. {{ishp.total_nr_elems()}, host_x->dtype()}));
  1835. auto ptr = expected.ptr<float>();
  1836. for (size_t i = 0; i < expected.shape()[0]; ++i)
  1837. ptr[i] *= 2;
  1838. MGB_ASSERT_TENSOR_EQ(expected, host_y);
  1839. }
  1840. }
  1841. TEST(TestTensorManip, FillWithDtypeDedup) {
  1842. HostTensorGenerator<> gen;
  1843. auto host_x = gen({20});
  1844. auto graph = ComputingGraph::make();
  1845. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  1846. SymbolVar vals[] = {
  1847. x.fill_retain_dtype(0), x.fill_retain_dtype(1),
  1848. x.fill_retain_dtype(0), x.fill_retain_dtype(1),
  1849. x.fill_retain_dtype(0.f), x.fill_retain_dtype(1.f),
  1850. x.fill_retain_dtype(0.f), x.fill_retain_dtype(1.f),
  1851. };
  1852. for (int i : {0, 1})
  1853. for (int j = 2; j < 8; j += 2)
  1854. ASSERT_EQ(vals[i].node(), vals[i + j].node()) << i << ' ' << i + j;
  1855. ASSERT_NE(vals[0].node(), vals[1].node());
  1856. }
  1857. TEST(TestTensorManip, StrongContig) {
  1858. HostTensorGenerator<> gen;
  1859. auto host_x = gen({5, 1});
  1860. auto graph = ComputingGraph::make();
  1861. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  1862. y = opr::Dimshuffle::make(x, {1, 0});
  1863. auto cb = [](DeviceTensorND& dv) {
  1864. TensorLayout expect{{1, 5}, dv.dtype()};
  1865. ASSERT_EQ(expect, dv.layout());
  1866. };
  1867. auto func = graph->compile({{y, cb}});
  1868. func->execute();
  1869. }
  1870. namespace {
  1871. void test_param_pack_concat(const TensorShapeArray& shapes, DType type) {
  1872. auto cn = CompNode::load("xpu0");
  1873. auto graph = ComputingGraph::make();
  1874. auto align = cn.get_mem_addr_alignment() / type.size();
  1875. size_t size = 0;
  1876. std::vector<size_t> begins;
  1877. for (auto&& shape : shapes) {
  1878. size = get_aligned_power2(size, align);
  1879. begins.push_back(size);
  1880. size += shape.total_nr_elems();
  1881. }
  1882. SmallVector<SymbolVar> srcs;
  1883. for (size_t i = 0; i < shapes.size(); i++) {
  1884. auto data = std::make_shared<HostTensorND>();
  1885. data->comp_node(cn).dtype(dtype::Int32()).resize(shapes[i]);
  1886. auto ptr = data->ptr<dt_int32>();
  1887. for (size_t j = 0; j < shapes[i].total_nr_elems(); j++) {
  1888. ptr[j] = j;
  1889. }
  1890. auto nd = opr::Host2DeviceCopy::make(*graph, data);
  1891. srcs.push_back(nd);
  1892. }
  1893. auto host_offsets_gen = megdnn::ParamPackConcat::gen_offsets(
  1894. shapes, cn.get_mem_addr_alignment(), 4);
  1895. ASSERT_EQ(host_offsets_gen.back(), size);
  1896. auto host_offsets = std::make_shared<HostTensorND>();
  1897. host_offsets->comp_node(cn).dtype(dtype::Int32{}).resize({srcs.size() * 2});
  1898. memcpy(host_offsets->raw_ptr(), host_offsets_gen.data(), srcs.size() * 8);
  1899. auto offsets = opr::Host2DeviceCopy::make(*graph, host_offsets);
  1900. auto z = opr::ParamPackConcat::make(srcs, offsets, host_offsets_gen);
  1901. HostTensorND host_z;
  1902. auto func = graph->compile({make_callback_copy(z, host_z)});
  1903. func->execute();
  1904. HostTensorND expected;
  1905. expected.comp_node(cn).dtype(dtype::Int32()).resize({size});
  1906. {
  1907. auto ptr = expected.ptr<dt_int32>();
  1908. memset(ptr, 0, sizeof(int32_t) * size);
  1909. for (size_t i = 0; i < begins.size(); i++) {
  1910. auto begin = begins[i];
  1911. auto shape = shapes[i];
  1912. for (size_t j = 0; j < shape.total_nr_elems(); j++) {
  1913. ptr[begin + j] = j;
  1914. }
  1915. }
  1916. }
  1917. MGB_ASSERT_TENSOR_EQ(expected, host_z);
  1918. }
  1919. template <size_t nr_out>
  1920. void test_param_pack_split(const TensorShapeArray& shapes) {
  1921. auto cn = CompNode::load("xpu0");
  1922. auto align = std::max<size_t>(cn.get_mem_addr_alignment() / 4, 1);
  1923. size_t concat_size = 0;
  1924. mgb_assert(shapes.size() == nr_out);
  1925. for (auto&& i : shapes) {
  1926. concat_size = get_aligned_power2(concat_size, align) + i.total_nr_elems();
  1927. }
  1928. using Checker = AutoOprChecker<1, nr_out>;
  1929. auto make_graph = [&](const typename Checker::SymInpArray& inputs) ->
  1930. typename Checker::SymOutArray {
  1931. auto offsets_val = megdnn::ParamPackConcat::gen_offsets(
  1932. shapes, cn.get_mem_addr_alignment(), 4);
  1933. HostTensorND offsets;
  1934. std::copy_n(
  1935. offsets_val.data(), offsets_val.size(),
  1936. offsets.dtype(dtype::Int32{})
  1937. .comp_node(cn)
  1938. .resize({offsets_val.size()})
  1939. .ptr<dt_int32>());
  1940. auto out = opr::ParamPackSplit::make(inputs[0], offsets_val, shapes);
  1941. mgb_assert(out.size() == nr_out);
  1942. typename Checker::SymOutArray ret;
  1943. for (size_t i = 0; i < nr_out; ++i) {
  1944. ret[i] = out[i];
  1945. }
  1946. return ret;
  1947. };
  1948. auto fwd = [&](typename Checker::NumOutArray& dest,
  1949. typename Checker::NumInpArray inp) {
  1950. size_t offset = 0;
  1951. auto ptr = inp[0]->template ptr<float>();
  1952. for (size_t i = 0; i < nr_out; ++i) {
  1953. dest[i].resize(shapes[i]);
  1954. offset = get_aligned_power2(offset, align);
  1955. auto nr_elem = shapes[i].total_nr_elems();
  1956. memcpy(dest[i].template ptr<float>(), ptr + offset, nr_elem * 4);
  1957. offset += nr_elem;
  1958. }
  1959. };
  1960. Checker{make_graph, fwd}
  1961. .run({TensorShape{concat_size}})
  1962. .run({TensorShape{concat_size}})
  1963. .run({TensorShape{concat_size}});
  1964. }
  1965. } // anonymous namespace
  1966. TEST(TestParamPack, Concat) {
  1967. TensorShapeArray array = {{129}, {21}};
  1968. test_param_pack_concat(array, dtype::Int32());
  1969. array = {{23}, {32}, {75}, {45}};
  1970. test_param_pack_concat(array, dtype::Int32());
  1971. array = {{129}, {512}, {513}, {27}};
  1972. test_param_pack_concat(array, dtype::Int32());
  1973. }
  1974. TEST(TestParamPack, Split) {
  1975. test_param_pack_split<2>({{2, 3}, {4, 5, 6}});
  1976. test_param_pack_split<3>({{2, 9}, {123}, {5, 3}});
  1977. }
  1978. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}