/** * \file src/opr/test/tensor_manip.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "megbrain/opr/tensor_manip.h" #include "megbrain/opr/basic_arith_wrapper.h" #include "megbrain/opr/blas.h" #include "megbrain/opr/io.h" #include "megbrain/opr/misc.h" #include "megbrain/opr/tensor_gen.h" #include "megbrain/opr/utility.h" #include "megbrain/test/autocheck.h" #include "megbrain/test/helper.h" #include "megbrain/test/megdnn_helper.h" #include "megbrain/utils/arith_helper.h" using namespace mgb; using namespace opr; TEST(TestTensorManip, GetVarShape) { HostTensorGenerator<> gen; auto host_x = gen({3, 1}), host_y = gen({1, 2}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Host2DeviceCopy::make(*graph, host_y), z0 = opr::GetVarShape::make({x, y, x.make_scalar(5)}), z1 = opr::GetVarShape::make({x, y}, 1); // ensure scalar is removed ASSERT_EQ(2u, z0.node()->owner_opr()->input().size()); constexpr auto tdt = cg::OperatorNodeBase::NodeProp::DepType::SHAPE; auto&& dt = z0.node()->owner_opr()->node_prop().dep_map(); ASSERT_EQ(2u, dt.size()); ASSERT_EQ(tdt, dt.at(x.node())); ASSERT_EQ(tdt, dt.at(y.node())); auto as_shp = [](const HostTensorND& hv) { mgb_assert(hv.dtype() == dtype::Int32()); mgb_assert(hv.shape().ndim == 1); TensorShape ret; ret.ndim = hv.shape()[0]; auto p = hv.ptr(); for (size_t i = 0; i < ret.ndim; ++i) ret[i] = p[i]; return ret; }; HostTensorND host_z0, host_z1; auto func = graph->compile( {make_callback_copy(z0, host_z0), make_callback_copy(z1, host_z1)}); func->execute(); ASSERT_EQ(TensorShape({3, 2}), as_shp(host_z0)); ASSERT_EQ(TensorShape({2}), as_shp(host_z1)); *host_x = *gen({5, 1, 6}); *host_y = *gen({1, 8, 1}); func->execute(); ASSERT_EQ(TensorShape({5, 8, 6}), as_shp(host_z0)); ASSERT_EQ(TensorShape({8}), as_shp(host_z1)); } TEST(TestTensorManip, GetVarShapeBypass) { HostTensorGenerator<> gen; auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, gen({3, 2})), t = opr::Host2DeviceCopy::make(*graph, gen({2, 3})), tshp = opr::GetVarShape::make(t), y = opr::GetVarShape::make(opr::Reshape::make(x, tshp)); ASSERT_EQ(tshp, y); } TEST(TestTensorManip, GetVarShapeNegativeAxis) { HostTensorGenerator<> gen; auto host_x = gen({1, 3}), host_y = gen({2, 1}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Host2DeviceCopy::make(*graph, host_y), z0 = opr::GetVarShape::make({x, y}, -1), z1 = opr::GetVarShape::make({x, y}, -2); // ensure scalar is removed ASSERT_EQ(2u, z0.node()->owner_opr()->input().size()); constexpr auto tdt = cg::OperatorNodeBase::NodeProp::DepType::SHAPE; auto&& dt = z0.node()->owner_opr()->node_prop().dep_map(); ASSERT_EQ(2u, dt.size()); ASSERT_EQ(tdt, dt.at(x.node())); ASSERT_EQ(tdt, dt.at(y.node())); auto as_shp = [](const HostTensorND& hv) { mgb_assert(hv.dtype() == dtype::Int32()); mgb_assert(hv.shape().ndim == 1); TensorShape ret; ret.ndim = hv.shape()[0]; auto p = hv.ptr(); for (size_t i = 0; i < ret.ndim; ++i) ret[i] = p[i]; return ret; }; HostTensorND host_z0, host_z1; auto func = graph->compile( {make_callback_copy(z0, host_z0), make_callback_copy(z1, host_z1)}); func->execute(); ASSERT_EQ(TensorShape({3}), as_shp(host_z0)); ASSERT_EQ(TensorShape({2}), as_shp(host_z1)); *host_x = *gen({5, 1, 6}); *host_y = *gen({1, 8, 1}); func->execute(); ASSERT_EQ(TensorShape({6}), as_shp(host_z0)); ASSERT_EQ(TensorShape({8}), as_shp(host_z1)); } TEST(TestTensorManip, Reshape) { constexpr size_t N = 123, C = 456; HostTensorGenerator<> gen; auto host_opr0 = gen({N * C}), host_opr1 = gen({N, C}); auto graph = ComputingGraph::make(); SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}), opr1 = opr::Host2DeviceCopy::make(*graph, host_opr1, {"opr1"}), opr0_reshp = opr::Reshape::make(opr0, opr::GetVarShape::make(opr1)), sum = opr::add(opr0_reshp, opr1); { // check dep type auto op = opr0_reshp.node()->owner_opr(); auto&& dep_map = opr0_reshp.node()->owner_opr()->node_prop().dep_map(); using DT = cg::OperatorNodeBase::NodeProp::DepType; ASSERT_EQ(2u, dep_map.size()); ASSERT_EQ(DT::DEV_VALUE | DT::VALUE_ALLOW_EMPTY, dep_map.at(op->input(0))); ASSERT_EQ(DT::HOST_VALUE, dep_map.at(op->input(1))); } HostTensorND host_sum; auto func = graph->compile({make_callback_copy(sum, host_sum)}); func->execute(); ASSERT_TRUE(cg::is_static_var_storage(opr0_reshp.node())); ASSERT_FALSE(host_sum.layout().eq_layout(host_opr0->layout())); ASSERT_TRUE(host_sum.layout().eq_layout(host_opr1->layout())); ASSERT_EQ(dev_ptr(opr0), dev_ptr(opr0_reshp)); auto o0 = host_opr0->ptr(), o1 = host_opr1->ptr(), s = host_sum.ptr(); for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) { MGB_ASSERT_FLOAT_EQ(o0[i] + o1[i], s[i]) << ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i); } } TEST(TestTensorManip, ReshapeNoncontigValueInfer) { HostTensorGenerator<> gen; auto host_x = gen({2, 1}); auto graph = ComputingGraph::make(); auto x = opr::ImmutableTensor::make(*graph, *host_x), y = x.broadcast({2, 2}), z = opr::Reshape::make(y, {1, 0}, 1); auto&& mgr = graph->static_infer_manager(); ASSERT_EQ(cg::static_infer::InferType::CONST, mgr.get_infer_type(z.node()).value); auto zv = mgr.infer_value(z.node()); auto xp = host_x->ptr(), zp = zv.ptr(); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 2; ++j) { ASSERT_EQ(xp[i], zp[i * 2 + j]); } } ASSERT_THROW(opr::Reshape::make(y, {3, 0}, 1), TensorReshapeError); ASSERT_THROW(opr::Reshape::make(y, {3, 2}), TensorReshapeError); } TEST(TestTensorManip, ReshapeSameShapeBypass) { HostTensorGenerator<> gen; auto host_x = gen({2, 3}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), x1 = x.reshape({6}), x2 = x1.reshape({6}), x3 = x.reshape(opr::GetVarShape::make(x)); ASSERT_EQ(x1.node(), x2.node()); ASSERT_EQ(x.node(), x3.node()); ASSERT_NE(x.node(), x1.node()); } TEST(TestTensorManip, ReshapeAndInplace) { constexpr size_t C = 456; HostTensorGenerator<> gen; auto host_opr0 = gen({C}), host_opr1 = gen({C / 2, 2}); auto graph = ComputingGraph::make(); SymbolVar opr0 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_opr0), opr1 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_opr1), reshape = opr::Reshape::make(opr0, TensorShape{C / 2, 2}), sum = reshape + opr1; opr1.node()->add_flag(cg::VarNode::Flag::NO_MEM_RECLAIM); HostTensorND host_sum(CompNode::load("xpu0")); auto func = graph->compile({make_callback_copy(sum, host_sum)}); func->execute(); ASSERT_EQ(dev_ptr(reshape), dev_ptr(sum)); // assert contiguous layout ASSERT_EQ(host_opr1->layout(), host_sum.layout()); auto o0 = host_opr0->ptr(), o1 = host_opr1->ptr(), s = host_sum.sync().ptr(); for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; ++i) { MGB_ASSERT_FLOAT_EQ(o0[i] + o1[i], s[i]) << ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i); } } TEST(TestTensorManip, DynamicReshape) { HostTensorGenerator<> gen; auto host_x = gen({3, 4}), host_tshp = std::make_shared( host_x->comp_node(), dtype::Int32()); host_tshp->resize({1}).ptr()[0] = 12; auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), x_rshp_shp = opr::MarkDynamicVar::make( opr::Host2DeviceCopy::make(*graph, host_tshp).rename("x_rshp_shp")), x_rshp = opr::Reshape::make(x, x_rshp_shp).rename("x_rshp"), x_flat = x_rshp.flatten(), gx = cg::grad(opr::Dot::make(x_flat, x_flat).rename("loss"), x).rename("gx"); ASSERT_FALSE(cg::is_static_var_shape(x_rshp.node())); ASSERT_TRUE(cg::is_static_var_shape(gx.node())); ASSERT_EQ(host_x->shape(), gx.node()->shape()); HostTensorND host_rshp, host_gx; auto func = graph->compile( {make_callback_copy(x_rshp, host_rshp), make_callback_copy(gx, host_gx)}); auto check = [&](const TensorShape& ishp, const TensorShape& tshp) { host_x->copy_from(*gen(ishp)); { DeviceTensorND tmp; cg::copy_shape_to_tensor_value(tmp, tshp); host_tshp->copy_from(tmp); } func->execute(); ASSERT_EQ(tshp, host_rshp.shape()); ASSERT_EQ(host_x->shape(), host_gx.shape()); for (size_t i = 0, it = host_x->shape().total_nr_elems(); i < it; ++i) MGB_ASSERT_FLOAT_EQ(host_x->ptr()[i] * 2, host_gx.ptr()[i]); }; check({3, 4}, {12}); check({5, 3}, {15}); check({3, 4, 35}, {21, 20}); } TEST(TestTensorManip, ReshapeWithUnspec) { HostTensorGenerator<> gen; auto host_x = gen({4, 8}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Reshape::make(x, {1, 8}, 0); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); for (size_t ishp : {1, 5, 6}) { host_x->copy_from(*gen({ishp * 8})); func->execute(); TensorShape expect_shape({ishp, 8}); ASSERT_EQ(expect_shape, host_y.shape()); MGB_ASSERT_TENSOR_EQ( host_x->sub(SubTensorSpec::make_from_layout( host_x->layout().reshape(expect_shape))), host_y); } } TEST(TestTensorManip, ReshapeInferShapeForDynamicInput) { constexpr size_t N0 = 2, C0 = 3; HostTensorGenerator<> gen; auto host_x = gen({N0, C0}), host_tshp = gen({1}); auto graph = ComputingGraph::make(); host_tshp->ptr()[0] = N0 * C0; SymbolVar x = opr::Host2DeviceCopy::make(*graph, host_x), xd = opr::MarkDynamicVar::make(x), tshp = opr::Host2DeviceCopy::make(*graph, host_tshp), y0 = opr::Reshape::make(xd, tshp) + 1, y1 = opr::Reshape::make(xd, opr::GetVarShape::make(x)) + 2; ASSERT_EQ(y0.shape(), TensorShape({N0 * C0})); ASSERT_EQ(y1.shape(), TensorShape({N0, C0})); HostTensorND host_y0, host_y1; auto func = graph->compile( {make_callback_copy(y0, host_y0), make_callback_copy(y1, host_y1)}); auto run = [&](const TensorShape& ishp) { auto tot = ishp.total_nr_elems(); host_x->copy_from(*gen(ishp)); host_tshp->ptr()[0] = tot; func->execute(); ASSERT_EQ(host_y0.shape(), TensorShape({tot})); ASSERT_EQ(host_y1.shape(), ishp); for (size_t i = 0; i < tot; ++i) { ASSERT_EQ(host_x->ptr()[i] + 1, host_y0.ptr()[i]); ASSERT_EQ(host_x->ptr()[i] + 2, host_y1.ptr()[i]); } }; run({3, 2}); run({23, 12, 5}); } TEST(TestTensorManip, ReshapeEmptyShape) { HostTensorGenerator<> gen; constexpr size_t x_length = 233; auto host_x = gen({x_length}), host_v = gen({2, 3, 3, 3}); for (size_t i = 0; i < x_length; ++i) { host_x->ptr()[i] = 1.f; } constexpr auto INVALID_AXIS = opr::Reshape::Param::INVALID_AXIS; for (auto unspec_axis : {INVALID_AXIS, 0, 1, 3}) { auto graph = ComputingGraph::make(); graph->options().graph_opt_level = 0; TensorShape tshape{2, 3, 3, 3}; auto zero_axis = unspec_axis; if (unspec_axis == INVALID_AXIS) { tshape[zero_axis = 2] = 0; } using CondTakeMode = opr::CondTake::Param::Mode; auto x = opr::Host2DeviceCopy::make(*graph, host_x), x_empty = opr::CondTake::make(x, x, {CondTakeMode::EQ, 0.f})[0], v = opr::Host2DeviceCopy::make(*graph, host_v), x_reshape = opr::Reshape::make(x_empty, tshape, {unspec_axis}), y = opr::Concat::make({x_reshape, v}, zero_axis); HostTensorND host_empty, host_y; auto func = graph->compile( {make_callback_copy(x_reshape, host_empty), make_callback_copy(y, host_y)}); func->execute().wait(); ASSERT_TRUE(host_empty.layout().is_empty()); MGB_ASSERT_TENSOR_EQ(*host_v, host_y); } } TEST(TestTensorManip, ReshapeWithNegativeUnspec) { HostTensorGenerator<> gen; auto host_x = gen({4, 8}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Reshape::make(x, {1, 8}, -2); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); for (size_t ishp : {1, 5, 6}) { host_x->copy_from(*gen({ishp * 8})); func->execute(); TensorShape expect_shape({ishp, 8}); ASSERT_EQ(expect_shape, host_y.shape()); MGB_ASSERT_TENSOR_EQ( host_x->sub(SubTensorSpec::make_from_layout( host_x->layout().reshape(expect_shape))), host_y); } } TEST(TestTensorManip, Broadcast) { constexpr size_t N = 20, C = 30; HostTensorGenerator<> gen; auto host_opr0 = gen({1, 1}), host_opr1 = gen({N, C}); auto graph = ComputingGraph::make(); SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}), opr1 = opr::Host2DeviceCopy::make(*graph, host_opr1, {"opr1"}), sum = opr::add(opr::Broadcast::make(opr0, host_opr1->shape()), opr1); HostTensorND host_sum(CompNode::load("xpu0")); auto func = graph->compile({{sum, [&](DeviceTensorND& s) { host_sum.copy_from(s); }}}); func->execute(); ASSERT_TRUE(host_sum.layout().eq_layout(host_opr1->layout())); auto o0 = host_opr0->ptr(), o1 = host_opr1->ptr(), s = host_sum.sync().ptr(); for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) { MGB_ASSERT_FLOAT_EQ(o0[0] + o1[i], s[i]) << ssprintf("failed opr0(%.5f)+opr1(%.5f) at %zd", o0[i], o1[i], i); } } TEST(TestTensorManip, BroadcastEmptyShape) { HostTensorGenerator<> gen; for (auto&& arg : {std::make_pair(TensorShape{1}, TensorShape{0}), {{1, 2, 3}, {0, 2, 3}}, {{2, 3}, {1, 0, 2, 3}}, {{1, 0, 2, 3}, {4, 0, 2, 3}}, {{0, 1, 2, 3}, {3, 0, 4, 2, 3}}}) { auto host_x = gen(arg.first); auto graph = ComputingGraph::make(); graph->options().graph_opt_level = 0; auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Broadcast::make(x, arg.second); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); ASSERT_TRUE(host_y.shape().eq_shape(arg.second)); } } TEST(TestTensorManip, Dimshuffle) { HostTensorGenerator<> gen; constexpr size_t S0 = 8, S1 = 3; auto host_x = gen({S0, S1}), host_prod = gen({S1, 1, S0, 1}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), prod = opr::Host2DeviceCopy::make(*graph, host_prod).rename("prod"), x_ds = opr::Dimshuffle::make(x, {1, -1, 0, -1}).rename("x_ds"), y = (x_ds * prod).reshape({S0 * S1}).rename("y"), loss = opr::Dot::make(y, y).rename("loss"), gx = cg::grad(loss, x).rename("gx"); ASSERT_TRUE(cg::is_static_var_shape(gx.node())); ASSERT_EQ(host_x->shape(), gx.node()->shape()); HostTensorND host_gx; auto func = graph->compile({make_callback_copy(gx, host_gx)}); func->execute(); for (size_t i = 0; i < S0; i++) for (size_t j = 0; j < S1; j++) { float x = host_x->ptr({i, j})[0], prod = host_prod->ptr({j, 0, i, 0})[0], gx = host_gx.ptr({i, j})[0]; MGB_ASSERT_FLOAT_EQ(2 * prod * prod * x, gx) << ssprintf( "failed at (%zd, %zd): x=%g prod=%g gx=%g", i, j, x, prod, gx); } } TEST(TestTensorManip, DimshuffleEmptyShape) { HostTensorGenerator<> gen; for (auto&& arg : {std::make_pair(TensorShape{3, 0}, std::vector{1, -1, 0, -1}), {{3, 1, 0, 4}, {-1, 3, -1, 0, 2}}, {{2, 0, 3, 0}, {1, 0, 2, 3}}}) { auto host_x = gen(arg.first); auto graph = ComputingGraph::make(); graph->options().graph_opt_level = 0; auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Dimshuffle::make(x, arg.second); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); auto&& y_shape = host_y.shape(); for (size_t idx = 0; idx < arg.second.size(); ++idx) { auto elem = arg.second[idx]; if (elem == -1) { ASSERT_EQ(y_shape[idx], 1u); } else { ASSERT_EQ(arg.first[elem], y_shape[idx]); } } } } TEST(TestTensorManip, DimshuffleCombined) { using Checker = AutoOprChecker<1, 1>; constexpr int RED0 = 2, RED1 = 3; for (bool dyn : {false, true}) { auto make_graph = [dyn](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto x = inputs[0]; if (dyn) x = opr::MarkDynamicVar::make(x); auto cv = [&](int v) { auto rst = x.make_scalar(v); if (dyn) rst = opr::MarkDynamicVar::make(rst); return rst; }; auto xshp = opr::GetVarShape::make(x); auto sub = [&](int idx) { return opr::IndexAt::make(xshp, {{0, cv(idx)}}); }; auto tshp0 = opr::Concat::make( {sub(0), sub(1) / (RED0 * RED1), cv(RED0), cv(RED1), sub(2), sub(3)}, 0), tshp1 = opr::Concat::make( {sub(0), sub(1) / (RED0 * RED1), sub(2) * RED0, sub(3) * RED1}, 0); auto y0 = opr::Reshape::make(x, tshp0), y1 = opr::Dimshuffle::make(y0, {0, 1, 3, 2, 4, 5}), y2 = opr::Reshape::make(y1, tshp1); return {y2.node()}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto&& iv = *inp.at(0); auto&& ov = dest.at(0); auto ishp = iv.shape(); auto oshp = ishp; oshp.shape[1] /= RED0 * RED1; oshp.shape[2] *= RED0; oshp.shape[3] *= RED1; ov.comp_node(iv.comp_node()).resize(oshp); size_t tmpshp[6] = {oshp.shape[0], oshp.shape[1], RED1, RED0, ishp.shape[2], ishp.shape[3]}, tmpidx[6]; for (size_t oidx = 0, oidxt = oshp.total_nr_elems(); oidx < oidxt; ++oidx) { for (int i = 5, x = oidx; i >= 0; --i) { tmpidx[i] = x % tmpshp[i]; x /= tmpshp[i]; mgb_assert(i || !x); } std::swap(tmpshp[2], tmpshp[3]); std::swap(tmpidx[2], tmpidx[3]); size_t iidx = 0; for (int i = 5, d = 1; i >= 0; --i) { iidx += d * tmpidx[i]; d *= tmpshp[i]; } std::swap(tmpshp[2], tmpshp[3]); ov.ptr()[oidx] = iv.ptr()[iidx]; } }; Checker::RunOptions opt; opt.numdiff_eps = 1; // large eps because all linear constexpr size_t R = RED0 * RED1; Checker(make_graph, fwd) .run({{{1, R, 1, 1}}}, opt) .run({{{5, R * 2, 3, 2}}}, opt) .run({{{2, R * 3, 4, 3}}}, opt); } } TEST(TestTensorManip, Subtensor) { using Checker = AutoOprChecker<1, 1>; SymbolVar sub0, sub1, sub2, sub3, sub4; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { using AIdx = opr::Subtensor::AxisIndexer; auto x = inputs[0]; x = x.rename("x"); auto cv = [&](int v, bool dyn = false) { auto rst = x.make_scalar(v); if (dyn) rst = opr::MarkDynamicVar::make(rst); return rst; }; // sub0 = (0.9*x)[10:shp0:2] sub0 = opr::Subtensor::make( x * 0.9f, {AIdx::make_interval( 0, cv(10, true), opr::GetVarShape::make(x, 0), cv(2))}) .rename("sub0"); // sub1 = x[:-10:2] sub1 = opr::Subtensor::make( opr::MarkDynamicVar::make(x), {AIdx::make_interval(0, None, cv(-10), cv(2))}) .rename("sub1"); // sub2_raw = x[5:-5:2, 3] auto sub2_raw = opr::Subtensor::make( opr::IndexAt::make(x, {{1, cv(3)}}), {AIdx::make_interval(0, cv(5), cv(-5), cv(2))}); { auto opr = sub2_raw.node()->owner_opr(); auto&& inp = opr->input(); auto&& dmap = opr->node_prop().dep_map(); for (size_t i = 1; i < inp.size(); ++i) { mgb_assert( dmap.at(inp[i]) & cg::OperatorNodeBase::NodeProp::DepType::HOST_VALUE); } } sub2 = opr::AxisAddRemove::make( sub2_raw, {opr::AxisAddRemove::AxisDesc::make_add(1)}) .rename("sub2"); // sub3 = x[4:-6:2, -1:] sub3 = opr::Subtensor::make( x, {AIdx::make_interval(0, cv(4), cv(-6), cv(2)), AIdx::make_interval(1, cv(-1), None, None)}); // sub4 = (x + 0.1)[-3:7:-2, 1::-3] (negative stride) sub4 = opr::Subtensor::make( x + .1f, {AIdx::make_interval(0, cv(-3), cv(7), cv(-2)), AIdx::make_interval(1, cv(1), None, cv(-3, true))}); return {(sub0 + sub1 + sub2 + sub3 + sub4).rename("y")}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto iptr = inp[0]->ptr(); auto ishp = inp[0]->shape(); auto oshp = ishp; auto s0 = ishp.shape[0], s1 = ishp.total_nr_elems() / s0, s2 = s1 / ishp.shape[1]; auto os0 = (s0 - 10 + 1) / 2; oshp.shape[0] = os0; dest[0].comp_node(inp[0]->comp_node()); dest[0].resize(oshp); auto optr = dest[0].ptr(); for (size_t i = 0; i < os0; ++i) for (size_t j = 0; j < s1; ++j) { optr[i * s1 + j] = iptr[(i * 2 + 10) * s1 + j] * .9f + iptr[(i * 2) * s1 + j] + iptr[(i * 2 + 5) * s1 + j % s2 + s2 * 3] + iptr[(i * 2 + 4) * s1 + j % s2 + s2 * (ishp.shape[1] - 1)] + iptr[(ishp.shape[0] - 3 - i * 2) * s1 + j % s2 + s2 * 1] + 0.1; } }; Checker::RunOptions opt; opt.numdiff_eps = 1; // large eps because all linear Checker checker(make_graph, fwd); checker.run({{{11, 5}}}, opt).run({{{20, 6}}}, opt).run({{{56, 6, 4}}}, opt); ASSERT_FALSE(cg::is_static_var_shape(sub0.node())); ASSERT_FALSE(cg::is_static_var_shape(sub1.node())); ASSERT_TRUE(cg::is_static_var_storage(sub2.node())); ASSERT_TRUE(cg::is_static_var_storage(sub3.node())); ASSERT_FALSE(cg::is_static_var_storage(sub4.node())); } TEST(TestTensorManip, SubtensorNegativeAxis) { using Checker = AutoOprChecker<1, 1>; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { using AIdx = opr::Subtensor::AxisIndexer; auto x = inputs[0]; return {opr::Subtensor::make(x, {AIdx::make_index(-1, x.make_scalar(2))})}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto iptr = inp[0]->ptr(); auto ishp = inp[0]->shape(); auto oshp = ishp; --oshp.ndim; auto stride = oshp.shape[oshp.ndim]; if (!oshp.ndim) oshp = {1}; auto optr = dest[0].resize(oshp).ptr(); for (size_t i = 0, it = oshp.total_nr_elems(); i < it; ++i) { optr[i] = iptr[i * stride + 2]; } }; Checker checker(make_graph, fwd); checker.run({TensorShape{5}}) .run({TensorShape{2, 3}}) .run({TensorShape{2, 3, 4}}) .run({TensorShape{2, 3, 4, 5}}); } TEST(TestTensorManip, SubtensorWithEmptyIndexDesc) { using Checker = AutoOprChecker<1, 1>; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto x = inputs[0]; return {opr::Subtensor::make(x, {})}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto iptr = inp[0]->ptr(); auto oshp = inp[0]->shape(); auto optr = dest[0].resize(oshp).ptr(); for (size_t i = 0, it = oshp.total_nr_elems(); i < it; ++i) { optr[i] = iptr[i]; } }; Checker checker(make_graph, fwd); checker.run({TensorShape{5}}) .run({TensorShape{2, 3}}) .run({TensorShape{2, 3, 4}}) .run({TensorShape{2, 3, 4, 5}}); } TEST(TestTensorManip, SubtensorShapeInferForDynAxisIdx) { HostTensorGenerator<> gen; auto host_x = gen({5, 6, 3}); auto host_idx = gen({1}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), idx = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_idx)); auto cv = [&](int v) { return x.make_scalar(v); }; using Ad = opr::Subtensor::AxisIndexer; // y = x[2, 1:-2:2] auto y = opr::Subtensor::make( x, {Ad::make_interval(1, cv(1), cv(-2), cv(2)), Ad::make_index(0, idx)}); ASSERT_TRUE(cg::is_static_var_shape(y.node())); ASSERT_EQ(y.node()->shape(), TensorShape({2, 3})); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); host_idx->ptr()[0] = 2; func->execute(); HostTensorND expt{host_x->comp_node(), host_x->dtype()}; expt.resize({2, 3}); for (size_t i = 0; i < 2; ++i) for (size_t j = 0; j < 3; ++j) { expt.ptr()[i * 3 + j] = host_x->ptr({2, i * 2 + 1, j})[0]; } MGB_ASSERT_TENSOR_EQ(expt, host_y); } TEST(TestTensorManip, SubtensorDynCaseMemFwd) { auto run = [](int dyn_type) { // dyn_type: 0->const idx, 1->static idx, 2->dynamic idx, 3->dynamic inp ASSERT_FALSE(HasFailure()) << "already failed before " << dyn_type; HostTensorGenerator<> gen; auto host_x = gen({2, 3}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x); SymbolVar idx; if (dyn_type == 0 || dyn_type == 3) { idx = x.make_scalar(1); if (dyn_type == 3) { // force dynamic storage by reading on another comp node auto xrd = opr::Copy::make(x, host_x->comp_node().change_stream(1)); graph->options().extra_vardeps[x.node()].push_back(xrd.node()); } } else { auto host_idx = std::make_shared(host_x->comp_node(), dtype::Int32{}); host_idx->resize({1}).ptr()[0] = 1; idx = opr::Host2DeviceCopy::make(*graph, host_idx); if (dyn_type == 2) { idx = opr::MarkDynamicVar::make(idx); } } auto y = opr::Subtensor::make( x, {opr::Subtensor::AxisIndexer::make_interval(0, idx, None, None)}); if (dyn_type != 2) { ASSERT_EQ(TensorShape({1, 3}), y.shape()); } HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); auto xsub = host_x->sub(SubTensorSpec::make_from_offset_elem( TensorLayout({1, 3}, dtype::Float32{}), 3)); MGB_ASSERT_TENSOR_EQ(xsub, host_y); ASSERT_EQ(dyn_type == 0, cg::is_static_var_storage(y.node())); ASSERT_EQ(dyn_type != 2, cg::is_static_var_shape(y.node())); ASSERT_EQ( static_cast(prev_dev_ptr(x)) + 3 * sizeof(float), prev_dev_ptr(y)); }; run(0); run(1); run(2); run(3); } TEST(TestTensorManip, SubtensorWithNoValInferInp) { HostTensorGenerator<> gen; auto host_x = gen({5, 1}), host_idx = gen({1}); auto graph = ComputingGraph::make(); using Ad = opr::Subtensor::AxisIndexer; auto x = opr::Host2DeviceCopy::make(*graph, host_x), idx = opr::Host2DeviceCopy::make_no_value_infer(*graph, host_idx), y = opr::Subtensor::make(x, {Ad::make_index(0, idx)}); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); host_idx->ptr()[0] = 2; func->execute(); HostTensorND expt{host_x->comp_node(), host_x->dtype()}; expt.resize({1}).ptr()[0] = host_x->ptr()[2]; MGB_ASSERT_TENSOR_EQ(expt, host_y); } TEST(TestTensorManip, SubtensorDedup) { HostTensorGenerator<> gen; auto host_x = gen({5, 5, 5, 5}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); auto cv = [&](int v) { return x.make_scalar(v); }; using S = opr::Subtensor; using D = S::AxisIndexer; std::unordered_set nodes; for (int i : {0, 1, 1, 0}) { nodes.insert(S::make(x, {D::make_index(i, cv(2))}).node()); nodes.insert(S::make(x, {D::make_interval(i, cv(2), None, None)}).node()); nodes.insert(S::make(x, {D::make_interval(i, None, cv(2), None)}).node()); nodes.insert(S::make(x, {D::make_interval(i, None, None, cv(2))}).node()); } ASSERT_EQ(8u, nodes.size()); } TEST(TestTensorManip, SubtensorIdxChange) { auto run = [](bool dyn) { HostTensorGenerator<> gen; auto host_x = gen({10}); auto host_idx = std::make_shared(host_x->comp_node(), dtype::Int32()); host_idx->resize({1}).ptr()[0] = 1; bool idx_exec = false, idx_infered = false; auto cb_set_idx_exec = [&](DeviceTensorND& dv) { if (dv.comp_node() == CompNode::default_cpu()) { idx_infered = true; } else { idx_exec = true; } }; auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); SymbolVar idx_; if (dyn) { idx_ = opr::Host2DeviceCopy::make(*graph, host_idx); } else { idx_ = opr::ImmutableTensor::make(*graph, *host_idx); } auto idx = opr::CallbackInjector::make(idx_, {false, true, cb_set_idx_exec}), y = opr::Subtensor::make( x, {opr::Subtensor::AxisIndexer::make_interval( 0, idx, idx + 1, None)}); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); ASSERT_TRUE(cg::is_static_var_shape(y.node())); ASSERT_TRUE(cg::is_static_var_value(y.node())); ASSERT_EQ(!dyn, cg::is_static_var_storage(y.node())); ASSERT_EQ(TensorShape({1}), y.node()->shape()); auto px = host_x->ptr(); func->execute(); ASSERT_EQ(px[1], host_y.ptr()[0]); host_idx->ptr()[0] = 5; func->execute(); if (dyn) { ASSERT_EQ(px[5], host_y.ptr()[0]); } else { ASSERT_EQ(px[1], host_y.ptr()[0]); } ASSERT_TRUE(idx_infered); ASSERT_FALSE(idx_exec); }; run(true); run(false); } TEST(TestTensorManip, SubtensorEmptyIO) { using AIdx = opr::Subtensor::AxisIndexer; using IndexDesc = std::vector; using IndexDescCreater = thin_function; HostTensorGenerator<> gen; auto run = [&](const TensorShape& inp_shp, const TensorShape& out_shp, const IndexDescCreater& c) { auto host_x = gen(inp_shp); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); auto y = opr::Subtensor::make(x, c(x)); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); ASSERT_EQ(host_y.shape(), out_shp); ASSERT_TRUE(host_y.empty()); }; // x.shape = {0}, x[:0] run({0}, {0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval(0, None, x.make_scalar(0), None)}; }); // x.shape = {100, 0}, x[0:-10:2] run({100, 0}, {45, 0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval( 0, x.make_scalar(0), x.make_scalar(-10), x.make_scalar(2))}; }); // x.shape = {100, 0}, x[10:-10:2, 0:0] run({100, 0}, {40, 0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval( 0, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2)), AIdx::make_interval(1, x.make_scalar(0), x.make_scalar(0), None)}; }); // x.shape = {10, 0, 10}, x[5, 10:-10:-2] run({10, 0, 10}, {0, 10}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_index(0, x.make_scalar(5)), AIdx::make_interval( 1, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2))}; }); // x.shape = {10}, x[100:] run({10}, {0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval(0, x.make_scalar(100), None, None)}; }); } TEST(TestTensorManip, SetSubtensorEmptyIO) { using AIdx = opr::SetSubtensor::AxisIndexer; using IndexDesc = std::vector; using IndexDescCreater = thin_function; HostTensorGenerator<> gen; auto run = [&](const TensorShape& inp_shp, const TensorShape& val_shp, const IndexDescCreater& c) { auto host_x = gen(inp_shp), host_v = gen(val_shp); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), v = opr::Host2DeviceCopy::make(*graph, host_v); auto y = opr::SetSubtensor::make(x, v, c(x)); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); ASSERT_EQ(host_y.shape(), inp_shp); }; // x.shape = {0}, v.shape = {0}, x[:0] = v run({0}, {0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval(0, None, x.make_scalar(0), None)}; }); // x.shape = {100, 0}, v.shape = {45, 0}, x[0:-10:2] = v run({100, 0}, {45, 0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval( 0, x.make_scalar(0), x.make_scalar(-10), x.make_scalar(2))}; }); // x.shape = {100, 0}, v.shape = {40, 0}, x[10:-10:2, 0:0] = v run({100, 0}, {40, 0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval( 0, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2)), AIdx::make_interval(1, x.make_scalar(0), x.make_scalar(0), None)}; }); // x.shape = {10, 0, 10}, v.shape = {0, 10}, x[5, 10:-10:-2] = v run({10, 0, 10}, {0, 10}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_index(0, x.make_scalar(5)), AIdx::make_interval( 1, x.make_scalar(10), x.make_scalar(-10), x.make_scalar(2))}; }); // x.shape = {10}, v.shape = {0}, x[100:] = v run({10}, {0}, [&](SymbolVar x) -> IndexDesc { return {AIdx::make_interval(0, x.make_scalar(100), None, None)}; }); } namespace { void test_subtensor_fwdonly(bool dyn_inp, bool dyn_idx) { constexpr size_t SIZE = 25; auto mkhost = [](size_t size, DType dtype) { auto rst = std::make_shared(CompNode::load("xpu0"), dtype); rst->resize({size}); return rst; }; auto host_x = mkhost(SIZE, dtype::Float32()), host_idx0 = mkhost(1, dtype::Int32()), host_idx1 = mkhost(1, dtype::Int32()); for (size_t i = 0; i < SIZE; ++i) { host_x->ptr()[i] = i; } host_idx0->ptr()[0] = 2; host_idx1->ptr()[0] = 6; auto graph = ComputingGraph::make(); using AIdx = opr::Subtensor::AxisIndexer; auto x = opr::Host2DeviceCopy::make(*graph, host_x), idx0 = opr::Host2DeviceCopy::make(*graph, host_idx0), idx1 = opr::Host2DeviceCopy::make(*graph, host_idx1); float *x_ptr = nullptr, *x_ptr_end = nullptr, *xsub_ptr = nullptr; if (dyn_inp) x = opr::MarkDynamicVar::make(x); x = opr::CallbackInjector::make(x, [&](DeviceTensorND& v) { x_ptr = v.ptr(); x_ptr_end = v.ptr() + v.layout().total_nr_elems(); }); if (dyn_idx) idx0 = opr::MarkDynamicVar::make(idx0); auto xsub = opr::Subtensor::make(x, {AIdx::make_interval(0, idx0, idx1, None)}); xsub = opr::CallbackInjector::make( xsub, [&](DeviceTensorND& v) { xsub_ptr = v.ptr(); }); ASSERT_EQ(!dyn_inp && !dyn_idx, cg::is_static_var_shape(xsub.node())); HostTensorND host_sub; auto func = graph->compile({make_callback_copy(xsub, host_sub)}); bool failed = false; auto run_and_check = [&](size_t begin, size_t end) { ASSERT_FALSE(failed); failed = true; host_idx0->ptr()[0] = begin; host_idx1->ptr()[0] = end; func->execute(); if (!(!dyn_inp && dyn_idx)) { ASSERT_GE(xsub_ptr, x_ptr); ASSERT_LE(xsub_ptr, x_ptr_end); } ASSERT_EQ(TensorShape({end - begin}), host_sub.shape()); for (size_t i = 0; i < end - begin; ++i) ASSERT_EQ(host_x->ptr()[i + begin], host_sub.ptr()[i]) << ssprintf("failed [%zu, %zu): i=%zu", begin, end, i); failed = false; }; run_and_check(0, 1); run_and_check(2, 3); run_and_check(0, 5); run_and_check(1, 6); run_and_check(3, 21); run_and_check(0, SIZE); run_and_check(1, SIZE); run_and_check(0, SIZE - 1); } } // anonymous namespace TEST(TestTensorManip, SubtensorFwdOnly00) { test_subtensor_fwdonly(false, false); } TEST(TestTensorManip, SubtensorFwdOnly01) { test_subtensor_fwdonly(false, true); } TEST(TestTensorManip, SubtensorFwdOnly10) { test_subtensor_fwdonly(true, false); } TEST(TestTensorManip, SubtensorFwdOnly11) { test_subtensor_fwdonly(true, true); } TEST(TestTensorManip, OverlapSetSubtensor) { constexpr size_t SIZE = 2048, SIZE_SUB = (SIZE - 4) / 2; auto host_x = std::make_shared(CompNode::load("xpu0"), dtype::Float32()); host_x->resize({SIZE}); for (size_t i = 0; i < SIZE; ++i) host_x->ptr()[i] = i; auto graph = ComputingGraph::make(); graph->options().allocate_static_mem_after_graph_compile = true; auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"); auto cv = [&](int v, bool dyn = false) { auto rst = x.make_scalar(v); if (dyn) rst = opr::MarkDynamicVar::make(rst); return rst; }; using AIdx = opr::Subtensor::AxisIndexer; auto xsub = opr::Subtensor::make(x, {AIdx::make_interval(0, cv(2), cv(-2), cv(2))}) .rename("xsub"), // y = xsub[:-10] := xsub[10:] y = opr::SetSubtensor::make( xsub, opr::Subtensor::make( xsub, {AIdx::make_interval(0, cv(10), None, None)}) .rename("xsub[10:]"), {AIdx::make_interval(0, None, cv(-10), None)}) .rename("y"); HostTensorND expected(host_x->comp_node(), dtype::Float32()); expected.resize({SIZE_SUB}); for (size_t i = 0; i < SIZE_SUB; ++i) { auto i0 = i; if (i0 < SIZE_SUB - 10) i0 += 10; expected.ptr()[i] = i0 * 2 + 2; } ASSERT_TRUE(cg::is_static_var_value(y.node())); HostTensorND infer_result; infer_result.copy_from(graph->static_infer_manager().infer_value(y.node())); MGB_ASSERT_TENSOR_EQ(expected, infer_result); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->to_json()->writeto_fpath(output_file("OverlapSetSubtensor.json")); func->execute(); MGB_ASSERT_TENSOR_EQ(expected, host_y); } TEST(TestTensorManip, OverlapSetSubtensor2) { constexpr size_t SIZE_X = 20, SIZE_Y = 23; auto run = [](bool should_overlap) { auto host_x = std::make_shared( CompNode::load("xpu0"), dtype::Float32()); host_x->resize({SIZE_X, SIZE_Y}); for (size_t i = 0; i < SIZE_X * SIZE_Y; ++i) host_x->ptr()[i] = i; auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"); auto cv = [&](int v) { return x.make_scalar(v); }; auto make_sub_desc = [&](int begin, int end) -> opr::Subtensor::IndexDesc { using AIdx = opr::Subtensor::AxisIndexer; return {AIdx::make_interval(0, cv(begin), cv(end), None)}; }; auto slice = [&](SymbolVar inp, int begin, int end) { return opr::Subtensor::make(inp, make_sub_desc(begin, end)); }; // y = x.copy() // y[2:7] = y[4:9].copy() // y[1:6] += y[3:8].copy() auto xsub = slice(x, 4, 9).rename("xsub"), y0 = opr::SetSubtensor::make(x, xsub, make_sub_desc(2, 7)).rename("y0"), y0sub = slice(y0, 3, 8).rename("y0sub"), ypar = should_overlap ? y0 : y0 + 1, y = opr::IncrSubtensor::make(ypar, y0sub, make_sub_desc(1, 6)) .rename("y1"); HostTensorND expect; expect.copy_from(*host_x); auto ptr = expect.ptr(); memmove(ptr + 2 * SIZE_Y, ptr + 4 * SIZE_Y, 5 * SIZE_Y * sizeof(float)); for (size_t i = 1; i < 6; ++i) { for (size_t j = 0; j < SIZE_Y; ++j) { ptr[i * SIZE_Y + j] += ptr[(i + 2) * SIZE_Y + j]; } } if (!should_overlap) { for (size_t i = 0; i < SIZE_X * SIZE_Y; ++i) { ++ptr[i]; } } ASSERT_TRUE(cg::is_static_var_value(y.node())); HostTensorND infer_result; infer_result.copy_from(graph->static_infer_manager().infer_value(y.node())); MGB_ASSERT_TENSOR_EQ(expect, infer_result); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); MGB_ASSERT_TENSOR_EQ(expect, host_y); if (!should_overlap) { ASSERT_EQ(prev_dev_ptr(ypar), prev_dev_ptr(y)); } }; run(false); run(true); } TEST(TestTensorManip, SetSubtensor) { using Checker = AutoOprChecker<3, 1>; auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { using AIdx = opr::Subtensor::AxisIndexer; auto x = inputs[0], v0 = inputs[1], v1 = inputs[2]; x = x.rename("x"); v0 = v0.rename("v0"); v1 = v1.rename("v1"); auto cv = [&](int v, bool dyn = false) { auto rst = x.make_scalar(v); if (dyn) rst = opr::MarkDynamicVar::make(rst); return rst; }; auto // x0 = x[10::2] := v0 x0 = opr::SetSubtensor::make( x, v0, {AIdx::make_interval(0, cv(10), None, cv(2))}) .rename("x0"), // x1 = x[:-10:2] := v0[:, 3] := v1 x1 = opr::SetSubtensor::make( opr::MarkDynamicVar::make(x), opr::SetSubtensor::make( v0, v1, {AIdx::make_index(1, cv(3))}), {AIdx::make_interval(0, None, cv(-10), cv(2))}) .rename("x_sub1"), // x2 = (x[:5] := x[4:9])[3:-7:2, -1] := v1 x2_t = opr::Subtensor::make( x, {AIdx::make_interval(0, cv(4), cv(9), None)}) .rename("x2_t"), x2 = opr::SetSubtensor::make( opr::SetSubtensor::make( x, x2_t, {AIdx::make_interval(0, None, cv(5), None)}), v1, {AIdx::make_interval(0, cv(3), cv(-7), cv(2)), AIdx::make_index(1, cv(-1))}) .rename("x2"), y = (x0 + x1 + x2).rename("y"); mgb_assert(cg::is_static_var_storage(x0.node())); mgb_assert(!cg::is_static_var_shape(x1.node())); mgb_assert(cg::is_static_var_storage(x2.node())); return {y}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto px = inp[0]->ptr(), pv0 = inp[1]->ptr(), pv1 = inp[2]->ptr(); auto ishp = inp[0]->shape(); dest[0].comp_node(inp[0]->comp_node()); dest[0].resize(ishp); auto optr = dest[0].ptr(); auto s0 = ishp.shape[0], s1 = ishp.total_nr_elems() / s0, s2 = s1 / ishp.shape[1]; for (size_t i = 0; i < s0; ++i) { for (size_t j = 0; j < s1; ++j) { float x0, x1, x2; x0 = x1 = x2 = px[i * s1 + j]; if (i >= 10 && (i - 10) % 2 == 0) x0 = pv0[((i - 10) / 2) * s1 + j]; if (i < s0 - 10 && i % 2 == 0) { auto row = i / 2; if (j / s2 == 3) x1 = pv1[row * s2 + j % s2]; else x1 = pv0[row * s1 + j]; } if (i >= 3 && i < s0 - 7 && (i - 3) % 2 == 0 && j / s2 == ishp.shape[1] - 1) x2 = pv1[((i - 3) / 2) * s2 + j % s2]; else if (i < 5) x2 = px[(i + 4) * s1 + j]; optr[i * s1 + j] = x0 + x1 + x2; } } }; auto mkshp = [](const TensorShape& shp0) -> Checker::ShapeInpArray { mgb_assert(shp0.shape[0] > 10 && shp0.ndim >= 2 && shp0.shape[1] >= 4); auto shp1 = shp0; shp1.shape[0] = (shp0.shape[0] - 10) / 2; auto shp2 = shp1; for (size_t i = 2; i < shp2.ndim; ++i) shp2.shape[i - 1] = shp2.shape[i]; --shp2.ndim; return {shp0, shp1, shp2}; }; Checker::RunOptions opt; opt.numdiff_eps = 1; Checker(make_graph, fwd) .run(mkshp({16, 4, 2}), opt) .run(mkshp({14, 10}), opt) .run(mkshp({18, 5, 2, 3}), opt); } TEST(TestTensorManip, SetSubtensorCheckByShapeInfer) { HostTensorGenerator<> gen; HostTensorGenerator gen_int; auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen_int({1}); host_idx->ptr()[0] = 13; auto graph = ComputingGraph::make(); using Ad = opr::Subtensor::AxisIndexer; auto x = opr::Host2DeviceCopy::make(*graph, host_x), sub = opr::Host2DeviceCopy::make(*graph, host_sub); auto idx1 = Ad::make_index(0, opr::ImmutableTensor::make(*graph, *host_idx)), idx2 = Ad::make_index(0, opr::Host2DeviceCopy::make(*graph, host_idx)); MGB_MARK_USED_VAR(x); MGB_MARK_USED_VAR(sub); MGB_MARK_USED_VAR(idx1); MGB_MARK_USED_VAR(idx2); ASSERT_THROW(opr::SetSubtensor::make(x, sub, {idx1}), MegBrainError); ASSERT_THROW(opr::SetSubtensor::make(x, sub, {idx2}), MegBrainError); } TEST(TestTensorManip, SetSubtensorShapeInfer) { HostTensorGenerator<> gen; HostTensorGenerator gen_int; auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen_int({1}); host_idx->ptr()[0] = 13; auto graph = ComputingGraph::make(); auto&& mgr = graph->static_infer_manager(); using Ad = opr::Subtensor::AxisIndexer; auto x = opr::Host2DeviceCopy::make(*graph, host_x), sub = opr::Host2DeviceCopy::make(*graph, host_sub), index = opr::Host2DeviceCopy::make_no_value_infer(*graph, host_idx); auto rt_static_idx = Ad::make_index(0, index * 2); auto y = opr::SetSubtensor::make(x, sub, {rt_static_idx}); ASSERT_TRUE(mgr.infer_shape_fallible(y.node())); } TEST(TestTensorManip, SetSubtensorDynIdx) { HostTensorGenerator<> gen; auto host_x = gen({12}), host_sub = gen({1}), host_idx = gen({1}); host_idx->ptr()[0] = 3; auto dev_idx = std::make_shared(); dev_idx->copy_from(*host_idx); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), sub = opr::Host2DeviceCopy::make(*graph, host_sub), idx = opr::SharedDeviceTensor::make(*graph, dev_idx), y = opr::SetSubtensor::make( x, sub, {opr::SetSubtensor::AxisIndexer::make_index(0, idx)}); ASSERT_TRUE(cg::is_static_var_storage(y.node())); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); func->execute(); host_x->ptr()[3] = host_sub->ptr()[0]; MGB_ASSERT_TENSOR_EQ(*host_x, host_y); } TEST(TestTensorManip, SetSubtensorWithEmptyIndexDesc) { HostTensorGenerator<> gen; auto host_x = gen({12}), host_y = gen({12}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Host2DeviceCopy::make(*graph, host_y), z = opr::SetSubtensor::make(x, y, {}); ASSERT_TRUE(cg::is_static_var_storage(z.node())); HostTensorND host_z; auto func = graph->compile({make_callback_copy(z, host_z)}); func->execute(); MGB_ASSERT_TENSOR_EQ(*host_y, host_z); } TEST(TestTensorManip, IncrSubtensor) { using Checker = AutoOprChecker<2, 1>; auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { using AIdx = opr::Subtensor::AxisIndexer; auto x = inputs[0]; return {opr::IncrSubtensor::make( x, inputs[1], {AIdx::make_interval( 0, x.make_scalar(2), x.make_scalar(-2), x.make_scalar(2))})}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto nr = inp[0]->shape(0); auto pv = inp[1]->ptr(), pd = dest[0].copy_from(*inp[0]).ptr(); for (size_t i = 0; i < (nr - 3) / 2; ++i) { pd[i * 2 + 2] += pv[i]; } }; Checker{make_graph, fwd} .run({TensorShape{5}, {1}}) .run({TensorShape{8}, {2}}) .run({TensorShape{23}, {10}}); } TEST(TestTensorManip, Concat) { auto cns = load_multiple_xpus(4); using Checker = AutoOprChecker<3, 1>; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto sub0 = inputs[0], sub1 = opr::Copy::make(inputs[1], cns[1]), sub2 = opr::Copy::make(inputs[2], cns[2]), ret = opr::Concat::make({sub0, sub1, sub2}, 1, cns[3]); return {opr::Copy::make(ret, cns[0])}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { size_t n = inp[0]->shape(0), c0 = inp[0]->shape(1), c1 = inp[1]->shape(1), c2 = inp[2]->shape(1), c = c0 + c1 + c2; auto i0 = inp[0]->ptr(), i1 = inp[1]->ptr(), i2 = inp[2]->ptr(), o = dest[0].resize({n, c}).ptr(); for (size_t i = 0; i < n; ++i) { for (size_t j = 0; j < c; ++j) { float cur; if (j < c0) { cur = i0[i * c0 + j]; } else if (j < c0 + c1) { cur = i1[i * c1 + j - c0]; } else { cur = i2[i * c2 + j - c0 - c1]; } o[i * c + j] = cur; } } }; Checker checker{make_graph, fwd, cns[0]}; checker.run({TensorShape{2, 3}, {2, 4}, {2, 5}}) .run({TensorShape{2, 8}, {2, 3}, {2, 9}}) .run({TensorShape{5, 10}, {5, 3}, {5, 4}}); } TEST(TestTensorManip, ConcatWithNegativeAxis) { auto cns = load_multiple_xpus(4); using Checker = AutoOprChecker<3, 1>; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto sub0 = inputs[0], sub1 = opr::Copy::make(inputs[1], cns[1]), sub2 = opr::Copy::make(inputs[2], cns[2]), ret = opr::Concat::make({sub0, sub1, sub2}, -1, cns[3]); return {opr::Copy::make(ret, cns[0])}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { size_t n = inp[0]->shape(0), c0 = inp[0]->shape(1), c1 = inp[1]->shape(1), c2 = inp[2]->shape(1), c = c0 + c1 + c2; auto i0 = inp[0]->ptr(), i1 = inp[1]->ptr(), i2 = inp[2]->ptr(), o = dest[0].resize({n, c}).ptr(); for (size_t i = 0; i < n; ++i) { for (size_t j = 0; j < c; ++j) { float cur; if (j < c0) { cur = i0[i * c0 + j]; } else if (j < c0 + c1) { cur = i1[i * c1 + j - c0]; } else { cur = i2[i * c2 + j - c0 - c1]; } o[i * c + j] = cur; } } }; Checker checker{make_graph, fwd, cns[0]}; checker.run({TensorShape{2, 3}, {2, 4}, {2, 5}}) .run({TensorShape{2, 8}, {2, 3}, {2, 9}}) .run({TensorShape{5, 10}, {5, 3}, {5, 4}}); } TEST(TestTensorManip, ConcatEmpty) { HostTensorGenerator<> gen; auto host_x = gen({2, 3, 5}), host_y = gen({2, 0, 5}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Host2DeviceCopy::make(*graph, host_y), z = opr::Concat::make({x, y}, 1); HostTensorND host_z; auto func = graph->compile({make_callback_copy(z, host_z)}); func->execute(); MGB_ASSERT_TENSOR_EQ(*host_x, host_z); host_x->resize({2, 0, 5}); func->execute(); MGB_ASSERT_TENSOR_EQ(*host_y, host_z); } TEST(TestTensorManip, ConcatEmpty2) { HostTensorGenerator<> gen; auto host_x = gen({2, 0, 5}), host_y = gen({2, 0, 6}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Host2DeviceCopy::make(*graph, host_y), z = opr::Concat::make({x, y}, 2); HostTensorND host_z; auto func = graph->compile({make_callback_copy(z, host_z)}); func->execute(); ASSERT_EQ(TensorShape({2, 0, 11}), host_z.shape()); } #if MGB_OPENCL #include "megcore_opencl.h" #define REQUIRE_OPENCL() \ do { \ if (!CompNode::get_device_count(CompNode::DeviceType::OPENCL)) { \ return; \ } \ } while (0) TEST(TestTensorManip, ConcatCD4) { REQUIRE_OPENCL(); auto cn = CompNode::load("openclx"); HostTensorGenerator<> gen; auto host_x = gen({1, 4, 2, 2}, cn), host_y = gen({1, 4, 2, 2}, cn); auto graph0 = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph0, host_x); auto y = opr::Host2DeviceCopy::make(*graph0, host_y); x = opr::RelayoutFormat::make(x, {opr::RelayoutFormat::Param::Mode::NCHW_NHWCD4I}); y = opr::RelayoutFormat::make(y, {opr::RelayoutFormat::Param::Mode::NCHW_NHWCD4I}); auto z = opr::Concat::make({x, y}, 2); HostTensorND host_z0; auto func = graph0->compile({make_callback_copy(z, host_z0)}); func->execute(); ASSERT_EQ(TensorShape({1, 2, 2, 2, 4}), host_z0.shape()); auto graph1 = ComputingGraph::make(); x = opr::Host2DeviceCopy::make(*graph1, host_x); y = opr::Host2DeviceCopy::make(*graph1, host_y); z = opr::RelayoutFormat::make( opr::Concat::make({x, y}, 1), {opr::RelayoutFormat::Param::Mode::NCHW_NHWCD4I}); HostTensorND host_z1; func = graph1->compile({make_callback_copy(z, host_z1)}); func->execute(); MGB_ASSERT_TENSOR_EQ(host_z0, host_z1); } #endif TEST(TestTensorManip, AxisAddRemove) { HostTensorGenerator<> gen; for (bool dyn_shape : {false, true}) { auto host_x = gen({2, 1, 5}); using AD = opr::AxisAddRemove::AxisDesc; auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); if (dyn_shape) { x = opr::MarkDynamicVar::make(x); } auto y = opr::AxisAddRemove::make(x, {AD::make_add(0)}), z = opr::AxisAddRemove::make(x, {AD::make_remove(1)}); HostTensorND host_y, host_z; auto func = graph->compile( {make_callback_copy(y, host_y), make_callback_copy(z, host_z)}); func->execute(); ASSERT_EQ(TensorShape({1, 2, 1, 5}), host_y.shape()); ASSERT_EQ(TensorShape({2, 5}), host_z.shape()); MGB_ASSERT_TENSOR_EQ(*host_x, host_y.resize(host_x->shape())); MGB_ASSERT_TENSOR_EQ(*host_x, host_z.resize(host_x->shape())); // test empty tensor host_x->resize({2, 1, 0}); func->execute(); ASSERT_EQ(TensorShape({1, 2, 1, 0}), host_y.shape()); ASSERT_EQ(TensorShape({2, 0}), host_z.shape()); } } TEST(TestTensorManip, Split) { auto cns = load_multiple_xpus(3); constexpr size_t C1 = 20, C2 = 30; constexpr size_t N = 2, C = C1 + C2; HostTensorGenerator<> gen; auto host_opr0 = gen({N, C}, cns[0]); auto graph = ComputingGraph::make(); SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}); auto spl = opr::Split::make( opr0, Split::Options::make_partition(opr0, 1, {C1, C2}), OperatorNodeConfig("split").comp_node_arr({cns[1], cns[2]})); auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()), cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()), cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)), cost = opr::Copy::make( cost0 + cost1, OperatorNodeConfig().follow_comp_node(opr0)), grad = cg::grad(cost, opr0); HostTensorND host_spl0, host_spl1, host_grad; auto func = graph->compile( {{spl[0], [&](DeviceTensorND& s) { host_spl0.copy_from(s); }}, {spl[1], [&](DeviceTensorND& s) { host_spl1.copy_from(s); }}, {grad, [&](DeviceTensorND& s) { host_grad.copy_from(s); }}}); func->execute(); auto o0 = host_spl0.sync().ptr(), o1 = host_spl1.sync().ptr(), c = host_opr0->ptr(), g = host_grad.sync().ptr(); for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) { auto ch = i % C; auto n = i / C; if (ch < C1) { MGB_ASSERT_FLOAT_EQ(o0[n * C1 + ch], c[i]) << ssprintf("failed at %zd", i); } else { MGB_ASSERT_FLOAT_EQ(o1[n * C2 + ch - C1], c[i]) << ssprintf("failed at %zd", i); } MGB_ASSERT_FLOAT_EQ(c[i] * 2, g[i]) << ssprintf("grad failed at %zd", i); } } TEST(TestTensorManip, SplitWithNegativeAxis) { auto cns = load_multiple_xpus(3); constexpr size_t C1 = 20, C2 = 30; constexpr size_t N = 2, C = C1 + C2; HostTensorGenerator<> gen; auto host_opr0 = gen({N, C}, cns[0]); auto graph = ComputingGraph::make(); SymbolVar opr0 = opr::Host2DeviceCopy::make(*graph, host_opr0, {"opr0"}); auto spl = opr::Split::make( opr0, Split::Options::make_partition(opr0, -1, {C1, C2}), OperatorNodeConfig("split").comp_node_arr({cns[1], cns[2]})); auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()), cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()), cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)), cost = opr::Copy::make( cost0 + cost1, OperatorNodeConfig().follow_comp_node(opr0)), grad = cg::grad(cost, opr0); HostTensorND host_spl0, host_spl1, host_grad; auto func = graph->compile( {{spl[0], [&](DeviceTensorND& s) { host_spl0.copy_from(s); }}, {spl[1], [&](DeviceTensorND& s) { host_spl1.copy_from(s); }}, {grad, [&](DeviceTensorND& s) { host_grad.copy_from(s); }}}); func->execute(); auto o0 = host_spl0.sync().ptr(), o1 = host_spl1.sync().ptr(), c = host_opr0->ptr(), g = host_grad.sync().ptr(); for (size_t i = 0, it = host_opr0->layout().total_nr_elems(); i < it; i++) { auto ch = i % C; auto n = i / C; if (ch < C1) { MGB_ASSERT_FLOAT_EQ(o0[n * C1 + ch], c[i]) << ssprintf("failed at %zd", i); } else { MGB_ASSERT_FLOAT_EQ(o1[n * C2 + ch - C1], c[i]) << ssprintf("failed at %zd", i); } MGB_ASSERT_FLOAT_EQ(c[i] * 2, g[i]) << ssprintf("grad failed at %zd", i); } } TEST(TestTensorManip, SplitToDynOutShape) { using Checker = AutoOprChecker<1, 2>; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto x = inputs[0]; auto y = opr::Split::make( x, opr::Split::Options::make_partition( 0, {x.make_scalar(3), opr::MarkDynamicVar::make( opr::GetVarShape::make(x, 0) - x.make_scalar(3))})); return {y[0], y[1]}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto sub = [&](size_t begin, Maybe end) { auto&& iv = inp[0]; return iv->sub(Slice(begin, end, None).apply(iv->layout(), 0)); }; dest[0].copy_from(sub(0, 3)); dest[1].copy_from(sub(3, None)); }; Checker{make_graph, fwd} .run({TensorShape{5}}) .run({TensorShape{8}}) .run({TensorShape{9, 3}}); } TEST(TestTensorManip, SplitToDynOutStorage) { using Checker = AutoOprChecker<1, 2>; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto x = inputs[0]; auto y = opr::Split::make( x, opr::Split::Options::make_partition( 0, {x.make_scalar(3), opr::GetVarShape::make(x, 0) - x.make_scalar(3)})); auto y0 = opr::Copy::make(y[0], x.node()->comp_node().change_stream(1)); y0 = opr::Copy::make(y0, x.node()->comp_node()); return {y0, y[1]}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto sub = [&](size_t begin, Maybe end) { auto&& iv = inp[0]; return iv->sub(Slice(begin, end, None).apply(iv->layout(), 0)); }; dest[0].copy_from(sub(0, 3)); dest[1].copy_from(sub(3, None)); }; Checker{make_graph, fwd} .run({TensorShape{5}}) .run({TensorShape{8}}) .run({TensorShape{9, 3}}); } namespace { void do_test_dynamic_split(bool multiple_cn, bool force_dynamic) { auto cns = load_multiple_xpus(3); constexpr size_t N = 2, C = 51; HostTensorGenerator<> gen; auto host_x = gen({N, C}, cns[0]), host_sub_begin = gen({1}, cns[0]), host_sub_end = gen({1}, cns[0]); host_sub_begin->ptr()[0] = 0; host_sub_end->ptr()[0] = 2; auto graph = ComputingGraph::make(); SymbolVar x = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x, {"x"}), sub_begin = opr::Host2DeviceCopy::make_no_fwd( *graph, host_sub_begin, {"sub_begin"}), sub_end = opr::Host2DeviceCopy::make_no_fwd( *graph, host_sub_end, {"sub_end"}), xsub = opr::Subtensor::make( x, {opr::Subtensor::AxisIndexer::make_interval( 1, sub_begin, sub_end, None)}) .rename("xsub"); OperatorNodeConfig split_config("split"); if (multiple_cn) { split_config.comp_node_arr({cns[1], cns[2]}); } if (force_dynamic) xsub = opr::MarkDynamicVar::make(xsub); auto spl = opr::Split::make( xsub, Split::Options::make_callback( 1, 2, [](size_t s) { return std::vector{s / 2, s - s / 2}; }), split_config); if (multiple_cn) { spl[0] = opr::Sleep::make(spl[0], 0.1); spl[1] = opr::Sleep::make(spl[1], 0.2); } auto cost0 = opr::Dot::make(spl[0].flatten(), spl[0].flatten()), cost1_ = opr::Dot::make(spl[1].flatten(), spl[1].flatten()), cost1 = opr::Copy::make(cost1_, OperatorNodeConfig().follow_comp_node(cost0)), cost = opr::Copy::make( cost0 + cost1, OperatorNodeConfig().follow_comp_node(x)) * 0.5f, grad = cg::grad(cost, x); HostTensorND host_spl0, host_spl1, host_grad; auto func = graph->compile( {make_callback_copy(spl[0], host_spl0), make_callback_copy(spl[1], host_spl1), make_callback_copy(grad, host_grad)}); if (force_dynamic) ASSERT_TRUE(!cg::is_static_var_shape(spl[0].node())); else { auto cb = [](cg::OperatorNodeBase* op) { for (auto i : op->output()) { mgb_assert( cg::is_static_var_shape(i), "dynamic var: %s", cg::dump_var_info({i}).c_str()); } return true; }; func->iter_opr_seq(cb); } bool failed = false, fwd_checked = false; auto run_and_check = [&](size_t begin, size_t end) { ASSERT_FALSE(failed); failed = true; host_sub_begin->ptr()[0] = begin; host_sub_end->ptr()[0] = end; func->execute(); auto mid = begin + (end - begin) / 2; auto inp = host_x->ptr(), grad = host_grad.ptr(); ASSERT_EQ(host_spl0.shape(), TensorShape({N, mid - begin})); ASSERT_EQ(host_spl1.shape(), TensorShape({N, end - mid})); if (!force_dynamic && !multiple_cn && !begin && mid - begin == 1) { // check mem fwd for spl[0] // do not check for spl[1] since flatten() causes copy ASSERT_EQ( prev_dev_ptr(spl[0]), static_cast(prev_dev_ptr(x))); fwd_checked = true; } for (size_t i = 0, it = host_x->layout().total_nr_elems(); i < it; ++i) { auto ch = i % C; auto n = i / C; float expect_grad; if (ch >= begin && ch < mid) { MGB_ASSERT_FLOAT_EQ(inp[i], *host_spl0.ptr({n, ch - begin})) << ssprintf( "failed at (%zu, %zu),sub=[: ,%zu:%zu]", i, ch, begin, end); expect_grad = inp[i]; } else if (ch >= mid && ch < end) { MGB_ASSERT_FLOAT_EQ(inp[i], *host_spl1.ptr({n, ch - mid})) << ssprintf( "failed at (%zu, %zu),sub=[: ,%zu:%zu]", i, ch, begin, end); expect_grad = inp[i]; } else { expect_grad = 0; } MGB_ASSERT_FLOAT_EQ(expect_grad, grad[i]) << ssprintf( "grad failed at (%zu, %zu), sub=x[:, %zu:%zu]", n, ch, begin, end); } failed = false; }; run_and_check(0, 3); run_and_check(2, 8); run_and_check(5, 12); run_and_check(1, C - 1); run_and_check(0, C); run_and_check(C - 2, C); run_and_check(0, 2); if (!multiple_cn && !force_dynamic) { ASSERT_TRUE(fwd_checked); } } } // namespace TEST(TestTensorManip, DynamicSplit00) { do_test_dynamic_split(false, false); } TEST(TestTensorManip, DynamicSplit01) { do_test_dynamic_split(false, true); } TEST(TestTensorManip, DynamicSplit10) { do_test_dynamic_split(true, false); } TEST(TestTensorManip, DynamicSplit11) { do_test_dynamic_split(true, true); } TEST(TestTensorManip, SplitFromDynStorage) { HostTensorGenerator<> gen; auto host_x = gen({4}); auto graph = cg::ComputingGraph::make(); auto x = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_x)) .reshape({4}); ASSERT_TRUE(cg::is_static_var_shape(x.node())); auto y = opr::Split::make(x, opr::Split::Options::make_partition(x, 0, {1, 3})); HostTensorND y0, y1; auto func = graph->compile( {make_callback_copy(y[0], y0), make_callback_copy(y[1], y1)}); func->execute(); ASSERT_FALSE(cg::is_static_var_storage(x.node())); HostTensorND expt{host_x->comp_node(), host_x->dtype()}; expt.resize({1}).ptr()[0] = host_x->ptr()[0]; MGB_ASSERT_TENSOR_EQ(expt, y0); expt.resize({3}); for (int i = 0; i < 3; ++i) expt.ptr()[i] = host_x->ptr()[i + 1]; MGB_ASSERT_TENSOR_EQ(expt, y1); } TEST(TestTensorManip, SplitPreAllocatedMultiCN) { auto cns = load_multiple_xpus(3); HostTensorGenerator<> gen; auto host_x = gen({3}, cns[0]); auto dev_x = std::make_shared(); dev_x->copy_from(*host_x).sync(); auto graph = cg::ComputingGraph::make(); auto x = opr::SharedDeviceTensor::make(*graph, dev_x); auto ys = opr::Split::make( x, opr::Split::Options::make_average(0, 3), OperatorNodeConfig{}.comp_node_arr({cns.begin(), cns.end()})); ASSERT_EQ(3u, ys.size()); HostTensorND y0, y1, y2; auto func = graph->compile( {make_callback_copy(ys[0], y0), make_callback_copy(opr::Copy::make(ys[1], {cns[0]}), y1), make_callback_copy(ys[2], y2)}); func->execute(); ASSERT_TRUE(cg::is_static_var_storage(ys[0].node())); ASSERT_FALSE(cg::is_static_var_storage(ys[1].node())); ASSERT_EQ(x.node()->prev_dev_ptr(), ys[0].node()->prev_dev_ptr()); ASSERT_EQ(host_x->ptr()[0], y0.ptr()[0]); ASSERT_EQ(host_x->ptr()[1], y1.ptr()[0]); ASSERT_EQ(host_x->ptr()[2], y2.ptr()[0]); } TEST(TestTensorManip, SplitMemfwdMultipleTimesWithOffset) { auto cns = load_multiple_xpus(2); HostTensorGenerator<> gen; auto host_x = gen({4}, cns[0]); auto graph = cg::ComputingGraph::make(); auto x0 = opr::Host2DeviceCopy::make_no_fwd(*graph, host_x), x = opr::Subtensor::make( x0, {opr::Subtensor::AxisIndexer::make_interval( 0, x0.make_scalar(1), None, None)}); auto ys = opr::Split::make(x, opr::Split::Options::make_average(0, 3)); ASSERT_EQ(3u, ys.size()); HostTensorND y0, y1, y2; auto func = graph->compile( {make_callback_copy(ys[0], y0), make_callback_copy(opr::Copy::make(ys[1], {cns[1]}), y1), make_callback_copy(ys[2], y2)}); func->execute(); ASSERT_FALSE(cg::is_static_var_storage(ys[0].node())); ASSERT_TRUE(cg::is_static_var_shape(ys[0].node())); ASSERT_FALSE(cg::is_static_var_storage(ys[1].node())); ASSERT_EQ(host_x->ptr()[1], y0.ptr()[0]); ASSERT_EQ(host_x->ptr()[2], y1.ptr()[0]); ASSERT_EQ(host_x->ptr()[3], y2.ptr()[0]); ASSERT_EQ(static_cast(prev_dev_ptr(x0)) + 3, prev_dev_ptr(ys[2])); } TEST(TestTensorManip, SplitValueInfer) { auto cns = load_multiple_xpus(3); HostTensorGenerator<> gen; auto host_x = gen({3}); auto graph = cg::ComputingGraph::make(); auto x = opr::ImmutableTensor::make(*graph, *host_x); auto ys = opr::Split::make( x, opr::Split::Options::make_average(0, 3), OperatorNodeConfig{}.comp_node_arr({cns.begin(), cns.end()})); for (size_t i = 0; i < 3; ++i) { // split itself does not replace imm vars; use +0 to trigger optimizer auto var = (ys[i] + 0).node(); ASSERT_TRUE(var->owner_opr()->same_type()); ASSERT_EQ(cns[i], var->comp_node()); HostTensorND hv; hv.copy_from(var->owner_graph()->static_infer_manager().infer_value(var)); ASSERT_EQ(TensorShape{1}, hv.shape()); ASSERT_EQ(host_x->ptr()[i], hv.ptr()[0]); } } TEST(TestTensorManip, SplitZeroGrad) { HostTensorGenerator<> gen; auto host_x = gen({3, 2}); auto graph = cg::ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); auto ys = opr::Split::make(x, opr::Split::Options::make_average(0, 3)); auto loss = opr::reduce_sum(ys[2] * ys[2], x.make_scalar(1)), gx = cg::grad(loss, x); HostTensorND host_gx; auto func = graph->compile({make_callback_copy(gx, host_gx)}); func->execute(); auto px = host_x->ptr(), pgx = host_gx.ptr(); for (int i = 0; i < 2; ++i) { MGB_ASSERT_FLOAT_EQ(0.f, pgx[i]); MGB_ASSERT_FLOAT_EQ(0.f, pgx[2 + i]); MGB_ASSERT_FLOAT_EQ(px[4 + i] * 2, pgx[4 + i]); } } TEST(TestTensorManip, DynamicFill) { HostTensorGenerator<> gen; auto host_x = gen({1}); auto graph = cg::ComputingGraph::make(); auto x = opr::MarkDynamicVar::make(opr::Host2DeviceCopy::make(*graph, host_x)), y = x.fill_retain_dtype(23); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); bool failed = false; auto check = [&](const TensorShape& ishp) { ASSERT_FALSE(failed); failed = true; host_x->resize(ishp); func->execute(); ASSERT_EQ(ishp, host_y.shape()); auto ptr = host_y.ptr(); for (size_t i = 0, it = host_y.shape().total_nr_elems(); i < it; ++i) ASSERT_EQ(23, ptr[i]); failed = false; }; check({4, 2}); check({2, 4}); check({23}); } TEST(TestTensorManip, Pooling2DBySetSub) { constexpr int PH = 4, PW = 3; using Checker = AutoOprChecker<1, 1>; bool run_dyn = false; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { auto x = inputs.at(0); if (run_dyn) x = opr::MarkDynamicVar::make(x); x.rename("x"); auto cv = [&](int v, bool dyn = false) { auto rst = x.make_scalar(v); if (dyn) rst = opr::MarkDynamicVar::make(rst); return rst; }; auto oh = (opr::GetVarShape::make(x, 0) / PH).rename("oh"), ow = (opr::GetVarShape::make(x, 1) / PW).rename("ow"), y_tmp_shape = opr::Concat::make({cv(PH * PW), oh, ow}, 0), y_tmp = opr::Alloc::make(y_tmp_shape, dtype::Float32()); if (!run_dyn) mgb_assert(cg::is_static_var_storage(y_tmp.node())); using Ad = opr::Subtensor::AxisIndexer; for (size_t i = 0, num = 0; i < (size_t)PH; ++i) { for (size_t j = 0; j < (size_t)PW; ++j) { bool dyn = run_dyn && num % 2; auto xsub = opr::Subtensor::make( x, {Ad::make_interval(0, cv(i, dyn), None, cv(PH)), Ad::make_interval(1, cv(j), None, cv(PW))}) .rename(ssprintf("sub(%zu, %zu)", i, j)); y_tmp = opr::SetSubtensor::make( y_tmp, xsub, {Ad::make_index(0, cv(num, dyn))}) .rename(ssprintf("y(%zu, %zu)", i, j)); if (!run_dyn) { mgb_assert(cg::is_static_var_storage(xsub.node())); mgb_assert(cg::is_static_var_storage(y_tmp.node())); } else if (dyn) y_tmp = opr::MarkDynamicVar::make(y_tmp); ++num; } } auto y = opr::Reduce::make(y_tmp, {opr::Reduce::Mode::SUM, 0}); y = opr::AxisAddRemove::make(y, {opr::AxisAddRemove::AxisDesc::make_remove(0)}); if (!run_dyn) mgb_assert(cg::is_static_var_storage(y.node())); return {y}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { auto&& ishp = inp.at(0)->shape(); auto oshp = ishp; mgb_assert(oshp.shape[0] % PH == 0); mgb_assert(oshp.shape[1] % PW == 0); oshp.shape[0] /= PH; oshp.shape[1] /= PW; auto optr = dest.at(0).comp_node(inp[0]->comp_node()).resize(oshp).ptr(); auto&& iv = *inp.at(0); for (size_t i = 0; i < oshp.shape[0]; ++i) for (size_t j = 0; j < oshp.shape[1]; ++j) { auto ii = i * PH, ij = j * PW; float sum = 0; for (size_t di = 0; di < PH; ++di) for (size_t dj = 0; dj < PW; ++dj) { sum += *iv.ptr({ii + di, ij + dj}); } *(optr++) = sum; } }; auto run = [&](bool dyn) { run_dyn = dyn; Checker(make_graph, fwd) .run({TensorShape{PH * 1, PW * 2}}) .run({TensorShape{PH * 4, PW * 3}}) .run({TensorShape{PH * 2, PW * 2}}); }; run(false); run(true); } TEST(TestTensorManip, Flatten) { HostTensorGenerator<> gen; auto host_x = gen({20}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), y = x.flatten(); y = y + x.reshape(y.symshape()); ASSERT_EQ(TensorShape{20}, y.node()->shape()); HostTensorND host_y; auto func = graph->compile({make_callback_copy(y, host_y)}); for (auto&& ishp : {TensorShape{2, 5}, TensorShape{6, 8, 1}, TensorShape{3}}) { *host_x = *gen(ishp); func->execute(); auto expected = host_x->sub(SubTensorSpec::make_from_layout( {{ishp.total_nr_elems()}, host_x->dtype()})); auto ptr = expected.ptr(); for (size_t i = 0; i < expected.shape()[0]; ++i) ptr[i] *= 2; MGB_ASSERT_TENSOR_EQ(expected, host_y); } } TEST(TestTensorManip, FillWithDtypeDedup) { HostTensorGenerator<> gen; auto host_x = gen({20}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x); SymbolVar vals[] = { x.fill_retain_dtype(0), x.fill_retain_dtype(1), x.fill_retain_dtype(0), x.fill_retain_dtype(1), x.fill_retain_dtype(0.f), x.fill_retain_dtype(1.f), x.fill_retain_dtype(0.f), x.fill_retain_dtype(1.f), }; for (int i : {0, 1}) for (int j = 2; j < 8; j += 2) ASSERT_EQ(vals[i].node(), vals[i + j].node()) << i << ' ' << i + j; ASSERT_NE(vals[0].node(), vals[1].node()); } TEST(TestTensorManip, StrongContig) { HostTensorGenerator<> gen; auto host_x = gen({5, 1}); auto graph = ComputingGraph::make(); auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = opr::Dimshuffle::make(x, {1, 0}); auto cb = [](DeviceTensorND& dv) { TensorLayout expect{{1, 5}, dv.dtype()}; ASSERT_EQ(expect, dv.layout()); }; auto func = graph->compile({{y, cb}}); func->execute(); } namespace { void test_param_pack_concat(const TensorShapeArray& shapes, DType type) { auto cn = CompNode::load("xpu0"); auto graph = ComputingGraph::make(); auto align = cn.get_mem_addr_alignment() / type.size(); size_t size = 0; std::vector begins; for (auto&& shape : shapes) { size = get_aligned_power2(size, align); begins.push_back(size); size += shape.total_nr_elems(); } SmallVector srcs; for (size_t i = 0; i < shapes.size(); i++) { auto data = std::make_shared(); data->comp_node(cn).dtype(dtype::Int32()).resize(shapes[i]); auto ptr = data->ptr(); for (size_t j = 0; j < shapes[i].total_nr_elems(); j++) { ptr[j] = j; } auto nd = opr::Host2DeviceCopy::make(*graph, data); srcs.push_back(nd); } auto host_offsets_gen = megdnn::ParamPackConcat::gen_offsets( shapes, cn.get_mem_addr_alignment(), 4); ASSERT_EQ(host_offsets_gen.back(), size); auto host_offsets = std::make_shared(); host_offsets->comp_node(cn).dtype(dtype::Int32{}).resize({srcs.size() * 2}); memcpy(host_offsets->raw_ptr(), host_offsets_gen.data(), srcs.size() * 8); auto offsets = opr::Host2DeviceCopy::make(*graph, host_offsets); auto z = opr::ParamPackConcat::make(srcs, offsets, host_offsets_gen); HostTensorND host_z; auto func = graph->compile({make_callback_copy(z, host_z)}); func->execute(); HostTensorND expected; expected.comp_node(cn).dtype(dtype::Int32()).resize({size}); { auto ptr = expected.ptr(); memset(ptr, 0, sizeof(int32_t) * size); for (size_t i = 0; i < begins.size(); i++) { auto begin = begins[i]; auto shape = shapes[i]; for (size_t j = 0; j < shape.total_nr_elems(); j++) { ptr[begin + j] = j; } } } MGB_ASSERT_TENSOR_EQ(expected, host_z); } template void test_param_pack_split(const TensorShapeArray& shapes) { auto cn = CompNode::load("xpu0"); auto align = std::max(cn.get_mem_addr_alignment() / 4, 1); size_t concat_size = 0; mgb_assert(shapes.size() == nr_out); for (auto&& i : shapes) { concat_size = get_aligned_power2(concat_size, align) + i.total_nr_elems(); } using Checker = AutoOprChecker<1, nr_out>; auto make_graph = [&](const typename Checker::SymInpArray& inputs) -> typename Checker::SymOutArray { auto offsets_val = megdnn::ParamPackConcat::gen_offsets( shapes, cn.get_mem_addr_alignment(), 4); HostTensorND offsets; std::copy_n( offsets_val.data(), offsets_val.size(), offsets.dtype(dtype::Int32{}) .comp_node(cn) .resize({offsets_val.size()}) .ptr()); auto out = opr::ParamPackSplit::make(inputs[0], offsets_val, shapes); mgb_assert(out.size() == nr_out); typename Checker::SymOutArray ret; for (size_t i = 0; i < nr_out; ++i) { ret[i] = out[i]; } return ret; }; auto fwd = [&](typename Checker::NumOutArray& dest, typename Checker::NumInpArray inp) { size_t offset = 0; auto ptr = inp[0]->template ptr(); for (size_t i = 0; i < nr_out; ++i) { dest[i].resize(shapes[i]); offset = get_aligned_power2(offset, align); auto nr_elem = shapes[i].total_nr_elems(); memcpy(dest[i].template ptr(), ptr + offset, nr_elem * 4); offset += nr_elem; } }; Checker{make_graph, fwd} .run({TensorShape{concat_size}}) .run({TensorShape{concat_size}}) .run({TensorShape{concat_size}}); } } // anonymous namespace TEST(TestParamPack, Concat) { TensorShapeArray array = {{129}, {21}}; test_param_pack_concat(array, dtype::Int32()); array = {{23}, {32}, {75}, {45}}; test_param_pack_concat(array, dtype::Int32()); array = {{129}, {512}, {513}, {27}}; test_param_pack_concat(array, dtype::Int32()); } TEST(TestParamPack, Split) { test_param_pack_split<2>({{2, 3}, {4, 5, 6}}); test_param_pack_split<3>({{2, 9}, {123}, {5, 3}}); } // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}