GitOrigin-RevId: 4e49d8eae8
release-1.6
@@ -711,6 +711,35 @@ def test_copy_d2d(is_varnode): | |||||
copy_test("gpu0:0", "gpu0:1", network=network) | copy_test("gpu0:0", "gpu0:1", network=network) | ||||
@pytest.mark.require_ngpu(2) | |||||
@pytest.mark.parametrize( | |||||
"shape, device_src, device_dst", | |||||
[ | |||||
((0,), "cpu0", "cpu0"), | |||||
((10, 0), "cpu0", "cpu1"), | |||||
((2, 0, 3), "cpu0", "gpu0"), | |||||
((1, 0, 1, 0), "gpu0", "cpu0"), | |||||
((2, 3, 4, 5, 0), "gpu0", "gpu1"), | |||||
], | |||||
) | |||||
@pytest.mark.parametrize("is_symbolic", [None, True, False]) | |||||
def test_copy_empty(shape, device_src, device_dst, is_symbolic): | |||||
inp = tensor(np.random.randn(*shape).astype("float32"), device=device_src) | |||||
def func(inp): | |||||
return F.copy(inp, device_dst) | |||||
if is_symbolic is not None: | |||||
func = trace(symbolic=is_symbolic)(func) | |||||
for _ in range(3): | |||||
out = func(inp) | |||||
assert out.numpy().shape == shape | |||||
assert out.device == device_dst | |||||
if is_symbolic is None: | |||||
break | |||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"shape, repeats, axis", | "shape, repeats, axis", | ||||
[ | [ | ||||
@@ -170,6 +170,7 @@ void OutputCallback::init_output_static_infer_desc() {} | |||||
cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const { | cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const { | ||||
NodeProp* prop = Super::do_make_node_prop(); | NodeProp* prop = Super::do_make_node_prop(); | ||||
prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP); | prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP); | ||||
prop->add_flag(NodeProp::Flag::CROSS_COMP_NODE_MEMORY); | |||||
SmallVector<NodeProp::DepType> dep_types(input().size(), | SmallVector<NodeProp::DepType> dep_types(input().size(), | ||||
NodeProp::DepType::DEV_COMP_ORDER); | NodeProp::DepType::DEV_COMP_ORDER); | ||||
using IT = cg::static_infer::InferType; | using IT = cg::static_infer::InferType; | ||||
@@ -708,7 +708,7 @@ Copy::Copy(VarNode *inp, const OperatorNodeConfig &config): | |||||
Super{inp->owner_graph(), config, "copy", {inp}} | Super{inp->owner_graph(), config, "copy", {inp}} | ||||
{ | { | ||||
add_input({inp}); | add_input({inp}); | ||||
add_output(None); | |||||
add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); | |||||
} | } | ||||
SymbolVar Copy::make(SymbolVar inp, const OperatorNodeConfig &config) { | SymbolVar Copy::make(SymbolVar inp, const OperatorNodeConfig &config) { | ||||
@@ -767,6 +767,8 @@ Copy::NodeProp* Copy::do_make_node_prop() const { | |||||
using F = NodeProp::Flag; | using F = NodeProp::Flag; | ||||
rst->add_flag(F::CROSS_COMP_NODE_MEMORY); | rst->add_flag(F::CROSS_COMP_NODE_MEMORY); | ||||
rst->add_flag(F::NO_AUTOMATIC_DUP); | rst->add_flag(F::NO_AUTOMATIC_DUP); | ||||
rst->add_dep_type_existing_var(input(0), | |||||
NodeProp::DepType::VALUE_ALLOW_EMPTY); | |||||
return rst; | return rst; | ||||
} | } | ||||
@@ -423,7 +423,7 @@ Identity::Identity(VarNode* input, const OperatorNodeConfig &config): | |||||
Super(input->owner_graph(), config, "identity", {input}) | Super(input->owner_graph(), config, "identity", {input}) | ||||
{ | { | ||||
add_input({input}); | add_input({input}); | ||||
add_output(None); | |||||
add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); | |||||
set_ignore_side_effect(); | set_ignore_side_effect(); | ||||
} | } | ||||
@@ -437,6 +437,13 @@ SymbolVar Identity::make( | |||||
return input.insert_single_output_opr<Identity>(input.node(), config); | return input.insert_single_output_opr<Identity>(input.node(), config); | ||||
} | } | ||||
Identity::NodeProp* Identity::do_make_node_prop() const { | |||||
auto ret = Super::do_make_node_prop(); | |||||
ret->add_dep_type_existing_var(input(0), | |||||
NodeProp::DepType::VALUE_ALLOW_EMPTY); | |||||
return ret; | |||||
} | |||||
#if MGB_ENABLE_GRAD | #if MGB_ENABLE_GRAD | ||||
MGB_IMPL_OPR_GRAD(Identity) { | MGB_IMPL_OPR_GRAD(Identity) { | ||||
return out_grad.at(0); | return out_grad.at(0); | ||||
@@ -277,6 +277,7 @@ MGB_DEFINE_OPR_CLASS(MarkNoBroadcastElemwise, intl::ForwardInputToOutput) // { | |||||
* its gradient can be correctly computed. | * its gradient can be correctly computed. | ||||
*/ | */ | ||||
MGB_DEFINE_OPR_CLASS(Identity, intl::ForwardInputToOutput) // { | MGB_DEFINE_OPR_CLASS(Identity, intl::ForwardInputToOutput) // { | ||||
NodeProp* do_make_node_prop() const override; | |||||
public: | public: | ||||
using Param = megdnn::param::Empty; | using Param = megdnn::param::Empty; | ||||
Identity(VarNode* input, const OperatorNodeConfig &config); | Identity(VarNode* input, const OperatorNodeConfig &config); | ||||
@@ -406,6 +406,20 @@ TEST(TestOprIO, D2DNonContig) { | |||||
MGB_ASSERT_TENSOR_EQ(host_y, except_y); | MGB_ASSERT_TENSOR_EQ(host_y, except_y); | ||||
} | } | ||||
TEST(TestOprIO, D2DCopyEmpty) { | |||||
auto cns = load_multiple_xpus(2); | |||||
HostTensorGenerator<> gen; | |||||
auto host_x = gen({2,0,3,0,4}, cns[0]); | |||||
auto graph = ComputingGraph::make(); | |||||
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), | |||||
y = (opr::Copy::make(x, {cns[1]})).rename("y"); | |||||
HostTensorND host_y; | |||||
auto func = graph->compile({make_callback_copy(y, host_y)}); | |||||
func->execute(); | |||||
ASSERT_TRUE(host_y.layout().is_empty()); | |||||
ASSERT_EQ(host_y.layout(), host_x->layout()); | |||||
} | |||||
TEST(TestOprIO, MultipleDeviceTensorHolder) { | TEST(TestOprIO, MultipleDeviceTensorHolder) { | ||||
auto cns = load_multiple_xpus(2); | auto cns = load_multiple_xpus(2); | ||||
HostTensorGenerator<> gen0; | HostTensorGenerator<> gen0; | ||||