fix(opr/basic_arith): make Elemwise really support empty IO

... and add tests, should've done this the first time... GitOrigin-RevId: 5d73fc4c7b
5 years ago · 6fe6df28e6
--- a/dnn/src/common/elemwise/opr_impl.cpp
+++ b/dnn/src/common/elemwise/opr_impl.cpp
@@ -174,11 +174,12 @@ void ElemwiseForward::deduce_shape(const TensorShapeArray& src,
                if (cur_idx >= 0 && dst_idx >= 0) {
                    size_t v0 = dst.shape[dst_idx], v1 = cur.shape[cur_idx];
                    if (v0 != v1) {
                        if (v0 != 1 && v1 != 1)
                        if (v0 > 1 && v1 > 1)
                            err();
                    }
                    int final_idx = std::max(cur_idx, dst_idx);
                    dst.shape[final_idx] = std::max(v0, v1);
                    dst.shape[final_idx] =
                            (v0 != 0 && v1 != 0) ? std::max(v0, v1) : 0;
                } else {
                    if (dst_idx < 0) {
                        dst.shape[cur_idx] = cur.shape[cur_idx];
--- a/src/opr/impl/basic_arith.cpp
+++ b/src/opr/impl/basic_arith.cpp
@@ -132,6 +132,7 @@ Elemwise::Elemwise(
    Super{inputs.at(0)->owner_graph(), config, mode_trait.name, inputs}
 {
    init_megdnn_opr(*this, param);
    output(0)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
    if (mode_trait.commutable) {
        mgb_assert(inputs.size() == 2);
        add_input({inputs[0], inputs[1]}, AddInputSortType::CUR_ADDED);
@@ -371,8 +372,14 @@ void Elemwise::scn_do_execute() {
    mgb_assert(megdnn_inp.capacity() >= inp.size(),
            "heap allocation in elemwise exec");
    megdnn_inp.resize(inp.size());
    for (size_t i = 0; i < inp.size(); ++ i)
    for (size_t i = 0; i < inp.size(); ++ i) {
        if (inp[i]->dev_tensor().empty()) {
            mgb_assert(output(0)->dev_tensor().empty());
            return;
        }
        megdnn_inp[i] = (inp[i]->dev_tensor().as_megdnn());
    }
    mgb_assert(!output(0)->dev_tensor().empty());
    megdnn_opr()->param() = param();
    call_megdnn_opr_exec(
@@ -747,6 +754,15 @@ void Elemwise::record_execute_deps(ExecDependencyArray& deps) {
    record_megdnn_opr(deps);
 }
 Elemwise::NodeProp* Elemwise::do_make_node_prop() const {
    auto ret = Super::do_make_node_prop();
    for (auto& inp : input()) {
        ret->add_dep_type_existing_var(inp,
                                       NodeProp::DepType::VALUE_ALLOW_EMPTY);
    }
    return ret;
 }
 /* =========================== TypeCvt =========================== */
 MGB_DYN_TYPE_OBJ_FINAL_IMPL(TypeCvt);
--- a/src/opr/include/megbrain/opr/basic_arith.h
+++ b/src/opr/include/megbrain/opr/basic_arith.h
@@ -163,6 +163,7 @@ MGB_DEFINE_OPR_CLASS(Elemwise, intl::ElemwiseBase) // {
        void record_execute_deps(ExecDependencyArray& deps) override;
        void add_input_layout_constraint() override;
        NodeProp* do_make_node_prop() const override;
 };
 namespace intl {
--- a/src/opr/test/basic_arith/elemwise.cpp
+++ b/src/opr/test/basic_arith/elemwise.cpp
@@ -649,6 +649,17 @@ namespace {
        > TernaryTraitTypes;
    TYPED_TEST_CASE(TestOprBasicArithTernaryElemwise, TernaryTraitTypes);
    ::testing::AssertionResult assert_shape_equal(const TensorShape& v0,
                                                  const TensorShape& v1) {
        if (v0.eq_shape(v1))
            return ::testing::AssertionSuccess()
                   << v0.to_string() << " == " << v1.to_string();
        else
            return ::testing::AssertionFailure()
                   << v0.to_string() << " != " << v1.to_string();
    }
 #define ASSERT_SHAPE_EQ(v0, v1) ASSERT_TRUE(assert_shape_equal(v0, v1))
 } // anonymous namespace
 template<typename Trait, typename dtype>
@@ -950,4 +961,58 @@ TEST(TestLayoutUtil, CollectiveCollapse) {
    check(cc_res5, std_res5);
 }
 TEST(TestOprBasicArithElemwise, EmptyInputOutputUnary) {
    HostTensorGenerator<> gen;
    auto graph = ComputingGraph::make();
    auto host_x = gen({3, 0, 1, 3});
    auto x = opr::Host2DeviceCopy::make(*graph, host_x),
         y = opr::Elemwise::make(
                 {x}, opr::Elemwise::Param(opr::Elemwise::Param::Mode::RELU));
    HostTensorND host_y;
    auto func = graph->compile({make_callback_copy(y, host_y)});
    ASSERT_NO_THROW(func->execute().wait());
    ASSERT_TRUE(host_y.empty());
    ASSERT_TRUE(host_y.shape().is_empty());
    ASSERT_SHAPE_EQ(host_y.shape(), TensorShape({3, 0, 1, 3}));
 }
 TEST(TestOprBasicArithElemwise, EmptyInputOutputBinary) {
    HostTensorGenerator<> gen;
    auto graph = ComputingGraph::make();
    auto host_x = gen({0, 8, 1, 7}), host_y = gen({0, 8, 1, 7});
    auto x = opr::Host2DeviceCopy::make(*graph, host_x),
         y = opr::Host2DeviceCopy::make(*graph, host_y),
         z = x + y;
    HostTensorND host_z;
    auto func = graph->compile({make_callback_copy(z, host_z)});
    // Invalid broadcast
    host_y->resize({0, 9, 1, 7});
    ASSERT_ANY_THROW(func->execute().wait());
    // Broadcast to 0
    host_y->resize({1, 8, 0, 7});
    ASSERT_NO_THROW(func->execute().wait());
    ASSERT_TRUE(host_z.empty());
    ASSERT_TRUE(host_z.shape().is_empty());
    ASSERT_SHAPE_EQ(host_z.shape(), TensorShape({0, 8, 0, 7}));
    // Broadcast to 0 (2)
    host_y->resize({2, 8, 1, 7});
    ASSERT_NO_THROW(func->execute().wait());
    ASSERT_TRUE(host_z.empty());
    ASSERT_TRUE(host_z.shape().is_empty());
    ASSERT_SHAPE_EQ(host_z.shape(), TensorShape({0, 8, 1, 7}));
    // Scalar broadcast
    z = x + x.make_scalar(1.f);
    func = graph->compile({make_callback_copy(z, host_z)});
    ASSERT_NO_THROW(func->execute().wait());
    ASSERT_TRUE(host_z.empty());
    ASSERT_TRUE(host_z.shape().is_empty());
    ASSERT_SHAPE_EQ(host_z.shape(), TensorShape({0, 8, 1, 7}));
 }
 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}