From 4afa4b72c3447df6ef1b1d9099fe4ad2251997e2 Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Thu, 15 Oct 2020 17:43:41 +0800
Subject: [PATCH] fix(mgb/oppass): fix fuse conv bias pass when bias is full
 bias and bias is const provider

GitOrigin-RevId: 3285022f3376505cb1541b6bb5d427bc36933c66
---
 src/gopt/impl/inference.cpp |  8 ++++++-
 src/gopt/test/inference.cpp | 55 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp
index 8115f61c..2db1f1da 100644
--- a/src/gopt/impl/inference.cpp
+++ b/src/gopt/impl/inference.cpp
@@ -1720,9 +1720,15 @@ void FuseConvBiasNonlinPass::apply(OptState& state) const {
         auto dst_shape = conv->output(0)->shape();
         auto filter_shape = conv->input(1)->shape();
         auto bias_shape = bias->shape();
-        if (dst_shape.eq_shape(bias_shape)) {
+
+        //! pay attention: make sure bias node is not const provider when
+        //! batch > 1 cause shape assert problem in convbias
+        //! if you resize the input shape, can not update the bias shape too.
+        //! so do not fuse conv bias in this situation
+        if (dst_shape.eq_shape(bias_shape) && !cg::is_const_var_shape(bias)) {
             return valid_bias_shape;
         }
+
         size_t OC = filter_shape[0];
         if (conv->param().sparse == Sparse::GROUP) {
             OC *= filter_shape[1];
diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp
index cb2cd183..f2a42035 100644
--- a/src/gopt/test/inference.cpp
+++ b/src/gopt/test/inference.cpp
@@ -1482,6 +1482,61 @@ TEST(TestGoptInference, ConvBiasNonlinearityFusePass) {
     MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-4);
 }
 
+TEST(TestGoptInference, ConvBiasNonlinearityFusePass_FullBias) {
+    NaiveMegDNNHandleScope naive_megdnn_handle;
+
+    for (int i = 0; i < 2; i++) {
+        auto graph = ComputingGraph::make();
+        auto cn = CompNode::load("cpu0");
+        HostTensorGenerator<> gen;
+        auto mkImvar = [&](const char* name, const TensorShape& shp) {
+            return opr::ImmutableTensor::make(*graph, *gen(shp, cn))
+                    .rename(name);
+        };
+
+        graph->options().graph_opt_level = 0;
+        auto mkcvar = [&](const char* name, const TensorShape& shp) {
+            return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn))
+                    .rename(name);
+        };
+        opr::Convolution::Param param;
+        auto host_x = gen({1, 8, 16, 24}, cn);
+        auto x = opr::Host2DeviceCopy::make(*graph, host_x),
+             w1 = mkcvar("w1", {4, 8, 1, 1}), w2 = mkcvar("w2", {4, 8, 3, 3}),
+             w3 = mkcvar("w3", {4, 4, 1, 1}),
+             b = i == 0 ? mkcvar("b", {1, 4, 16, 24})
+                        : mkImvar("bias", {1, 4, 16, 24}),
+             y_cut0 = opr::Convolution::make(x, w1, param);
+        param.pad_w = param.pad_h = 1;
+        auto y_cut1 = opr::Convolution::make(x, w2, param);
+        auto y1 = opr::Elemwise::make({y_cut0 + y_cut1},
+                                      opr::Elemwise::Param::Mode::RELU);
+        param.pad_w = param.pad_h = 0;
+        auto y2 = opr::Convolution::make(y1, w3, param);
+        auto y =
+                opr::Elemwise::make({y2 + b}, opr::Elemwise::Param::Mode::RELU);
+        SymbolVar y_opt;
+        auto options = gopt::OptimizeForInferenceOptions{};
+        options.enable_fuse_conv_bias_nonlinearity();
+        unpack_vector(gopt::optimize_for_inference({y}, options), y_opt);
+        ASSERT_EQ(3u, find_opr<opr::ConvBias>(y_opt).input().size());
+        graph->compile({{y_opt, {}}})
+                ->to_json()
+                ->writeto_fpath(
+                        output_file("TestGoptInference.FuseConvBiasNonlinPass_"
+                                    "FulBias.json"));
+        HostTensorND host_y, host_y_opt;
+        auto func = graph->compile({make_callback_copy(y, host_y),
+                                    make_callback_copy(y_opt, host_y_opt)});
+        func->execute();
+        MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-4);
+        *host_x = *gen({4, 8, 16, 24}, cn);
+        func->execute();
+        MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-4);
+    }
+}
+
+
 TEST(TestGoptInference, ParamMerge) {
     auto cns = load_multiple_xpus(2);
     HostTensorGenerator<> gen;