From 4348960c40420a32790b8af2d6be0d808311d7ba Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 29 Jul 2020 17:36:27 +0800 Subject: [PATCH] fix(mge/gopt): fix fp16 compute mode GitOrigin-RevId: 350625d1aaa9e714da2c27708a33d5f4d74beb11 --- src/gopt/impl/inference.cpp | 41 +++++++++++++++++++++++++++++++- src/gopt/test/inference.cpp | 58 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp index 0a05aae1..fa9c33b5 100644 --- a/src/gopt/impl/inference.cpp +++ b/src/gopt/impl/inference.cpp @@ -754,6 +754,42 @@ std::unique_ptr ConvertF32ToF16Pass::make( return new_conv_opr.node()->owner_opr(); }; + auto replace_convbias_opr = [use_f32_comp](OperatorNodeBase* opr, + const VarNodeArray& new_inp) { + auto& convbias_opr = opr->cast_final_safe(); + auto new_param = convbias_opr.param(); + if (use_f32_comp) { + new_param.compute_mode = + megdnn::param::ConvBias::ComputeMode::FLOAT32; + } + mgb_assert(new_inp[0]->dtype() == dtype::Float16(), + "inp %s:%s, owner_opr:%s", new_inp[0]->dtype().name(), + new_inp[0]->name().c_str(), + new_inp[0]->owner_opr()->name().c_str()); + mgb_assert(new_inp[1]->dtype() == dtype::Float16(), + "inp %s:%s, owner_opr:%s", new_inp[1]->dtype().name(), + new_inp[1]->name().c_str(), + new_inp[1]->owner_opr()->name().c_str()); + if(opr->input().size() == 2) { + auto new_conv_opr = opr::ConvBias::make( + new_inp[0], new_inp[1], new_param, convbias_opr.execution_policy(), + convbias_opr.config()); + return new_conv_opr.node()->owner_opr(); + } else if(opr->input().size() == 3) { + auto new_conv_opr = opr::ConvBias::make( + new_inp[0], new_inp[1], new_inp[2], new_param, convbias_opr.execution_policy(), + convbias_opr.config()); + return new_conv_opr.node()->owner_opr(); + } else { + mgb_assert(opr->input().size() == 4, "invalid input size %zu", + opr->input().size()); + auto new_conv_opr = opr::ConvBias::make( + new_inp[0], new_inp[1], new_inp[2], new_inp[3], new_param, convbias_opr.execution_policy(), + convbias_opr.config()); + return new_conv_opr.node()->owner_opr(); + } + }; + auto replace_matmul_opr = [use_f32_comp](OperatorNodeBase* opr, const VarNodeArray& new_inp) { mgb_assert(opr->input().size() == new_inp.size()); @@ -888,6 +924,7 @@ std::unique_ptr ConvertF32ToF16Pass::make( replace_func[opr::Host2DeviceCopy::typeinfo()] = replace_h2d_opr; replace_func[opr::SharedDeviceTensor::typeinfo()] = replace_sdt_opr; replace_func[opr::Convolution::typeinfo()] = replace_conv_opr; + replace_func[opr::ConvBias::typeinfo()] = replace_convbias_opr; replace_func[opr::MatrixMul::typeinfo()] = replace_matmul_opr; replace_func[opr::Reduce::typeinfo()] = replace_reduce_opr; replace_func[opr::ImmutableTensor::typeinfo()] = replace_imt_opr; @@ -1622,7 +1659,9 @@ void FuseConvBiasNonlinPass::apply(OptState& state) const { param.stride_h, param.stride_w, param.dilate_h, - param.dilate_w}; + param.dilate_w, + 0, + param.compute_mode}; }; auto check_bias_shape = [&](opr::Convolution* conv, VarNode* bias) -> bool { diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp index f276a188..54ffd19e 100644 --- a/src/gopt/test/inference.cpp +++ b/src/gopt/test/inference.cpp @@ -880,6 +880,64 @@ TEST(TestGoptInference, Float32TOFloat16) { MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3); } +TEST(TestGoptInference, Float32TOFloat16C32) { + CompNode cn = CompNode::load("cpu0"); + HostTensorGenerator<> gen(0, 1, 0); + auto host_x0 = gen({1, 4, 1, 1}, cn), host_x1 = gen({2, 3, 16, 8}, cn), + host_x2 = gen({4, 3, 1, 1}, cn); + auto graph = ComputingGraph::make(); + + auto make_f32_to_f16_graph = [&]() { + graph->options().graph_opt_level = 0; + + auto d0 = opr::Host2DeviceCopy::make(*graph, host_x0), + d1 = opr::Host2DeviceCopy::make(*graph, host_x1), + d2 = opr::SharedDeviceTensor::make(*graph, *host_x2); + + auto y = opr::ConvBias::make(d1, d2, d0); + y = opr::Reduce::make(y, {}, y.make_scalar(1)); + + SymbolVar y_opt; + auto options = gopt::OptimizeForInferenceOptions{}; + options.enable_f16_io_f32_comp(); + unpack_vector(gopt::optimize_for_inference({y}, options), y_opt); + return y_opt; + }; + + auto make_f16_graph = [&]() { + auto d0 = opr::TypeCvt::make(opr::TypeCvt::make( + opr::Host2DeviceCopy::make(*graph, host_x0), + dtype::Float16{}), dtype::Float32{}), + d1 = opr::TypeCvt::make(opr::TypeCvt::make( + opr::Host2DeviceCopy::make(*graph, host_x1), + dtype::Float16{}), dtype::Float32{}), + d2 = opr::TypeCvt::make(opr::TypeCvt::make( + opr::SharedDeviceTensor::make(*graph, *host_x2), + dtype::Float16{}), dtype::Float32{}); + + auto y = opr::ConvBias::make(d1, d2, d0); + y = opr::Reduce::make(y, {}, y.make_scalar(1)); + y = opr::TypeCvt::make( + opr::TypeCvt::make(y, dtype::Float16{}), + dtype::Float32{}); + + return y; + }; + + auto y_opt = make_f32_to_f16_graph(); + auto y = make_f16_graph(); + ASSERT_EQ(find_opr(y_opt).param().compute_mode, + opr::ConvBias::Param::ConvBias::ComputeMode::FLOAT32); + ASSERT_EQ(y_opt.dtype(), dtype::Float32{}); + ASSERT_EQ(y.dtype(), dtype::Float32{}); + + HostTensorND host_y_opt, host_y; + auto func = graph->compile({make_callback_copy(y, host_y), + make_callback_copy(y_opt, host_y_opt)}); + func->execute(); + MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3); +} + TEST(TestGoptInference, Float32TOFloat16EndpointElemwise) { CompNode cn = CompNode::load("cpu0"); HostTensorGenerator<> gen(0, 1, 0);