From 4b08e79da84db055989857035d19ce807c809d88 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 4 Aug 2021 19:31:43 +0800 Subject: [PATCH] fix(mgb): fix fastrun no_profiling_on_shape_change GitOrigin-RevId: 27355364248c464089e6fcbbde52dcaf9a26b314 --- src/opr/impl/search_policy/algo_chooser.cpp | 16 ++++--- .../include/megbrain/opr/search_policy/profiler.h | 7 ---- src/opr/test/algo_chooser.cpp | 49 ++++++++++++++++++++++ src/opr/test/blas.cpp | 5 ++- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/opr/impl/search_policy/algo_chooser.cpp b/src/opr/impl/search_policy/algo_chooser.cpp index cb593028..71c4b2e7 100644 --- a/src/opr/impl/search_policy/algo_chooser.cpp +++ b/src/opr/impl/search_policy/algo_chooser.cpp @@ -523,6 +523,15 @@ AlgoChooser::AlgoChooserHelper::AlgoChooserHelper( fastrun_batch_size); } + if (owner_graph()->options().no_profiling_on_shape_change) { + for (size_t i = 0; i < m_incache_layouts.size(); i++) { + for (size_t j = 0; j < m_incache_layouts.at(i).ndim; j++) { + m_incache_layouts.at(i)[j] = 0; + m_incache_layouts.at(i).stride[j] = 0; + } + } + } + mgb_assert(m_fastrun_layouts.size() == layouts.size()); static_assert( @@ -582,12 +591,6 @@ AlgoChooser::AlgoChooserHelper::choose_by_profile( if (policy.algo.valid()) { return policy; } - if (is_matmul()) { - mgb_log_warn( - "choose algo by heuristic, which may cause performance " - "regression."); - return choose_by_heuristic(selected_strategy); - } } typename AlgoChooser::ImplExecutionPolicy tmp_policy; @@ -1027,6 +1030,7 @@ AlgoChooser::AlgoChooserHelper::extract_algo_attribute( } //! from graph option + // FIXME: no_profiling_on_shape_change extract USABLE_DEPEND_ON_SHAPE attribute when fixed usable if (owner_graph()->options().fast_run_config.shared_batch_size) { ret.second |= AlgoAttribute::USABLE_DEPEND_ON_SHAPE; } diff --git a/src/opr/include/megbrain/opr/search_policy/profiler.h b/src/opr/include/megbrain/opr/search_policy/profiler.h index d1bf34aa..ac8879ec 100644 --- a/src/opr/include/megbrain/opr/search_policy/profiler.h +++ b/src/opr/include/megbrain/opr/search_policy/profiler.h @@ -58,13 +58,6 @@ constexpr bool opr_contain_bias() { return std::is_same::value; } -//! matmul and batchedMatrixMul -template -constexpr bool is_matmul() { - return std::is_same::value || - std::is_same::value; -} - template struct PreprocessFilterImpl { using T = union {}; diff --git a/src/opr/test/algo_chooser.cpp b/src/opr/test/algo_chooser.cpp index 52200dc8..967d631d 100644 --- a/src/opr/test/algo_chooser.cpp +++ b/src/opr/test/algo_chooser.cpp @@ -296,6 +296,55 @@ TEST(TestOprDNN, FastrunIgnoreBatchSizeBatchedMatrixMul) { {TensorShape{4, 6, 8}, TensorShape{4, 8, 4}}); } +template +void test_no_profiling_on_shape_change(const TensorShapeArray& inps0, + const TensorShapeArray& inps1) { + using Policy = typename MgbOpr::ExecutionPolicy; + + int nr_set = 0; + auto on_get = [](const std::string&, const void*, size_t, const void*, + size_t) {}; + auto on_set = [&nr_set](const std::string&, const void*, size_t, + const void*, size_t) { nr_set++; }; + PersistentCacheHook cache_hook{on_get, on_set}; + + auto cn = CompNode::load("xpu0"); + auto run = [&cn](const TensorShapeArray& shapes) { + auto graph = ComputingGraph::make(); + graph->options().no_profiling_on_shape_change = true; + + HostTensorGenerator<> gen; + auto host_a = gen(shapes[0], cn); + auto host_b = gen(shapes[1], cn); + HostTensorND host_out; + auto a = opr::Host2DeviceCopy::make(*graph, host_a), + b = opr::Host2DeviceCopy::make(*graph, host_b); + + Policy policy; + policy.strategy = Policy::Strategy::PROFILE; + auto out = MgbOpr::make(a, b, {}, policy, {}); + + std::unique_ptr func = graph->compile({{out, {}}}); + func->execute(); + }; + + run(inps0); + int nr = nr_set; + ASSERT_GT(nr, 0); + run(inps1); + ASSERT_EQ(nr, nr_set); +} + +TEST(TestOprDNN, FastrunNoProfilingOnShapeChange) { + REQUIRE_GPU(1); + + test_no_profiling_on_shape_change( + {{12, 3, 36, 36}, {4, 3, 3, 3}}, {{32, 3, 28, 28}, {4, 3, 3, 3}}); + + test_no_profiling_on_shape_change({{20, 30}, {30, 40}}, + {{30, 40}, {40, 60}}); +} + #endif // MGB_ENABLE_FASTRUN #endif // MGB_CUDA diff --git a/src/opr/test/blas.cpp b/src/opr/test/blas.cpp index c0198042..d271828e 100644 --- a/src/opr/test/blas.cpp +++ b/src/opr/test/blas.cpp @@ -916,11 +916,12 @@ TEST(TestOprBlas, MatrixMulExePolicy) { graph->options().no_profiling_on_shape_change = true; auto func = graph->compile({make_callback_copy(matmul, host_y)}); func->execute(); - ASSERT_EQ(nr_get, 0); + ASSERT_GT(nr_get, 0); + int nr = nr_get; graph->options().no_profiling_on_shape_change = false; func = graph->compile({make_callback_copy(matmul, host_y)}); func->execute(); - ASSERT_GT(nr_get, 0); + ASSERT_GT(nr_get, nr); } #endif