This reverts commitrelease-1.72735536424
. GitOrigin-RevId:6a641808f0
@@ -508,15 +508,6 @@ AlgoChooser<Opr>::AlgoChooserHelper::AlgoChooserHelper( | |||||
m_fastrun_layouts, m_dnn_opr->param(), fastrun_batch_size); | m_fastrun_layouts, m_dnn_opr->param(), fastrun_batch_size); | ||||
} | } | ||||
if (owner_graph()->options().no_profiling_on_shape_change) { | |||||
for (size_t i = 0; i < m_incache_layouts.size(); i++) { | |||||
for (size_t j = 0; j < m_incache_layouts.at(i).ndim; j++) { | |||||
m_incache_layouts.at(i)[j] = 0; | |||||
m_incache_layouts.at(i).stride[j] = 0; | |||||
} | |||||
} | |||||
} | |||||
mgb_assert(m_fastrun_layouts.size() == layouts.size()); | mgb_assert(m_fastrun_layouts.size() == layouts.size()); | ||||
static_assert( | static_assert( | ||||
@@ -571,6 +562,12 @@ typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::AlgoChooserHelp | |||||
if (policy.algo.valid()) { | if (policy.algo.valid()) { | ||||
return policy; | return policy; | ||||
} | } | ||||
if (is_matmul<Opr>()) { | |||||
mgb_log_warn( | |||||
"choose algo by heuristic, which may cause performance " | |||||
"regression."); | |||||
return choose_by_heuristic(selected_strategy); | |||||
} | |||||
} | } | ||||
typename AlgoChooser<Opr>::ImplExecutionPolicy tmp_policy; | typename AlgoChooser<Opr>::ImplExecutionPolicy tmp_policy; | ||||
@@ -1016,8 +1013,6 @@ std::pair<AlgoAttribute, AlgoAttribute> AlgoChooser<Opr>::AlgoChooserHelper:: | |||||
} | } | ||||
//! from graph option | //! from graph option | ||||
// FIXME: no_profiling_on_shape_change extract USABLE_DEPEND_ON_SHAPE attribute when | |||||
// fixed usable | |||||
if (owner_graph()->options().fast_run_config.shared_batch_size) { | if (owner_graph()->options().fast_run_config.shared_batch_size) { | ||||
ret.second |= AlgoAttribute::USABLE_DEPEND_ON_SHAPE; | ret.second |= AlgoAttribute::USABLE_DEPEND_ON_SHAPE; | ||||
} | } | ||||
@@ -58,6 +58,13 @@ constexpr bool opr_contain_bias() { | |||||
return std::is_same<Opr, megdnn::ConvBias>::value; | return std::is_same<Opr, megdnn::ConvBias>::value; | ||||
} | } | ||||
//! matmul and batchedMatrixMul | |||||
template <typename Opr> | |||||
constexpr bool is_matmul() { | |||||
return std::is_same<Opr, megdnn::MatrixMul>::value || | |||||
std::is_same<Opr, megdnn::BatchedMatrixMul>::value; | |||||
} | |||||
template <typename Opr, bool has_prep> | template <typename Opr, bool has_prep> | ||||
struct PreprocessFilterImpl { | struct PreprocessFilterImpl { | ||||
using T = union {}; | using T = union {}; | ||||
@@ -292,56 +292,6 @@ TEST(TestOprDNN, FastrunIgnoreBatchSizeBatchedMatrixMul) { | |||||
{TensorShape{4, 6, 8}, TensorShape{4, 8, 4}}); | {TensorShape{4, 6, 8}, TensorShape{4, 8, 4}}); | ||||
} | } | ||||
template <typename MgbOpr> | |||||
void test_no_profiling_on_shape_change( | |||||
const TensorShapeArray& inps0, const TensorShapeArray& inps1) { | |||||
using Policy = typename MgbOpr::ExecutionPolicy; | |||||
int nr_set = 0; | |||||
auto on_get = [](const std::string&, const void*, size_t, const void*, size_t) {}; | |||||
auto on_set = [&nr_set]( | |||||
const std::string&, const void*, size_t, const void*, | |||||
size_t) { nr_set++; }; | |||||
PersistentCacheHook cache_hook{on_get, on_set}; | |||||
auto cn = CompNode::load("xpu0"); | |||||
auto run = [&cn](const TensorShapeArray& shapes) { | |||||
auto graph = ComputingGraph::make(); | |||||
graph->options().no_profiling_on_shape_change = true; | |||||
HostTensorGenerator<> gen; | |||||
auto host_a = gen(shapes[0], cn); | |||||
auto host_b = gen(shapes[1], cn); | |||||
HostTensorND host_out; | |||||
auto a = opr::Host2DeviceCopy::make(*graph, host_a), | |||||
b = opr::Host2DeviceCopy::make(*graph, host_b); | |||||
Policy policy; | |||||
policy.strategy = Policy::Strategy::PROFILE; | |||||
auto out = MgbOpr::make(a, b, {}, policy, {}); | |||||
std::unique_ptr<cg::AsyncExecutable> func = graph->compile({{out, {}}}); | |||||
func->execute(); | |||||
}; | |||||
run(inps0); | |||||
int nr = nr_set; | |||||
ASSERT_GT(nr, 0); | |||||
run(inps1); | |||||
ASSERT_EQ(nr, nr_set); | |||||
} | |||||
TEST(TestOprDNN, FastrunNoProfilingOnShapeChange) { | |||||
REQUIRE_GPU(1); | |||||
megdnn::HeuristicCache::instance().clear(); | |||||
test_no_profiling_on_shape_change<opr::Convolution>( | |||||
{{12, 3, 36, 36}, {4, 3, 3, 3}}, {{32, 3, 28, 28}, {4, 3, 3, 3}}); | |||||
test_no_profiling_on_shape_change<opr::MatrixMul>( | |||||
{{20, 30}, {30, 40}}, {{30, 40}, {40, 60}}); | |||||
} | |||||
#endif // MGB_ENABLE_FASTRUN | #endif // MGB_ENABLE_FASTRUN | ||||
#endif // MGB_CUDA | #endif // MGB_CUDA | ||||
@@ -899,12 +899,11 @@ TEST(TestOprBlas, MatrixMulExePolicy) { | |||||
graph->options().no_profiling_on_shape_change = true; | graph->options().no_profiling_on_shape_change = true; | ||||
auto func = graph->compile({make_callback_copy(matmul, host_y)}); | auto func = graph->compile({make_callback_copy(matmul, host_y)}); | ||||
func->execute(); | func->execute(); | ||||
ASSERT_GT(nr_get, 0); | |||||
int nr = nr_get; | |||||
ASSERT_EQ(nr_get, 0); | |||||
graph->options().no_profiling_on_shape_change = false; | graph->options().no_profiling_on_shape_change = false; | ||||
func = graph->compile({make_callback_copy(matmul, host_y)}); | func = graph->compile({make_callback_copy(matmul, host_y)}); | ||||
func->execute(); | func->execute(); | ||||
ASSERT_GT(nr_get, nr); | |||||
ASSERT_GT(nr_get, 0); | |||||
} | } | ||||
#endif | #endif | ||||