From 12dc36a6abeb100182c914ef32aa1d90707a639b Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 10 Jun 2020 23:21:04 +0800 Subject: [PATCH] feat(mgb/gopt): add interface to reproducible GitOrigin-RevId: f341bea40b6e52f4598640b81b477184d8473421 --- sdk/load-and-run/src/mgblar.cpp | 28 +++++++++++- src/gopt/impl/inference.cpp | 73 ++++++++++++++---------------- src/gopt/include/megbrain/gopt/inference.h | 17 ++++++- 3 files changed, 74 insertions(+), 44 deletions(-) diff --git a/sdk/load-and-run/src/mgblar.cpp b/sdk/load-and-run/src/mgblar.cpp index b2f2d7db..a30e21b1 100644 --- a/sdk/load-and-run/src/mgblar.cpp +++ b/sdk/load-and-run/src/mgblar.cpp @@ -14,6 +14,7 @@ #include "./json_loader.h" #include "./npy.h" +#include "megbrain/opr/dnn/convolution.h" #include "megbrain/utils/debug.h" #include "megbrain/serialization/serializer.h" #include "megbrain/serialization/extern_c_opr.h" @@ -144,6 +145,10 @@ R"__usage__( R"__usage__( --fast-run-algo-policy It will read the cache file before profile, and save new fastrun in cache file. + --reproducible + Enable choose algo which is reproducible. It mainly used for cudnn algos. + See https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#reproducibility + for more details. --wait-gdb Print PID and wait for a line from stdin before starting execution. Useful for waiting for gdb attach. @@ -467,6 +472,7 @@ struct Args { #if MGB_ENABLE_FASTRUN bool use_fast_run = false; #endif + bool reproducible = false; std::string fast_run_cache_path; bool copy_to_host = false; int nr_run = 10; @@ -647,10 +653,24 @@ void run_test_st(Args &env) { } mgb::gopt::set_opr_algo_workspace_limit_inplace(vars, env.workspace_limit); + using S = opr::mixin::Convolution::ExecutionPolicy::Strategy; + S strategy = S::HEURISTIC; #if MGB_ENABLE_FASTRUN - if (env.use_fast_run) - mgb::gopt::enable_opr_algo_profiling_inplace(vars); + if (env.use_fast_run) { + if (env.reproducible) { + strategy = S::PROFILE_REPRODUCIBLE; + } else { + strategy = S::PROFILE; + } + } else if (env.reproducible) { + strategy = S::HEURISTIC_REPRODUCIBLE; + } +#else + if (env.reproducible) { + strategy = S::HEURISTIC_REPRODUCIBLE; + } #endif + mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy); if (!env.fast_run_cache_path.empty()) { #if MGB_ENABLE_FASTRUN if (!access(env.fast_run_cache_path.c_str(), F_OK)) { @@ -1149,6 +1169,10 @@ Args Args::from_argv(int argc, char **argv) { ret.fast_run_cache_path = argv[i]; continue; } + if (!strcmp(argv[i], "--reproducible")) { + ret.reproducible = true; + continue; + } if (!strcmp(argv[i], "--const-shape")) { ret.load_config.const_var_shape = true; continue; diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp index c60f876b..c43dd0d6 100644 --- a/src/gopt/impl/inference.cpp +++ b/src/gopt/impl/inference.cpp @@ -104,25 +104,21 @@ SymbolVarArray gopt::optimize_for_inference( } namespace { -void modify_conv_policy(opr::mixin::Convolution& conv, - megdnn::param::ExecutionPolicy::Strategy strategy) { +void modify_conv_strategy( + opr::mixin::Convolution& conv, + opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) { auto policy = conv.execution_policy_transient(); policy.strategy = strategy; conv.set_execution_policy(policy); } template -void inplace_conv_opr_profile_modifier(OperatorNodeBase& opr) { - modify_conv_policy( +void inplace_conv_opr_modifier( + OperatorNodeBase& opr, + opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) { + modify_conv_strategy( opr.cast_final_safe(), - opr::mixin::Convolution::ExecutionPolicy::Strategy::PROFILE); -} - -template -void inplace_conv_opr_profile_cache_modifier(OperatorNodeBase& opr) { - modify_conv_policy(opr.cast_final_safe(), - opr::mixin::Convolution::ExecutionPolicy::Strategy:: - PROFILE_HEURISTIC); + strategy); } void modify_conv_policy_workspace_limit(opr::mixin::Convolution& conv, @@ -150,12 +146,20 @@ void inplace_conv_opr_workspace_limit_modifier(OperatorNodeBase& opr, cb(DeformableConvBackwardFilter), cb(DeformableConvBackwardData), \ cb(BatchConvBiasForward), -void gopt::enable_opr_algo_profiling_inplace( - const VarNodeArrayView& dest_vars) { -#if MGB_ENABLE_FASTRUN - static const ThinHashMap modifiers = - { -#define CONV(t) {opr::t::typeinfo(), &inplace_conv_opr_profile_modifier} +void gopt::modify_opr_algo_strategy_inplace( + const VarNodeArrayView& dest_vars, + opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) { +#if !MGB_ENABLE_FASTRUN + using S = opr::mixin::Convolution::ExecutionPolicy::Strategy; + if (strategy == S::PROFILE || strategy == S::PROFILE_REPRODUCIBLE) { + mgb_throw(MegBrainError, "fastrun is disabled at compile time"); + } +#endif + const ThinHashMap> + modifiers = { +#define CONV(t) \ + {opr::t::typeinfo(), std::bind(inplace_conv_opr_modifier, \ + std::placeholders::_1, strategy)} MGB_FOREACH_FASTRUN_OPR(CONV) #undef CONV }; @@ -171,34 +175,23 @@ void gopt::enable_opr_algo_profiling_inplace( for (auto i : dest_vars) { dep_iter.add(i); } -#else - mgb_throw(MegBrainError, "fastrun is disabled at compile time"); -#endif } -void gopt::enable_opr_use_profiling_cache_inplace( +void gopt::enable_opr_algo_profiling_inplace( const VarNodeArrayView& dest_vars) { - static const ThinHashMap modifiers = - { -#define CONV(t) \ - {opr::t::typeinfo(), &inplace_conv_opr_profile_cache_modifier} - MGB_FOREACH_FASTRUN_OPR(CONV) -#undef CONV - }; - - auto on_opr = [&](OperatorNodeBase* opr) { - auto iter = modifiers.find(opr->dyn_typeinfo()); - if (iter != modifiers.end()) { - iter->second(*opr); - } - }; + modify_opr_algo_strategy_inplace(dest_vars, + opr::mixin::Convolution::ExecutionPolicy:: + Strategy::PROFILE); +} - cg::DepOprIter dep_iter{on_opr}; - for (auto i : dest_vars) { - dep_iter.add(i); - } +void gopt::enable_opr_use_profiling_cache_inplace( + const VarNodeArrayView& dest_vars) { + modify_opr_algo_strategy_inplace(dest_vars, + opr::mixin::Convolution::ExecutionPolicy:: + Strategy::PROFILE_HEURISTIC); } + void gopt::set_opr_algo_workspace_limit_inplace( const VarNodeArrayView& dest_vars, size_t workspace_limit) { static const ThinHashMap diff --git a/src/gopt/include/megbrain/gopt/inference.h b/src/gopt/include/megbrain/gopt/inference.h index d2556e15..34892f22 100644 --- a/src/gopt/include/megbrain/gopt/inference.h +++ b/src/gopt/include/megbrain/gopt/inference.h @@ -13,6 +13,7 @@ #include "megbrain/gopt/framework.h" #include "megbrain/graph/cg.h" +#include "megbrain/opr/dnn/convolution.h" namespace mgb { namespace gopt { @@ -303,6 +304,17 @@ namespace gopt { const OptimizeForInferenceOptions& opt = {}); /*! + * \brief modify execution strategy for oprs with multiple + * algorithms + * + * This would modify the operators inplace. It can be used for implement + * the fast-run mode. + */ + void modify_opr_algo_strategy_inplace( + const VarNodeArrayView& dest_vars, + opr::mixin::Convolution::ExecutionPolicy::Strategy strategy); + + /*! * \brief enable PROFILE execution strategy for oprs with multiple * algorithms * @@ -315,7 +327,7 @@ namespace gopt { void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars); /*! - * \brief enable opr try profiling cache first, if failed, then try + * \brief enable opr try profiling cache first, if failed, fallback to * heuristic * * This would modify the operators inplace. It is usually used to enable @@ -324,7 +336,8 @@ namespace gopt { * You may want to implement TimedFuncInvoker::ForkExecImpl and/or * PersistentCache for better performance in an SDK. */ - void enable_opr_use_profiling_cache_inplace(const VarNodeArrayView& dest_vars); + void enable_opr_use_profiling_cache_inplace( + const VarNodeArrayView& dest_vars); /*! * \brief set workspace_limit for execution strategy for oprs with multiple