From 12dc36a6abeb100182c914ef32aa1d90707a639b Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Wed, 10 Jun 2020 23:21:04 +0800
Subject: [PATCH] feat(mgb/gopt): add interface to reproducible

GitOrigin-RevId: f341bea40b6e52f4598640b81b477184d8473421
---
 sdk/load-and-run/src/mgblar.cpp            | 28 +++++++++++-
 src/gopt/impl/inference.cpp                | 73 ++++++++++++++----------------
 src/gopt/include/megbrain/gopt/inference.h | 17 ++++++-
 3 files changed, 74 insertions(+), 44 deletions(-)
diff --git a/sdk/load-and-run/src/mgblar.cpp b/sdk/load-and-run/src/mgblar.cpp
index b2f2d7db..a30e21b1 100644
--- a/sdk/load-and-run/src/mgblar.cpp
+++ b/sdk/load-and-run/src/mgblar.cpp
@@ -14,6 +14,7 @@
 #include "./json_loader.h"
 #include "./npy.h"
 
+#include "megbrain/opr/dnn/convolution.h"
 #include "megbrain/utils/debug.h"
 #include "megbrain/serialization/serializer.h"
 #include "megbrain/serialization/extern_c_opr.h"
@@ -144,6 +145,10 @@ R"__usage__(
 R"__usage__(
   --fast-run-algo-policy <path>
     It will read the cache file before profile, and save new fastrun in cache file.
+  --reproducible
+    Enable choose algo which is reproducible. It mainly used for cudnn algos.
+    See https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#reproducibility
+    for more details.
   --wait-gdb
     Print PID and wait for a line from stdin before starting execution. Useful
     for waiting for gdb attach.
@@ -467,6 +472,7 @@ struct Args {
 #if MGB_ENABLE_FASTRUN
     bool use_fast_run = false;
 #endif
+    bool reproducible = false;
     std::string fast_run_cache_path;
     bool copy_to_host = false;
     int nr_run = 10;
@@ -647,10 +653,24 @@ void run_test_st(Args &env) {
     }
 
     mgb::gopt::set_opr_algo_workspace_limit_inplace(vars, env.workspace_limit);
+    using S = opr::mixin::Convolution::ExecutionPolicy::Strategy;
+    S strategy = S::HEURISTIC;
 #if MGB_ENABLE_FASTRUN
-    if (env.use_fast_run)
-        mgb::gopt::enable_opr_algo_profiling_inplace(vars);
+    if (env.use_fast_run) {
+        if (env.reproducible) {
+            strategy = S::PROFILE_REPRODUCIBLE;
+        } else {
+            strategy = S::PROFILE;
+        }
+    } else if (env.reproducible) {
+        strategy = S::HEURISTIC_REPRODUCIBLE;
+    }
+#else
+    if (env.reproducible) {
+        strategy = S::HEURISTIC_REPRODUCIBLE;
+    }
 #endif
+    mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy);
     if (!env.fast_run_cache_path.empty()) {
 #if MGB_ENABLE_FASTRUN
         if (!access(env.fast_run_cache_path.c_str(), F_OK)) {
@@ -1149,6 +1169,10 @@ Args Args::from_argv(int argc, char **argv) {
             ret.fast_run_cache_path = argv[i];
             continue;
         }
+        if (!strcmp(argv[i], "--reproducible")) {
+            ret.reproducible = true;
+            continue;
+        }
         if (!strcmp(argv[i], "--const-shape")) {
             ret.load_config.const_var_shape = true;
             continue;
diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp
index c60f876b..c43dd0d6 100644
--- a/src/gopt/impl/inference.cpp
+++ b/src/gopt/impl/inference.cpp
@@ -104,25 +104,21 @@ SymbolVarArray gopt::optimize_for_inference(
 }
 
 namespace {
-void modify_conv_policy(opr::mixin::Convolution& conv,
-                        megdnn::param::ExecutionPolicy::Strategy strategy) {
+void modify_conv_strategy(
+        opr::mixin::Convolution& conv,
+        opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
     auto policy = conv.execution_policy_transient();
     policy.strategy = strategy;
     conv.set_execution_policy(policy);
 }
 
 template <typename Opr>
-void inplace_conv_opr_profile_modifier(OperatorNodeBase& opr) {
-    modify_conv_policy(
+void inplace_conv_opr_modifier(
+        OperatorNodeBase& opr,
+        opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
+    modify_conv_strategy(
             opr.cast_final_safe<Opr>(),
-            opr::mixin::Convolution::ExecutionPolicy::Strategy::PROFILE);
-}
-
-template <typename Opr>
-void inplace_conv_opr_profile_cache_modifier(OperatorNodeBase& opr) {
-    modify_conv_policy(opr.cast_final_safe<Opr>(),
-                       opr::mixin::Convolution::ExecutionPolicy::Strategy::
-                               PROFILE_HEURISTIC);
+            strategy);
 }
 
 void modify_conv_policy_workspace_limit(opr::mixin::Convolution& conv,
@@ -150,12 +146,20 @@ void inplace_conv_opr_workspace_limit_modifier(OperatorNodeBase& opr,
             cb(DeformableConvBackwardFilter), cb(DeformableConvBackwardData), \
             cb(BatchConvBiasForward),
 
-void gopt::enable_opr_algo_profiling_inplace(
-        const VarNodeArrayView& dest_vars) {
-#if MGB_ENABLE_FASTRUN
-    static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&)> modifiers =
-            {
-#define CONV(t) {opr::t::typeinfo(), &inplace_conv_opr_profile_modifier<opr::t>}
+void gopt::modify_opr_algo_strategy_inplace(
+        const VarNodeArrayView& dest_vars,
+        opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
+#if !MGB_ENABLE_FASTRUN
+    using S = opr::mixin::Convolution::ExecutionPolicy::Strategy;
+    if (strategy == S::PROFILE || strategy == S::PROFILE_REPRODUCIBLE) {
+        mgb_throw(MegBrainError, "fastrun is disabled at compile time");
+    }
+#endif
+    const ThinHashMap<Typeinfo*, std::function<void(OperatorNodeBase&)>>
+            modifiers = {
+#define CONV(t)                                                       \
+    {opr::t::typeinfo(), std::bind(inplace_conv_opr_modifier<opr::t>, \
+                                   std::placeholders::_1, strategy)}
                     MGB_FOREACH_FASTRUN_OPR(CONV)
 #undef CONV
             };
@@ -171,34 +175,23 @@ void gopt::enable_opr_algo_profiling_inplace(
     for (auto i : dest_vars) {
         dep_iter.add(i);
     }
-#else
-    mgb_throw(MegBrainError, "fastrun is disabled at compile time");
-#endif
 }
 
-void gopt::enable_opr_use_profiling_cache_inplace(
+void gopt::enable_opr_algo_profiling_inplace(
         const VarNodeArrayView& dest_vars) {
-    static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&)> modifiers =
-            {
-#define CONV(t) \
-    {opr::t::typeinfo(), &inplace_conv_opr_profile_cache_modifier<opr::t>}
-                    MGB_FOREACH_FASTRUN_OPR(CONV)
-#undef CONV
-            };
-
-    auto on_opr = [&](OperatorNodeBase* opr) {
-        auto iter = modifiers.find(opr->dyn_typeinfo());
-        if (iter != modifiers.end()) {
-            iter->second(*opr);
-        }
-    };
+    modify_opr_algo_strategy_inplace(dest_vars,
+                                     opr::mixin::Convolution::ExecutionPolicy::
+                                             Strategy::PROFILE);
+}
 
-    cg::DepOprIter dep_iter{on_opr};
-    for (auto i : dest_vars) {
-        dep_iter.add(i);
-    }
+void gopt::enable_opr_use_profiling_cache_inplace(
+        const VarNodeArrayView& dest_vars) {
+    modify_opr_algo_strategy_inplace(dest_vars,
+                                     opr::mixin::Convolution::ExecutionPolicy::
+                                             Strategy::PROFILE_HEURISTIC);
 }
 
+
 void gopt::set_opr_algo_workspace_limit_inplace(
         const VarNodeArrayView& dest_vars, size_t workspace_limit) {
     static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)>
diff --git a/src/gopt/include/megbrain/gopt/inference.h b/src/gopt/include/megbrain/gopt/inference.h
index d2556e15..34892f22 100644
--- a/src/gopt/include/megbrain/gopt/inference.h
+++ b/src/gopt/include/megbrain/gopt/inference.h
@@ -13,6 +13,7 @@
 
 #include "megbrain/gopt/framework.h"
 #include "megbrain/graph/cg.h"
+#include "megbrain/opr/dnn/convolution.h"
 
 namespace mgb {
 namespace gopt {
@@ -303,6 +304,17 @@ namespace gopt {
             const OptimizeForInferenceOptions& opt = {});
 
     /*!
+     * \brief modify execution strategy for oprs with multiple
+     *      algorithms
+     *
+     * This would modify the operators inplace. It can be used for implement
+     * the fast-run mode.
+     */
+    void modify_opr_algo_strategy_inplace(
+            const VarNodeArrayView& dest_vars,
+            opr::mixin::Convolution::ExecutionPolicy::Strategy strategy);
+
+    /*!
      * \brief enable PROFILE execution strategy for oprs with multiple
      *      algorithms
      *
@@ -315,7 +327,7 @@ namespace gopt {
     void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars);
 
     /*!
-     * \brief enable opr try profiling cache first, if failed, then try
+     * \brief enable opr try profiling cache first, if failed, fallback to
      * heuristic
      *
      * This would modify the operators inplace. It is usually used to enable
@@ -324,7 +336,8 @@ namespace gopt {
      * You may want to implement TimedFuncInvoker::ForkExecImpl and/or
      * PersistentCache for better performance in an SDK.
      */
-    void enable_opr_use_profiling_cache_inplace(const VarNodeArrayView& dest_vars);
+    void enable_opr_use_profiling_cache_inplace(
+            const VarNodeArrayView& dest_vars);
 
     /*!
      * \brief set workspace_limit for execution strategy for oprs with multiple