Browse Source

feat(mgb/gopt): add interface to reproducible

GitOrigin-RevId: f341bea40b
tags/v0.5.0
Megvii Engine Team Xu Xinran 5 years ago
parent
commit
12dc36a6ab
3 changed files with 74 additions and 44 deletions
  1. +26
    -2
      sdk/load-and-run/src/mgblar.cpp
  2. +33
    -40
      src/gopt/impl/inference.cpp
  3. +15
    -2
      src/gopt/include/megbrain/gopt/inference.h

+ 26
- 2
sdk/load-and-run/src/mgblar.cpp View File

@@ -14,6 +14,7 @@
#include "./json_loader.h" #include "./json_loader.h"
#include "./npy.h" #include "./npy.h"


#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/utils/debug.h" #include "megbrain/utils/debug.h"
#include "megbrain/serialization/serializer.h" #include "megbrain/serialization/serializer.h"
#include "megbrain/serialization/extern_c_opr.h" #include "megbrain/serialization/extern_c_opr.h"
@@ -144,6 +145,10 @@ R"__usage__(
R"__usage__( R"__usage__(
--fast-run-algo-policy <path> --fast-run-algo-policy <path>
It will read the cache file before profile, and save new fastrun in cache file. It will read the cache file before profile, and save new fastrun in cache file.
--reproducible
Enable choose algo which is reproducible. It mainly used for cudnn algos.
See https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#reproducibility
for more details.
--wait-gdb --wait-gdb
Print PID and wait for a line from stdin before starting execution. Useful Print PID and wait for a line from stdin before starting execution. Useful
for waiting for gdb attach. for waiting for gdb attach.
@@ -467,6 +472,7 @@ struct Args {
#if MGB_ENABLE_FASTRUN #if MGB_ENABLE_FASTRUN
bool use_fast_run = false; bool use_fast_run = false;
#endif #endif
bool reproducible = false;
std::string fast_run_cache_path; std::string fast_run_cache_path;
bool copy_to_host = false; bool copy_to_host = false;
int nr_run = 10; int nr_run = 10;
@@ -647,10 +653,24 @@ void run_test_st(Args &env) {
} }


mgb::gopt::set_opr_algo_workspace_limit_inplace(vars, env.workspace_limit); mgb::gopt::set_opr_algo_workspace_limit_inplace(vars, env.workspace_limit);
using S = opr::mixin::Convolution::ExecutionPolicy::Strategy;
S strategy = S::HEURISTIC;
#if MGB_ENABLE_FASTRUN #if MGB_ENABLE_FASTRUN
if (env.use_fast_run)
mgb::gopt::enable_opr_algo_profiling_inplace(vars);
if (env.use_fast_run) {
if (env.reproducible) {
strategy = S::PROFILE_REPRODUCIBLE;
} else {
strategy = S::PROFILE;
}
} else if (env.reproducible) {
strategy = S::HEURISTIC_REPRODUCIBLE;
}
#else
if (env.reproducible) {
strategy = S::HEURISTIC_REPRODUCIBLE;
}
#endif #endif
mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy);
if (!env.fast_run_cache_path.empty()) { if (!env.fast_run_cache_path.empty()) {
#if MGB_ENABLE_FASTRUN #if MGB_ENABLE_FASTRUN
if (!access(env.fast_run_cache_path.c_str(), F_OK)) { if (!access(env.fast_run_cache_path.c_str(), F_OK)) {
@@ -1149,6 +1169,10 @@ Args Args::from_argv(int argc, char **argv) {
ret.fast_run_cache_path = argv[i]; ret.fast_run_cache_path = argv[i];
continue; continue;
} }
if (!strcmp(argv[i], "--reproducible")) {
ret.reproducible = true;
continue;
}
if (!strcmp(argv[i], "--const-shape")) { if (!strcmp(argv[i], "--const-shape")) {
ret.load_config.const_var_shape = true; ret.load_config.const_var_shape = true;
continue; continue;


+ 33
- 40
src/gopt/impl/inference.cpp View File

@@ -104,25 +104,21 @@ SymbolVarArray gopt::optimize_for_inference(
} }


namespace { namespace {
void modify_conv_policy(opr::mixin::Convolution& conv,
megdnn::param::ExecutionPolicy::Strategy strategy) {
void modify_conv_strategy(
opr::mixin::Convolution& conv,
opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
auto policy = conv.execution_policy_transient(); auto policy = conv.execution_policy_transient();
policy.strategy = strategy; policy.strategy = strategy;
conv.set_execution_policy(policy); conv.set_execution_policy(policy);
} }


template <typename Opr> template <typename Opr>
void inplace_conv_opr_profile_modifier(OperatorNodeBase& opr) {
modify_conv_policy(
void inplace_conv_opr_modifier(
OperatorNodeBase& opr,
opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
modify_conv_strategy(
opr.cast_final_safe<Opr>(), opr.cast_final_safe<Opr>(),
opr::mixin::Convolution::ExecutionPolicy::Strategy::PROFILE);
}

template <typename Opr>
void inplace_conv_opr_profile_cache_modifier(OperatorNodeBase& opr) {
modify_conv_policy(opr.cast_final_safe<Opr>(),
opr::mixin::Convolution::ExecutionPolicy::Strategy::
PROFILE_HEURISTIC);
strategy);
} }


void modify_conv_policy_workspace_limit(opr::mixin::Convolution& conv, void modify_conv_policy_workspace_limit(opr::mixin::Convolution& conv,
@@ -150,12 +146,20 @@ void inplace_conv_opr_workspace_limit_modifier(OperatorNodeBase& opr,
cb(DeformableConvBackwardFilter), cb(DeformableConvBackwardData), \ cb(DeformableConvBackwardFilter), cb(DeformableConvBackwardData), \
cb(BatchConvBiasForward), cb(BatchConvBiasForward),


void gopt::enable_opr_algo_profiling_inplace(
const VarNodeArrayView& dest_vars) {
#if MGB_ENABLE_FASTRUN
static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&)> modifiers =
{
#define CONV(t) {opr::t::typeinfo(), &inplace_conv_opr_profile_modifier<opr::t>}
void gopt::modify_opr_algo_strategy_inplace(
const VarNodeArrayView& dest_vars,
opr::mixin::Convolution::ExecutionPolicy::Strategy strategy) {
#if !MGB_ENABLE_FASTRUN
using S = opr::mixin::Convolution::ExecutionPolicy::Strategy;
if (strategy == S::PROFILE || strategy == S::PROFILE_REPRODUCIBLE) {
mgb_throw(MegBrainError, "fastrun is disabled at compile time");
}
#endif
const ThinHashMap<Typeinfo*, std::function<void(OperatorNodeBase&)>>
modifiers = {
#define CONV(t) \
{opr::t::typeinfo(), std::bind(inplace_conv_opr_modifier<opr::t>, \
std::placeholders::_1, strategy)}
MGB_FOREACH_FASTRUN_OPR(CONV) MGB_FOREACH_FASTRUN_OPR(CONV)
#undef CONV #undef CONV
}; };
@@ -171,34 +175,23 @@ void gopt::enable_opr_algo_profiling_inplace(
for (auto i : dest_vars) { for (auto i : dest_vars) {
dep_iter.add(i); dep_iter.add(i);
} }
#else
mgb_throw(MegBrainError, "fastrun is disabled at compile time");
#endif
} }


void gopt::enable_opr_use_profiling_cache_inplace(
void gopt::enable_opr_algo_profiling_inplace(
const VarNodeArrayView& dest_vars) { const VarNodeArrayView& dest_vars) {
static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&)> modifiers =
{
#define CONV(t) \
{opr::t::typeinfo(), &inplace_conv_opr_profile_cache_modifier<opr::t>}
MGB_FOREACH_FASTRUN_OPR(CONV)
#undef CONV
};

auto on_opr = [&](OperatorNodeBase* opr) {
auto iter = modifiers.find(opr->dyn_typeinfo());
if (iter != modifiers.end()) {
iter->second(*opr);
}
};
modify_opr_algo_strategy_inplace(dest_vars,
opr::mixin::Convolution::ExecutionPolicy::
Strategy::PROFILE);
}


cg::DepOprIter dep_iter{on_opr};
for (auto i : dest_vars) {
dep_iter.add(i);
}
void gopt::enable_opr_use_profiling_cache_inplace(
const VarNodeArrayView& dest_vars) {
modify_opr_algo_strategy_inplace(dest_vars,
opr::mixin::Convolution::ExecutionPolicy::
Strategy::PROFILE_HEURISTIC);
} }



void gopt::set_opr_algo_workspace_limit_inplace( void gopt::set_opr_algo_workspace_limit_inplace(
const VarNodeArrayView& dest_vars, size_t workspace_limit) { const VarNodeArrayView& dest_vars, size_t workspace_limit) {
static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)> static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)>


+ 15
- 2
src/gopt/include/megbrain/gopt/inference.h View File

@@ -13,6 +13,7 @@


#include "megbrain/gopt/framework.h" #include "megbrain/gopt/framework.h"
#include "megbrain/graph/cg.h" #include "megbrain/graph/cg.h"
#include "megbrain/opr/dnn/convolution.h"


namespace mgb { namespace mgb {
namespace gopt { namespace gopt {
@@ -303,6 +304,17 @@ namespace gopt {
const OptimizeForInferenceOptions& opt = {}); const OptimizeForInferenceOptions& opt = {});


/*! /*!
* \brief modify execution strategy for oprs with multiple
* algorithms
*
* This would modify the operators inplace. It can be used for implement
* the fast-run mode.
*/
void modify_opr_algo_strategy_inplace(
const VarNodeArrayView& dest_vars,
opr::mixin::Convolution::ExecutionPolicy::Strategy strategy);

/*!
* \brief enable PROFILE execution strategy for oprs with multiple * \brief enable PROFILE execution strategy for oprs with multiple
* algorithms * algorithms
* *
@@ -315,7 +327,7 @@ namespace gopt {
void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars); void enable_opr_algo_profiling_inplace(const VarNodeArrayView& dest_vars);


/*! /*!
* \brief enable opr try profiling cache first, if failed, then try
* \brief enable opr try profiling cache first, if failed, fallback to
* heuristic * heuristic
* *
* This would modify the operators inplace. It is usually used to enable * This would modify the operators inplace. It is usually used to enable
@@ -324,7 +336,8 @@ namespace gopt {
* You may want to implement TimedFuncInvoker::ForkExecImpl and/or * You may want to implement TimedFuncInvoker::ForkExecImpl and/or
* PersistentCache for better performance in an SDK. * PersistentCache for better performance in an SDK.
*/ */
void enable_opr_use_profiling_cache_inplace(const VarNodeArrayView& dest_vars);
void enable_opr_use_profiling_cache_inplace(
const VarNodeArrayView& dest_vars);


/*! /*!
* \brief set workspace_limit for execution strategy for oprs with multiple * \brief set workspace_limit for execution strategy for oprs with multiple


Loading…
Cancel
Save