GitOrigin-RevId: 843dc3a790
tags/v1.3.0
@@ -506,10 +506,66 @@ struct DynOutMallocPolicyCall { | |||
} | |||
}; | |||
template <typename T> | |||
class EnumClassBit { | |||
std::underlying_type_t<T> m_val; | |||
constexpr EnumClassBit(std::underlying_type_t<T> v) : m_val(v) {} | |||
public: | |||
constexpr EnumClassBit(T v) | |||
: m_val(static_cast<std::underlying_type_t<T>>(v)) {} | |||
constexpr operator T() const { return static_cast<T>(m_val); } | |||
constexpr explicit operator bool() const { return m_val; } | |||
#define DEF_OPR(op) \ | |||
constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \ | |||
return m_val op rhs.m_val; \ | |||
} | |||
DEF_OPR(&) | |||
DEF_OPR(|) | |||
DEF_OPR (^) | |||
constexpr EnumClassBit operator~() const { return ~m_val; } | |||
#undef DEF_OPR | |||
}; | |||
#endif // MEGDNN_CC_HOST | |||
} // namespace megdnn | |||
#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \ | |||
inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \ | |||
return ::megdnn::EnumClassBit<cls>(x) \ | |||
op ::megdnn::EnumClassBit<cls>(y); \ | |||
} \ | |||
inline constexpr ::megdnn::EnumClassBit<cls> operator op( \ | |||
::megdnn::EnumClassBit<cls> x, cls y) { \ | |||
return x op ::megdnn::EnumClassBit<cls>(y); \ | |||
} | |||
#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \ | |||
inline constexpr cls& operator op##=(cls& x, cls y) { \ | |||
x = x op ::megdnn::EnumClassBit<cls>(y); \ | |||
return x; \ | |||
} | |||
#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \ | |||
_MEGDNN_DECBO_SINGLE_OPR(cls, &) \ | |||
_MEGDNN_DECBO_SINGLE_OPR(cls, |) \ | |||
_MEGDNN_DECBO_SINGLE_OPR(cls, ^) \ | |||
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \ | |||
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \ | |||
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \ | |||
inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \ | |||
return ~::megdnn::EnumClassBit<cls>(x); \ | |||
} | |||
#include "megdnn/internal/visibility_epilogue.h" | |||
// vim: syntax=cpp.doxygen |
@@ -251,6 +251,8 @@ protected: | |||
Handle::HandleType m_handle_type = Handle::HandleType::NAIVE; | |||
}; | |||
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(Algorithm::Attribute) | |||
//! policy for executing the operator | |||
struct ExecutionPolicy { | |||
//! INVALID_ALGO_TYPE algo_type means using heuristic | |||
@@ -53,9 +53,13 @@ class FlatBuffersWriter(IndentWriterBase): | |||
e = self._enums[(p, e)] | |||
self._write_doc(e.name) | |||
self._write("enum %s%s : uint {", p, e.name, indent=1) | |||
for member in e.members: | |||
for idx, member in enumerate(e.members): | |||
self._write_doc(member) | |||
self._write("%s,", scramble_enum_member_name(str(member))) | |||
if e.combined: | |||
self._write("%s=%d,", scramble_enum_member_name(str(member)), | |||
1<<idx) | |||
else: | |||
self._write("%s,", scramble_enum_member_name(str(member))) | |||
self._write("}\n", indent=-1) | |||
def _write_doc(self, doc): | |||
@@ -80,13 +80,13 @@ class member_defs: | |||
:attr member_alias: list of (member, alias) pairs | |||
""" | |||
__slots__ = ['name', 'name_field', 'members', 'default', | |||
'member_alias'] | |||
'member_alias', 'combined'] | |||
all_enums = {} | |||
"""(param_name, name) => enum""" | |||
def __init__(self, param_name, name, name_field, members, default, | |||
member_alias): | |||
member_alias, combined = False): | |||
name = member_defs.Doc.make(name) | |||
assert name.id[0].isupper() | |||
members = tuple(map(member_defs.Doc.make, members)) | |||
@@ -97,6 +97,7 @@ class member_defs: | |||
default = name_field.index(default) | |||
assert isinstance(default, int) | |||
self.name = name | |||
self.combined = combined | |||
self.name_field = self.get_name_field(name.id, name_field) | |||
self.members = members | |||
self.default = default | |||
@@ -197,6 +198,12 @@ class ParamDef: | |||
self.name.id, name, name_field, members, default, member_alias)) | |||
return self | |||
def add_bit_combination_enum(self, name, *members, default=0, | |||
name_field=None, member_alias=[]): | |||
self.members.append(member_defs.Enum( | |||
self.name.id, name, name_field, members, default, member_alias, True)) | |||
return self | |||
def add_enum_alias(self, name, src_class, src_name=None, name_field=None, | |||
default=None): | |||
self.members.append(member_defs.EnumAlias( | |||
@@ -463,8 +470,12 @@ class SerializedDType(_ParamDefBase): | |||
for idx, emem in enumerate(e.members): | |||
self._write('%s = "%s"', emem, emem) | |||
self._write_doc(emem) | |||
self._enum_member2num.append('id({}.{}):{}'.format( | |||
qualname, emem, idx)) | |||
if e.combined: | |||
self._enum_member2num.append('id({}.{}):{}'.format( | |||
qualname, emem, 1<<idx)) | |||
else: | |||
self._enum_member2num.append('id({}.{}):{}'.format( | |||
qualname, emem, idx)) | |||
for emem, emem_alis in e.member_alias: | |||
self._write('%s = %s', emem_alis, emem) | |||
@@ -622,6 +633,8 @@ class CPPWriter(IndentWriterBase): | |||
for idx, i in enumerate(e.members): | |||
self._write_doc(i) | |||
v = '{} = {}'.format(i, idx) | |||
if e.combined: | |||
v = '{} = 1 << {}'.format(i, idx) | |||
if i is not e.members[-1] or e.member_alias: | |||
v += ',' | |||
self._write(v) | |||
@@ -672,7 +685,6 @@ class CPPEnumValueWriter(CPPWriter): | |||
self._write('static const uint32_t %s = %s;', alias, mem) | |||
self._write('};', indent=-1) | |||
def _on_member_enum_alias(self, e): | |||
s = e.src_enum | |||
self._write('typedef %s::%s %s;', e.src_class, e.src_name, e.name) | |||
@@ -91,12 +91,17 @@ class ConverterWriter(IndentWriterBase): | |||
def format(v): | |||
return '\"{}\"'.format(str(v)) | |||
enum_def += ','.join(format(i) for i in e.members) | |||
enum_def += "]" | |||
if e.combined: | |||
enum_def += "], 1" | |||
else: | |||
enum_def += "], 0" | |||
if ENUM_TO_STRING_SPECIAL_RULES.count((p.name, e.name)): | |||
enum_def += ", 1" # whether generate ToStringTrait | |||
enum_def += ">" | |||
self._write("def {} : {};".format(td_class, enum_def)) | |||
self._write("def {} : {};".format(td_class, enum_def)) | |||
if self._skip_current_param: | |||
return | |||
@@ -21,8 +21,6 @@ | |||
namespace megdnn { | |||
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(AlgoAttribute) | |||
#define MEGDNN_DECL_ALGO_TYPE(_type) \ | |||
uint32_t type() const override { \ | |||
return static_cast<std::underlying_type<AlgoType>::type>( \ | |||
@@ -692,61 +692,6 @@ inline void* get_origin_ptr(const TensorND* tensor, void* ptr) { | |||
tensor->layout.span().low_byte); | |||
} | |||
template <typename T> | |||
class EnumClassBit { | |||
std::underlying_type_t<T> m_val; | |||
constexpr EnumClassBit(std::underlying_type_t<T> v) : m_val(v) {} | |||
public: | |||
constexpr EnumClassBit(T v) | |||
: m_val(static_cast<std::underlying_type_t<T>>(v)) {} | |||
constexpr operator T() const { return static_cast<T>(m_val); } | |||
constexpr explicit operator bool() const { return m_val; } | |||
#define DEF_OPR(op) \ | |||
constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \ | |||
return m_val op rhs.m_val; \ | |||
} | |||
DEF_OPR(&) | |||
DEF_OPR(|) | |||
DEF_OPR (^) | |||
constexpr EnumClassBit operator~() const { return ~m_val; } | |||
#undef DEF_OPR | |||
}; | |||
#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \ | |||
inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \ | |||
return ::megdnn::EnumClassBit<cls>(x) \ | |||
op ::megdnn::EnumClassBit<cls>(y); \ | |||
} \ | |||
inline constexpr ::megdnn::EnumClassBit<cls> operator op( \ | |||
::megdnn::EnumClassBit<cls> x, cls y) { \ | |||
return x op ::megdnn::EnumClassBit<cls>(y); \ | |||
} | |||
#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \ | |||
inline constexpr cls& operator op##=(cls& x, cls y) { \ | |||
x = x op ::megdnn::EnumClassBit<cls>(y); \ | |||
return x; \ | |||
} | |||
#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \ | |||
_MEGDNN_DECBO_SINGLE_OPR(cls, &) \ | |||
_MEGDNN_DECBO_SINGLE_OPR(cls, |) \ | |||
_MEGDNN_DECBO_SINGLE_OPR(cls, ^) \ | |||
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \ | |||
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \ | |||
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \ | |||
inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \ | |||
return ~::megdnn::EnumClassBit<cls>(x); \ | |||
} | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -218,4 +218,3 @@ public: | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen | |||
@@ -8,9 +8,12 @@ | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
import os | |||
from ..core.ops import builtin | |||
from ..logger import get_logger | |||
from ..utils.deprecation import deprecated | |||
Strategy = builtin.ops.Convolution.Strategy | |||
_execution_strategy = os.getenv("MEGENGINE_EXECUTION_STRATEGY", "HEURISTIC") | |||
if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None: | |||
@@ -19,7 +22,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None: | |||
) | |||
def get_execution_strategy() -> str: | |||
def get_execution_strategy() -> Strategy: | |||
""" | |||
Returns the execution strategy of :class:`~.Conv2d` and :func:'~.matmul' | |||
@@ -28,12 +31,22 @@ def get_execution_strategy() -> str: | |||
return _execution_strategy | |||
def set_execution_strategy(option: str): | |||
def set_execution_strategy(option): | |||
""" | |||
Sets the execution strategy of :class:`~.Conv2d` and :func:'~.matmul' | |||
:param option: Decides how :class:`~.Conv2d` and :func:'~.matmul' algorithms are chosen. | |||
Available values: | |||
:param option: Decides how :class:`~.Conv2d`and :func:'~.matmul' algorithms are chosen. | |||
Available value Strategy | |||
* HEURISTIC uses heuristic to choose the fastest algorithm. | |||
* PROFILE runs possible algorithms on real device to find the best one. | |||
* REPRODUCIBLE uses the algorithms that is reproducible. | |||
* OPTMIZED uses the algorithms that is optimized. | |||
The default strategy is HEURISTIC, this options can be combined to | |||
form a combination option, e.g. PROFILE | REPRODUCIBLE | |||
can combined a option that uses the fastest of profiling result that is also reproducible. | |||
Available values string: | |||
* 'HEURISTIC' uses heuristic to choose the fastest algorithm. | |||
* 'PROFILE' runs possible algorithms on real device to find the best one. | |||
@@ -45,18 +58,29 @@ def set_execution_strategy(option: str): | |||
It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'. | |||
""" | |||
valid_option = ( | |||
"HEURISTIC", | |||
"PROFILE", | |||
"PROFILE_HEURISTIC", | |||
"PROFILE_REPRODUCIBLE", | |||
"HEURISTIC_REPRODUCIBLE", | |||
) | |||
if not option in valid_option: | |||
raise ValueError("Valid option can only be one of {}".format(valid_option)) | |||
valid_string_option = { | |||
"REPRODUCIBLE": Strategy.REPRODUCIBLE, | |||
"HEURISTIC": Strategy.HEURISTIC, | |||
"PROFILE": Strategy.PROFILE, | |||
} | |||
global _execution_strategy # pylint: disable=global-statement | |||
_execution_strategy = option | |||
if isinstance(option, Strategy): | |||
_execution_strategy = option | |||
return | |||
assert isinstance(option, str) | |||
strategy_tmp = Strategy(0) | |||
for opt in option.split("_"): | |||
if not opt in valid_string_option: | |||
raise ValueError( | |||
"Valid option can only be one of {}, or combine them with '_'.".format( | |||
valid_string_option.keys() | |||
) | |||
) | |||
strategy_tmp = strategy_tmp | valid_string_option[opt] | |||
_execution_strategy = strategy_tmp | |||
@deprecated(version="1.3", reason="use get_execution_strategy() instead") | |||
@@ -19,6 +19,7 @@ import megengine.autodiff as ad | |||
import megengine.functional as F | |||
from megengine import jit | |||
from megengine.core._trace_option import set_symbolic_shape | |||
from megengine.core.ops import builtin | |||
from megengine.core.tensor.utils import make_shape_tuple | |||
from megengine.functional.debug_param import set_execution_strategy | |||
from megengine.jit import SublinearMemoryConfig | |||
@@ -33,6 +34,8 @@ from megengine.module import ( | |||
from megengine.optimizer import SGD | |||
from megengine.tensor import Tensor | |||
Strategy = builtin.ops.Convolution.Strategy | |||
def get_gpu_name(): | |||
try: | |||
@@ -242,7 +245,7 @@ def test_correctness(): | |||
else: | |||
model_name = "mnist_model_with_test_cpu.mge" | |||
model_path = os.path.join(os.path.dirname(__file__), model_name) | |||
set_execution_strategy("HEURISTIC_REPRODUCIBLE") | |||
set_execution_strategy(Strategy.HEURISTIC | Strategy.REPRODUCIBLE) | |||
run_train(model_path, False, False, max_err=1e-5) | |||
run_train(model_path, True, False, max_err=1e-5) | |||
@@ -337,6 +337,20 @@ static void gen_op_def_pybind11_single(raw_ostream &os, MgbOp& op, EnumContext& | |||
className, attr->getEnumName(), i | |||
)); | |||
} | |||
if (attr->getEnumCombinedFlag()) { | |||
//! define operator | | |||
os << formatv( | |||
"\n .def(\"__or__\", []({0}::{1} s0, {0}::{1} s1) {{ " | |||
"\n return static_cast<{0}::{1}>(uint32_t(s0) | uint32_t(s1));" | |||
"\n })", | |||
className, attr->getEnumName()); | |||
//! define operator & | |||
os << formatv( | |||
"\n .def(\"__and__\", []({0}::{1} s0, {0}::{1} s1) {{" | |||
"\n return static_cast<{0}::{1}>(uint32_t(s0) & uint32_t(s1));" | |||
"\n })", | |||
className, attr->getEnumName()); | |||
} | |||
os << formatv( | |||
"\n .def(py::init([](const std::string& in) {" | |||
"\n auto&& str = normalize_enum(in);" | |||
@@ -77,6 +77,9 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase { | |||
bool supportToString() const { | |||
return getBaseRecord()->getValueAsBit("supportToString"); | |||
} | |||
bool getEnumCombinedFlag() const { | |||
return getBaseRecord()->getValueAsBit("enumCombined"); | |||
} | |||
}; | |||
struct MgbHashableAttrMixin : public MgbAttrWrapperBase { | |||
@@ -142,8 +142,16 @@ R"__usage__( | |||
#if MGB_ENABLE_FASTRUN | |||
R"__usage__( | |||
--fast-run | |||
Enable fast-run mode. Operators with multiple algorithms would be profiled | |||
on the real device with actual input shapes. | |||
This param will be deperated later, please replace with param --full-profile. | |||
--full-profile | |||
Enable full-profile mode. Operators with multiple algorithms would be profiled | |||
on the real device with actual input shapes, all algorithms will be profiled | |||
include naive algorithms. | |||
See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details. | |||
--fast-profile | |||
Enable fast-profile mode. Operators with multiple algorithms would be profiled | |||
on the real device with actual input shapes, this mode will only profile the | |||
well optimized algorithms to get the profile result fast. | |||
See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details. | |||
)__usage__" | |||
#endif | |||
@@ -511,7 +519,8 @@ struct Args { | |||
bool disable_assert_throw = false; | |||
bool share_param_mem = false; | |||
#if MGB_ENABLE_FASTRUN | |||
bool use_fast_run = false; | |||
bool use_full_profile = false; | |||
bool use_fast_profile = false; | |||
#endif | |||
bool reproducible = false; | |||
std::string fast_run_cache_path; | |||
@@ -695,18 +704,20 @@ void run_test_st(Args &env) { | |||
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy; | |||
S strategy = S::HEURISTIC; | |||
#if MGB_ENABLE_FASTRUN | |||
if (env.use_fast_run) { | |||
if (env.use_full_profile) { | |||
if (env.reproducible) { | |||
strategy = S::PROFILE_REPRODUCIBLE; | |||
strategy = S::PROFILE | S::REPRODUCIBLE; | |||
} else { | |||
strategy = S::PROFILE; | |||
} | |||
} else if (env.use_fast_profile) { | |||
strategy = S::PROFILE | S::OPTMIZED; | |||
} else if (env.reproducible) { | |||
strategy = S::HEURISTIC_REPRODUCIBLE; | |||
strategy = S::HEURISTIC | S::REPRODUCIBLE; | |||
} | |||
#else | |||
if (env.reproducible) { | |||
strategy = S::HEURISTIC_REPRODUCIBLE; | |||
strategy = S::HEURISTIC | S::REPRODUCIBLE; | |||
} | |||
#endif | |||
mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy); | |||
@@ -729,11 +740,12 @@ void run_test_st(Args &env) { | |||
std::make_shared<InFilePersistentCache>(buf.get(), flen)); | |||
#if MGB_ENABLE_FASTRUN | |||
} else { | |||
mgb_assert(env.use_fast_run, "fast-run should be enabled"); | |||
mgb_assert(env.use_full_profile || env.use_fast_profile, | |||
"fast-run or fast-profile should be enabled"); | |||
PersistentCache::set_impl( | |||
std::make_shared<InFilePersistentCache>()); | |||
} | |||
if (!env.use_fast_run) | |||
if (!env.use_full_profile && !env.use_fast_profile) | |||
#endif | |||
mgb::gopt::enable_opr_use_profiling_cache_inplace(vars); | |||
} | |||
@@ -1314,7 +1326,18 @@ Args Args::from_argv(int argc, char **argv) { | |||
} | |||
#if MGB_ENABLE_FASTRUN | |||
if (!strcmp(argv[i], "--fast-run")) { | |||
ret.use_fast_run = true; | |||
mgb_log_warn( | |||
"--fast-run param will be deperated later, please replace " | |||
"with --full-profile or --fast-profile."); | |||
ret.use_full_profile = true; | |||
continue; | |||
} | |||
if (!strcmp(argv[i], "--full-profile")) { | |||
ret.use_full_profile = true; | |||
continue; | |||
} | |||
if (!strcmp(argv[i], "--fast-profile")) { | |||
ret.use_fast_profile = true; | |||
continue; | |||
} | |||
#endif | |||
@@ -188,7 +188,7 @@ AlgoChooserProfileCache::get(const Key &key) { | |||
auto entry_len = read_uint32(); | |||
mgb_assert(buf + entry_len <= buf_end); | |||
auto nr = sscanf(reinterpret_cast<const char*>(buf), ENTRY_FMT, | |||
&i.reproducible, &i.time, &i.workspace); | |||
&i.attribute, &i.time, &i.workspace); | |||
mgb_assert(nr == 3); | |||
buf += entry_len; | |||
} | |||
@@ -210,10 +210,10 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) { | |||
auto &&cur = result[i]; | |||
if (prev.workspace <= cur.workspace && | |||
prev.reproducible == cur.reproducible) { | |||
prev.attribute == cur.attribute) { | |||
result.erase(result.begin() + i); | |||
} else { | |||
++ i; | |||
++i; | |||
} | |||
} | |||
@@ -235,8 +235,8 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) { | |||
write_uint32(0); | |||
pos = val.size(); | |||
val.resize(pos + SPR_SIZE); | |||
uint32_t nr = snprintf(&val[pos], SPR_SIZE, | |||
ENTRY_FMT, i.reproducible, i.time, i.workspace); | |||
uint32_t nr = snprintf(&val[pos], SPR_SIZE, ENTRY_FMT, i.attribute, | |||
i.time, i.workspace); | |||
//! for memory boundary failed, snprintf ret do not contain \0 | |||
nr += 1; | |||
mgb_assert(nr < SPR_SIZE); | |||
@@ -12,6 +12,8 @@ | |||
#pragma once | |||
#include "megbrain_build_config.h" | |||
#include "megbrain/opr/param_defs.h" | |||
#include "megdnn/basic_types.h" | |||
#include <memory> | |||
#include <string> | |||
@@ -242,6 +244,16 @@ inline constexpr std::size_t operator"" _z(unsigned long long n) { | |||
return n; | |||
} | |||
#endif | |||
#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \ | |||
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) | |||
} // namespace mgb | |||
namespace megdnn { | |||
namespace param { | |||
MGB_DEF_ENUM_CLASS_BIT_OPR(ExecutionPolicy::Strategy) | |||
} | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -12,7 +12,6 @@ | |||
#pragma once | |||
#include "megbrain/utils/hash.h" | |||
#include "megbrain/utils/enum_class_bit.h" | |||
#include "megbrain/utils/metahelper.h" | |||
#include "megbrain/utils/thin/hash_table.h" | |||
#include "megbrain/utils/thread.h" | |||
@@ -16,7 +16,6 @@ | |||
#include "megbrain/graph/symbol_var.h" | |||
#include "megbrain/utils/hashable.h" | |||
#include "megbrain/utils/enum_class_bit.h" | |||
#include "megbrain/utils/thin/hash_table.h" | |||
#include "megbrain/utils/small_vector.h" | |||
@@ -12,7 +12,6 @@ | |||
#pragma once | |||
#include "megbrain/graph/bases.h" | |||
#include "megbrain/utils/enum_class_bit.h" | |||
#include "megbrain/utils/comp_node_sync_manager.h" | |||
#include "megbrain/utils/small_vector.h" | |||
#include "megbrain/utils/mempool.h" | |||
@@ -33,10 +33,11 @@ class MgbHashableAttrMixin { | |||
string reprFunction = "std::to_string($0)"; | |||
} | |||
class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit toString> { | |||
class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit combined, bit toString> { | |||
string parentNamespace = namespace; | |||
string enumName = name; | |||
list<string> enumMembers = members; | |||
bit enumCombined = combined; | |||
bit supportToString = toString; | |||
} | |||
@@ -166,8 +167,8 @@ class MgbTupleAttr<list<MgbAttrWrapper> args>: | |||
} | |||
// -- enum types | |||
class MgbEnumAttr<string namespace, string enumName, list<string> members, bit toString=0>: | |||
HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, toString> { | |||
class MgbEnumAttr<string namespace, string enumName, list<string> members, bit combined, bit toString=0>: | |||
HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, combined, toString> { | |||
let storageType = "::mlir::IntegerAttr"; | |||
let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())"; | |||
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))"; | |||
@@ -176,7 +177,7 @@ class MgbEnumAttr<string namespace, string enumName, list<string> members, bit t | |||
} | |||
class MgbEnumAliasAttr<string namespace, string enumName, MgbEnumAttr base>: | |||
MgbEnumAttr<namespace, enumName, base.enumMembers>, MgbAliasAttrMixin<base>; | |||
MgbEnumAttr<namespace, enumName, base.enumMembers, 0>, MgbAliasAttrMixin<base>; | |||
// -- other types | |||
def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> { | |||
@@ -1,89 +0,0 @@ | |||
/** | |||
* \file src/core/include/megbrain/utils/enum_class_bit.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include <type_traits> | |||
namespace mgb { | |||
template<typename T> | |||
class EnumClassBit { | |||
std::underlying_type_t<T> m_val; | |||
constexpr EnumClassBit(std::underlying_type_t<T> v): | |||
m_val(v) | |||
{ | |||
} | |||
public: | |||
constexpr EnumClassBit(T v): | |||
m_val(static_cast<std::underlying_type_t<T>>(v)) | |||
{ | |||
} | |||
constexpr operator T() const { | |||
return static_cast<T>(m_val); | |||
} | |||
constexpr explicit operator bool() const { | |||
return m_val; | |||
} | |||
#define DEF_OPR(op) \ | |||
constexpr EnumClassBit operator op (\ | |||
const EnumClassBit &rhs) const { \ | |||
return m_val op rhs.m_val; \ | |||
} | |||
DEF_OPR(&) | |||
DEF_OPR(|) | |||
DEF_OPR(^) | |||
constexpr EnumClassBit operator ~() const { | |||
return ~m_val; | |||
} | |||
#undef DEF_OPR | |||
}; | |||
} | |||
#define _MGB_DECBO_SINGLE_OPR(cls, op) \ | |||
inline constexpr ::mgb::EnumClassBit<cls> operator op (cls x, cls y) { \ | |||
return ::mgb::EnumClassBit<cls>(x) op ::mgb::EnumClassBit<cls>(y); \ | |||
} \ | |||
inline constexpr ::mgb::EnumClassBit<cls> operator op ( \ | |||
::mgb::EnumClassBit<cls> x, cls y) { \ | |||
return x op ::mgb::EnumClassBit<cls>(y); \ | |||
} | |||
#define _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, op) \ | |||
inline constexpr cls& operator op##= (cls& x, cls y) { \ | |||
x = x op ::mgb::EnumClassBit<cls>(y); \ | |||
return x; \ | |||
} | |||
#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \ | |||
_MGB_DECBO_SINGLE_OPR(cls, &) \ | |||
_MGB_DECBO_SINGLE_OPR(cls, |) \ | |||
_MGB_DECBO_SINGLE_OPR(cls, ^) \ | |||
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, &) \ | |||
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, |) \ | |||
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \ | |||
inline constexpr ::mgb::EnumClassBit<cls> operator ~ (cls x) { \ | |||
return ~::mgb::EnumClassBit<cls>(x); \ | |||
} \ | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | |||
@@ -100,8 +100,7 @@ namespace mgb { | |||
struct ResultEntry { | |||
std::string algo; //! identifier of the algorithm | |||
//! sscanf will up bool as int | |||
int reproducible; //! whether algorithm is reproducible | |||
uint32_t attribute; //! algo attribute, e.g. reproducible | |||
double time; //! execution time in seconds | |||
size_t workspace; //! workspace in bytes | |||
}; | |||
@@ -54,7 +54,6 @@ using namespace gopt; | |||
namespace { | |||
template <typename SharedDeviceTensor, typename MultipleDeviceTensorHolder> | |||
void param_merge(OptState& opt_state) { | |||
auto rewriter = opt_state.graph().make_rewriter(); | |||
@@ -102,7 +101,7 @@ void param_merge(OptState& opt_state) { | |||
rewriter.apply_inplace(); | |||
} | |||
} | |||
} // namespace | |||
/* ================ global functions ================ */ | |||
@@ -190,12 +189,10 @@ void gopt::enable_opr_algo_profiling_inplace( | |||
void gopt::enable_opr_use_profiling_cache_inplace( | |||
const VarNodeArrayView& dest_vars) { | |||
modify_opr_algo_strategy_inplace( | |||
dest_vars, opr::mixin::AlgoChooserHelper::ExecutionPolicy:: | |||
Strategy::PROFILE_HEURISTIC); | |||
using S = megdnn::param::ExecutionPolicy::Strategy; | |||
modify_opr_algo_strategy_inplace(dest_vars, S::PROFILE | S::HEURISTIC); | |||
} | |||
void gopt::set_opr_algo_workspace_limit_inplace( | |||
const VarNodeArrayView& dest_vars, size_t workspace_limit) { | |||
static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)> | |||
@@ -1693,7 +1693,22 @@ TEST(TestGoptInference, ProfileCache) { | |||
using S = opr::Convolution::ExecutionPolicy::Strategy; | |||
ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy); | |||
gopt::enable_opr_use_profiling_cache_inplace({z + 2.3f}); | |||
ASSERT_EQ(S::PROFILE_HEURISTIC, conv.execution_policy().strategy); | |||
ASSERT_EQ(S::PROFILE | S::HEURISTIC, conv.execution_policy().strategy); | |||
} | |||
TEST(TestGoptInference, FastProfileCache) { | |||
HostTensorGenerator<> gen; | |||
auto graph = ComputingGraph::make(); | |||
auto host_x = gen({4, 3, 8, 9}), host_y = gen({2, 3, 3, 3}); | |||
auto x = opr::Host2DeviceCopy::make(*graph, host_x), | |||
y = opr::Host2DeviceCopy::make(*graph, host_y), | |||
z = opr::Convolution::make(x, y); | |||
auto&& conv = z.node()->owner_opr()->cast_final_safe<opr::Convolution>(); | |||
using S = opr::Convolution::ExecutionPolicy::Strategy; | |||
ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy); | |||
gopt::modify_opr_algo_strategy_inplace({z + 2.3f}, | |||
S::PROFILE | S::OPTMIZED); | |||
ASSERT_EQ(S::PROFILE | S::OPTMIZED, conv.execution_policy().strategy); | |||
} | |||
TEST(TestGoptInference, AlgoWorkspaceLimit) { | |||
@@ -20,7 +20,6 @@ | |||
#include "megbrain/opr/dnn/lrn.h" | |||
#include "megbrain/opr/dnn/fake_quant.h" | |||
#include "megbrain/opr/dnn/tqt.h" | |||
#include "megbrain/serialization/sereg.h" | |||
#include "megdnn/opr_param_defs.h" | |||
#include "megdnn/oprs/nn.h" | |||
@@ -284,8 +284,9 @@ namespace mgb { | |||
namespace opr { | |||
template <typename Opr> | |||
void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) { | |||
if (ctx.get_profile_result_from_cache(require_reproducible).valid()) | |||
void AlgoChooser<Opr>::profile(ExeContext& ctx, | |||
ExecutionStrategy select_strategy) { | |||
if (ctx.get_profile_result_from_cache(select_strategy).valid()) | |||
return; | |||
AlgoChooserProfileCache::Result prof_rst; | |||
@@ -305,7 +306,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) { | |||
algo.name.c_str(), str_on_inp_shape.c_str()); | |||
ImplExecutionPolicy policy; | |||
policy.algo = algo.desc; | |||
ctx.construct_execution_policy(require_reproducible, policy); | |||
ctx.construct_execution_policy(select_strategy, policy); | |||
if (ctx.get_workspace_size_bytes(policy) >= workspace_limit) | |||
continue; | |||
@@ -354,7 +355,8 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) { | |||
template <typename Opr> | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy | |||
AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible, | |||
AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, | |||
ExecutionStrategy select_strategy, | |||
bool enable_update) { | |||
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("AlgoChooser::choose_by_profile"))) | |||
if (ctx.owner_graph()->options().no_profiling_on_shape_change) { | |||
@@ -376,11 +378,11 @@ AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible, | |||
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(), | |||
_item.param, ctx.mgb_opr(), ctx.comp_node(), | |||
ctx.execution_policy(), ctx.allow_weight_preprocess()); | |||
AlgoChooser<_Opr>::profile(sub_ctx, require_reproducible); | |||
AlgoChooser<_Opr>::profile(sub_ctx, select_strategy); | |||
}); | |||
} | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy policy; | |||
ctx.construct_execution_policy(require_reproducible, policy); | |||
ctx.construct_execution_policy(select_strategy, policy); | |||
return policy; | |||
MIDOUT_E | |||
} | |||
@@ -402,11 +404,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | |||
ImplExecutionPolicy policy; | |||
if (auto algo_choose_hook = mgb_opr->algo_chooser()) { | |||
policy = algo_choose_hook(mgb_opr); | |||
ctx.construct_execution_policy( | |||
mgb_opr->execution_policy().strategy == | |||
mixin::AlgoChooserHelper::ExecutionPolicy::Strategy:: | |||
HEURISTIC_REPRODUCIBLE, | |||
policy, false); | |||
ctx.construct_execution_policy((ExecutionStrategy::HEURISTIC | | |||
ExecutionStrategy::REPRODUCIBLE), | |||
policy, false); | |||
} | |||
if (!policy.algo.valid()) { | |||
policy = get_policy(ctx); | |||
@@ -419,10 +419,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | |||
Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(policy.algo); | |||
mgb_assert(palgo, "Unknown algo description"); | |||
ret.append("): algo=" + std::string(palgo->name())); | |||
ret.append(ssprintf(" workspace=%.2fMiB reproducible=%d", | |||
ret.append(ssprintf(" workspace=%.2fMiB attirbute=%d", | |||
workspace / (1024 * 1024.0), | |||
palgo->contain_attribute( | |||
megdnn::AlgoAttribute::REPRODUCIBLE))); | |||
static_cast<uint32_t>(palgo->attribute()))); | |||
mgb_log_debug("%s", ret.c_str()); | |||
megdnn_opr->execution_policy() = policy; | |||
@@ -432,41 +431,39 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts, | |||
template <typename Opr> | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::get_policy( | |||
ExeContext& ctx) { | |||
using S = mixin::AlgoChooserHelper::ExecutionPolicy::Strategy; | |||
MGB_MARK_USED_VAR(TIMEOUT_TOLERANCE); | |||
switch (ctx.execution_policy().strategy) { | |||
case S::HEURISTIC: | |||
return ctx.choose_by_heuristic(); | |||
case S::HEURISTIC_REPRODUCIBLE: | |||
return ctx.choose_by_heuristic(true); | |||
case S::PROFILE_HEURISTIC: { | |||
ImplExecutionPolicy policy = choose_by_profile(ctx, false, false); | |||
if (!policy.algo.valid()) | |||
policy = ctx.choose_by_heuristic(); | |||
return policy; | |||
} | |||
auto opr_strategy = ctx.execution_policy().strategy; | |||
if ((opr_strategy & ExecutionStrategy::HEURISTIC) && | |||
(opr_strategy & ExecutionStrategy::PROFILE)) { | |||
ImplExecutionPolicy policy = | |||
choose_by_profile(ctx, opr_strategy, false); | |||
if (!policy.algo.valid()) | |||
policy = ctx.choose_by_heuristic(opr_strategy); | |||
return policy; | |||
} else if ((opr_strategy & ExecutionStrategy::HEURISTIC)) { | |||
return ctx.choose_by_heuristic(opr_strategy); | |||
} | |||
#if MGB_ENABLE_FASTRUN | |||
case S::PROFILE: | |||
return choose_by_profile(ctx, false); | |||
case S::PROFILE_REPRODUCIBLE: | |||
return choose_by_profile(ctx, true); | |||
else if (opr_strategy & ExecutionStrategy::PROFILE) { | |||
return choose_by_profile(ctx, opr_strategy); | |||
} | |||
#endif | |||
default: | |||
mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy"); | |||
else { | |||
mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy"); | |||
} | |||
} | |||
#define INST(Opr) \ | |||
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \ | |||
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx); \ | |||
template void AlgoChooser<megdnn::Opr>::profile( \ | |||
ExeContext& ctx, bool require_reproducible); \ | |||
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \ | |||
AlgoChooser<megdnn::Opr>::choose_by_profile( \ | |||
ExeContext& ctx, bool require_reproducible, bool enable_update); \ | |||
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \ | |||
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \ | |||
const MGBOpr* mgb_opr, bool allow_weight_preprocess); \ | |||
#define INST(Opr) \ | |||
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \ | |||
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx); \ | |||
template void AlgoChooser<megdnn::Opr>::profile(ExeContext& ctx, \ | |||
ExecutionStrategy); \ | |||
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \ | |||
AlgoChooser<megdnn::Opr>::choose_by_profile( \ | |||
ExeContext& ctx, ExecutionStrategy, bool enable_update); \ | |||
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \ | |||
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \ | |||
const MGBOpr* mgb_opr, bool allow_weight_preprocess); | |||
MGB_FOREACH_FASTRUN_OPR(INST) | |||
@@ -498,7 +495,7 @@ AlgoChooser<Opr>::ExeContext::ExeContext( | |||
template <typename Opr> | |||
typename AlgoChooser<Opr>::ImplAlgo | |||
AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache( | |||
bool require_reproducible) const { | |||
ExecutionStrategy select_strategy) const { | |||
MIDOUT_B(Opr, | |||
midout_iv(MGB_HASH_STR( | |||
"AlgoChooser::ExeContext::get_profile_result_from_cache"))) | |||
@@ -522,7 +519,9 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache( | |||
if (prof.empty()) | |||
return {}; | |||
for (auto&& i : prof) { | |||
if ((!require_reproducible || i.reproducible)) { | |||
if (!(select_strategy & ExecutionStrategy::REPRODUCIBLE) || | |||
static_cast<AlgoAttribute>(i.attribute) & | |||
AlgoAttribute::REPRODUCIBLE) { | |||
auto iter = algo_map.find(i.algo); | |||
mgb_assert(iter != algo_map.end(), | |||
"algorithm %s exists in " | |||
@@ -550,7 +549,8 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache( | |||
template <typename Opr> | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy | |||
AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const { | |||
AlgoChooser<Opr>::ExeContext::choose_by_heuristic( | |||
ExecutionStrategy select_strategy) const { | |||
if (m_execution_policy.workspace_limit != | |||
std::numeric_limits<decltype( | |||
m_execution_policy.workspace_limit)>::max()) { | |||
@@ -558,6 +558,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const { | |||
"workspace_limit should not be setted if choose algo by " | |||
"heuristic"); | |||
} | |||
bool reproducible = static_cast<bool>(select_strategy & | |||
ExecutionStrategy::REPRODUCIBLE); | |||
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit( | |||
owner_graph(), m_cn, m_execution_policy.workspace_limit); | |||
ImplExecutionPolicy policy; | |||
@@ -579,7 +581,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const { | |||
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(), | |||
_item.param, m_base_mgb_opr, m_cn, m_execution_policy, | |||
m_allow_weight_preprocess); | |||
policy.sub_policy.push_back(sub_ctx.choose_by_heuristic(reproducible)); | |||
policy.sub_policy.push_back( | |||
sub_ctx.choose_by_heuristic(select_strategy)); | |||
}); | |||
return policy; | |||
@@ -588,9 +591,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const { | |||
template <typename Opr> | |||
std::vector<typename AlgoChooser<Opr>::ImplAlgo> | |||
AlgoChooser<Opr>::ExeContext::get_all_candidates() const { | |||
auto heu = choose_by_heuristic(); | |||
auto&& ret = | |||
APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts); | |||
auto heu = choose_by_heuristic(ExecutionStrategy::HEURISTIC); | |||
auto&& ret = APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts); | |||
bool found = false; | |||
for (size_t i = 0; i < ret.size(); ++i) { | |||
if (ret[i].desc == heu.algo) { | |||
@@ -611,19 +613,21 @@ AlgoChooser<Opr>::ExeContext::get_all_candidates() const { | |||
template <typename Opr> | |||
void AlgoChooser<Opr>::ExeContext::construct_execution_policy( | |||
bool require_reproducible, | |||
ExecutionStrategy select_strategy, | |||
typename AlgoChooser<Opr>::ImplExecutionPolicy& policy, | |||
bool retrive_from_cache) const { | |||
bool reproducible = static_cast<bool>(select_strategy & | |||
ExecutionStrategy::REPRODUCIBLE); | |||
if (!policy.algo.valid()) { | |||
if (retrive_from_cache) { | |||
policy.algo = | |||
get_profile_result_from_cache(require_reproducible).desc; | |||
get_profile_result_from_cache(select_strategy).desc; | |||
} else { | |||
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit( | |||
owner_graph(), m_cn, m_execution_policy.workspace_limit); | |||
policy.algo = APPLY(m_megdnn_opr->get_algorithm_info_heuristic( | |||
args..., workspace_limit, | |||
require_reproducible), | |||
reproducible), | |||
m_layouts) | |||
.desc; | |||
} | |||
@@ -647,7 +651,7 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy( | |||
_item.param, m_base_mgb_opr, m_cn, m_execution_policy, | |||
m_allow_weight_preprocess); | |||
policy.sub_policy.push_back({}); | |||
sub_ctx.construct_execution_policy(require_reproducible, | |||
sub_ctx.construct_execution_policy(select_strategy, | |||
policy.sub_policy.back(), | |||
retrive_from_cache); | |||
}); | |||
@@ -718,8 +722,7 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo( | |||
return None; | |||
return AlgoChooserProfileCache::ResultEntry{ | |||
palgo->name(), | |||
palgo->contain_attribute( | |||
megdnn::AlgoAttribute::REPRODUCIBLE), | |||
static_cast<uint32_t>(palgo->attribute()), | |||
rst.val().time, param.workspace}; | |||
} | |||
@@ -768,10 +771,10 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const { | |||
bool allow_weight_preprocess); \ | |||
template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \ | |||
AlgoChooser<megdnn::Opr>::ExeContext::choose_by_heuristic( \ | |||
bool reproducible) const; \ | |||
ExecutionStrategy select_strategy) const; \ | |||
template typename AlgoChooser<megdnn::Opr>::ImplAlgo \ | |||
AlgoChooser<megdnn::Opr>::ExeContext::get_profile_result_from_cache( \ | |||
bool require_reproducible) const; \ | |||
ExecutionStrategy select_strategy) const; \ | |||
template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo> \ | |||
AlgoChooser<megdnn::Opr>::ExeContext::get_all_candidates() const; \ | |||
template size_t \ | |||
@@ -780,7 +783,7 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const { | |||
policy) const; \ | |||
template void \ | |||
AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy( \ | |||
bool require_reproducible, \ | |||
ExecutionStrategy select_strategy, \ | |||
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \ | |||
bool retrive_from_cache) const; \ | |||
template Maybe<AlgoChooserProfileCache::ResultEntry> \ | |||
@@ -35,6 +35,13 @@ MGB_FOREACH_FASTRUN_OPR(cb) | |||
#undef cb | |||
namespace mgb { | |||
//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum | |||
//! and megdnn::detail::AlgoAttribute enum | |||
using ExecutionStrategy = megdnn::param::ExecutionPolicy::Strategy; | |||
using AlgoAttribute = megdnn::AlgoAttribute; | |||
namespace opr { | |||
/* =================== AlgoChooser =================== */ | |||
@@ -103,7 +110,7 @@ public: | |||
const FixedTensorLayouts& layouts() const { return m_layouts; } | |||
ImplExecutionPolicy choose_by_heuristic( | |||
bool reproducible = false) const; | |||
ExecutionStrategy select_strategy) const; | |||
//! get all candidate algos, and the one choose_by_heuristic() is | |||
//! put first | |||
@@ -126,19 +133,20 @@ public: | |||
const ImplExecutionPolicy& policy, double& timeout) const; | |||
//! get all profile algorithm from cache, return invalid if not exists | |||
ImplAlgo get_profile_result_from_cache(bool require_reproducible) const; | |||
ImplAlgo get_profile_result_from_cache( | |||
ExecutionStrategy select_strategy) const; | |||
/** | |||
* \brief construct execution policy from cache or heuristic. | |||
* | |||
* \param require_reproducible select algo which is reproducible | |||
* \param select_strategy select algo which matched this strategy | |||
* \param policy execution policy | |||
* \param retrive_from_cache retrive algo from cache if set True, get | |||
* from heuristic otherwise. | |||
*/ | |||
void construct_execution_policy( | |||
bool require_reproducible, ImplExecutionPolicy& policy, | |||
bool retrive_from_cache = true) const; | |||
void construct_execution_policy(ExecutionStrategy select_strategy, | |||
ImplExecutionPolicy& policy, | |||
bool retrive_from_cache = true) const; | |||
private: | |||
Maybe<PreprocessFilter<Opr>> construct_fake_preprocess_filter() const; | |||
@@ -153,11 +161,11 @@ private: | |||
//! profile and save to cache | |||
static void profile(ExeContext& ctx, bool require_reproducible); | |||
static void profile(ExeContext& ctx, ExecutionStrategy select_strategy); | |||
static ImplExecutionPolicy choose_by_profile(ExeContext& ctx, | |||
bool require_reproducible, | |||
bool enable_update = true); | |||
static ImplExecutionPolicy choose_by_profile( | |||
ExeContext& ctx, ExecutionStrategy select_strategy, | |||
bool enable_update = true); | |||
public: | |||
/*! | |||
@@ -13,7 +13,6 @@ | |||
#pragma once | |||
#include "megbrain/graph/operator_node.h" | |||
#include "megbrain/opr/param_defs.h" | |||
#include "megdnn/oprs/base.h" | |||
#include "megdnn/oprs/nn.h" | |||
@@ -73,7 +72,6 @@ protected: | |||
}; | |||
} // namespace mixin | |||
} // namespace opr | |||
} // namespace mgb | |||
@@ -429,10 +429,11 @@ TEST(TestOprDNN, MatrixMulExePolicy) { | |||
auto cn = CompNode::load("cpux"); | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, | |||
S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC}) { | |||
#else | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
auto graph = ComputingGraph::make(); | |||
@@ -355,11 +355,13 @@ TEST(TestOprDNN, ConvBiasExePolicy) { | |||
auto cn = CompNode::load("cpux"); | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||
#else | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
auto graph = ComputingGraph::make(); | |||
HostTensorGenerator<> gen; | |||
@@ -397,7 +399,8 @@ TEST(TestOprDNN, ConvBiasExePolicy_Quantized8Asym) { | |||
auto cn = CompNode::load("cpux"); | |||
for (auto strategy: {S::PROFILE, S::PROFILE_REPRODUCIBLE}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::PROFILE | S::REPRODUCIBLE}) { | |||
auto graph = ComputingGraph::make(); | |||
HostTensorGenerator<> gen; | |||
@@ -439,10 +442,12 @@ TEST(TestOprDNN, ConvolutionExePolicy) { | |||
PersistentCacheHook cache_hook{on_get}; | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, | |||
S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||
#else | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
using Checker = AutoOprChecker<2, 1>; | |||
@@ -522,10 +527,11 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) { | |||
PersistentCacheHook cache_hook{on_get}; | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, | |||
S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
{S::PROFILE, S::HEURISTIC, S(S::PROFILE | S::REPRODUCIBLE), | |||
S(S::PROFILE | S::HEURISTIC)}) { | |||
#else | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) { | |||
#endif | |||
using Checker = AutoOprChecker<2, 1>; | |||
@@ -1183,9 +1189,12 @@ TEST(TestOprDNN, Convolution3DExePolicy) { | |||
using S = Policy::Strategy; | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC}) { | |||
#else | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
using Checker = AutoOprChecker<2, 1>; | |||
@@ -1660,10 +1669,12 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) { | |||
PersistentCacheHook cache_hook{on_get}; | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, | |||
S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||
#else | |||
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
auto make_graph = [&](const Checker::SymInpArray& inputs) | |||
-> Checker::SymOutArray { | |||
@@ -1769,10 +1780,12 @@ TEST(TestOprDNN, DeformableConvForward) { | |||
Param param; | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, | |||
S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||
#else | |||
for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
auto make_graph = [&](const Checker::SymInpArray& inputs) | |||
-> Checker::SymOutArray { | |||
@@ -1936,10 +1949,12 @@ TEST(TestOprDNN, BatchConvBiasForward) { | |||
param.sparse = Param::Sparse::DENSE; | |||
#if MGB_ENABLE_FASTRUN | |||
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, | |||
S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, | |||
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { | |||
#else | |||
for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) { | |||
for (auto strategy : | |||
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { | |||
#endif | |||
auto make_quantized = [&](SymbolVar x, const DType& dtype) { | |||
@@ -2080,7 +2095,8 @@ TEST(TestOprDNN, HeuristicReproducible) { | |||
constexpr size_t PH = 1, PW = 1, SH = 1, SW = 1; | |||
for (auto strategy : {S::HEURISTIC, S::HEURISTIC_REPRODUCIBLE}) { | |||
for (auto strategy : | |||
SmallVector<S>{S::HEURISTIC, S::HEURISTIC | S::REPRODUCIBLE}) { | |||
VarNode* bwd_flt; | |||
auto make_graph = [&](const Checker::SymInpArray& inputs) | |||
-> Checker::SymOutArray { | |||
@@ -2126,7 +2142,7 @@ TEST(TestOprDNN, HeuristicReproducible) { | |||
megdnn::Algorithm* palgo = | |||
megdnn_opr->get_algorithm_from_desc(algo); | |||
mgb_assert(palgo, "Unknown algo description"); | |||
if (strategy == S::HEURISTIC_REPRODUCIBLE) { | |||
if (strategy == S(S::HEURISTIC | S::REPRODUCIBLE)) { | |||
EXPECT_TRUE(palgo->contain_attribute( | |||
megdnn::AlgoAttribute::REPRODUCIBLE)); | |||
} | |||
@@ -43,6 +43,7 @@ namespace megdnn { | |||
std::ostream &ostr, const DType &dt) { | |||
return ostr << dt.name(); | |||
} | |||
} // namespace megdnn | |||
namespace mgb { | |||
@@ -18,7 +18,7 @@ pdef('PersistentOutputStorage').add_fields( | |||
add_const('int32', 'INVALID_AXIS', 'MAX_NDIM'). | |||
add_fields('int32', 'axis', 'INVALID_AXIS')) | |||
(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator'). | |||
(pdef('ExecutionPolicy', version=0, is_legacy=True). | |||
add_enum('Strategy', | |||
Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), | |||
Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, ' | |||
@@ -33,6 +33,20 @@ pdef('PersistentOutputStorage').add_fields( | |||
Doc('workspace_limit', 'workspace limit in bytes'), | |||
str(2**64-1)+'ull')) | |||
(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator', version=1). | |||
add_bit_combination_enum('Strategy', | |||
Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), | |||
Doc('PROFILE', | |||
'run possible algorithms on real device to find the best'), | |||
Doc('REPRODUCIBLE', | |||
'when profile or heuristic algo selection it require the algos' | |||
'must be reproducible'), | |||
Doc('OPTMIZED', | |||
'profile require algos are optmized to achieve fast-profile')). | |||
add_fields('uint64', | |||
Doc('workspace_limit', 'workspace limit in bytes'), | |||
str(2**64-1)+'ull')) | |||
(pdef('AssertEqual'). | |||
add_fields('float32', | |||
Doc('maxerr', 'max allowed error; error is defined as the minimal ' | |||