diff --git a/imperative/python/megengine/jit/__init__.py b/imperative/python/megengine/jit/__init__.py index 13fb0cc0..c2f2bf77 100644 --- a/imperative/python/megengine/jit/__init__.py +++ b/imperative/python/megengine/jit/__init__.py @@ -1 +1,2 @@ +from .sublinear_memory_config import SublinearMemoryConfig from .tracing import exclude_from_trace, trace diff --git a/imperative/python/megengine/jit/sublinear_memory_config.py b/imperative/python/megengine/jit/sublinear_memory_config.py new file mode 100644 index 00000000..7f6a8d27 --- /dev/null +++ b/imperative/python/megengine/jit/sublinear_memory_config.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +from ..device import get_device_count + + +class SublinearMemoryConfig: + r""" + Configuration for sublinear memory optimization. + + :param thresh_nr_try: number of samples both for searching in linear space + and around current thresh in sublinear memory optimization. Default: 10. + It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'. + :param genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm. + Default: 0. + It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'. + :param genetic_pool_size: number of samples for the crossover random selection + during genetic optimization. Default: 20. + It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. + :param lb_memory: memory lower bound of bottleneck size in MB for sublinear memory optimization. + It can be used to perform manual tradeoff between memory and speed. Default: 0. + It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. + :param num_worker: number of thread workers to search the optimum checkpoints + in sublinear memory optimization. Default: half of cpu number in the system. + Note: the value must be greater or equal to one. + It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'. + + Note that the environmental variable MGB_COMP_GRAPH_OPT must be set to 'enable_sublinear_memory_opt=1' + in order for the above environmental variable to be effective. + """ + + def __init__( + self, + thresh_nr_try: int = 10, + genetic_nr_iter: int = 0, + genetic_pool_size: int = 20, + lb_memory: int = 0, + num_worker: int = max(1, get_device_count("cpu") // 2), + ): + assert thresh_nr_try >= 0, "thresh_nr_try must be greater or equal to zero" + self.thresh_nr_try = thresh_nr_try + assert genetic_nr_iter >= 0, "genetic_nr_iter must be greater or equal to zero" + self.genetic_nr_iter = genetic_nr_iter + assert ( + genetic_pool_size >= 0 + ), "genetic_pool_size must be greater or equal to zero" + self.genetic_pool_size = genetic_pool_size + self.lb_memory = lb_memory + assert num_worker > 0, "num_worker must be greater or equal to one" + self.num_worker = num_worker diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py index dd789fdf..aef6f386 100644 --- a/imperative/python/megengine/jit/tracing.py +++ b/imperative/python/megengine/jit/tracing.py @@ -7,6 +7,7 @@ from ..core.ops.special import Const from ..core.tensor import megbrain_graph as G from ..core.tensor.core import OpBase, apply from ..core.tensor.raw_tensor import OpDef, RawTensor, as_raw_tensor +from .sublinear_memory_config import SublinearMemoryConfig class TraceMismatchError(RuntimeError): @@ -72,11 +73,18 @@ class trace: self.__init__(*args, **kwargs) return self - def __init__(self, function, symbolic=False, capture_as_const=False): + def __init__( + self, + function, + symbolic=False, + capture_as_const=False, + sublinear_memory_config: SublinearMemoryConfig = None, + ): self.__wrapped__ = function self._symbolic = symbolic self._capture_as_const = capture_as_const self._capture_static_shape = False + self._sublinear_memory_config = sublinear_memory_config self._untraced = True self._tinfo = [] # handle -> TensorInfo @@ -227,6 +235,7 @@ class trace: G.OutputNode(x._LazyEvalTensor__varnode).outputs[0] for x in lazy_eval_tensors ] + self._apply_graph_options(self._lazy_eval_graph) self._lazy_eval_graph.compile(*readers) self._lazy_eval_graph() for r, x in zip(readers, lazy_eval_tensors): @@ -259,9 +268,26 @@ class trace: info.exported = True info.data_read = True + def _apply_graph_options(self, graph): + + # sublinear + if self._sublinear_memory_config is not None: + graph.options.enable_sublinear_memory_opt = True + sublinear_config = graph.options.sublinear_mem_config + sublinear_config.lb_memory = self._sublinear_memory_config.lb_memory + sublinear_config.genetic_nr_iter = ( + self._sublinear_memory_config.genetic_nr_iter + ) + sublinear_config.genetic_pool_size = ( + self._sublinear_memory_config.genetic_pool_size + ) + sublinear_config.thresh_nr_try = self._sublinear_memory_config.thresh_nr_try + sublinear_config.num_worker = self._sublinear_memory_config.num_worker + def _compile(self): graph = self._graph = G.Graph() graph.options.no_force_inplace = True + self._apply_graph_options(graph) # graph.options.graph_opt_level = 0 need_reset_nodes = self._need_reset_nodes = [] # links enforce ordering of I/O nodes diff --git a/imperative/python/src/graph_rt.cpp b/imperative/python/src/graph_rt.cpp index 67de3508..27899e02 100644 --- a/imperative/python/src/graph_rt.cpp +++ b/imperative/python/src/graph_rt.cpp @@ -119,6 +119,7 @@ void init_graph_rt(py::module m) { DEF_READWRITE(enable_memory_swap) DEF_READWRITE(comp_node_seq_record_level) DEF_READWRITE(no_force_inplace) + DEF_READWRITE(sublinear_mem_config) // DEF_READWRITE(eager_evaluation) // DEF_READWRITE(imperative_proxy_graph) // DEF_READWRITE(extra_vardeps) @@ -142,6 +143,16 @@ void init_graph_rt(py::module m) { #undef CURRENT_CLASS +#define CURRENT_CLASS cg::ComputingGraph::Options::SublinearMemConfig + + py::class_(PyComputingGraphOptions, "SublinearMemConfig") + DEF_READWRITE(thresh_nr_try) + DEF_READWRITE(genetic_nr_iter) + DEF_READWRITE(genetic_pool_size) + DEF_READWRITE(lb_memory) + DEF_READWRITE(num_worker); + +#undef CURRENT_CLASS auto common = rel_import("common", m, 1); common.def("invoke_op", [](const OpDef& def, const std::vector inputs, cg::ComputingGraph* graph) { diff --git a/imperative/python/test/integration/test_correctness.py b/imperative/python/test/integration/test_correctness.py index 7519c06a..31cfecbf 100644 --- a/imperative/python/test/integration/test_correctness.py +++ b/imperative/python/test/integration/test_correctness.py @@ -19,6 +19,7 @@ import megengine.functional as F from megengine import jit from megengine.core._trace_option import set_tensor_shape from megengine.functional.debug_param import set_conv_execution_strategy +from megengine.jit import SublinearMemoryConfig from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module from megengine.optimizer import SGD from megengine.tensor import Tensor @@ -217,14 +218,14 @@ def test_correctness(): set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") run_train(model_path, False, False, max_err=1e-5) - # run_test(model_path, True, False) - # run_test(model_path, True, True) + run_train(model_path, True, False, max_err=1e-5) + run_train(model_path, True, True, max_err=1e-5) # sublinear - # config = SublinearMemoryConfig(genetic_nr_iter=10) - # run_test( - # model_path, True, True, sublinear_memory_config=config, max_err=1e-5, - # ) + config = SublinearMemoryConfig(genetic_nr_iter=10) + run_train( + model_path, True, True, sublinear_memory_config=config, max_err=1e-5, + ) run_eval(model_path, False, max_err=1e-7) - # run_eval(model_path, True, max_err=1e-7) # XXX: fix me + run_eval(model_path, True, max_err=1e-7) diff --git a/python_module/megengine/jit/__init__.py b/python_module/megengine/jit/__init__.py index b3aabdbf..d610388b 100644 --- a/python_module/megengine/jit/__init__.py +++ b/python_module/megengine/jit/__init__.py @@ -298,23 +298,23 @@ class trace: if self._sublinear_memory_config is not None: cg.set_option("enable_sublinear_memory_opt", True) cg.set_option( - "sublinear_mem_cofig.lb_memory", + "sublinear_mem_config.lb_memory", self._sublinear_memory_config.lb_memory, ) cg.set_option( - "sublinear_mem_cofig.genetic_nr_iter", + "sublinear_mem_config.genetic_nr_iter", self._sublinear_memory_config.genetic_nr_iter, ) cg.set_option( - "sublinear_mem_cofig.genetic_pool_size", + "sublinear_mem_config.genetic_pool_size", self._sublinear_memory_config.genetic_pool_size, ) cg.set_option( - "sublinear_mem_cofig.thresh_nr_try", + "sublinear_mem_config.thresh_nr_try", self._sublinear_memory_config.thresh_nr_try, ) cg.set_option( - "sublinear_mem_cofig.num_worker", + "sublinear_mem_config.num_worker", self._sublinear_memory_config.num_worker, ) # pack allreduce diff --git a/python_module/src/cpp/megbrain_config.cpp b/python_module/src/cpp/megbrain_config.cpp index 84f19888..fa31950e 100644 --- a/python_module/src/cpp/megbrain_config.cpp +++ b/python_module/src/cpp/megbrain_config.cpp @@ -116,11 +116,11 @@ bool _config::set_comp_graph_option( SET_CG_OPTION(allocate_static_mem_after_graph_compile); SET_CG_OPTION(log_level); SET_CG_OPTION(enable_sublinear_memory_opt); - SET_CG_OPTION(sublinear_mem_cofig.lb_memory); - SET_CG_OPTION(sublinear_mem_cofig.genetic_nr_iter); - SET_CG_OPTION(sublinear_mem_cofig.genetic_pool_size); - SET_CG_OPTION(sublinear_mem_cofig.thresh_nr_try); - SET_CG_OPTION(sublinear_mem_cofig.num_worker); + SET_CG_OPTION(sublinear_mem_config.lb_memory); + SET_CG_OPTION(sublinear_mem_config.genetic_nr_iter); + SET_CG_OPTION(sublinear_mem_config.genetic_pool_size); + SET_CG_OPTION(sublinear_mem_config.thresh_nr_try); + SET_CG_OPTION(sublinear_mem_config.num_worker); SET_CG_OPTION(enable_var_mem_defragment); SET_CG_OPTION(eager_evaluation); SET_CG_OPTION(enable_memory_swap); diff --git a/src/core/impl/graph/cg_impl.cpp b/src/core/impl/graph/cg_impl.cpp index 4641562a..3341c3a4 100644 --- a/src/core/impl/graph/cg_impl.cpp +++ b/src/core/impl/graph/cg_impl.cpp @@ -219,7 +219,7 @@ ComputingGraphImpl::Components::Components(ComputingGraphImpl* owner) grad_manager{owner}, #if MGB_ENABLE_SUBLINEAR seq_modifier_for_sublinear_memory{owner, - &(owner->options().sublinear_mem_cofig)}, + &(owner->options().sublinear_mem_config)}, #endif #if MGB_ENABLE_MEMORY_SWAP memory_swap_support{owner}, diff --git a/src/core/include/megbrain/graph/cg.h b/src/core/include/megbrain/graph/cg.h index 41c702ed..9b83e459 100644 --- a/src/core/include/megbrain/graph/cg.h +++ b/src/core/include/megbrain/graph/cg.h @@ -409,7 +409,7 @@ class ComputingGraph : public std::enable_shared_from_this, int genetic_pool_size = 20; int lb_memory = 0; int num_worker = sys::get_cpu_count() / 2; - } sublinear_mem_cofig; + } sublinear_mem_config; //! do not re-profile to select best impl algo when input shape //! changes (use previous algo) diff --git a/src/core/test/sublinear_memory.cpp b/src/core/test/sublinear_memory.cpp index 2e16bee3..6e42f12e 100644 --- a/src/core/test/sublinear_memory.cpp +++ b/src/core/test/sublinear_memory.cpp @@ -522,7 +522,7 @@ TEST(TestSublinearMemory, BadOpr) { set_priority(z, 3); graph->options().graph_opt_level = 0; graph->options().enable_sublinear_memory_opt = 1; - graph->options().sublinear_mem_cofig.genetic_nr_iter = 50; + graph->options().sublinear_mem_config.genetic_nr_iter = 50; auto func = graph->compile({{y, {}}, {z, {}}}); auto&& results = static_cast(graph.get()) ->seq_modifier_for_sublinear_memory().prev_min_bottleneck();