GitOrigin-RevId: fd51023c01
tags/v1.6.0-rc1
@@ -22,7 +22,7 @@ class SublinearMemoryConfig: | |||||
:param genetic_pool_size: number of samples for the crossover random selection | :param genetic_pool_size: number of samples for the crossover random selection | ||||
during genetic optimization. Default: 20. | during genetic optimization. Default: 20. | ||||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | ||||
:param lb_memory: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||||
:param lb_memory_mb: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||||
It can be used to perform manual tradeoff between memory and speed. Default: 0. | It can be used to perform manual tradeoff between memory and speed. Default: 0. | ||||
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | ||||
:param num_worker: number of thread workers to search the optimum checkpoints | :param num_worker: number of thread workers to search the optimum checkpoints | ||||
@@ -39,7 +39,7 @@ class SublinearMemoryConfig: | |||||
thresh_nr_try: int = 10, | thresh_nr_try: int = 10, | ||||
genetic_nr_iter: int = 0, | genetic_nr_iter: int = 0, | ||||
genetic_pool_size: int = 20, | genetic_pool_size: int = 20, | ||||
lb_memory: int = 0, | |||||
lb_memory_mb: int = 0, | |||||
num_worker: int = max(1, get_device_count("cpu") // 2), | num_worker: int = max(1, get_device_count("cpu") // 2), | ||||
): | ): | ||||
assert thresh_nr_try >= 0, "thresh_nr_try must be greater or equal to zero" | assert thresh_nr_try >= 0, "thresh_nr_try must be greater or equal to zero" | ||||
@@ -50,6 +50,6 @@ class SublinearMemoryConfig: | |||||
genetic_pool_size >= 0 | genetic_pool_size >= 0 | ||||
), "genetic_pool_size must be greater or equal to zero" | ), "genetic_pool_size must be greater or equal to zero" | ||||
self.genetic_pool_size = genetic_pool_size | self.genetic_pool_size = genetic_pool_size | ||||
self.lb_memory = lb_memory | |||||
self.lb_memory_mb = lb_memory_mb | |||||
assert num_worker > 0, "num_worker must be greater or equal to one" | assert num_worker > 0, "num_worker must be greater or equal to one" | ||||
self.num_worker = num_worker | self.num_worker = num_worker |
@@ -532,7 +532,7 @@ class trace: | |||||
if self._sublinear_memory_config is not None: | if self._sublinear_memory_config is not None: | ||||
graph.options.enable_sublinear_memory_opt = True | graph.options.enable_sublinear_memory_opt = True | ||||
sublinear_config = graph.options.sublinear_mem_config | sublinear_config = graph.options.sublinear_mem_config | ||||
sublinear_config.lb_memory = self._sublinear_memory_config.lb_memory | |||||
sublinear_config.lb_memory_mb = self._sublinear_memory_config.lb_memory_mb | |||||
sublinear_config.genetic_nr_iter = ( | sublinear_config.genetic_nr_iter = ( | ||||
self._sublinear_memory_config.genetic_nr_iter | self._sublinear_memory_config.genetic_nr_iter | ||||
) | ) | ||||
@@ -476,7 +476,7 @@ void init_graph_rt(py::module m) { | |||||
DEF_READWRITE(thresh_nr_try) | DEF_READWRITE(thresh_nr_try) | ||||
DEF_READWRITE(genetic_nr_iter) | DEF_READWRITE(genetic_nr_iter) | ||||
DEF_READWRITE(genetic_pool_size) | DEF_READWRITE(genetic_pool_size) | ||||
DEF_READWRITE(lb_memory) | |||||
DEF_READWRITE(lb_memory_mb) | |||||
DEF_READWRITE(num_worker); | DEF_READWRITE(num_worker); | ||||
#undef CURRENT_CLASS | #undef CURRENT_CLASS | ||||
@@ -562,7 +562,7 @@ public: | |||||
m_config->genetic_pool_size = psize; | m_config->genetic_pool_size = psize; | ||||
} | } | ||||
if (auto env = MGB_GETENV("MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB")) { | if (auto env = MGB_GETENV("MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB")) { | ||||
m_config->lb_memory = std::stoi(env) * 1024 * 1024; | |||||
m_config->lb_memory_mb = std::stoi(env); | |||||
} | } | ||||
} | } | ||||
@@ -781,7 +781,7 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() { | |||||
} | } | ||||
void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_refine() { | void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_refine() { | ||||
size_t lower_bound = m_par_modifier->m_config->lb_memory; | |||||
size_t lower_bound = static_cast<size_t>(m_par_modifier->m_config->lb_memory_mb) << 20; | |||||
if (m_min_bottleneck >= lower_bound) | if (m_min_bottleneck >= lower_bound) | ||||
return; | return; | ||||
OprFootprint footprint; | OprFootprint footprint; | ||||
@@ -443,7 +443,7 @@ class ComputingGraph : public std::enable_shared_from_this<ComputingGraph>, | |||||
int thresh_nr_try = 10; | int thresh_nr_try = 10; | ||||
int genetic_nr_iter = 0; | int genetic_nr_iter = 0; | ||||
int genetic_pool_size = 20; | int genetic_pool_size = 20; | ||||
int lb_memory = 0; | |||||
int lb_memory_mb = 0; | |||||
int num_worker = sys::get_cpu_count() / 2; | int num_worker = sys::get_cpu_count() / 2; | ||||
} sublinear_mem_config; | } sublinear_mem_config; | ||||