Browse Source

refactor(mge/dtr): update dtr api

GitOrigin-RevId: dc366c65be
release-1.4
Megvii Engine Team 4 years ago
parent
commit
d4dd134d1a
6 changed files with 147 additions and 58 deletions
  1. +1
    -0
      imperative/python/megengine/__init__.py
  2. +131
    -0
      imperative/python/megengine/dtr.py
  3. +0
    -44
      imperative/python/megengine/utils/dtr.py
  4. +1
    -0
      imperative/python/requires.txt
  5. +11
    -11
      imperative/src/impl/interpreter/interpreter_impl.cpp
  6. +3
    -3
      imperative/src/impl/interpreter/option_manager.h

+ 1
- 0
imperative/python/megengine/__init__.py View File

@@ -76,6 +76,7 @@ from .core._imperative_rt.core2 import full_sync as _full_sync
from .core._imperative_rt.core2 import sync as _sync
from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func
from .device import *
from .dtr import *
from .logger import enable_debug_log, get_logger, set_log_file, set_log_level
from .serialization import load, save
from .tensor import Parameter, Tensor, tensor


+ 131
- 0
imperative/python/megengine/dtr.py View File

@@ -0,0 +1,131 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import re
from typing import Union

from mprop import mproperty

from .core._imperative_rt.core2 import set_option
from .core._imperative_rt.utils import _set_defrag

_eviction_threshold = 0
_evictee_minimum_size = 1024 ** 2


def str2bytes(text: str) -> int:
regex = re.compile(r"(\d+(?:\.\d+)?)\s*([kmg]?b)", re.IGNORECASE)
order = ["b", "kb", "mb", "gb"]
result = regex.findall(text)
if len(result) != 1:
raise ValueError(
"Formatting of `value` only supports bytes(B), kilobyte(KB), megabyte(MB) and gigabyte(GB) units"
)
return int(float(result[0][0]) * 1024 ** order.index(result[0][1].lower()))


@mproperty
def eviction_threshold(mod):
r"""
Returns the eviction threshold in bytes.

.. note::

When GPU memory usage exceeds this value, DTR will heuristically select
and evict resident tensors until the amount of used memory falls below
this threshold.

"""
return mod._eviction_threshold


@eviction_threshold.setter
def eviction_threshold(mod, value: Union[int, str]):
r"""
Change the eviction threshold. If `value` is an int, it represents the
number of bytes. If `value` is a string, its formatting supports bytes(B),
kilobyte(KB), megabyte(MB) and gigabyte(GB) units.

Examples:

.. code-block::

import megengine as mge
mge.dtr.eviction_threshold = 2 * 1024 ** 3
mge.dtr.eviction_threshold = "2GB"
mge.dtr.eviction_threshold = "2048MB"

"""
if isinstance(value, str):
mod._eviction_threshold = mod.str2bytes(value)
elif isinstance(value, int):
mod._eviction_threshold = value
else:
raise TypeError("`value` should be a str or an int")
set_option("dtr_eviction_threshold", mod._eviction_threshold)


@mproperty
def evictee_minimum_size(mod):
r"""
Returns the memory threshold of tensors in bytes.

.. note::

Only tensors whose size exceeds this threshold will be added to the
candidate set. A tensor that is not added to the candidate set will
never be evicted during its lifetime.

"""
return mod._evictee_minimum_size


@evictee_minimum_size.setter
def evictee_minimum_size(mod, value: Union[int, str]):
r"""
Change the memory threshold of tensors. If `value` is an int, it represents
the number of bytes. If `value` is a string, its formatting supports bytes(B),
kilobyte(KB), megabyte(MB) and gigabyte(GB) units.

Examples:

.. code-block::

import megengine as mge
mge.dtr.evictee_minimum_size = 2 * 1024 ** 2
mge.dtr.evictee_minimum_size = "2MB"
mge.dtr.evictee_minimum_size = "2048KB"

"""
if isinstance(value, str):
mod._evictee_minimum_size = mod.str2bytes(value)
elif isinstance(value, int):
mod._evictee_minimum_size = value
else:
raise TypeError("`value` should be a str or an int")
set_option("dtr_evictee_minimum_size", mod._evictee_minimum_size)


def enable():
r"""
Enable to record computing path of tensors and to perform DTR policy.
"""
_set_defrag(True)
set_option("enable_dtr_auto_drop", 1)
set_option("enable_drop", 1)
set_option("buffer_length", 0)
set_option("record_computing_path", 1)


def disable():
r"""
Stop recording computing path of tensors and performing DTR policy.
"""
set_option("enable_dtr_auto_drop", 0)
set_option("enable_drop", 0)
set_option("record_computing_path", 0)

+ 0
- 44
imperative/python/megengine/utils/dtr.py View File

@@ -1,44 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

from ..core._imperative_rt.core2 import set_option
from ..core._imperative_rt.utils import _set_defrag


class DTR:
r"""
DTR implements `Dynamic Tensor Rematerialization <https://arxiv.org/abs/2006.09616>`_ in MegEngine.

It is basically an online algorithm for checkpointing driven by certain eviction policies.

.. code-block::
from megengine.utils.dtr import DTR

ds = DTR(memory_budget=5*1024**3)

# your training code

"""

def __init__(self, memory_budget=0, tensor_lowerbound=1048576):
r"""
:param memory_budget: int. The threshold of memory usage. When memory
usage exceeds this value, auto evict will be triggered.
:param tensor_lowerbound: int. The minimum memory limit of the tensor
that can be evicted. Default: 1MB.
"""
if memory_budget > 0:
set_option("enable_auto_drop", 1)
set_option("enable_drop", 1)
set_option("buffer_length", 0)
set_option("memory_budget", memory_budget)
set_option("tensor_lowerbound", tensor_lowerbound)
set_option("record_computing_path", 1)
_set_defrag(True)

+ 1
- 0
imperative/python/requires.txt View File

@@ -6,3 +6,4 @@ tabulate
tqdm
redispy
deprecated
mprop

+ 11
- 11
imperative/src/impl/interpreter/interpreter_impl.cpp View File

@@ -422,7 +422,7 @@ void ChannelImpl::do_drop(TensorInfo* ptr, bool user=false) {
}

void ChannelImpl::free(TensorInfo* ptr) {
if (m_worker_state.options.enable_auto_drop) {
if (m_worker_state.options.enable_dtr_auto_drop) {
// Evicting a tensor, rather than freeing it, can avoid pinning
// potentially exploding amounts of memory and allow us to save
// more memory.
@@ -459,7 +459,7 @@ void ChannelImpl::real_free(TensorInfo* ptr) {
if (m_channel_state.profiler->is_profiling()) {
m_channel_state.profiler->record_host<TensorEraseEvent>(ptr->id);
}
if (ptr->size_exceeds_thd(m_worker_state.options.tensor_lowerbound)) {
if (ptr->size_exceeds_thd(m_worker_state.options.dtr_evictee_minimum_size)) {
m_dtr.erase_candidate(ptr);
}
detach_users(ptr);
@@ -487,7 +487,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr, bool notice=tr
dest->memory = ptr->blob()->size();
dest->ptr = std::move(ptr);
dest->evict_type = EvictType::NONE;
if (notice && dest->size_exceeds_thd(m_worker_state.options.tensor_lowerbound)) {
if (notice && dest->size_exceeds_thd(m_worker_state.options.dtr_evictee_minimum_size)) {
m_dtr.insert_candidate(dest);
}
if (notice && m_waitee == dest) {
@@ -519,7 +519,7 @@ void ChannelImpl::recompute(TensorInfo::ComputePath* path) {
inputs.push_back(i->ptr);
m_dtr.update_used_time(i);
}
if (m_worker_state.options.enable_auto_drop && m_worker_state.options.memory_budget > 0) {
if (m_worker_state.options.enable_dtr_auto_drop && m_worker_state.options.dtr_eviction_threshold > 0) {
auto_evict();
}
auto outputs = OpDef::apply_on_physical_tensor(*path->op, inputs);
@@ -531,7 +531,7 @@ void ChannelImpl::recompute(TensorInfo::ComputePath* path) {
o->recompute_times ++;
if (!o->ptr) {
produce_tensor(o, std::move(outputs[i]), false);
if (m_worker_state.options.enable_auto_drop) {
if (m_worker_state.options.enable_dtr_auto_drop) {
m_dtr.update_dsu_after_recompute(o);
}
}
@@ -544,7 +544,7 @@ void ChannelImpl::auto_evict() {
return;
}
size_t current_memory = m_dtr.comp_node.get_used_memory();
while (current_memory > m_worker_state.options.memory_budget) {
while (current_memory > m_worker_state.options.dtr_eviction_threshold) {
auto best = m_dtr.find_best_tensor();
if (!best) {
if (!m_dtr.warn_printed) {
@@ -642,7 +642,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
uint64_t apply_id = ++m_last_id;
SmallVector<TensorPtr> tensor_inputs;
SmallVector<CompNode> devices;
if (m_worker_state.options.enable_auto_drop) {
if (m_worker_state.options.enable_dtr_auto_drop) {
m_dtr.pin(cmd.inputs);
}
for (auto i : cmd.inputs) {
@@ -696,7 +696,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
m_worker_state.profiler->record_device<DeviceOpExecuteEvent>(device, event_data);
}
}
if (m_worker_state.options.enable_auto_drop && m_worker_state.options.memory_budget > 0) {
if (m_worker_state.options.enable_dtr_auto_drop && m_worker_state.options.dtr_eviction_threshold > 0) {
auto_evict();
}
// Apply op
@@ -712,7 +712,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
}
// End profiling operator
double estimate_compute_time = 0;
if (m_worker_state.options.enable_auto_drop) {
if (m_worker_state.options.enable_dtr_auto_drop) {
for (auto i : cmd.inputs) {
estimate_compute_time += i->memory;
}
@@ -735,7 +735,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
continue;
}
produce_tensor(cmd.outputs[i], std::move(tensor_outputs[i]));
if (m_worker_state.options.enable_auto_drop) {
if (m_worker_state.options.enable_dtr_auto_drop) {
cmd.outputs[i]->dsu_ptr = std::make_shared<DsuNode>(estimate_compute_time);
}
}
@@ -774,7 +774,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
TensorInfo::ComputePath::make(cmd.op, cmd.inputs, cmd.outputs);
size_t detach_cnt = 0;
for (auto output : cmd.outputs) {
if (!output->size_exceeds_thd(m_worker_state.options.tensor_lowerbound)) {
if (!output->size_exceeds_thd(m_worker_state.options.dtr_evictee_minimum_size)) {
output->detach_producer();
detach_cnt ++;
}


+ 3
- 3
imperative/src/impl/interpreter/option_manager.h View File

@@ -39,10 +39,10 @@ public:
"set command buffer length.");
DEF_OPTION(enable_host_compute, "MEGENGINE_HOST_COMPUTE", 1,
"enable host compute, thus computation may be done in host event if it's device is gpu.");
DEF_OPTION(enable_auto_drop, "MEGENGINE_AUTO_DROP", 0, "");
DEF_OPTION(memory_budget, "MEGENGINE_MEMORY_BUDGET", 0,
DEF_OPTION(enable_dtr_auto_drop, "MEGENGINE_DTR_AUTO_DROP", 0, "");
DEF_OPTION(dtr_eviction_threshold, "MEGENGINE_DTR_EVICTION_THRESHOLD", 0,
"auto drop will start whenever gpu memory usage exceeds this value.");
DEF_OPTION(tensor_lowerbound, "MEGENGINE_TENSOR_LOWERBOUND", 1048576,
DEF_OPTION(dtr_evictee_minimum_size, "MEGENGINE_DTR_EVICTEE_MINIMUM_SIZE", 1048576,
"the minimum memory value of a tensor added to the candidate set");
DEF_OPTION(record_computing_path, "MEGENGINE_RECORD_COMPUTING_PATH", 0, "");



Loading…
Cancel
Save