GitOrigin-RevId: e725e7efdd
tags/v1.0.0-rc1
@@ -9,6 +9,7 @@ | |||||
import os | import os | ||||
from .core._imperative_rt.common import CompNode, DeviceType | from .core._imperative_rt.common import CompNode, DeviceType | ||||
from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config | |||||
__all__ = [ | __all__ = [ | ||||
"is_cuda_available", | "is_cuda_available", | ||||
@@ -16,6 +17,7 @@ __all__ = [ | |||||
"get_default_device", | "get_default_device", | ||||
"set_default_device", | "set_default_device", | ||||
"set_prealloc_config", | "set_prealloc_config", | ||||
"DeviceType", | |||||
] | ] | ||||
@@ -94,15 +96,15 @@ def set_prealloc_config( | |||||
alignment: int = 1, | alignment: int = 1, | ||||
min_req: int = 32 * 1024 * 1024, | min_req: int = 32 * 1024 * 1024, | ||||
max_overhead: int = 0, | max_overhead: int = 0, | ||||
growth_factor: float = 2.0, | |||||
device_type: str = "gpu", | |||||
growth_factor=2.0, | |||||
device_type=DeviceType.CUDA, | |||||
): | ): | ||||
"""specifies how to pre-allocate from raw device allocator | |||||
"""specifies how to pre-allocate from raw dev allocator | |||||
:param alignment: specifies the alignment in byte | |||||
:param min_req: min request size in byte | |||||
:param max_overhead: max overhead above required size in byte | |||||
:growth_factor: request size = growth_factor * current allocated size | |||||
:param alignment: specifies the alignment in bytes. | |||||
:param min_req: min request size in bytes. | |||||
:param max_overhead: max overhead above required size in bytes. | |||||
:growth_factor: request size / cur allocated | |||||
:device_type: the device type | :device_type: the device type | ||||
""" | """ | ||||
@@ -110,5 +112,4 @@ def set_prealloc_config( | |||||
assert min_req > 0 | assert min_req > 0 | ||||
assert max_overhead >= 0 | assert max_overhead >= 0 | ||||
assert growth_factor >= 1 | assert growth_factor >= 1 | ||||
t = _str2device_type(device_type) | |||||
_set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t) | |||||
_set_prealloc_config(alignment, min_req, max_overhead, growth_factor, device_type) |
@@ -165,6 +165,9 @@ void init_common(py::module m) { | |||||
.value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD) | .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD) | ||||
.value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID); | .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID); | ||||
m.def("set_prealloc_config", &CompNode::set_prealloc_config, | |||||
"specifies how to pre-allocate from raw dev allocator"); | |||||
init_npy_num_bfloat16(m); | init_npy_num_bfloat16(m); | ||||
init_npy_num_intbx(m); | init_npy_num_intbx(m); | ||||
} | } |
@@ -12,6 +12,8 @@ | |||||
#include "megbrain/comp_node.h" | #include "megbrain/comp_node.h" | ||||
#include "megbrain/comp_node_env.h" | #include "megbrain/comp_node_env.h" | ||||
#include "megbrain/graph/exc_extra_info.h" | #include "megbrain/graph/exc_extra_info.h" | ||||
#include "megbrain/common.h" | |||||
#include "megbrain/comp_node/alloc.h" | |||||
#include "./cuda/comp_node.h" | #include "./cuda/comp_node.h" | ||||
#include "./cpu/comp_node.h" | #include "./cpu/comp_node.h" | ||||
@@ -420,6 +422,21 @@ void CompNode::activate() const { | |||||
static_cast<Impl*>(m_impl)->env().activate(); | static_cast<Impl*>(m_impl)->env().activate(); | ||||
} | } | ||||
void CompNode::set_prealloc_config( | |||||
size_t alignment, | |||||
size_t min_req, | |||||
size_t max_overhead, | |||||
double growth_factor, | |||||
DeviceType device_type) { | |||||
switch (device_type) { | |||||
case DeviceType::CUDA: | |||||
CudaCompNode::set_prealloc_config(alignment, min_req, max_overhead, growth_factor); | |||||
break; | |||||
default: | |||||
mgb_log_warn("unsupported device type for set_prealloc_config"); | |||||
}; | |||||
} | |||||
void* CompNode::alloc_device(size_t size) const { | void* CompNode::alloc_device(size_t size) const { | ||||
auto ret = m_impl->alloc_device(size); | auto ret = m_impl->alloc_device(size); | ||||
static_cast<Impl*>(m_impl)->env().on_mem_event(size, true, ret); | static_cast<Impl*>(m_impl)->env().on_mem_event(size, true, ret); | ||||
@@ -825,15 +825,16 @@ void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, | |||||
using T = CudaCompNodeImpl::StaticData; | using T = CudaCompNodeImpl::StaticData; | ||||
static std::aligned_storage_t<sizeof(T), alignof(T)> storage; | static std::aligned_storage_t<sizeof(T), alignof(T)> storage; | ||||
sdptr = new(&storage)T; | sdptr = new(&storage)T; | ||||
MGB_LOCK_GUARD(sdptr->mtx); | |||||
sdptr->prealloc_config.alignment = alignment; | sdptr->prealloc_config.alignment = alignment; | ||||
sdptr->prealloc_config.min_req = min_req; | sdptr->prealloc_config.min_req = min_req; | ||||
sdptr->prealloc_config.growth_factor = growth_factor; | sdptr->prealloc_config.growth_factor = growth_factor; | ||||
sdptr->prealloc_config.max_overhead = max_overhead; | sdptr->prealloc_config.max_overhead = max_overhead; | ||||
} else { | } else { | ||||
mgb_log_warn( | mgb_log_warn( | ||||
"failed to invoke set_prealloc_config; fallback to default configuration; " | |||||
"prealloc_config should be specified before any invocation of load_cuda"); | |||||
"invalid call to set_prealloc_config, will fallback to " | |||||
"default config; " | |||||
"prealloc_config should be specified before any CUDA " | |||||
"memory allocation"); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -858,6 +859,10 @@ CudaCompNode::Impl* CudaCompNode::load_cuda(const Locator&, const Locator&) { | |||||
void CudaCompNode::sync_all() { | void CudaCompNode::sync_all() { | ||||
} | } | ||||
void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, | |||||
size_t max_overhead, | |||||
double growth_factor) {} | |||||
#undef err | #undef err | ||||
#endif // MGB_CUDA | #endif // MGB_CUDA | ||||
@@ -32,9 +32,10 @@ namespace mgb { | |||||
static Impl* load_cuda( | static Impl* load_cuda( | ||||
const Locator &locator, const Locator &locator_logical); | const Locator &locator, const Locator &locator_logical); | ||||
static void sync_all(); | static void sync_all(); | ||||
static void set_prealloc_config(size_t alignment, size_t min_req, | |||||
size_t max_overhead, double growth_factor); | |||||
}; | }; | ||||
} | } | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
@@ -308,6 +308,14 @@ class CompNode { | |||||
*/ | */ | ||||
static void try_coalesce_all_free_memory(); | static void try_coalesce_all_free_memory(); | ||||
/* | |||||
* \brief specifies how to pre-allocate from raw dev allocator | |||||
* | |||||
*/ | |||||
static void set_prealloc_config(size_t alignment, size_t min_req, | |||||
size_t max_overhead, double growth_factor, | |||||
DeviceType device_type); | |||||
/* =================== synchronization ======================== */ | /* =================== synchronization ======================== */ | ||||
class Event; | class Event; | ||||