GitOrigin-RevId: ae0bf4b479
tags/v1.0.0-rc1
@@ -15,6 +15,7 @@ __all__ = [ | |||||
"get_device_count", | "get_device_count", | ||||
"get_default_device", | "get_default_device", | ||||
"set_default_device", | "set_default_device", | ||||
"set_prealloc_config", | |||||
] | ] | ||||
@@ -33,7 +34,7 @@ def _str2device_type(type_str: str, allow_unspec: bool = True): | |||||
elif type_str == "GPU" or type_str == "CUDA": | elif type_str == "GPU" or type_str == "CUDA": | ||||
return DeviceType.CUDA | return DeviceType.CUDA | ||||
else: | else: | ||||
assert allow_unspec and str == "XPU", "bad device type" | |||||
assert allow_unspec and str == "XPU", "device type can only be cpu, gpu or xpu" | |||||
return DeviceType.UNSPEC | return DeviceType.UNSPEC | ||||
@@ -87,3 +88,27 @@ def get_default_device() -> str: | |||||
set_default_device(os.getenv("MGE_DEFAULT_DEVICE", "xpux")) | set_default_device(os.getenv("MGE_DEFAULT_DEVICE", "xpux")) | ||||
def set_prealloc_config( | |||||
alignment: int = 1, | |||||
min_req: int = 32 * 1024 * 1024, | |||||
max_overhead: int = 0, | |||||
growth_factor: float = 2.0, | |||||
device_type: str = "gpu", | |||||
): | |||||
"""specifies how to pre-allocate from raw device allocator | |||||
:param alignment: specifies the alignment in byte | |||||
:param min_req: min request size in byte | |||||
:param max_overhead: max overhead above required size in byte | |||||
:growth_factor: request size = growth_factor * current allocated size | |||||
:device_type: the device type | |||||
""" | |||||
assert alignment > 0 | |||||
assert min_req > 0 | |||||
assert max_overhead >= 0 | |||||
assert growth_factor >= 1 | |||||
t = _str2device_type(device_type) | |||||
_set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t) |
@@ -815,6 +815,29 @@ size_t CudaCompNode::get_device_count(bool warn) { | |||||
return cnt; | return cnt; | ||||
} | } | ||||
void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, | |||||
size_t max_overhead, | |||||
double growth_factor) { | |||||
auto &&sdptr = CudaCompNodeImpl::sd; | |||||
{ | |||||
MGB_LOCK_GUARD(CudaCompNodeImpl::sd_mtx); | |||||
if (!sdptr) { | |||||
using T = CudaCompNodeImpl::StaticData; | |||||
static std::aligned_storage_t<sizeof(T), alignof(T)> storage; | |||||
sdptr = new(&storage)T; | |||||
MGB_LOCK_GUARD(sdptr->mtx); | |||||
sdptr->prealloc_config.alignment = alignment; | |||||
sdptr->prealloc_config.min_req = min_req; | |||||
sdptr->prealloc_config.growth_factor = growth_factor; | |||||
sdptr->prealloc_config.max_overhead = max_overhead; | |||||
} else { | |||||
mgb_log_warn( | |||||
"failed to invoke set_prealloc_config; fallback to default configuration; " | |||||
"prealloc_config should be specified before any invocation of load_cuda"); | |||||
} | |||||
} | |||||
} | |||||
#else | #else | ||||
bool CudaCompNode::available() { | bool CudaCompNode::available() { | ||||
@@ -290,6 +290,12 @@ TEST(TestCompNodeCuda, Uid) { | |||||
ASSERT_NE(cn00.get_uid(), cn1.get_uid()); | ASSERT_NE(cn00.get_uid(), cn1.get_uid()); | ||||
} | } | ||||
TEST(TestCompNodeCuda, set_prealloc_config) { | |||||
CompNode::set_prealloc_config( | |||||
1024, 1024, 256 * 1024 * 1024, | |||||
4, CompNode::DeviceType::CUDA); | |||||
} | |||||
#if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
TEST(TestCompNodeCambricon, MemNode) { | TEST(TestCompNodeCambricon, MemNode) { | ||||