feat(mgb/comp_node): add set_prealloc_config

GitOrigin-RevId: e725e7efdd
4 years ago · a7b9ece4a7
--- a/imperative/python/megengine/device.py
+++ b/imperative/python/megengine/device.py
@@ -9,6 +9,7 @@
 import os

 from .core._imperative_rt.common import CompNode, DeviceType
 from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config

 __all__ = [
    "is_cuda_available",
@@ -16,6 +17,7 @@ __all__ = [
    "get_default_device",
    "set_default_device",
    "set_prealloc_config",
    "DeviceType",
 ]


@@ -94,15 +96,15 @@ def set_prealloc_config(
    alignment: int = 1,
    min_req: int = 32 * 1024 * 1024,
    max_overhead: int = 0,
    growth_factor: float = 2.0,
    device_type: str = "gpu",
    growth_factor=2.0,
    device_type=DeviceType.CUDA,
 ):
    """specifies how to pre-allocate from raw device allocator
    """specifies how to pre-allocate from raw dev allocator

    :param alignment: specifies the alignment in byte
    :param min_req: min request size in byte
    :param max_overhead: max overhead above required size in byte
    :growth_factor: request size = growth_factor * current allocated size
    :param alignment: specifies the alignment in bytes.
    :param min_req: min request size in bytes.
    :param max_overhead: max overhead above required size in bytes.
    :growth_factor: request size / cur allocated
    :device_type: the device type

    """
@@ -110,5 +112,4 @@ def set_prealloc_config(
    assert min_req > 0
    assert max_overhead >= 0
    assert growth_factor >= 1
    t = _str2device_type(device_type)
    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t)
    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, device_type)
--- a/imperative/python/src/common.cpp
+++ b/imperative/python/src/common.cpp
@@ -165,6 +165,9 @@ void init_common(py::module m) {
            .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD)
            .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID);

    m.def("set_prealloc_config", &CompNode::set_prealloc_config, 
        "specifies how to pre-allocate from raw dev allocator");

    init_npy_num_bfloat16(m);
    init_npy_num_intbx(m);
 }
--- a/src/core/impl/comp_node/comp_node.cpp
+++ b/src/core/impl/comp_node/comp_node.cpp
@@ -12,6 +12,8 @@
 #include "megbrain/comp_node.h"
 #include "megbrain/comp_node_env.h"
 #include "megbrain/graph/exc_extra_info.h"
 #include "megbrain/common.h"
 #include "megbrain/comp_node/alloc.h"

 #include "./cuda/comp_node.h"
 #include "./cpu/comp_node.h"
@@ -420,6 +422,21 @@ void CompNode::activate() const {
    static_cast<Impl*>(m_impl)->env().activate();
 }

 void CompNode::set_prealloc_config(
    size_t alignment, 
    size_t min_req, 
    size_t max_overhead, 
    double growth_factor, 
    DeviceType device_type) {
    switch (device_type) {
        case DeviceType::CUDA:
            CudaCompNode::set_prealloc_config(alignment, min_req, max_overhead, growth_factor);
            break;
        default:
            mgb_log_warn("unsupported device type for set_prealloc_config");
    };
 }

 void* CompNode::alloc_device(size_t size) const {
    auto ret = m_impl->alloc_device(size);
    static_cast<Impl*>(m_impl)->env().on_mem_event(size, true, ret);
--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -825,15 +825,16 @@ void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req,
            using T = CudaCompNodeImpl::StaticData;
            static std::aligned_storage_t<sizeof(T), alignof(T)> storage;
            sdptr = new(&storage)T;
            MGB_LOCK_GUARD(sdptr->mtx);
            sdptr->prealloc_config.alignment = alignment;
            sdptr->prealloc_config.min_req = min_req;
            sdptr->prealloc_config.growth_factor = growth_factor;
            sdptr->prealloc_config.max_overhead = max_overhead;
        } else {
            mgb_log_warn(
                "failed to invoke set_prealloc_config; fallback to default configuration; "
                "prealloc_config should be specified before any invocation of load_cuda");
                "invalid call to set_prealloc_config, will fallback to "
                "default config; "
                "prealloc_config should be specified before any CUDA "
                "memory allocation");
        }
    }
 }
@@ -858,6 +859,10 @@ CudaCompNode::Impl* CudaCompNode::load_cuda(const Locator&, const Locator&) {
 void CudaCompNode::sync_all() {
 }

 void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, 
                                       size_t max_overhead,
                                       double growth_factor) {}

 #undef err

 #endif // MGB_CUDA
--- a/src/core/impl/comp_node/cuda/comp_node.h
+++ b/src/core/impl/comp_node/cuda/comp_node.h
@@ -32,9 +32,10 @@ namespace mgb {
            static Impl* load_cuda(
                    const Locator &locator, const Locator &locator_logical);
            static void sync_all();

            static void set_prealloc_config(size_t alignment, size_t min_req,
                                            size_t max_overhead, double growth_factor);
    };
 }

 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}


--- a/src/core/include/megbrain/comp_node.h
+++ b/src/core/include/megbrain/comp_node.h
@@ -308,6 +308,14 @@ class CompNode {
         */
        static void try_coalesce_all_free_memory();

        /*
        * \brief specifies how to pre-allocate from raw dev allocator
        *
        */
        static void set_prealloc_config(size_t alignment, size_t min_req,
                                        size_t max_overhead, double growth_factor,
                                        DeviceType device_type);

        /* =================== synchronization ======================== */

        class Event;