GitOrigin-RevId: 61874faa6d
tags/v1.0.0-rc1
@@ -267,45 +267,59 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { | |||||
} | } | ||||
size_t DevMemAllocImpl::gather_stream_free_blk_and_release_full() { | size_t DevMemAllocImpl::gather_stream_free_blk_and_release_full() { | ||||
size_t gathered_size = 0; | |||||
MGB_LOCK_GUARD(m_mutex); | |||||
for (auto &&pair: m_stream_alloc) { | |||||
auto ch = pair.second.get(); | |||||
auto &&chmtx = ch->m_mutex; | |||||
MGB_LOCK_GUARD(chmtx); | |||||
for (auto &&i: ch->m_free_blk_size) { | |||||
merge_free_unsafe(i.first); | |||||
gathered_size += i.first.size; | |||||
} | |||||
ch->m_free_blk_addr.clear(); | |||||
ch->m_free_blk_size.clear(); | |||||
} | |||||
mgb_assert(gathered_size <= m_used_size.load()); | |||||
m_used_size -= gathered_size; | |||||
size_t free_size = 0; | size_t free_size = 0; | ||||
using Iter = decltype(m_free_blk_size.begin()); | |||||
std::vector<void*> to_free_by_raw; | std::vector<void*> to_free_by_raw; | ||||
for (Iter i = m_free_blk_size.begin(), inext; i != m_free_blk_size.end(); | |||||
i = inext) { | |||||
inext = i; | |||||
++ inext; | |||||
auto &&blk = i->first; | |||||
if (blk.addr.is_head) { | |||||
auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); | |||||
mgb_assert(riter != m_alloc_from_raw.end() && | |||||
blk.size <= riter->second); | |||||
if (blk.size == riter->second) { | |||||
to_free_by_raw.push_back(blk.addr.addr_ptr()); | |||||
free_size += blk.size; | |||||
auto j = i->second.aiter; | |||||
m_free_blk_size.erase(i); | |||||
m_free_blk_addr.erase(j); | |||||
m_alloc_from_raw.erase(riter); | |||||
MGB_LOCK_GUARD(m_mutex); | |||||
auto return_full_free_blk_unsafe = [&](MemAllocImplHelper* alloc) { | |||||
auto&& free_blk_size = alloc->m_free_blk_size; | |||||
auto&& free_blk_addr = alloc->m_free_blk_addr; | |||||
using Iter = decltype(m_free_blk_size.begin()); | |||||
for (Iter i = free_blk_size.begin(), inext; i != free_blk_size.end(); | |||||
i = inext) { | |||||
inext = i; | |||||
++ inext; | |||||
auto &&blk = i->first; | |||||
if (blk.addr.is_head) { | |||||
auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); | |||||
mgb_assert(riter != m_alloc_from_raw.end() && | |||||
blk.size <= riter->second); | |||||
if (blk.size == riter->second) { | |||||
to_free_by_raw.push_back(blk.addr.addr_ptr()); | |||||
free_size += blk.size; | |||||
auto j = i->second.aiter; | |||||
free_blk_size.erase(i); | |||||
free_blk_addr.erase(j); | |||||
m_alloc_from_raw.erase(riter); | |||||
} | |||||
} | |||||
} | |||||
}; | |||||
if (auto child = get_single_child_stream_unsafe()) { | |||||
MGB_LOCK_GUARD(child->m_mutex); | |||||
return_full_free_blk_unsafe(child); | |||||
mgb_assert(free_size <= m_used_size.load()); | |||||
m_used_size -= free_size; | |||||
} else { | |||||
size_t gathered_size = 0; | |||||
for (auto &&pair: m_stream_alloc) { | |||||
auto ch = pair.second.get(); | |||||
auto &&chmtx = ch->m_mutex; | |||||
MGB_LOCK_GUARD(chmtx); | |||||
for (auto &&i: ch->m_free_blk_size) { | |||||
merge_free_unsafe(i.first); | |||||
gathered_size += i.first.size; | |||||
} | } | ||||
ch->m_free_blk_addr.clear(); | |||||
ch->m_free_blk_size.clear(); | |||||
} | } | ||||
mgb_assert(gathered_size <= m_used_size.load()); | |||||
m_used_size -= gathered_size; | |||||
} | } | ||||
return_full_free_blk_unsafe(this); | |||||
m_tot_allocated_from_raw -= free_size; | m_tot_allocated_from_raw -= free_size; | ||||
// we have to sync to ensure no kernel on the child stream still uses | // we have to sync to ensure no kernel on the child stream still uses | ||||
@@ -359,6 +373,25 @@ FreeMemStat DevMemAllocImpl::get_free_memory_dev() { | |||||
return ret; | return ret; | ||||
} | } | ||||
void DevMemAllocImpl::insert_free_unsafe(const FreeBlock &block) { | |||||
if (auto child = get_single_child_stream_unsafe()) { | |||||
{ | |||||
MGB_LOCK_GUARD(child->m_mutex); | |||||
child->insert_free_unsafe(block); | |||||
} | |||||
m_used_size += block.size; | |||||
} else { | |||||
MemAllocImplHelper::insert_free_unsafe(block); | |||||
} | |||||
} | |||||
StreamMemAllocImpl* DevMemAllocImpl::get_single_child_stream_unsafe() { | |||||
if (m_stream_alloc.size() == 1) { | |||||
return m_stream_alloc.begin()->second.get(); | |||||
} | |||||
return nullptr; | |||||
} | |||||
DevMemAllocImpl::~DevMemAllocImpl() { | DevMemAllocImpl::~DevMemAllocImpl() { | ||||
for (auto &&i: m_alloc_from_raw) | for (auto &&i: m_alloc_from_raw) | ||||
m_raw_allocator->free(i.first); | m_raw_allocator->free(i.first); | ||||
@@ -94,7 +94,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { | |||||
* \brief directly insert a free block into m_free_blk_size and | * \brief directly insert a free block into m_free_blk_size and | ||||
* m_free_blk_addr, without merging | * m_free_blk_addr, without merging | ||||
*/ | */ | ||||
inline void insert_free_unsafe(const FreeBlock &block); | |||||
virtual void insert_free_unsafe(const FreeBlock &block); | |||||
/*! | /*! | ||||
* \brief allocate from parent allocator; this method must either return | * \brief allocate from parent allocator; this method must either return | ||||
@@ -153,6 +153,12 @@ class StreamMemAllocImpl final: public StreamMemAlloc, | |||||
{} | {} | ||||
}; | }; | ||||
/*! | |||||
* \Note: DevMemAlloc has two-level structure, but when only one stream was | |||||
* registered into the DevMemAlloc, the DevMemAlloc would behave like a | |||||
* single-level allocator(i.e. only the FreeBlock pool in its child stream | |||||
* allocator will be used) for better performance | |||||
*/ | |||||
class DevMemAllocImpl final: public DevMemAlloc, | class DevMemAllocImpl final: public DevMemAlloc, | ||||
public MemAllocImplHelper { | public MemAllocImplHelper { | ||||
friend class StreamMemAllocImpl; | friend class StreamMemAllocImpl; | ||||
@@ -193,6 +199,14 @@ class DevMemAllocImpl final: public DevMemAlloc, | |||||
size_t get_used_memory() override { return m_used_size.load(); } | size_t get_used_memory() override { return m_used_size.load(); } | ||||
void insert_free_unsafe(const FreeBlock &block) override; | |||||
/*! | |||||
* \brief return stream allocator if DevMemAlloc has single child, | |||||
* otherwise return nullptr | |||||
*/ | |||||
StreamMemAllocImpl* get_single_child_stream_unsafe(); | |||||
public: | public: | ||||
DevMemAllocImpl( | DevMemAllocImpl( | ||||
int device, size_t reserve_size, | int device, size_t reserve_size, | ||||
@@ -209,18 +209,73 @@ TEST(TestMemAlloc, Alloc) { | |||||
auto ptr = strm_alloc->alloc_shared(REQ); | auto ptr = strm_alloc->alloc_shared(REQ); | ||||
EXPECT_EQ(REQ, strm_alloc->get_used_memory()); | EXPECT_EQ(REQ, strm_alloc->get_used_memory()); | ||||
EXPECT_EQ(0u, strm_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(REQ, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(TOT - REQ, strm_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(TOT, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||||
auto addr = ptr.get(); | auto addr = ptr.get(); | ||||
ptr.reset(); | ptr.reset(); | ||||
EXPECT_EQ(0u, strm_alloc->get_used_memory()); | EXPECT_EQ(0u, strm_alloc->get_used_memory()); | ||||
EXPECT_EQ(REQ, strm_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(REQ, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(TOT, strm_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(TOT, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||||
EXPECT_EQ(addr, strm_alloc->alloc_shared(REQ).get()); | EXPECT_EQ(addr, strm_alloc->alloc_shared(REQ).get()); | ||||
} | } | ||||
TEST(TestMemAlloc, MergeFreeBlock) { | |||||
using StreamKey = DevMemAlloc::StreamKey; | |||||
auto raw_alloc = std::make_shared<DummyAllocator>(7000); | |||||
auto runtime_policy = std::make_shared<DummyRuntimePolicy>(0); | |||||
auto dev_alloc = DevMemAlloc::make(0, 7000, raw_alloc, runtime_policy); | |||||
StreamKey stream_key = nullptr; | |||||
auto strm_alloc = | |||||
dev_alloc->add_stream(static_cast<StreamKey>(&stream_key)); | |||||
auto ptr = strm_alloc->alloc_shared(2000); | |||||
auto addr = ptr.get(); | |||||
ptr.reset(); | |||||
ptr = strm_alloc->alloc_shared(3000); | |||||
EXPECT_EQ(addr, ptr.get()); | |||||
strm_alloc->alloc_shared(4000); | |||||
} | |||||
TEST(TestMemAlloc, AllocTwoStream) { | |||||
constexpr size_t TOT = 2048, REQ0 = 1000, REQ1 = 2000; | |||||
using StreamKey = DevMemAlloc::StreamKey; | |||||
auto raw_alloc = std::make_shared<DummyAllocator>(TOT); | |||||
auto runtime_policy = std::make_shared<DummyRuntimePolicy>(0); | |||||
auto dev_alloc = DevMemAlloc::make(0, TOT, raw_alloc, runtime_policy); | |||||
StreamKey stream_key0, stream_key1; | |||||
auto strm_alloc0 = | |||||
dev_alloc->add_stream(static_cast<StreamKey>(&stream_key0)), | |||||
strm_alloc1 = | |||||
dev_alloc->add_stream(static_cast<StreamKey>(&stream_key1)); | |||||
ASSERT_NE(strm_alloc0, strm_alloc1); | |||||
auto ptr0 = strm_alloc0->alloc_shared(REQ0); | |||||
EXPECT_EQ(REQ0, strm_alloc0->get_used_memory()); | |||||
EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); | |||||
EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); | |||||
ptr0.reset(); | |||||
EXPECT_EQ(0u, strm_alloc0->get_used_memory()); | |||||
EXPECT_EQ(REQ0, strm_alloc0->get_free_memory().tot); | |||||
EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); | |||||
auto ptr1 = strm_alloc1->alloc_shared(REQ1); | |||||
EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); | |||||
EXPECT_EQ(REQ1, strm_alloc1->get_used_memory()); | |||||
EXPECT_EQ(0u, strm_alloc1->get_free_memory().tot); | |||||
EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||||
ptr1.reset(); | |||||
EXPECT_EQ(0u, strm_alloc1->get_used_memory()); | |||||
EXPECT_EQ(REQ1, strm_alloc1->get_free_memory().tot); | |||||
EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); | |||||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||||
} | |||||
TEST(TestMemAlloc, AllocMoreThanReserve) { | TEST(TestMemAlloc, AllocMoreThanReserve) { | ||||
constexpr size_t RES = 1000, TOT = 2048, REQ = 2048; | constexpr size_t RES = 1000, TOT = 2048, REQ = 2048; | ||||