GitOrigin-RevId: 61874faa6d
tags/v1.0.0-rc1
@@ -267,45 +267,59 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { | |||
} | |||
size_t DevMemAllocImpl::gather_stream_free_blk_and_release_full() { | |||
size_t gathered_size = 0; | |||
MGB_LOCK_GUARD(m_mutex); | |||
for (auto &&pair: m_stream_alloc) { | |||
auto ch = pair.second.get(); | |||
auto &&chmtx = ch->m_mutex; | |||
MGB_LOCK_GUARD(chmtx); | |||
for (auto &&i: ch->m_free_blk_size) { | |||
merge_free_unsafe(i.first); | |||
gathered_size += i.first.size; | |||
} | |||
ch->m_free_blk_addr.clear(); | |||
ch->m_free_blk_size.clear(); | |||
} | |||
mgb_assert(gathered_size <= m_used_size.load()); | |||
m_used_size -= gathered_size; | |||
size_t free_size = 0; | |||
using Iter = decltype(m_free_blk_size.begin()); | |||
std::vector<void*> to_free_by_raw; | |||
for (Iter i = m_free_blk_size.begin(), inext; i != m_free_blk_size.end(); | |||
i = inext) { | |||
inext = i; | |||
++ inext; | |||
auto &&blk = i->first; | |||
if (blk.addr.is_head) { | |||
auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); | |||
mgb_assert(riter != m_alloc_from_raw.end() && | |||
blk.size <= riter->second); | |||
if (blk.size == riter->second) { | |||
to_free_by_raw.push_back(blk.addr.addr_ptr()); | |||
free_size += blk.size; | |||
auto j = i->second.aiter; | |||
m_free_blk_size.erase(i); | |||
m_free_blk_addr.erase(j); | |||
m_alloc_from_raw.erase(riter); | |||
MGB_LOCK_GUARD(m_mutex); | |||
auto return_full_free_blk_unsafe = [&](MemAllocImplHelper* alloc) { | |||
auto&& free_blk_size = alloc->m_free_blk_size; | |||
auto&& free_blk_addr = alloc->m_free_blk_addr; | |||
using Iter = decltype(m_free_blk_size.begin()); | |||
for (Iter i = free_blk_size.begin(), inext; i != free_blk_size.end(); | |||
i = inext) { | |||
inext = i; | |||
++ inext; | |||
auto &&blk = i->first; | |||
if (blk.addr.is_head) { | |||
auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); | |||
mgb_assert(riter != m_alloc_from_raw.end() && | |||
blk.size <= riter->second); | |||
if (blk.size == riter->second) { | |||
to_free_by_raw.push_back(blk.addr.addr_ptr()); | |||
free_size += blk.size; | |||
auto j = i->second.aiter; | |||
free_blk_size.erase(i); | |||
free_blk_addr.erase(j); | |||
m_alloc_from_raw.erase(riter); | |||
} | |||
} | |||
} | |||
}; | |||
if (auto child = get_single_child_stream_unsafe()) { | |||
MGB_LOCK_GUARD(child->m_mutex); | |||
return_full_free_blk_unsafe(child); | |||
mgb_assert(free_size <= m_used_size.load()); | |||
m_used_size -= free_size; | |||
} else { | |||
size_t gathered_size = 0; | |||
for (auto &&pair: m_stream_alloc) { | |||
auto ch = pair.second.get(); | |||
auto &&chmtx = ch->m_mutex; | |||
MGB_LOCK_GUARD(chmtx); | |||
for (auto &&i: ch->m_free_blk_size) { | |||
merge_free_unsafe(i.first); | |||
gathered_size += i.first.size; | |||
} | |||
ch->m_free_blk_addr.clear(); | |||
ch->m_free_blk_size.clear(); | |||
} | |||
mgb_assert(gathered_size <= m_used_size.load()); | |||
m_used_size -= gathered_size; | |||
} | |||
return_full_free_blk_unsafe(this); | |||
m_tot_allocated_from_raw -= free_size; | |||
// we have to sync to ensure no kernel on the child stream still uses | |||
@@ -359,6 +373,25 @@ FreeMemStat DevMemAllocImpl::get_free_memory_dev() { | |||
return ret; | |||
} | |||
void DevMemAllocImpl::insert_free_unsafe(const FreeBlock &block) { | |||
if (auto child = get_single_child_stream_unsafe()) { | |||
{ | |||
MGB_LOCK_GUARD(child->m_mutex); | |||
child->insert_free_unsafe(block); | |||
} | |||
m_used_size += block.size; | |||
} else { | |||
MemAllocImplHelper::insert_free_unsafe(block); | |||
} | |||
} | |||
StreamMemAllocImpl* DevMemAllocImpl::get_single_child_stream_unsafe() { | |||
if (m_stream_alloc.size() == 1) { | |||
return m_stream_alloc.begin()->second.get(); | |||
} | |||
return nullptr; | |||
} | |||
DevMemAllocImpl::~DevMemAllocImpl() { | |||
for (auto &&i: m_alloc_from_raw) | |||
m_raw_allocator->free(i.first); | |||
@@ -94,7 +94,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { | |||
* \brief directly insert a free block into m_free_blk_size and | |||
* m_free_blk_addr, without merging | |||
*/ | |||
inline void insert_free_unsafe(const FreeBlock &block); | |||
virtual void insert_free_unsafe(const FreeBlock &block); | |||
/*! | |||
* \brief allocate from parent allocator; this method must either return | |||
@@ -153,6 +153,12 @@ class StreamMemAllocImpl final: public StreamMemAlloc, | |||
{} | |||
}; | |||
/*! | |||
* \Note: DevMemAlloc has two-level structure, but when only one stream was | |||
* registered into the DevMemAlloc, the DevMemAlloc would behave like a | |||
* single-level allocator(i.e. only the FreeBlock pool in its child stream | |||
* allocator will be used) for better performance | |||
*/ | |||
class DevMemAllocImpl final: public DevMemAlloc, | |||
public MemAllocImplHelper { | |||
friend class StreamMemAllocImpl; | |||
@@ -193,6 +199,14 @@ class DevMemAllocImpl final: public DevMemAlloc, | |||
size_t get_used_memory() override { return m_used_size.load(); } | |||
void insert_free_unsafe(const FreeBlock &block) override; | |||
/*! | |||
* \brief return stream allocator if DevMemAlloc has single child, | |||
* otherwise return nullptr | |||
*/ | |||
StreamMemAllocImpl* get_single_child_stream_unsafe(); | |||
public: | |||
DevMemAllocImpl( | |||
int device, size_t reserve_size, | |||
@@ -209,18 +209,73 @@ TEST(TestMemAlloc, Alloc) { | |||
auto ptr = strm_alloc->alloc_shared(REQ); | |||
EXPECT_EQ(REQ, strm_alloc->get_used_memory()); | |||
EXPECT_EQ(0u, strm_alloc->get_free_memory().tot); | |||
EXPECT_EQ(REQ, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); | |||
EXPECT_EQ(TOT - REQ, strm_alloc->get_free_memory().tot); | |||
EXPECT_EQ(TOT, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
auto addr = ptr.get(); | |||
ptr.reset(); | |||
EXPECT_EQ(0u, strm_alloc->get_used_memory()); | |||
EXPECT_EQ(REQ, strm_alloc->get_free_memory().tot); | |||
EXPECT_EQ(REQ, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); | |||
EXPECT_EQ(TOT, strm_alloc->get_free_memory().tot); | |||
EXPECT_EQ(TOT, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
EXPECT_EQ(addr, strm_alloc->alloc_shared(REQ).get()); | |||
} | |||
TEST(TestMemAlloc, MergeFreeBlock) { | |||
using StreamKey = DevMemAlloc::StreamKey; | |||
auto raw_alloc = std::make_shared<DummyAllocator>(7000); | |||
auto runtime_policy = std::make_shared<DummyRuntimePolicy>(0); | |||
auto dev_alloc = DevMemAlloc::make(0, 7000, raw_alloc, runtime_policy); | |||
StreamKey stream_key = nullptr; | |||
auto strm_alloc = | |||
dev_alloc->add_stream(static_cast<StreamKey>(&stream_key)); | |||
auto ptr = strm_alloc->alloc_shared(2000); | |||
auto addr = ptr.get(); | |||
ptr.reset(); | |||
ptr = strm_alloc->alloc_shared(3000); | |||
EXPECT_EQ(addr, ptr.get()); | |||
strm_alloc->alloc_shared(4000); | |||
} | |||
TEST(TestMemAlloc, AllocTwoStream) { | |||
constexpr size_t TOT = 2048, REQ0 = 1000, REQ1 = 2000; | |||
using StreamKey = DevMemAlloc::StreamKey; | |||
auto raw_alloc = std::make_shared<DummyAllocator>(TOT); | |||
auto runtime_policy = std::make_shared<DummyRuntimePolicy>(0); | |||
auto dev_alloc = DevMemAlloc::make(0, TOT, raw_alloc, runtime_policy); | |||
StreamKey stream_key0, stream_key1; | |||
auto strm_alloc0 = | |||
dev_alloc->add_stream(static_cast<StreamKey>(&stream_key0)), | |||
strm_alloc1 = | |||
dev_alloc->add_stream(static_cast<StreamKey>(&stream_key1)); | |||
ASSERT_NE(strm_alloc0, strm_alloc1); | |||
auto ptr0 = strm_alloc0->alloc_shared(REQ0); | |||
EXPECT_EQ(REQ0, strm_alloc0->get_used_memory()); | |||
EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); | |||
EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); | |||
ptr0.reset(); | |||
EXPECT_EQ(0u, strm_alloc0->get_used_memory()); | |||
EXPECT_EQ(REQ0, strm_alloc0->get_free_memory().tot); | |||
EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); | |||
auto ptr1 = strm_alloc1->alloc_shared(REQ1); | |||
EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); | |||
EXPECT_EQ(REQ1, strm_alloc1->get_used_memory()); | |||
EXPECT_EQ(0u, strm_alloc1->get_free_memory().tot); | |||
EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
ptr1.reset(); | |||
EXPECT_EQ(0u, strm_alloc1->get_used_memory()); | |||
EXPECT_EQ(REQ1, strm_alloc1->get_free_memory().tot); | |||
EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); | |||
EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); | |||
} | |||
TEST(TestMemAlloc, AllocMoreThanReserve) { | |||
constexpr size_t RES = 1000, TOT = 2048, REQ = 2048; | |||