diff --git a/src/core/impl/comp_node/mem_alloc/impl.cpp b/src/core/impl/comp_node/mem_alloc/impl.cpp index 7fdebc38..2f9f0ba7 100644 --- a/src/core/impl/comp_node/mem_alloc/impl.cpp +++ b/src/core/impl/comp_node/mem_alloc/impl.cpp @@ -267,45 +267,59 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { } size_t DevMemAllocImpl::gather_stream_free_blk_and_release_full() { - size_t gathered_size = 0; - MGB_LOCK_GUARD(m_mutex); - for (auto &&pair: m_stream_alloc) { - auto ch = pair.second.get(); - auto &&chmtx = ch->m_mutex; - - MGB_LOCK_GUARD(chmtx); - for (auto &&i: ch->m_free_blk_size) { - merge_free_unsafe(i.first); - gathered_size += i.first.size; - } - ch->m_free_blk_addr.clear(); - ch->m_free_blk_size.clear(); - } - mgb_assert(gathered_size <= m_used_size.load()); - m_used_size -= gathered_size; - size_t free_size = 0; - using Iter = decltype(m_free_blk_size.begin()); std::vector to_free_by_raw; - for (Iter i = m_free_blk_size.begin(), inext; i != m_free_blk_size.end(); - i = inext) { - inext = i; - ++ inext; - auto &&blk = i->first; - if (blk.addr.is_head) { - auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); - mgb_assert(riter != m_alloc_from_raw.end() && - blk.size <= riter->second); - if (blk.size == riter->second) { - to_free_by_raw.push_back(blk.addr.addr_ptr()); - free_size += blk.size; - auto j = i->second.aiter; - m_free_blk_size.erase(i); - m_free_blk_addr.erase(j); - m_alloc_from_raw.erase(riter); + + MGB_LOCK_GUARD(m_mutex); + auto return_full_free_blk_unsafe = [&](MemAllocImplHelper* alloc) { + auto&& free_blk_size = alloc->m_free_blk_size; + auto&& free_blk_addr = alloc->m_free_blk_addr; + using Iter = decltype(m_free_blk_size.begin()); + for (Iter i = free_blk_size.begin(), inext; i != free_blk_size.end(); + i = inext) { + inext = i; + ++ inext; + auto &&blk = i->first; + if (blk.addr.is_head) { + auto riter = m_alloc_from_raw.find(blk.addr.addr_ptr()); + mgb_assert(riter != m_alloc_from_raw.end() && + blk.size <= riter->second); + if (blk.size == riter->second) { + to_free_by_raw.push_back(blk.addr.addr_ptr()); + free_size += blk.size; + auto j = i->second.aiter; + free_blk_size.erase(i); + free_blk_addr.erase(j); + m_alloc_from_raw.erase(riter); + } + } + } + }; + + if (auto child = get_single_child_stream_unsafe()) { + MGB_LOCK_GUARD(child->m_mutex); + return_full_free_blk_unsafe(child); + mgb_assert(free_size <= m_used_size.load()); + m_used_size -= free_size; + } else { + size_t gathered_size = 0; + for (auto &&pair: m_stream_alloc) { + auto ch = pair.second.get(); + auto &&chmtx = ch->m_mutex; + + MGB_LOCK_GUARD(chmtx); + for (auto &&i: ch->m_free_blk_size) { + merge_free_unsafe(i.first); + gathered_size += i.first.size; } + ch->m_free_blk_addr.clear(); + ch->m_free_blk_size.clear(); } + mgb_assert(gathered_size <= m_used_size.load()); + m_used_size -= gathered_size; } + + return_full_free_blk_unsafe(this); m_tot_allocated_from_raw -= free_size; // we have to sync to ensure no kernel on the child stream still uses @@ -359,6 +373,25 @@ FreeMemStat DevMemAllocImpl::get_free_memory_dev() { return ret; } +void DevMemAllocImpl::insert_free_unsafe(const FreeBlock &block) { + if (auto child = get_single_child_stream_unsafe()) { + { + MGB_LOCK_GUARD(child->m_mutex); + child->insert_free_unsafe(block); + } + m_used_size += block.size; + } else { + MemAllocImplHelper::insert_free_unsafe(block); + } +} + +StreamMemAllocImpl* DevMemAllocImpl::get_single_child_stream_unsafe() { + if (m_stream_alloc.size() == 1) { + return m_stream_alloc.begin()->second.get(); + } + return nullptr; +} + DevMemAllocImpl::~DevMemAllocImpl() { for (auto &&i: m_alloc_from_raw) m_raw_allocator->free(i.first); diff --git a/src/core/impl/comp_node/mem_alloc/impl.h b/src/core/impl/comp_node/mem_alloc/impl.h index 73c2c0b0..bb41d5d1 100644 --- a/src/core/impl/comp_node/mem_alloc/impl.h +++ b/src/core/impl/comp_node/mem_alloc/impl.h @@ -94,7 +94,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { * \brief directly insert a free block into m_free_blk_size and * m_free_blk_addr, without merging */ - inline void insert_free_unsafe(const FreeBlock &block); + virtual void insert_free_unsafe(const FreeBlock &block); /*! * \brief allocate from parent allocator; this method must either return @@ -153,6 +153,12 @@ class StreamMemAllocImpl final: public StreamMemAlloc, {} }; +/*! + * \Note: DevMemAlloc has two-level structure, but when only one stream was + * registered into the DevMemAlloc, the DevMemAlloc would behave like a + * single-level allocator(i.e. only the FreeBlock pool in its child stream + * allocator will be used) for better performance + */ class DevMemAllocImpl final: public DevMemAlloc, public MemAllocImplHelper { friend class StreamMemAllocImpl; @@ -193,6 +199,14 @@ class DevMemAllocImpl final: public DevMemAlloc, size_t get_used_memory() override { return m_used_size.load(); } + void insert_free_unsafe(const FreeBlock &block) override; + + /*! + * \brief return stream allocator if DevMemAlloc has single child, + * otherwise return nullptr + */ + StreamMemAllocImpl* get_single_child_stream_unsafe(); + public: DevMemAllocImpl( int device, size_t reserve_size, diff --git a/src/core/test/mem_alloc.cpp b/src/core/test/mem_alloc.cpp index b9e87341..0896d6bb 100644 --- a/src/core/test/mem_alloc.cpp +++ b/src/core/test/mem_alloc.cpp @@ -209,18 +209,73 @@ TEST(TestMemAlloc, Alloc) { auto ptr = strm_alloc->alloc_shared(REQ); EXPECT_EQ(REQ, strm_alloc->get_used_memory()); - EXPECT_EQ(0u, strm_alloc->get_free_memory().tot); - EXPECT_EQ(REQ, dev_alloc->get_used_memory()); - EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); + EXPECT_EQ(TOT - REQ, strm_alloc->get_free_memory().tot); + EXPECT_EQ(TOT, dev_alloc->get_used_memory()); + EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); auto addr = ptr.get(); ptr.reset(); EXPECT_EQ(0u, strm_alloc->get_used_memory()); - EXPECT_EQ(REQ, strm_alloc->get_free_memory().tot); - EXPECT_EQ(REQ, dev_alloc->get_used_memory()); - EXPECT_EQ(TOT - REQ, dev_alloc->get_free_memory().tot); + EXPECT_EQ(TOT, strm_alloc->get_free_memory().tot); + EXPECT_EQ(TOT, dev_alloc->get_used_memory()); + EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); EXPECT_EQ(addr, strm_alloc->alloc_shared(REQ).get()); } +TEST(TestMemAlloc, MergeFreeBlock) { + using StreamKey = DevMemAlloc::StreamKey; + auto raw_alloc = std::make_shared(7000); + auto runtime_policy = std::make_shared(0); + auto dev_alloc = DevMemAlloc::make(0, 7000, raw_alloc, runtime_policy); + + StreamKey stream_key = nullptr; + auto strm_alloc = + dev_alloc->add_stream(static_cast(&stream_key)); + + auto ptr = strm_alloc->alloc_shared(2000); + auto addr = ptr.get(); + ptr.reset(); + ptr = strm_alloc->alloc_shared(3000); + EXPECT_EQ(addr, ptr.get()); + strm_alloc->alloc_shared(4000); +} + +TEST(TestMemAlloc, AllocTwoStream) { + constexpr size_t TOT = 2048, REQ0 = 1000, REQ1 = 2000; + using StreamKey = DevMemAlloc::StreamKey; + auto raw_alloc = std::make_shared(TOT); + auto runtime_policy = std::make_shared(0); + auto dev_alloc = DevMemAlloc::make(0, TOT, raw_alloc, runtime_policy); + + StreamKey stream_key0, stream_key1; + auto strm_alloc0 = + dev_alloc->add_stream(static_cast(&stream_key0)), + strm_alloc1 = + dev_alloc->add_stream(static_cast(&stream_key1)); + ASSERT_NE(strm_alloc0, strm_alloc1); + + auto ptr0 = strm_alloc0->alloc_shared(REQ0); + EXPECT_EQ(REQ0, strm_alloc0->get_used_memory()); + EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); + EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); + EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); + ptr0.reset(); + EXPECT_EQ(0u, strm_alloc0->get_used_memory()); + EXPECT_EQ(REQ0, strm_alloc0->get_free_memory().tot); + EXPECT_EQ(REQ0, dev_alloc->get_used_memory()); + EXPECT_EQ(TOT - REQ0, dev_alloc->get_free_memory().tot); + auto ptr1 = strm_alloc1->alloc_shared(REQ1); + EXPECT_EQ(0u, strm_alloc0->get_free_memory().tot); + EXPECT_EQ(REQ1, strm_alloc1->get_used_memory()); + EXPECT_EQ(0u, strm_alloc1->get_free_memory().tot); + EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); + EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); + ptr1.reset(); + EXPECT_EQ(0u, strm_alloc1->get_used_memory()); + EXPECT_EQ(REQ1, strm_alloc1->get_free_memory().tot); + EXPECT_EQ(REQ1, dev_alloc->get_used_memory()); + EXPECT_EQ(0u, dev_alloc->get_free_memory().tot); +} + TEST(TestMemAlloc, AllocMoreThanReserve) { constexpr size_t RES = 1000, TOT = 2048, REQ = 2048;