From 330100ecc8b37ac04c520f2370e2a57f2382b0bc Mon Sep 17 00:00:00 2001 From: wuweikang Date: Sat, 28 Aug 2021 14:55:42 +0800 Subject: [PATCH] release unused block memmory when CheckAndReleaseMemory --- ge/graph/execute/model_executor.cc | 36 ++++++++++++++++++++++++++--- ge/graph/execute/model_executor.h | 2 ++ ge/graph/manager/graph_caching_allocator.cc | 7 ++++++ ge/graph/manager/graph_caching_allocator.h | 2 ++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/ge/graph/execute/model_executor.cc b/ge/graph/execute/model_executor.cc index 993ba8c3..fa09e8ff 100644 --- a/ge/graph/execute/model_executor.cc +++ b/ge/graph/execute/model_executor.cc @@ -418,7 +418,7 @@ void ModelExecutor::ReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr } } -Status ModelExecutor::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { +Status ModelExecutor::CheckFreeMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node, bool &is_enough) { GELOGI("graph_id[%u]", graph_node->GetGraphId()); int64_t free_memory = 0; Status result = GraphLoader::GetMemoryInfo(free_memory); @@ -441,10 +441,24 @@ Status ModelExecutor::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gr return INTERNAL_ERROR; } if (free_memory >= (memory_size + weight_size)) { + is_enough = true; + } + is_enough = false; + return SUCCESS; +} + +Status ModelExecutor::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { + bool is_enough = false; + Status st = CheckFreeMemory(ge_model, graph_node, is_enough); + if (st != SUCCESS) { + GELOGE(FAILED, "[Check][Free][Memory] failed"); + } + if (is_enough) { return SUCCESS; } std::lock_guard lock(mutex_); + bool is_unknown_model_exist = false; for (const auto &it : graph_nodes_) { auto graph_id = it.second->GetGraphId(); auto model = it.second->GetGeRootModel(); @@ -457,15 +471,31 @@ Status ModelExecutor::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gr bool is_unknown_shape = false; GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape)); if (is_unknown_shape) { - GELOGD("model_id[%u] graph_id[%u] is unknown model, not release memory", model_id, graph_id); + GELOGD("model_id[%u] graph_id[%u] is unknown model", model_id, graph_id); continue; } - // not loaded,no need unload + // not loaded, no need unload if (!it.second->GetLoadFlag()) { GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id); continue; } + // unload static shape model + int64_t value = 0; + int64_t session_id = AttrUtils::GetInt(ge_model, MODEL_ATTR_SESSION_ID, value) ? value : 0; ReleaseMemory(ge_model, it.second, model_ids, graph_id, static_cast(session_id)); + st = CheckFreeMemory(ge_model, graph_node, is_enough); + if (st != SUCCESS) { + GELOGE(FAILED, "[Check][Free][Memory] failed"); + } + if (is_enough) { + return SUCCESS; + } + } + // unload unkonwn shape model + (void)MemManager::Instance().CachingInstance(RT_MEMORY_HBM).TryFreeUnusedBlocks(); + st = CheckFreeMemory(ge_model, graph_node, is_enough); + if (st != SUCCESS) { + GELOGE(FAILED, "[Check][Free][Memory] failed, still out of memory after releasing all models."); } return SUCCESS; diff --git a/ge/graph/execute/model_executor.h b/ge/graph/execute/model_executor.h index f11441e9..fea56c62 100644 --- a/ge/graph/execute/model_executor.h +++ b/ge/graph/execute/model_executor.h @@ -107,6 +107,8 @@ class ModelExecutor : public Executor { uint32_t graph_id, uint64_t session_id); Status CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node); + Status CheckFreeMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node, bool &is_enough); + void UpdateLocalOmeContext(const GraphNodePtr &graph_node); void RunThread(); diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 7b316fc3..44f0e1f4 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -376,6 +376,13 @@ void CachingAllocator::TryFreeBlocks() { if (allocated_blocks_.empty()) { (void) FreeCachedBlocks(); } + +} + +void CachingAllocator::TryFreeUnusedBlocks() { + GELOGI("Try free blocks."); + std::lock_guard lock(mutex_); + (void) FreeCachedBlocks(); } void CachingAllocator::FreeBlockBins() { diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index d00858f3..2676e9a5 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -102,6 +102,8 @@ class CachingAllocator { /// void TryFreeBlocks(); + void TryFreeUnusedBlocks() + private: ///