GitOrigin-RevId: 0f910c34b6
tags/v1.9.0
@@ -14,6 +14,7 @@ | |||
#include "megbrain/imperative/backward_graph_opt.h" | |||
#include "megbrain/imperative/ops/autogen.h" | |||
#include "megbrain/imperative/proxy_graph_detail.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "megbrain/utils/mempool.h" | |||
#include "range/v3/all.hpp" | |||
@@ -1158,11 +1158,16 @@ void init_tensor(py::module m) { | |||
using Segment = TransformationManager::Segment; | |||
auto* channel = interpreter::Interpreter::inst().create_channel().release(); | |||
using Channel = interpreter::Interpreter::Channel; | |||
auto* channel = | |||
imperative::ResourceManager::create_global<std::unique_ptr<Channel>>( | |||
interpreter::Interpreter::inst().create_channel()) | |||
->get(); | |||
interpreter_for_py = channel; | |||
transformations.register_at<Segment::Eval>( | |||
std::make_shared<InterpreterTransformation>( | |||
std::unique_ptr<interpreter::Interpreter::Channel>(channel))); | |||
std::shared_ptr<Channel>(channel, [](Channel*) {}))); | |||
transformations.register_at<Segment::Scalar>( | |||
std::make_shared<ScalarTransformation>()); | |||
@@ -13,6 +13,7 @@ | |||
#include "megbrain/comp_node.h" | |||
#include "megbrain/imperative/blob_manager.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "megbrain/system.h" | |||
#include "./event_pool.h" | |||
@@ -61,8 +62,8 @@ protected: | |||
public: | |||
static AsyncReleaser* inst() { | |||
static AsyncReleaser releaser; | |||
return &releaser; | |||
static auto* releaser = ResourceManager::create_global<AsyncReleaser>(); | |||
return releaser; | |||
} | |||
~AsyncReleaser() { m_waiter.wait_task_queue_empty(); } | |||
@@ -10,6 +10,9 @@ | |||
*/ | |||
#include "./event_pool.h" | |||
#include <memory> | |||
#include "megbrain/imperative/resource_manager.h" | |||
namespace mgb { | |||
namespace imperative { | |||
@@ -17,22 +20,18 @@ namespace imperative { | |||
EventPool::EventPool(size_t flags) : m_flags{flags} {} | |||
EventPool& EventPool::with_timer() { | |||
static Spinlock lock; | |||
static std::unique_ptr<EventPool> ptr; | |||
MGB_LOCK_GUARD(lock); | |||
if (!ptr || ptr->is_finalized()) { | |||
ptr.reset(new EventPool(CompNode::Event::NEED_TIMER)); | |||
} | |||
return *ptr; | |||
static auto* sm_pool = | |||
ResourceManager::create_global<CompNodeDependentResource<EventPool>>([] { | |||
return std::unique_ptr<EventPool>( | |||
new EventPool(CompNode::Event::NEED_TIMER)); | |||
}); | |||
return **sm_pool; | |||
} | |||
EventPool& EventPool::without_timer() { | |||
static Spinlock lock; | |||
static std::unique_ptr<EventPool> ptr; | |||
MGB_LOCK_GUARD(lock); | |||
if (!ptr || ptr->is_finalized()) { | |||
ptr.reset(new EventPool()); | |||
} | |||
return *ptr; | |||
static auto* sm_pool = | |||
ResourceManager::create_global<CompNodeDependentResource<EventPool>>( | |||
[] { return std::unique_ptr<EventPool>(new EventPool()); }); | |||
return **sm_pool; | |||
} | |||
CompNode::Event* EventPool::alloc(CompNode cn) { | |||
CompNode::EventPool* pool; | |||
@@ -31,6 +31,8 @@ public: | |||
void free(CompNode::Event* event); | |||
std::shared_ptr<void> on_comp_node_finalize(); | |||
~EventPool(); | |||
using CompNodeDepedentObject::is_finalized; | |||
}; | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -14,6 +14,7 @@ | |||
#include <sstream> | |||
#include "megbrain/imperative/ops/opr_attr.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "./op_trait.h" | |||
@@ -63,16 +64,16 @@ EncodedSubgraph OpDef::make_backward_graph( | |||
const SmallVector<bool>& output_has_grad) { | |||
using BackwardGraphCache = | |||
OpMethResultCache<EncodedSubgraph, SmallVector<bool>, SmallVector<bool>>; | |||
thread_local auto cache = std::make_unique<BackwardGraphCache>(); | |||
thread_local auto& cache = *ResourceManager::create_local<BackwardGraphCache>(); | |||
BackwardGraphCache::key_t cache_key{ | |||
const_cast<OpDef&>(def).shared_from_this(), | |||
inputs, | |||
{input_requires_grad, output_has_grad}}; | |||
auto iter = cache->find(cache_key); | |||
if (iter == cache->end()) { | |||
iter = cache->insert({cache_key, def.trait()->make_backward_graph( | |||
def, inputs, input_requires_grad, | |||
output_has_grad)}) | |||
auto iter = cache.find(cache_key); | |||
if (iter == cache.end()) { | |||
iter = cache.insert({cache_key, def.trait()->make_backward_graph( | |||
def, inputs, input_requires_grad, | |||
output_has_grad)}) | |||
.first; | |||
} | |||
return iter->second; | |||
@@ -86,12 +87,12 @@ EncodedSubgraph OpDef::make_forward_graph( | |||
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | |||
using ForwardGraphCache = | |||
OpMethResultCache<EncodedSubgraph, SmallVector<bool>, SmallVector<bool>>; | |||
thread_local auto cache = std::make_unique<ForwardGraphCache>(); | |||
thread_local auto& cache = *ResourceManager::create_local<ForwardGraphCache>(); | |||
ForwardGraphCache::key_t cache_key{ | |||
const_cast<OpDef&>(def).shared_from_this(), inputs}; | |||
auto iter = cache->find(cache_key); | |||
if (iter == cache->end()) { | |||
iter = cache->insert({cache_key, def.trait()->make_forward_graph(def, inputs)}) | |||
auto iter = cache.find(cache_key); | |||
if (iter == cache.end()) { | |||
iter = cache.insert({cache_key, def.trait()->make_forward_graph(def, inputs)}) | |||
.first; | |||
} | |||
return iter->second; | |||
@@ -9,6 +9,7 @@ | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include <atomic> | |||
#include <deque> | |||
#include "megbrain/imperative/graph_cache.h" | |||
@@ -16,6 +17,7 @@ | |||
#include "megbrain/imperative/ops/autogen.h" | |||
#include "megbrain/imperative/ops/opr_attr.h" | |||
#include "megbrain/imperative/ops/utility.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "megbrain/imperative/subgraph_detail.h" | |||
#include "megbrain/opr/io.h" | |||
#include "megbrain/opr/tensor_gen.h" | |||
@@ -510,16 +512,32 @@ struct ComputingGraphHolder { | |||
} | |||
}; | |||
static std::atomic<size_t> nr_cg_cache = 0; | |||
template <HolderKind Kind> | |||
ComputingGraphHolder<Kind>& get_computing_graph( | |||
std::shared_ptr<OpDef> compiled_op, | |||
const SmallVector<LogicalTensorDesc>& descs) { | |||
using ComputingGraphHolderCache = | |||
OpMethResultCache<std::deque<std::unique_ptr<ComputingGraphHolder<Kind>>>>; | |||
thread_local auto cache = std::make_unique<ComputingGraphHolderCache>(); | |||
thread_local auto& cache = ([]() -> auto& { | |||
mgb_assert( | |||
nr_cg_cache++ < 5, | |||
"using subgraph in too many threads, this causes resource leakage"); | |||
#if MGB_CUDA && defined(WIN32) | |||
// FIXME: Create as global to skip resource finalize and windows with cuda | |||
// doesn't cleanup global resources | |||
return *ResourceManager::create_global<ComputingGraphHolderCache>(); | |||
#else | |||
// Otherwise this should be local because compnode may be unusable when global | |||
// resource finalizing. | |||
// For example, CpuCompNode.sync hang on because underlying thread died | |||
return *ResourceManager::create_local<ComputingGraphHolderCache>(); | |||
#endif | |||
})(); | |||
thread_local size_t nr_cg_holders = 0; | |||
typename ComputingGraphHolderCache::key_t cache_key = {compiled_op, descs}; | |||
auto& cg_holder_queue = (*cache)[cache_key]; | |||
auto& cg_holder_queue = cache[cache_key]; | |||
std::unique_ptr<ComputingGraphHolder<Kind>> holder; | |||
if (!cg_holder_queue.empty()) { | |||
// pick one | |||
@@ -12,6 +12,7 @@ | |||
#include "megbrain/imperative.h" | |||
#include "megbrain/imperative/blob_manager.h" | |||
#include "megbrain/imperative/profiler.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "./async_releaser.h" | |||
#include "./event_pool.h" | |||
@@ -30,13 +31,6 @@ class CompNodeSyncManager : public CompNodeDepedentObject { | |||
std::mutex m_mtx; | |||
public: | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
static bool is_into_atexit; | |||
#endif | |||
std::shared_ptr<void> on_comp_node_finalize() override { | |||
MGB_LOCK_GUARD(m_mtx); | |||
m_blob2event.clear(); | |||
@@ -44,17 +38,7 @@ public: | |||
} | |||
static CompNodeSyncManager& inst() { | |||
static CompNodeSyncManager* sl_inst = new CompNodeSyncManager(); | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
if (!is_into_atexit) { | |||
auto err = atexit([] { is_into_atexit = true; }); | |||
mgb_assert(!err, "failed to register atexit function"); | |||
} | |||
#endif | |||
static auto* sl_inst = ResourceManager::create_global<CompNodeSyncManager>(); | |||
return *sl_inst; | |||
} | |||
@@ -73,13 +57,6 @@ public: | |||
m_blob2event.erase(blob); | |||
} | |||
}; | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
bool CompNodeSyncManager::is_into_atexit = false; | |||
#endif | |||
} // namespace | |||
@@ -106,15 +83,6 @@ Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} { | |||
Blob::~Blob() { | |||
BlobManager::inst()->unregister_blob(this); | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
if (CompNodeSyncManager::is_into_atexit) | |||
return; | |||
#endif | |||
CompNodeSyncManager::inst().remove(this); | |||
} | |||
@@ -242,8 +210,6 @@ void Tensor::static_initialize() { | |||
AsyncReleaser::inst(); | |||
CompNodeSyncManager::inst(); | |||
MultiCNConstTensorCache::inst(); | |||
// clean all CompNodeDepedentObjects | |||
mgb_assert(!atexit(CompNode::finalize), "atexit register failed"); | |||
} | |||
} // namespace imperative | |||
@@ -0,0 +1,95 @@ | |||
/** | |||
* \file imperative/src/impl/resource_manager.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include <thread> | |||
#include <unordered_map> | |||
using namespace mgb; | |||
using namespace imperative; | |||
namespace { | |||
class LocalResourceManager; | |||
std::unordered_map<std::thread::id, std::shared_ptr<LocalResourceManager>> | |||
local_managers; | |||
std::mutex global_lock; | |||
bool throw_all_resources = false; | |||
class LocalResourceManager final : public ResourceManager { | |||
private: | |||
std::thread::id m_id; | |||
public: | |||
LocalResourceManager() : m_id(std::this_thread::get_id()) {} | |||
std::thread::id id() const { return m_id; } | |||
}; | |||
class GlobalResourceManager final : public ResourceManager { | |||
public: | |||
~GlobalResourceManager() { | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
throw_all_resources = true; | |||
#endif | |||
MGB_LOCK_GUARD(global_lock); | |||
local_managers.clear(); | |||
} | |||
}; | |||
class LocalResourceManagerRef : public NonCopyableObj { | |||
private: | |||
std::weak_ptr<LocalResourceManager> m_manager; | |||
public: | |||
LocalResourceManagerRef() { | |||
auto manager = std::make_shared<LocalResourceManager>(); | |||
mgb_assert( | |||
local_managers.insert({manager->id(), manager}).second, | |||
"duplicated local manager"); | |||
m_manager = manager; | |||
} | |||
~LocalResourceManagerRef() { | |||
if (auto manager = m_manager.lock()) { | |||
local_managers.erase(manager->id()); | |||
} | |||
} | |||
ResourceManager& operator*() { return *m_manager.lock(); } | |||
}; | |||
} // namespace | |||
void ResourceManager::clear() { | |||
if (throw_all_resources) { | |||
new std::vector<std::any>(std::move(m_handles)); | |||
} | |||
for (auto iter = m_handles.rbegin(); iter != m_handles.rend(); ++iter) { | |||
(*iter) = {}; | |||
} | |||
} | |||
ResourceManager& ResourceManager::get_global() { | |||
static GlobalResourceManager sl_manager; | |||
return sl_manager; | |||
} | |||
ResourceManager& ResourceManager::get_local() { | |||
thread_local LocalResourceManagerRef tl_manager; | |||
return *tl_manager; | |||
} |
@@ -12,6 +12,7 @@ | |||
#include "megbrain/imperative/transformations/grad.h" | |||
#include "megbrain/imperative/graph_cache.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include <range/v3/all.hpp> | |||
@@ -24,7 +25,8 @@ static std::shared_ptr<OptimizedBackwardGraphResult> make_optimized_backward_gra | |||
// hash | |||
using OptimizedBackwardGraphCache = OpMethResultCache< | |||
std::shared_ptr<OptimizedBackwardGraphResult>, SmallVector<bool>>; | |||
thread_local auto cache = std::make_unique<OptimizedBackwardGraphCache>(); | |||
thread_local auto& cache = | |||
*ResourceManager::create_local<OptimizedBackwardGraphCache>(); | |||
OptimizedBackwardGraphCache::key_t cache_key{op}; | |||
SmallVector<LogicalTensorDesc>& input_descs = cache_key.inputs; | |||
std::get<0>(cache_key.extras) = inputs_require_grad.copy_into<SmallVector<bool>>(); | |||
@@ -34,8 +36,8 @@ static std::shared_ptr<OptimizedBackwardGraphResult> make_optimized_backward_gra | |||
input_descs[i].comp_node = inputs[i].device().cast<CompNodeValue>(); | |||
} | |||
auto iter = cache->find(cache_key); | |||
if (iter != cache->end()) { | |||
auto iter = cache.find(cache_key); | |||
if (iter != cache.end()) { | |||
return iter->second; | |||
} | |||
@@ -47,7 +49,7 @@ static std::shared_ptr<OptimizedBackwardGraphResult> make_optimized_backward_gra | |||
if (!bg.graph.empty()) { | |||
ret = std::make_shared<OptimizedBackwardGraphResult>(bg); | |||
} | |||
cache->emplace(cache_key, ret); | |||
cache.emplace(cache_key, ret); | |||
return ret; | |||
} | |||
@@ -14,6 +14,7 @@ | |||
#include <memory> | |||
#include <mutex> | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "megbrain/tensor.h" | |||
namespace mgb { | |||
@@ -278,8 +279,9 @@ struct MultiCNConstTensorCache : CompNodeDepedentObject { | |||
} | |||
static MultiCNConstTensorCache& inst() { | |||
static MultiCNConstTensorCache sl_inst; | |||
return sl_inst; | |||
static auto* sl_inst = | |||
ResourceManager::create_global<MultiCNConstTensorCache>(); | |||
return *sl_inst; | |||
} | |||
}; | |||
@@ -0,0 +1,87 @@ | |||
/** | |||
* \file imperative/src/include/megbrain/imperative/resource_manager.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include <any> | |||
#include <functional> | |||
#include <memory> | |||
#include <mutex> | |||
#include <vector> | |||
#include "megbrain/common.h" | |||
#include "megbrain/utils/metahelper.h" | |||
#include "megbrain/utils/thread.h" | |||
namespace mgb { | |||
namespace imperative { | |||
class ResourceManager : public NonCopyableObj { | |||
protected: | |||
std::vector<std::any> m_handles; | |||
std::mutex m_mutex; | |||
private: | |||
static ResourceManager& get_global(); | |||
static ResourceManager& get_local(); | |||
public: | |||
template <typename T, typename... TArgs> | |||
static T* create_global(TArgs&&... args) { | |||
mgb_log_debug("create global resource: %s", typeid(T).name()); | |||
auto instance = std::make_shared<T>(std::forward<TArgs&&>(args)...); | |||
auto& manager = get_global(); | |||
MGB_LOCK_GUARD(manager.m_mutex); | |||
manager.m_handles.push_back((std::any)instance); | |||
return instance.get(); | |||
} | |||
template <typename T, typename... TArgs> | |||
static T* create_local(TArgs&&... args) { | |||
mgb_log_debug("create local resource: %s", typeid(T).name()); | |||
auto instance = std::make_shared<T>(std::forward<TArgs&&>(args)...); | |||
get_local().m_handles.push_back((std::any)instance); | |||
return instance.get(); | |||
} | |||
void clear(); | |||
~ResourceManager() { clear(); } | |||
}; | |||
template <typename T> | |||
class CompNodeDependentResource : public NonCopyableObj { | |||
private: | |||
std::function<std::unique_ptr<T>()> m_ctor; | |||
std::unique_ptr<T> m_ptr; | |||
Spinlock m_spin; | |||
public: | |||
explicit CompNodeDependentResource(std::function<std::unique_ptr<T>()> ctor) | |||
: m_ctor(ctor) {} | |||
T& operator*() { | |||
if ((!m_ptr) || m_ptr->is_finalized()) { | |||
m_ptr = m_ctor(); | |||
} | |||
return *m_ptr; | |||
} | |||
T* operator->() { | |||
if ((!m_ptr) || m_ptr->is_finalized()) { | |||
m_ptr = m_ctor(); | |||
} | |||
return m_ptr.get(); | |||
} | |||
}; | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -63,10 +63,10 @@ public: | |||
using Channel = Interpreter::Channel; | |||
private: | |||
std::unique_ptr<Channel> m_channel; | |||
std::shared_ptr<Channel> m_channel; | |||
public: | |||
explicit InterpreterTransformation(std::unique_ptr<Channel> channel) | |||
explicit InterpreterTransformation(std::shared_ptr<Channel> channel) | |||
: m_channel{std::move(channel)} {} | |||
Channel* channel() { return m_channel.get(); } | |||