code(127), as windows cuda driver unloading before atexit function
may remove this after upgrade cuda runtime
GitOrigin-RevId: cac37ca3dd
release-1.5
@@ -26,6 +26,13 @@ class CompNodeSyncManager : public CompNodeDepedentObject { | |||
ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event; | |||
std::mutex m_mtx; | |||
public: | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
static bool is_into_atexit; | |||
#endif | |||
std::shared_ptr<void> on_comp_node_finalize() override { | |||
MGB_LOCK_GUARD(m_mtx); | |||
m_blob2event.clear(); | |||
@@ -34,6 +41,16 @@ public: | |||
static CompNodeSyncManager& inst() { | |||
static CompNodeSyncManager sl_inst; | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
if (!is_into_atexit) { | |||
auto err = atexit([] { is_into_atexit = true; }); | |||
mgb_assert(!err, "failed to register atexit function"); | |||
} | |||
#endif | |||
return sl_inst; | |||
} | |||
@@ -52,6 +69,13 @@ public: | |||
m_blob2event.erase(blob); | |||
} | |||
}; | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
bool CompNodeSyncManager::is_into_atexit = false; | |||
#endif | |||
// Cache for small blobs | |||
// 1. A blob has to be seen twice (within a window) to be eligible for cache | |||
@@ -221,6 +245,15 @@ Blob::Blob(CompNode cn, size_t sz): | |||
Blob::~Blob() { | |||
BlobManager::inst()->unregister_blob(this); | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
if (CompNodeSyncManager::is_into_atexit) | |||
return; | |||
#endif | |||
CompNodeSyncManager::inst().remove(this); | |||
} | |||
@@ -556,6 +556,13 @@ CompNode CompNode::load(const Locator& locator_physical, | |||
} | |||
void CompNode::finalize() { | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround recovery | |||
//! resource by OS temporarily, may need remove this after upgrade cuda | |||
//! runtime | |||
return; | |||
#endif | |||
comp_node_detail::DepedentObjList::invoke_callback_and_clean(); | |||
CudaCompNode::finalize(); | |||
CpuCompNode::finalize(); | |||
@@ -614,6 +614,18 @@ bool CudaCompNodeImpl::check_global_finalized() { | |||
} | |||
return true; | |||
} | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
if (CudaCompNode::is_into_atexit) { | |||
mgb_log_debug( | |||
"windows cudaErrorCudartUnloading happened!!, resource " | |||
"recovery by OS!!"); | |||
return true; | |||
} | |||
#endif | |||
return false; | |||
} | |||
@@ -733,11 +745,29 @@ void CudaCompNode::finalize() { | |||
} | |||
} | |||
CompNode::Impl* CudaCompNode::load_cuda( | |||
const Locator &locator, const Locator &locator_logical) { | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
bool CudaCompNode::is_into_atexit = false; | |||
#endif | |||
CompNode::Impl* CudaCompNode::load_cuda(const Locator& locator, | |||
const Locator& locator_logical) { | |||
int nr_gpu = get_device_count(); | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function even | |||
//! register atexit function after init cuda driver! as a workround | |||
//! recovery resource by OS temporarily, may need remove this after | |||
//! upgrade cuda runtime | |||
if (!is_into_atexit) { | |||
auto err = atexit([] { is_into_atexit = true; }); | |||
mgb_assert(!err, "failed to register atexit function"); | |||
} | |||
#endif | |||
mgb_assert(locator.device >= 0 && locator.device < nr_gpu, | |||
"request gpu%d out of valid range [0, %d)", locator.device, nr_gpu); | |||
"request gpu%d out of valid range [0, %d)", locator.device, | |||
nr_gpu); | |||
auto &&sdptr = CudaCompNodeImpl::sd; | |||
{ | |||
@@ -36,6 +36,13 @@ namespace mgb { | |||
static void set_prealloc_config(size_t alignment, size_t min_req, | |||
size_t max_overhead, double growth_factor); | |||
#if MGB_CUDA && defined(WIN32) | |||
//! FIXME: windows cuda driver shutdown before call atexit function | |||
//! even register atexit function after init cuda driver! as a | |||
//! workround recovery resource by OS temporarily, may need remove | |||
//! this after upgrade cuda runtime | |||
static bool is_into_atexit; | |||
#endif | |||
}; | |||
} | |||