(os vesion >= sp2) build with bazel
* bazel build support(define __DEPLOY_ON_XP_SP2__ when deploy on xp sp2):
(dbg)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp'
--compiler='clang_cl' -c dbg --copt "-D__DEPLOY_ON_XP_SP2__=1"
(opt)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp'
--compiler='clang_cl' -c opt --copt "-D__DEPLOY_ON_XP_SP2__=1"
* internal behavior:
will define MGB_HAVE_THREAD=0 when enable __DEPLOY_ON_XP_SP2__
* refer to
https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160
xp sp2(x86) do not support vc runtime fully, casused by KERNEL32.dll do not
implement some base apis for c++ std function, for example,
std::mutex/std::thread/std::condition_variable as a workround, we will
disable some MegEngine features on xp sp2 env, for exampe, multi-thread etc!
* about DNN_MUTEX/MGB_MUTEX, if your code will build in inference
code (even CPU backends), please replace std::mutex to DNN_MUTEX/MGB_MUTEX,
* about multi-thread, if you code need multi-thread support, please
enable it when MGB_HAVE_THREAD=1
* about test build env status
1: Visual Studio 2019(MSVC version <= 14.26.28801)---- pass
2: Visual Studio 2019(MSVC version > 14.26.28801) ---- failed
caused by this 'new' version will put VCR depends on win7
KERNEL32.DLL, this may be fixed at Visual Studio 2019 later version
but we do not test at this MR merge point
3: Visual Studio 2017 ---------- pass
4: Visual Studio 2014 ---------- pass
GitOrigin-RevId: 65ac48b95e
tags/v1.6.0-rc1
@@ -60,10 +60,10 @@ T deserialize_pod(const std::string& data, size_t& offset) { | |||||
ErrorHandler* ErrorHandler::sm_inst; | ErrorHandler* ErrorHandler::sm_inst; | ||||
ErrorHandler* ErrorHandler::inst() { | ErrorHandler* ErrorHandler::inst() { | ||||
static std::mutex mtx; | |||||
static DNN_MUTEX mtx; | |||||
static DefaultErrorHandler default_handler; | static DefaultErrorHandler default_handler; | ||||
if (megdnn_unlikely(!sm_inst)) { | if (megdnn_unlikely(!sm_inst)) { | ||||
std::lock_guard<std::mutex> lg{mtx}; | |||||
MEGDNN_LOCK_GUARD(mtx); | |||||
if (!sm_inst) { | if (!sm_inst) { | ||||
sm_inst = &default_handler; | sm_inst = &default_handler; | ||||
} | } | ||||
@@ -145,7 +145,7 @@ init_inter_tab_1d(InterpolationMode imode, float* tab, int tabsz) { | |||||
#if MEGDNN_X86 | #if MEGDNN_X86 | ||||
DEF_FUN(const int16_t*) get_linear_ic4_table() { | DEF_FUN(const int16_t*) get_linear_ic4_table() { | ||||
auto table_holder = &sm_tab_linear; | auto table_holder = &sm_tab_linear; | ||||
std::lock_guard<std::mutex> lg{table_holder->mtx}; | |||||
MEGDNN_LOCK_GUARD(table_holder->mtx); | |||||
float* tab = nullptr; | float* tab = nullptr; | ||||
short* itab = nullptr; | short* itab = nullptr; | ||||
MEGDNN_MARK_USED_VAR(tab); | MEGDNN_MARK_USED_VAR(tab); | ||||
@@ -175,7 +175,7 @@ DEF_FUN(const void*) get_table(InterpolationMode imode, bool fixpt) { | |||||
default: | default: | ||||
megdnn_throw(("unsupported interpolation mode")); | megdnn_throw(("unsupported interpolation mode")); | ||||
} | } | ||||
std::lock_guard<std::mutex> lg{table_holder->mtx}; | |||||
MEGDNN_LOCK_GUARD(table_holder->mtx); | |||||
float* tab = nullptr; | float* tab = nullptr; | ||||
short* itab = nullptr; | short* itab = nullptr; | ||||
@@ -134,7 +134,7 @@ private: | |||||
}; | }; | ||||
struct TableHolderBase { | struct TableHolderBase { | ||||
std::mutex mtx; | |||||
DNN_MUTEX mtx; | |||||
//! get table pointer; return whether already init | //! get table pointer; return whether already init | ||||
virtual bool get(float**, int16_t**) = 0; | virtual bool get(float**, int16_t**) = 0; | ||||
@@ -39,10 +39,10 @@ using Mode = param::Elemwise::Mode; | |||||
using ModeTrait = ElemwiseForward::ModeTrait; | using ModeTrait = ElemwiseForward::ModeTrait; | ||||
const ModeTrait& ModeTrait::from_mode(Mode mode) { | const ModeTrait& ModeTrait::from_mode(Mode mode) { | ||||
static std::mutex mtx; | |||||
static DNN_MUTEX mtx; | |||||
static std::vector<ModeTrait> traits; | static std::vector<ModeTrait> traits; | ||||
std::lock_guard<std::mutex> _lock(mtx); | |||||
MEGDNN_LOCK_GUARD(mtx); | |||||
if (traits.empty()) { | if (traits.empty()) { | ||||
auto get = [&](Mode m) -> ModeTrait& { | auto get = [&](Mode m) -> ModeTrait& { | ||||
@@ -28,10 +28,10 @@ void check_dtype(const ModeTrait& trait, size_t i, const TensorLayout& src) { | |||||
} // anonymous namespace | } // anonymous namespace | ||||
const ModeTrait& ModeTrait::from_mode(Mode mode) { | const ModeTrait& ModeTrait::from_mode(Mode mode) { | ||||
static std::mutex mtx; | |||||
static DNN_MUTEX mtx; | |||||
static std::vector<ModeTrait> traits; | static std::vector<ModeTrait> traits; | ||||
std::lock_guard<std::mutex> _lock(mtx); | |||||
MEGDNN_LOCK_GUARD(mtx); | |||||
auto make_check_dtype_func = [](DType expected) { | auto make_check_dtype_func = [](DType expected) { | ||||
auto func = [expected](DType dtype) { | auto func = [expected](DType dtype) { | ||||
@@ -70,7 +70,7 @@ protected: | |||||
MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { | MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { | ||||
static_assert(idx < NR_HELPER_OPRS, "invalid idx"); | static_assert(idx < NR_HELPER_OPRS, "invalid idx"); | ||||
if (!self->m_helper_oprs[idx]) { | if (!self->m_helper_oprs[idx]) { | ||||
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx}; | |||||
MEGDNN_LOCK_GUARD(self->m_helper_oprs_mtx); | |||||
if (!self->m_helper_oprs[idx]) { | if (!self->m_helper_oprs[idx]) { | ||||
self->m_helper_oprs[idx] = | self->m_helper_oprs[idx] = | ||||
self->template create_operator<Opr>(); | self->template create_operator<Opr>(); | ||||
@@ -88,7 +88,7 @@ protected: | |||||
private: | private: | ||||
std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs; | std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs; | ||||
std::mutex m_helper_oprs_mtx; | |||||
DNN_MUTEX m_helper_oprs_mtx; | |||||
}; | }; | ||||
} // namespace megdnn | } // namespace megdnn | ||||
@@ -38,7 +38,7 @@ const std::shared_ptr<Handle>& inplace_cpu_handle(int debug_level = 0); | |||||
*/ | */ | ||||
template <int nr_opr = 1> | template <int nr_opr = 1> | ||||
class CpuOprDelegationStorage { | class CpuOprDelegationStorage { | ||||
std::mutex m_mtx; | |||||
DNN_MUTEX m_mtx; | |||||
std::shared_ptr<Handle> m_handle; | std::shared_ptr<Handle> m_handle; | ||||
std::unique_ptr<OperatorBase> m_oprs[nr_opr]; | std::unique_ptr<OperatorBase> m_oprs[nr_opr]; | ||||
@@ -604,7 +604,7 @@ TensorLayout LowbitsAlignedTensorFormatBase::collapse_contiguous_spec( | |||||
TensorFormat Image2DPack4TensorFormat::make_raw( | TensorFormat Image2DPack4TensorFormat::make_raw( | ||||
size_t align_axis, size_t align_size_in_elements, | size_t align_axis, size_t align_size_in_elements, | ||||
Handle::HandleVendorType vendor_type) { | Handle::HandleVendorType vendor_type) { | ||||
static std::mutex mtx; | |||||
static DNN_MUTEX mtx; | |||||
static std::unordered_map<uint64_t, | static std::unordered_map<uint64_t, | ||||
std::unique_ptr<Image2DPack4TensorFormat>> | std::unique_ptr<Image2DPack4TensorFormat>> | ||||
cache; | cache; | ||||
@@ -641,7 +641,7 @@ TensorFormat Image2DPack4TensorFormat::change_axis(size_t axis) const { | |||||
/* ===================== LowbitsitsAlignedToBytesTensorFormat | /* ===================== LowbitsitsAlignedToBytesTensorFormat | ||||
* ===================== */ | * ===================== */ | ||||
TensorFormat LowbitsAlignedToBytesTensorFormat::make(size_t size_nbits) { | TensorFormat LowbitsAlignedToBytesTensorFormat::make(size_t size_nbits) { | ||||
static std::mutex mtx; | |||||
static DNN_MUTEX mtx; | |||||
static std::unordered_map< | static std::unordered_map< | ||||
uint64_t, std::unique_ptr<LowbitsAlignedToBytesTensorFormat>> | uint64_t, std::unique_ptr<LowbitsAlignedToBytesTensorFormat>> | ||||
cache; | cache; | ||||
@@ -118,8 +118,17 @@ | |||||
#define megdnn_layout_msg(layout) \ | #define megdnn_layout_msg(layout) \ | ||||
std::string(#layout "=" + (layout).to_string()) | std::string(#layout "=" + (layout).to_string()) | ||||
#define MEGDNN_LOCK_GUARD(var) \ | |||||
std::lock_guard<std::remove_cv_t<decltype(var)>> _lock_guard_##var { var } | |||||
#if __DEPLOY_ON_XP_SP2__ | |||||
#define DNN_MUTEX size_t | |||||
#define MEGDNN_LOCK_GUARD(var) MEGDNN_MARK_USED_VAR(var) | |||||
#else | |||||
#define DNN_MUTEX std::mutex | |||||
#define DNN_TOKENPASTE(x, y) x##y | |||||
#define DNN_TOKENPASTE2(x, y) DNN_TOKENPASTE(x, y) | |||||
#define DNN_LOCK_GUARD_CTOR(mtx) DNN_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) | |||||
#define MEGDNN_LOCK_GUARD(mtx) \ | |||||
std::lock_guard<decltype(mtx)> DNN_LOCK_GUARD_CTOR(mtx) | |||||
#endif | |||||
namespace megdnn { | namespace megdnn { | ||||
@@ -487,7 +496,7 @@ struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | |||||
"implicit conversion disallowed in SafeMultiplies"); | "implicit conversion disallowed in SafeMultiplies"); | ||||
megdnn_trap(); | megdnn_trap(); | ||||
} | } | ||||
}; | |||||
}; // namespace megdnn | |||||
template <> | template <> | ||||
struct SafeMultiplies<size_t> : public _SafeMultipliesImplUnsigned<size_t> {}; | struct SafeMultiplies<size_t> : public _SafeMultipliesImplUnsigned<size_t> {}; | ||||
@@ -81,7 +81,7 @@ public: | |||||
} | } | ||||
private: | private: | ||||
std::mutex m_mtx; | |||||
DNN_MUTEX m_mtx; | |||||
std::unordered_map<StrategyHashKey, std::unique_ptr<T>, StrategyHasher, | std::unordered_map<StrategyHashKey, std::unique_ptr<T>, StrategyHasher, | ||||
StrategyHashKeyEqual> | StrategyHashKeyEqual> | ||||
m_map_strategies; | m_map_strategies; | ||||
@@ -99,4 +99,4 @@ MatrixMulImpl::KernSizeParam get_matmul_kern_param( | |||||
} // namespace fallback | } // namespace fallback | ||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | |||||
// vim: syntax=cpp.doxygen |
@@ -110,7 +110,7 @@ struct StrategyHashParamEqual { | |||||
}; | }; | ||||
class StrategyDelegationStorage { | class StrategyDelegationStorage { | ||||
std::mutex m_mtx; | |||||
DNN_MUTEX m_mtx; | |||||
std::unordered_map<StrategyHashParam, std::unique_ptr<StrategyBase>, | std::unordered_map<StrategyHashParam, std::unique_ptr<StrategyBase>, | ||||
StrategyHashParamHash, StrategyHashParamEqual> | StrategyHashParamHash, StrategyHashParamEqual> | ||||
map_strategys; | map_strategys; | ||||
@@ -11,6 +11,10 @@ | |||||
#include "./opr_impl.h" | #include "./opr_impl.h" | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
#define MEGDNN_NO_THREAD 1 | |||||
#endif | |||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
#if !MEGDNN_NO_THREAD | #if !MEGDNN_NO_THREAD | ||||
#include <thread> | #include <thread> | ||||
@@ -20,10 +24,10 @@ namespace megdnn { | |||||
namespace naive { | namespace naive { | ||||
void SleepForwardImpl::exec() { | void SleepForwardImpl::exec() { | ||||
double seconds = m_param.time; | |||||
#if MEGDNN_NO_THREAD | #if MEGDNN_NO_THREAD | ||||
megdnn_trap(); | megdnn_trap(); | ||||
#else | #else | ||||
double seconds = m_param.time; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR( | MEGDNN_DISPATCH_CPU_KERN_OPR( | ||||
std::this_thread::sleep_for(std::chrono::microseconds( | std::this_thread::sleep_for(std::chrono::microseconds( | ||||
static_cast<uint64_t>(seconds * 1e6)));); | static_cast<uint64_t>(seconds * 1e6)));); | ||||
@@ -52,7 +52,7 @@ class InFilePersistentCache final : public PersistentCache { | |||||
std::unordered_map<std::string, std::unordered_map<BlobStorage, BlobStorage, | std::unordered_map<std::string, std::unordered_map<BlobStorage, BlobStorage, | ||||
BlobStorage::Hash>> | BlobStorage::Hash>> | ||||
m_cache; | m_cache; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
template <typename Input> | template <typename Input> | ||||
void read_cache(Input& inp); | void read_cache(Input& inp); | ||||
@@ -32,7 +32,7 @@ namespace { | |||||
std::atomic_flag | std::atomic_flag | ||||
g_default_cpu_initialized, | g_default_cpu_initialized, | ||||
g_exit_handler_registered[CompNode::NR_DEVICE_TYPE]; | g_exit_handler_registered[CompNode::NR_DEVICE_TYPE]; | ||||
std::mutex g_device_map_mtx; | |||||
MGB_MUTEX g_device_map_mtx; | |||||
ThinHashMap<CompNode::DeviceType, ThinHashMap<int, int>> g_device_map; | ThinHashMap<CompNode::DeviceType, ThinHashMap<int, int>> g_device_map; | ||||
CompNode::DeviceType g_unspec_locator_type; | CompNode::DeviceType g_unspec_locator_type; | ||||
@@ -60,7 +60,11 @@ class CpuCompNode::WorkerQueue final | |||||
sys::set_cpu_affinity({m_locator.device}); | sys::set_cpu_affinity({m_locator.device}); | ||||
#endif | #endif | ||||
} | } | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
__builtin_trap(); | |||||
#else | |||||
sys::set_thread_name(m_locator.to_string()); | sys::set_thread_name(m_locator.to_string()); | ||||
#endif | |||||
} | } | ||||
void on_sync_all_task_finish() override { | void on_sync_all_task_finish() override { | ||||
@@ -830,7 +834,9 @@ struct CpuCompNode::Pool { | |||||
void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); } | void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); } | ||||
}; | }; | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::recursive_mutex mtx; | std::recursive_mutex mtx; | ||||
#endif | |||||
// use global memory pool to ensuare object memory accessible even after | // use global memory pool to ensuare object memory accessible even after | ||||
// global finalize | // global finalize | ||||
std::aligned_storage_t<sizeof(CompNodeRecorderImpl), | std::aligned_storage_t<sizeof(CompNodeRecorderImpl), | ||||
@@ -862,7 +868,9 @@ void CpuCompNode::foreach (thin_function<void(CompNode)> callback) { | |||||
for (size_t i = 0;; ++i) { | for (size_t i = 0;; ++i) { | ||||
CompNode cur; | CompNode cur; | ||||
{ | { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
MGB_LOCK_GUARD(sm_pool->mtx); | MGB_LOCK_GUARD(sm_pool->mtx); | ||||
#endif | |||||
if (i >= sm_pool->nr_used_impl_storage) | if (i >= sm_pool->nr_used_impl_storage) | ||||
return; | return; | ||||
cur = make_comp_node_from_impl( | cur = make_comp_node_from_impl( | ||||
@@ -909,7 +917,9 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator, | |||||
locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT, | locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT, | ||||
"failed to load cpu for device:%d stream:%d", locator.device, | "failed to load cpu for device:%d stream:%d", locator.device, | ||||
locator.stream); | locator.stream); | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
MGB_LOCK_GUARD(sm_pool->mtx); | MGB_LOCK_GUARD(sm_pool->mtx); | ||||
#endif | |||||
// encode both device ID and type into a int | // encode both device ID and type into a int | ||||
mgb_assert(locator_logical.device >= -1 || | mgb_assert(locator_logical.device >= -1 || | ||||
@@ -967,7 +977,9 @@ void CpuCompNode::sync_all() { | |||||
if (!sm_pool) | if (!sm_pool) | ||||
return; | return; | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
MGB_LOCK_GUARD(sm_pool->mtx); | MGB_LOCK_GUARD(sm_pool->mtx); | ||||
#endif | |||||
for (auto&& i : sm_pool->locator2impl) | for (auto&& i : sm_pool->locator2impl) | ||||
i.second->sync(); | i.second->sync(); | ||||
for (auto&& i : sm_pool->locator2impl_multi_thread) | for (auto&& i : sm_pool->locator2impl_multi_thread) | ||||
@@ -1049,7 +1061,9 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::do_device_wait_by( | |||||
auto waiter = [this, version]() { | auto waiter = [this, version]() { | ||||
while (m_record_nr_finish.load(std::memory_order_acquire) < version) { | while (m_record_nr_finish.load(std::memory_order_acquire) < version) { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::unique_lock<std::mutex> lk{m_dev_wait_mtx}; | std::unique_lock<std::mutex> lk{m_dev_wait_mtx}; | ||||
#endif | |||||
if (m_record_nr_finish.load(std::memory_order_acquire) >= version) { | if (m_record_nr_finish.load(std::memory_order_acquire) >= version) { | ||||
break; | break; | ||||
} | } | ||||
@@ -1078,10 +1092,12 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::on_finish() { | |||||
} | } | ||||
m_record_nr_finish.fetch_add(1, std::memory_order_release); | m_record_nr_finish.fetch_add(1, std::memory_order_release); | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) { | if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) { | ||||
MGB_LOCK_GUARD(m_dev_wait_mtx); | MGB_LOCK_GUARD(m_dev_wait_mtx); | ||||
m_dev_wait_cv.notify_all(); | m_dev_wait_cv.notify_all(); | ||||
} | } | ||||
#endif | |||||
} | } | ||||
bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() { | bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() { | ||||
@@ -1100,11 +1116,15 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::host_wait_cv() { | |||||
m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release); | m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release); | ||||
for (;;) { | for (;;) { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::unique_lock<std::mutex> lock{m_dev_wait_mtx}; | std::unique_lock<std::mutex> lock{m_dev_wait_mtx}; | ||||
#endif | |||||
if (finished()) { | if (finished()) { | ||||
break; | break; | ||||
} | } | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_dev_wait_cv.wait(lock); | m_dev_wait_cv.wait(lock); | ||||
#endif | |||||
} | } | ||||
m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release); | m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release); | ||||
} | } | ||||
@@ -45,9 +45,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() { | |||||
return; | return; | ||||
} | } | ||||
if (sm_cpu_sync_level >= 1) { | if (sm_cpu_sync_level >= 1) { | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
#if MGB_HAVE_THREAD | |||||
__builtin_trap(); | |||||
#else | |||||
return; | |||||
#endif | |||||
#else | |||||
while (!finished()) { | while (!finished()) { | ||||
std::this_thread::yield(); | std::this_thread::yield(); | ||||
} | } | ||||
#endif | |||||
return; | return; | ||||
} | } | ||||
mgb_assert(!sm_cpu_sync_level, "invalid cpu sync level: %d", | mgb_assert(!sm_cpu_sync_level, "invalid cpu sync level: %d", | ||||
@@ -57,9 +65,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() { | |||||
} | } | ||||
void CompNodeImplHelper::EventImplHelper::host_wait_cv() { | void CompNodeImplHelper::EventImplHelper::host_wait_cv() { | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
#if MGB_HAVE_THREAD | |||||
__builtin_trap(); | |||||
#else | |||||
return; | |||||
#endif | |||||
#else | |||||
while (!finished()) { | while (!finished()) { | ||||
std::this_thread::yield(); | std::this_thread::yield(); | ||||
} | } | ||||
#endif | |||||
} | } | ||||
double CompNodeImplHelper::EventImplHelper::elapsed_time_until(Event& end_) { | double CompNodeImplHelper::EventImplHelper::elapsed_time_until(Event& end_) { | ||||
@@ -49,7 +49,7 @@ namespace mgb { | |||||
* been performed. | * been performed. | ||||
*/ | */ | ||||
class CompNodeImplHelper::EventImplHelper: public Event { | class CompNodeImplHelper::EventImplHelper: public Event { | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
bool m_recorded = false, m_finished = false; | bool m_recorded = false, m_finished = false; | ||||
@@ -59,11 +59,15 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc( | |||||
size_t size, bool allow_from_parent, bool log_stat_on_error) { | size_t size, bool allow_from_parent, bool log_stat_on_error) { | ||||
mgb_assert(size); | mgb_assert(size); | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_mutex.lock(); | m_mutex.lock(); | ||||
#endif | |||||
auto iter = m_free_blk_size.lower_bound(FreeBlock{MemAddr{0, 0}, size}); | auto iter = m_free_blk_size.lower_bound(FreeBlock{MemAddr{0, 0}, size}); | ||||
if (iter == m_free_blk_size.end()) { | if (iter == m_free_blk_size.end()) { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_mutex.unlock(); | m_mutex.unlock(); | ||||
#endif | |||||
if (!allow_from_parent) { | if (!allow_from_parent) { | ||||
if (log_stat_on_error) { | if (log_stat_on_error) { | ||||
print_memory_state(); | print_memory_state(); | ||||
@@ -87,7 +91,9 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc( | |||||
if (remain) | if (remain) | ||||
insert_free_unsafe({alloc_addr + size, remain}); | insert_free_unsafe({alloc_addr + size, remain}); | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_mutex.unlock(); | m_mutex.unlock(); | ||||
#endif | |||||
return alloc_addr; | return alloc_addr; | ||||
} | } | ||||
@@ -267,7 +273,9 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { | |||||
{ | { | ||||
// sleep to wait for async dealloc | // sleep to wait for async dealloc | ||||
using namespace std::literals; | using namespace std::literals; | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::this_thread::sleep_for(0.2s); | std::this_thread::sleep_for(0.2s); | ||||
#endif | |||||
} | } | ||||
get = gather_stream_free_blk_and_release_full(); | get = gather_stream_free_blk_and_release_full(); | ||||
mgb_log("device %d: sync all device and try to " | mgb_log("device %d: sync all device and try to " | ||||
@@ -73,7 +73,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { | |||||
//! map from address to size and size iter | //! map from address to size and size iter | ||||
std::map<size_t, FreeBlockAddrInfo> m_free_blk_addr; | std::map<size_t, FreeBlockAddrInfo> m_free_blk_addr; | ||||
std::mutex m_mutex; | |||||
MGB_MUTEX m_mutex; | |||||
struct BlkByAddrIter { | struct BlkByAddrIter { | ||||
decltype(m_free_blk_addr.begin()) aiter; | decltype(m_free_blk_addr.begin()) aiter; | ||||
@@ -48,7 +48,11 @@ class ComputingGraphImpl::ComputingSequence::ExecContext { | |||||
std::unique_ptr<CompNodeSeqRecorder> m_recorder; | std::unique_ptr<CompNodeSeqRecorder> m_recorder; | ||||
bool has_var_sanity_check() const { | bool has_var_sanity_check() const { | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
return false; | |||||
#else | |||||
return static_cast<bool>(m_comp_seq->m_var_sanity_check); | return static_cast<bool>(m_comp_seq->m_var_sanity_check); | ||||
#endif | |||||
} | } | ||||
void try_reset_recorder() { | void try_reset_recorder() { | ||||
@@ -305,10 +309,12 @@ void ComputingGraphImpl::ComputingSequence::preprocess(ExecContext* ctx) { | |||||
m_owner_graph->var_node_mem_manager().alloc_var_node_mem_static(); | m_owner_graph->var_node_mem_manager().alloc_var_node_mem_static(); | ||||
bool first_exec = m_first_exec; | bool first_exec = m_first_exec; | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
if (!first_exec) { | if (!first_exec) { | ||||
// var sanity check only for first run | // var sanity check only for first run | ||||
m_var_sanity_check.reset(); | m_var_sanity_check.reset(); | ||||
} | } | ||||
#endif | |||||
m_owner_graph->event().signal_inplace<event::CompSeqExecBeforeStart>( | m_owner_graph->event().signal_inplace<event::CompSeqExecBeforeStart>( | ||||
m_owner_graph, this, &ctx->m_cleanup_callback, &m_used_comp_node, | m_owner_graph, this, &ctx->m_cleanup_callback, &m_used_comp_node, | ||||
@@ -342,9 +348,13 @@ void ComputingGraphImpl::ComputingSequence::attach_to_graph() { | |||||
static_cast<ComputingSequence*>(gimpl->m_current_comp_seq); | static_cast<ComputingSequence*>(gimpl->m_current_comp_seq); | ||||
prev_seq->cleanup(); | prev_seq->cleanup(); | ||||
} | } | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
//! disable VarSanityCheck when __DEPLOY_ON_XP_SP2__=1. caused by | |||||
//! VarSanityCheck depends on std::thread | |||||
if (gimpl->options().var_sanity_check_first_run) { | if (gimpl->options().var_sanity_check_first_run) { | ||||
m_var_sanity_check = std::make_unique<VarSanityCheck>(gimpl); | m_var_sanity_check = std::make_unique<VarSanityCheck>(gimpl); | ||||
} | } | ||||
#endif | |||||
gimpl->m_current_comp_seq = this; | gimpl->m_current_comp_seq = this; | ||||
} | } | ||||
@@ -403,7 +413,9 @@ void ComputingGraphImpl::ComputingSequence::do_wait(bool explicit_user_wait) { | |||||
} | } | ||||
void ComputingGraphImpl::ComputingSequence::cleanup() { | void ComputingGraphImpl::ComputingSequence::cleanup() { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_var_sanity_check.reset(); | m_var_sanity_check.reset(); | ||||
#endif | |||||
if (has_uncaught_exception()) { | if (has_uncaught_exception()) { | ||||
mgb_log_warn( | mgb_log_warn( | ||||
"fallback to simple graph waiting in dtor due to uncaught " | "fallback to simple graph waiting in dtor due to uncaught " | ||||
@@ -30,7 +30,9 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { | |||||
size_t m_run_id = 0; | size_t m_run_id = 0; | ||||
size_t m_cg_event_version = 0; | size_t m_cg_event_version = 0; | ||||
mutable Maybe<double> m_prev_exec_time; | mutable Maybe<double> m_prev_exec_time; | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::unique_ptr<VarSanityCheck> m_var_sanity_check; | std::unique_ptr<VarSanityCheck> m_var_sanity_check; | ||||
#endif | |||||
std::unique_ptr<CompNodeSeqRecorder> m_comp_node_seq_recorder; | std::unique_ptr<CompNodeSeqRecorder> m_comp_node_seq_recorder; | ||||
NormalExecEnv m_exec_env; | NormalExecEnv m_exec_env; | ||||
@@ -46,7 +48,7 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { | |||||
class ExecContext; | class ExecContext; | ||||
std::unique_ptr<MegBrainError> m_async_exc; | std::unique_ptr<MegBrainError> m_async_exc; | ||||
std::mutex m_async_exc_mutex; | |||||
MGB_MUTEX m_async_exc_mutex; | |||||
/*! | /*! | ||||
* \brief check whether recording comp seq is enabled | * \brief check whether recording comp seq is enabled | ||||
@@ -713,7 +713,9 @@ void PostExecActions::perform() { | |||||
for (auto&& i : m_items) { | for (auto&& i : m_items) { | ||||
if (enable) { | if (enable) { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
VarSanityCheck::check_var_after_exec(i.var, *i.recv_info); | VarSanityCheck::check_var_after_exec(i.var, *i.recv_info); | ||||
#endif | |||||
if (i.shape_sync_hdl) | if (i.shape_sync_hdl) | ||||
i.shape_sync_hdl->sync_from_var(); | i.shape_sync_hdl->sync_from_var(); | ||||
@@ -141,7 +141,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final, | |||||
TagTraitBase) // { | TagTraitBase) // { | ||||
struct InferResultCache { | struct InferResultCache { | ||||
Spinlock mtx; | Spinlock mtx; | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
ThinHashMap<size_t, InpElement> storage; | |||||
#else | |||||
ThinHashMap<std::thread::id, InpElement> storage; | ThinHashMap<std::thread::id, InpElement> storage; | ||||
#endif | |||||
}; | }; | ||||
static TagTraitArray sm_empty_deps; | static TagTraitArray sm_empty_deps; | ||||
static InferResultCache sm_result_cache; | static InferResultCache sm_result_cache; | ||||
@@ -167,7 +171,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final, | |||||
{ | { | ||||
// thread_local not supported on ios; so we us a manual impl | // thread_local not supported on ios; so we us a manual impl | ||||
MGB_LOCK_GUARD(sm_result_cache.mtx); | MGB_LOCK_GUARD(sm_result_cache.mtx); | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
ret = &sm_result_cache.storage[0]; | |||||
#else | |||||
ret = &sm_result_cache.storage[std::this_thread::get_id()]; | ret = &sm_result_cache.storage[std::this_thread::get_id()]; | ||||
#endif | |||||
} | } | ||||
ret->m_shape = &tag()->shape(); | ret->m_shape = &tag()->shape(); | ||||
return ret; | return ret; | ||||
@@ -122,7 +122,7 @@ class StaticInferManagerImpl final: public StaticInferManager { | |||||
struct TagTraitContainer; | struct TagTraitContainer; | ||||
ComputingGraph * const m_owner_graph; | ComputingGraph * const m_owner_graph; | ||||
std::recursive_mutex m_mtx; | |||||
MGB_RECURSIVE_MUTEX m_mtx; | |||||
//! callbacks to be invoked in destructor | //! callbacks to be invoked in destructor | ||||
ThinHashMap<void*, thin_function<void()>> m_dtor_callbacks; | ThinHashMap<void*, thin_function<void()>> m_dtor_callbacks; | ||||
@@ -20,7 +20,7 @@ using namespace cg; | |||||
/* ===================== MemAllocPlan ===================== */ | /* ===================== MemAllocPlan ===================== */ | ||||
std::mutex MemAllocPlan::ReadonlyFwdList::list_mutex; | |||||
MGB_MUTEX MemAllocPlan::ReadonlyFwdList::list_mutex; | |||||
void MemAllocPlan::ReadonlyFwdList::reset() { | void MemAllocPlan::ReadonlyFwdList::reset() { | ||||
MGB_LOCK_GUARD(list_mutex); | MGB_LOCK_GUARD(list_mutex); | ||||
@@ -440,7 +440,7 @@ class VarNodeMemManager { | |||||
ImpureMemPlanManager m_impure_mem_plan_mgr; | ImpureMemPlanManager m_impure_mem_plan_mgr; | ||||
std::mutex m_dynamic_alloc_mtx; | |||||
MGB_MUTEX m_dynamic_alloc_mtx; | |||||
const size_t* m_run_id_ptr = nullptr; | const size_t* m_run_id_ptr = nullptr; | ||||
SyncableCounter m_cpu_async_release_barrier; | SyncableCounter m_cpu_async_release_barrier; | ||||
@@ -19,7 +19,13 @@ using namespace mgb; | |||||
using namespace sys; | using namespace sys; | ||||
int sys::get_cpu_count() { | int sys::get_cpu_count() { | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
//! when deploy on xp sp2, we only support single thread | |||||
//! so just return 1 even cpu number greater than 1 | |||||
return 1; | |||||
#else | |||||
return std::max(std::thread::hardware_concurrency(), 1u); | return std::max(std::thread::hardware_concurrency(), 1u); | ||||
#endif | |||||
} | } | ||||
#if defined(WIN32) | #if defined(WIN32) | ||||
@@ -153,9 +159,11 @@ bool sys::stderr_ansi_color() { | |||||
void sys::set_thread_name(const std::string &) { | void sys::set_thread_name(const std::string &) { | ||||
} | } | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::string sys::get_thread_name(Maybe<std::thread::id>) { | std::string sys::get_thread_name(Maybe<std::thread::id>) { | ||||
return "@"; | return "@"; | ||||
} | } | ||||
#endif | |||||
namespace { | namespace { | ||||
class FakeTimedFuncInvoker final: public TimedFuncInvoker { | class FakeTimedFuncInvoker final: public TimedFuncInvoker { | ||||
@@ -254,6 +262,7 @@ void sys::set_thread_name(const std::string &name) { | |||||
#endif | #endif | ||||
} | } | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::string sys::get_thread_name(Maybe<std::thread::id> tid_) { | std::string sys::get_thread_name(Maybe<std::thread::id> tid_) { | ||||
#if MGB_ENABLE_DEBUG_UTIL | #if MGB_ENABLE_DEBUG_UTIL | ||||
MGB_LOCK_GUARD(thread_name_map_lock); | MGB_LOCK_GUARD(thread_name_map_lock); | ||||
@@ -269,10 +278,11 @@ std::string sys::get_thread_name(Maybe<std::thread::id> tid_) { | |||||
return ""; | return ""; | ||||
#endif | #endif | ||||
} | } | ||||
#endif | |||||
namespace { | namespace { | ||||
class TimedFuncInvokerImpl final: public TimedFuncInvoker { | |||||
class TimedFuncInvokerImpl final : public TimedFuncInvoker { | |||||
/* | /* | ||||
* server-client protocol: | * server-client protocol: | ||||
* | * | ||||
@@ -308,7 +318,7 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { | |||||
bool m_watcher_should_stop = false; | bool m_watcher_should_stop = false; | ||||
std::condition_variable m_watcher_stop_cv; | std::condition_variable m_watcher_stop_cv; | ||||
std::mutex m_watcher_stop_mtx, m_global_mtx; | |||||
MGB_MUTEX m_watcher_stop_mtx, m_global_mtx; | |||||
void clear_sock_fd() { | void clear_sock_fd() { | ||||
if (m_peer_fd) | if (m_peer_fd) | ||||
@@ -567,8 +577,10 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { | |||||
auto start = high_resolution_clock::now(), | auto start = high_resolution_clock::now(), | ||||
end = start + timeout_due; | end = start + timeout_due; | ||||
for (; ; ) { | for (; ; ) { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::unique_lock<std::mutex> lk(m_watcher_stop_mtx); | std::unique_lock<std::mutex> lk(m_watcher_stop_mtx); | ||||
m_watcher_stop_cv.wait_until(lk, end); | m_watcher_stop_cv.wait_until(lk, end); | ||||
#endif | |||||
if (m_watcher_should_stop) | if (m_watcher_should_stop) | ||||
return false; | return false; | ||||
@@ -603,10 +615,9 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { | |||||
} MGB_CATCH(..., {}); | } MGB_CATCH(..., {}); | ||||
clear_sock_fd(); | clear_sock_fd(); | ||||
} | } | ||||
}; | }; | ||||
} // anonymous namespace | |||||
} // anonymous namespace | |||||
TimedFuncInvoker& TimedFuncInvoker::ins() { | TimedFuncInvoker& TimedFuncInvoker::ins() { | ||||
static TimedFuncInvokerImpl impl; | static TimedFuncInvokerImpl impl; | ||||
@@ -205,6 +205,21 @@ void __log__(LogLevel level, const char *file, const char *func, int line, | |||||
#define MGB_TOKENPASTE2(x, y) MGB_TOKENPASTE(x, y) | #define MGB_TOKENPASTE2(x, y) MGB_TOKENPASTE(x, y) | ||||
#define MGB_LOCK_GUARD_CTOR(mtx) MGB_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) | #define MGB_LOCK_GUARD_CTOR(mtx) MGB_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
//! refer to | |||||
//! https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160 | |||||
//! xp sp2 do not support vc runtime fully, casused by KERNEL32.dll do not | |||||
//! implement some base apis for c++ std function, for example, | |||||
//! std::mutex/std::thread/std::condition_variable as a workround, we will | |||||
//! disable some MegEngine feature on xp sp2 env, for exampe, multi-thread etc! | |||||
#define MGB_MUTEX size_t | |||||
#define MGB_RECURSIVE_MUTEX size_t | |||||
#define MGB_LOCK_GUARD(mtx) MGB_MARK_USED_VAR(mtx) | |||||
#define MGB_LOCK_GUARD_UNIQUE(mtx) MGB_MARK_USED_VAR(mtx) | |||||
#define MGB_LOCK_GUARD_SHARED(mtx) MGB_MARK_USED_VAR(MGB_MARK_USED_VAR) | |||||
#else | |||||
#define MGB_MUTEX std::mutex | |||||
#define MGB_RECURSIVE_MUTEX std::recursive_mutex | |||||
#define MGB_LOCK_GUARD(mtx) \ | #define MGB_LOCK_GUARD(mtx) \ | ||||
std::lock_guard<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx) | std::lock_guard<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx) | ||||
@@ -212,7 +227,8 @@ void __log__(LogLevel level, const char *file, const char *func, int line, | |||||
std::unique_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx) | std::unique_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx) | ||||
#define MGB_LOCK_GUARD_SHARED(mtx) \ | #define MGB_LOCK_GUARD_SHARED(mtx) \ | ||||
std::shared_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx) | |||||
std::shared_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx) | |||||
#endif | |||||
/*! | /*! | ||||
* \brief printf-like std::string constructor | * \brief printf-like std::string constructor | ||||
@@ -222,7 +222,7 @@ class MemAllocPlan final: public json::Serializable, public NonCopyableObj { | |||||
private: | private: | ||||
class ReadonlyFwdList { | class ReadonlyFwdList { | ||||
MemAllocPlan *m_prev = nullptr, *m_next = nullptr; | MemAllocPlan *m_prev = nullptr, *m_next = nullptr; | ||||
static std::mutex list_mutex; | |||||
static MGB_MUTEX list_mutex; | |||||
public: | public: | ||||
MemAllocPlan* next() const { return m_next; } | MemAllocPlan* next() const { return m_next; } | ||||
void reset(); | void reset(); | ||||
@@ -27,11 +27,13 @@ namespace sys { | |||||
//! set name of caller thread | //! set name of caller thread | ||||
void set_thread_name(const std::string &name); | void set_thread_name(const std::string &name); | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
/*! | /*! | ||||
* \brief get name of of given thread | * \brief get name of of given thread | ||||
* \param tid thread id, or None to for the caller thread | * \param tid thread id, or None to for the caller thread | ||||
*/ | */ | ||||
std::string get_thread_name(Maybe<std::thread::id> tid = None); | std::string get_thread_name(Maybe<std::thread::id> tid = None); | ||||
#endif | |||||
//! get number of CPU cores on this system | //! get number of CPU cores on this system | ||||
int get_cpu_count(); | int get_cpu_count(); | ||||
@@ -35,14 +35,20 @@ class AsyncWorkerSet final: public NonCopyableObj { | |||||
}; | }; | ||||
class FutureThreadPoolBase : public NonCopyableObj { | class FutureThreadPoolBase : public NonCopyableObj { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::vector<std::thread::id> m_ids; | std::vector<std::thread::id> m_ids; | ||||
#endif | |||||
public: | public: | ||||
FutureThreadPoolBase(const Maybe<std::string>& = None) {} | FutureThreadPoolBase(const Maybe<std::string>& = None) {} | ||||
#if __DEPLOY_ON_XP_SP2__ | |||||
size_t start(size_t concurrency) { return concurrency; } | |||||
#else | |||||
const std::vector<std::thread::id>& start(size_t concurrency) { | const std::vector<std::thread::id>& start(size_t concurrency) { | ||||
m_ids.resize(concurrency, std::this_thread::get_id()); | m_ids.resize(concurrency, std::this_thread::get_id()); | ||||
return m_ids; | return m_ids; | ||||
} | } | ||||
#endif | |||||
void stop() { | void stop() { | ||||
} | } | ||||
@@ -53,7 +53,7 @@ class SyncEventConnecter: public NonCopyableObj { | |||||
using ReceiverMap = ThinHashMap<Typeinfo*, ReceiverList>; | using ReceiverMap = ThinHashMap<Typeinfo*, ReceiverList>; | ||||
bool m_is_empty = true; | bool m_is_empty = true; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
//! map from type to receiver; use shared_ptr because it would be kept by | //! map from type to receiver; use shared_ptr because it would be kept by | ||||
//! handlers | //! handlers | ||||
std::shared_ptr<ReceiverMap> m_receiver_map = | std::shared_ptr<ReceiverMap> m_receiver_map = | ||||
@@ -83,7 +83,7 @@ namespace mgb { | |||||
std::string, | std::string, | ||||
std::unordered_map<BlobStorage, BlobStorage, BlobStorage::Hash>> | std::unordered_map<BlobStorage, BlobStorage, BlobStorage::Hash>> | ||||
m_cache; | m_cache; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
}; | }; | ||||
/*! | /*! | ||||
@@ -33,7 +33,7 @@ namespace { | |||||
template<class Opr> | template<class Opr> | ||||
class StaticInferOpr { | class StaticInferOpr { | ||||
intl::UniqPtrWithCN<Opr> m_opr; | intl::UniqPtrWithCN<Opr> m_opr; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
public: | public: | ||||
class Lock { | class Lock { | ||||
@@ -43,7 +43,9 @@ namespace { | |||||
explicit Lock(StaticInferOpr *owner): | explicit Lock(StaticInferOpr *owner): | ||||
m_owner{owner} | m_owner{owner} | ||||
{ | { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_owner->m_mtx.lock(); | m_owner->m_mtx.lock(); | ||||
#endif | |||||
} | } | ||||
public: | public: | ||||
@@ -54,8 +56,10 @@ namespace { | |||||
} | } | ||||
~Lock() { | ~Lock() { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
if (m_owner) | if (m_owner) | ||||
m_owner->m_mtx.unlock(); | m_owner->m_mtx.unlock(); | ||||
#endif | |||||
} | } | ||||
Lock& operator = (const Lock &) = delete; | Lock& operator = (const Lock &) = delete; | ||||
@@ -277,7 +277,7 @@ SubTensorSpec FancyIndexingHelper::fancy_indexing_make_sub_spec( | |||||
mgb_assert(m_require_scalar_index || !fake_single_idx); | mgb_assert(m_require_scalar_index || !fake_single_idx); | ||||
static DeviceTensorND fake_val; | static DeviceTensorND fake_val; | ||||
static std::mutex fake_val_mtx; | |||||
static MGB_MUTEX fake_val_mtx; | |||||
if (mgb_unlikely(fake_val.empty())) { | if (mgb_unlikely(fake_val.empty())) { | ||||
MGB_LOCK_GUARD(fake_val_mtx); | MGB_LOCK_GUARD(fake_val_mtx); | ||||
@@ -53,7 +53,7 @@ namespace { | |||||
MGB_TYPEINFO_OBJ_DECL; | MGB_TYPEINFO_OBJ_DECL; | ||||
public: | public: | ||||
std::mutex mtx; | |||||
MGB_MUTEX mtx; | |||||
CompNode::UnorderedMap<DeviceTensorStorage> cn2storage; | CompNode::UnorderedMap<DeviceTensorStorage> cn2storage; | ||||
}; | }; | ||||
MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer); | MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer); | ||||
@@ -377,7 +377,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(SharedDeviceTensorWithFormat); | |||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ImmutableTensor); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(ImmutableTensor); | ||||
class ImmutableTensor::Value { | class ImmutableTensor::Value { | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
DeviceTensorND m_dev, m_static_infer; | DeviceTensorND m_dev, m_static_infer; | ||||
std::string m_summary; | std::string m_summary; | ||||
@@ -527,7 +527,7 @@ class ImmutableTensor::DevValueCache final: public UserDataContainer::UserData { | |||||
std::unordered_map<TensorKey, Value, Hash> m_tensor2val; | std::unordered_map<TensorKey, Value, Hash> m_tensor2val; | ||||
std::unordered_map<ScalarKey, Value, Hash> m_scalar2val; | std::unordered_map<ScalarKey, Value, Hash> m_scalar2val; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
void setup_value(Value &dest, const HostTensorND &val) { | void setup_value(Value &dest, const HostTensorND &val) { | ||||
dest.setup(m_comp_node, val); | dest.setup(m_comp_node, val); | ||||
@@ -888,7 +888,7 @@ class LoopImpl::MutableStateSaver::Recorder final: public NonCopyableObj { | |||||
//! mutex for m_saved_buckets, used between copy_bucket_to_host() and the | //! mutex for m_saved_buckets, used between copy_bucket_to_host() and the | ||||
//! async copy task in m_copy_threadpool | //! async copy task in m_copy_threadpool | ||||
std::mutex m_saved_buckets_mtx; | |||||
MGB_MUTEX m_saved_buckets_mtx; | |||||
//! see on_fwd_finish() | //! see on_fwd_finish() | ||||
TensorShape m_var_shape; | TensorShape m_var_shape; | ||||
bool m_enabled = false; | bool m_enabled = false; | ||||
@@ -356,7 +356,9 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | |||||
next_report_time = timer.get_secs() + 1; | next_report_time = timer.get_secs() + 1; | ||||
} | } | ||||
using namespace std::literals; | using namespace std::literals; | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
std::this_thread::sleep_for(1000us); | std::this_thread::sleep_for(1000us); | ||||
#endif | |||||
} | } | ||||
// release all free blocks owned by child process, | // release all free blocks owned by child process, | ||||
// in order to avoid main process running out of memory | // in order to avoid main process running out of memory | ||||
@@ -731,7 +731,7 @@ class PersistentOutputStorage::StorageHolder final | |||||
key.second); | key.second); | ||||
} | } | ||||
}; | }; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
std::unordered_map<Key, DeviceTensorStorage, KeyHash> m_storage; | std::unordered_map<Key, DeviceTensorStorage, KeyHash> m_storage; | ||||
public: | public: | ||||
@@ -125,9 +125,13 @@ void VarValueChecker::on_var_computed(VarNode *var) { | |||||
} | } | ||||
if (!m_init_val_dumped) { | if (!m_init_val_dumped) { | ||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_var2val_mtx.lock(); | m_var2val_mtx.lock(); | ||||
auto &&val = m_var2val[var]; | |||||
#endif | |||||
auto&& val = m_var2val[var]; | |||||
#if !__DEPLOY_ON_XP_SP2__ | |||||
m_var2val_mtx.unlock(); | m_var2val_mtx.unlock(); | ||||
#endif | |||||
mgb_assert(!val); | mgb_assert(!val); | ||||
val = std::make_shared<DeviceTensorND>(); | val = std::make_shared<DeviceTensorND>(); | ||||
@@ -22,7 +22,7 @@ namespace mgb { | |||||
* This is intended to find potential bugs in megdnn. | * This is intended to find potential bugs in megdnn. | ||||
*/ | */ | ||||
class CPUDispatchChecker final: public PluginBase { | class CPUDispatchChecker final: public PluginBase { | ||||
std::mutex m_cn2nr_task_mtx, | |||||
MGB_MUTEX m_cn2nr_task_mtx, | |||||
m_failed_oprs_mtx_storage, | m_failed_oprs_mtx_storage, | ||||
*m_failed_oprs_mtx = &m_failed_oprs_mtx_storage; | *m_failed_oprs_mtx = &m_failed_oprs_mtx_storage; | ||||
CompNode::UnorderedMap<size_t> m_cn2nr_task; | CompNode::UnorderedMap<size_t> m_cn2nr_task; | ||||
@@ -60,7 +60,7 @@ class TextOprIODump final : public OprIODumpBase { | |||||
bool m_print_addr = true; | bool m_print_addr = true; | ||||
std::shared_ptr<FILE> m_fout; | std::shared_ptr<FILE> m_fout; | ||||
size_t m_max_size = 5; | size_t m_max_size = 5; | ||||
std::mutex m_mtx; | |||||
MGB_MUTEX m_mtx; | |||||
std::unique_ptr<LazyValueRecorder> m_lazy_value; | std::unique_ptr<LazyValueRecorder> m_lazy_value; | ||||
void dump_var(VarNode* var, bool lazy_sync) override; | void dump_var(VarNode* var, bool lazy_sync) override; | ||||
@@ -64,7 +64,7 @@ class VarSanityCheck final : public PluginBase { | |||||
//! map from caller thread to workspace map | //! map from caller thread to workspace map | ||||
ThinHashMap<std::thread::id, WorkspaceCache> m_workspace; | ThinHashMap<std::thread::id, WorkspaceCache> m_workspace; | ||||
std::mutex m_workspace_mtx; | |||||
MGB_MUTEX m_workspace_mtx; | |||||
ThinHashMap<VarNode*, ChecksumResult> m_var2chksum; | ThinHashMap<VarNode*, ChecksumResult> m_var2chksum; | ||||
/*! the ids of varnodes that have been modified by recv_opr | /*! the ids of varnodes that have been modified by recv_opr | ||||
@@ -72,7 +72,7 @@ class VarSanityCheck final : public PluginBase { | |||||
* cg::OperatorNodeBase::NodeProp::Flag:: FORCE_UPDATE_INPUT_VAR. | * cg::OperatorNodeBase::NodeProp::Flag:: FORCE_UPDATE_INPUT_VAR. | ||||
*/ | */ | ||||
ThinHashSet<VarNode*> m_modified_vars; | ThinHashSet<VarNode*> m_modified_vars; | ||||
std::mutex m_id2chksum_mtx; | |||||
MGB_MUTEX m_id2chksum_mtx; | |||||
typedef void (VarSanityCheck::*input_checker_fn)(cg::OperatorNodeBase*, | typedef void (VarSanityCheck::*input_checker_fn)(cg::OperatorNodeBase*, | ||||
VarNode*); | VarNode*); | ||||
@@ -50,7 +50,7 @@ namespace mgb { | |||||
size_t m_cur_var_idx, m_nr_exec; | size_t m_cur_var_idx, m_nr_exec; | ||||
VarNodeArray m_vars; | VarNodeArray m_vars; | ||||
std::mutex m_var2val_mtx; | |||||
MGB_MUTEX m_var2val_mtx; | |||||
ThinHashMap<VarNode*, std::shared_ptr<DeviceTensorND>> m_var2val; | ThinHashMap<VarNode*, std::shared_ptr<DeviceTensorND>> m_var2val; | ||||
Checker m_checker; | Checker m_checker; | ||||