diff --git a/dnn/src/common/basic_types.cpp b/dnn/src/common/basic_types.cpp index 96de41d3..eeee2479 100644 --- a/dnn/src/common/basic_types.cpp +++ b/dnn/src/common/basic_types.cpp @@ -60,10 +60,10 @@ T deserialize_pod(const std::string& data, size_t& offset) { ErrorHandler* ErrorHandler::sm_inst; ErrorHandler* ErrorHandler::inst() { - static std::mutex mtx; + static DNN_MUTEX mtx; static DefaultErrorHandler default_handler; if (megdnn_unlikely(!sm_inst)) { - std::lock_guard lg{mtx}; + MEGDNN_LOCK_GUARD(mtx); if (!sm_inst) { sm_inst = &default_handler; } diff --git a/dnn/src/common/cv/interp_helper.cpp b/dnn/src/common/cv/interp_helper.cpp index 9506a3f6..d2cc60cd 100644 --- a/dnn/src/common/cv/interp_helper.cpp +++ b/dnn/src/common/cv/interp_helper.cpp @@ -145,7 +145,7 @@ init_inter_tab_1d(InterpolationMode imode, float* tab, int tabsz) { #if MEGDNN_X86 DEF_FUN(const int16_t*) get_linear_ic4_table() { auto table_holder = &sm_tab_linear; - std::lock_guard lg{table_holder->mtx}; + MEGDNN_LOCK_GUARD(table_holder->mtx); float* tab = nullptr; short* itab = nullptr; MEGDNN_MARK_USED_VAR(tab); @@ -175,7 +175,7 @@ DEF_FUN(const void*) get_table(InterpolationMode imode, bool fixpt) { default: megdnn_throw(("unsupported interpolation mode")); } - std::lock_guard lg{table_holder->mtx}; + MEGDNN_LOCK_GUARD(table_holder->mtx); float* tab = nullptr; short* itab = nullptr; diff --git a/dnn/src/common/cv/interp_helper.h b/dnn/src/common/cv/interp_helper.h index c1cf68f0..922a14a4 100644 --- a/dnn/src/common/cv/interp_helper.h +++ b/dnn/src/common/cv/interp_helper.h @@ -134,7 +134,7 @@ private: }; struct TableHolderBase { - std::mutex mtx; + DNN_MUTEX mtx; //! get table pointer; return whether already init virtual bool get(float**, int16_t**) = 0; diff --git a/dnn/src/common/elemwise/opr_impl.cpp b/dnn/src/common/elemwise/opr_impl.cpp index 52c01490..96eb820d 100644 --- a/dnn/src/common/elemwise/opr_impl.cpp +++ b/dnn/src/common/elemwise/opr_impl.cpp @@ -39,10 +39,10 @@ using Mode = param::Elemwise::Mode; using ModeTrait = ElemwiseForward::ModeTrait; const ModeTrait& ModeTrait::from_mode(Mode mode) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::vector traits; - std::lock_guard _lock(mtx); + MEGDNN_LOCK_GUARD(mtx); if (traits.empty()) { auto get = [&](Mode m) -> ModeTrait& { diff --git a/dnn/src/common/elemwise_multi_type/opr_impl.cpp b/dnn/src/common/elemwise_multi_type/opr_impl.cpp index ef1ec392..dd2046a1 100644 --- a/dnn/src/common/elemwise_multi_type/opr_impl.cpp +++ b/dnn/src/common/elemwise_multi_type/opr_impl.cpp @@ -28,10 +28,10 @@ void check_dtype(const ModeTrait& trait, size_t i, const TensorLayout& src) { } // anonymous namespace const ModeTrait& ModeTrait::from_mode(Mode mode) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::vector traits; - std::lock_guard _lock(mtx); + MEGDNN_LOCK_GUARD(mtx); auto make_check_dtype_func = [](DType expected) { auto func = [expected](DType dtype) { diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 34cf79c0..1f6431f2 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -70,7 +70,7 @@ protected: MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { static_assert(idx < NR_HELPER_OPRS, "invalid idx"); if (!self->m_helper_oprs[idx]) { - std::lock_guard lg{self->m_helper_oprs_mtx}; + MEGDNN_LOCK_GUARD(self->m_helper_oprs_mtx); if (!self->m_helper_oprs[idx]) { self->m_helper_oprs[idx] = self->template create_operator(); @@ -88,7 +88,7 @@ protected: private: std::array, NR_HELPER_OPRS> m_helper_oprs; - std::mutex m_helper_oprs_mtx; + DNN_MUTEX m_helper_oprs_mtx; }; } // namespace megdnn diff --git a/dnn/src/common/opr_delegate.h b/dnn/src/common/opr_delegate.h index 0be9f4d4..d5e96b4b 100644 --- a/dnn/src/common/opr_delegate.h +++ b/dnn/src/common/opr_delegate.h @@ -38,7 +38,7 @@ const std::shared_ptr& inplace_cpu_handle(int debug_level = 0); */ template class CpuOprDelegationStorage { - std::mutex m_mtx; + DNN_MUTEX m_mtx; std::shared_ptr m_handle; std::unique_ptr m_oprs[nr_opr]; diff --git a/dnn/src/common/tensor_format.cpp b/dnn/src/common/tensor_format.cpp index ac4736ad..1b700e94 100644 --- a/dnn/src/common/tensor_format.cpp +++ b/dnn/src/common/tensor_format.cpp @@ -604,7 +604,7 @@ TensorLayout LowbitsAlignedTensorFormatBase::collapse_contiguous_spec( TensorFormat Image2DPack4TensorFormat::make_raw( size_t align_axis, size_t align_size_in_elements, Handle::HandleVendorType vendor_type) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::unordered_map> cache; @@ -641,7 +641,7 @@ TensorFormat Image2DPack4TensorFormat::change_axis(size_t axis) const { /* ===================== LowbitsitsAlignedToBytesTensorFormat * ===================== */ TensorFormat LowbitsAlignedToBytesTensorFormat::make(size_t size_nbits) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::unordered_map< uint64_t, std::unique_ptr> cache; diff --git a/dnn/src/common/utils.h b/dnn/src/common/utils.h index 11077216..452477d9 100644 --- a/dnn/src/common/utils.h +++ b/dnn/src/common/utils.h @@ -118,8 +118,17 @@ #define megdnn_layout_msg(layout) \ std::string(#layout "=" + (layout).to_string()) -#define MEGDNN_LOCK_GUARD(var) \ - std::lock_guard> _lock_guard_##var { var } +#if __DEPLOY_ON_XP_SP2__ +#define DNN_MUTEX size_t +#define MEGDNN_LOCK_GUARD(var) MEGDNN_MARK_USED_VAR(var) +#else +#define DNN_MUTEX std::mutex +#define DNN_TOKENPASTE(x, y) x##y +#define DNN_TOKENPASTE2(x, y) DNN_TOKENPASTE(x, y) +#define DNN_LOCK_GUARD_CTOR(mtx) DNN_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) +#define MEGDNN_LOCK_GUARD(mtx) \ + std::lock_guard DNN_LOCK_GUARD_CTOR(mtx) +#endif namespace megdnn { @@ -487,7 +496,7 @@ struct _SafeMultipliesImplUnsigned : public std::binary_function { "implicit conversion disallowed in SafeMultiplies"); megdnn_trap(); } -}; +}; // namespace megdnn template <> struct SafeMultiplies : public _SafeMultipliesImplUnsigned {}; diff --git a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h index 11ab21aa..8e52a7ec 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h +++ b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h @@ -81,7 +81,7 @@ public: } private: - std::mutex m_mtx; + DNN_MUTEX m_mtx; std::unordered_map, StrategyHasher, StrategyHashKeyEqual> m_map_strategies; @@ -99,4 +99,4 @@ MatrixMulImpl::KernSizeParam get_matmul_kern_param( } // namespace fallback } // namespace megdnn -// vim: syntax=cpp.doxygen \ No newline at end of file +// vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/conv_bias/im2col/factory.h b/dnn/src/fallback/conv_bias/im2col/factory.h index f7daef77..bbaa0de2 100644 --- a/dnn/src/fallback/conv_bias/im2col/factory.h +++ b/dnn/src/fallback/conv_bias/im2col/factory.h @@ -110,7 +110,7 @@ struct StrategyHashParamEqual { }; class StrategyDelegationStorage { - std::mutex m_mtx; + DNN_MUTEX m_mtx; std::unordered_map, StrategyHashParamHash, StrategyHashParamEqual> map_strategys; diff --git a/dnn/src/naive/sleep/opr_impl.cpp b/dnn/src/naive/sleep/opr_impl.cpp index db1c167d..ca1ebb60 100644 --- a/dnn/src/naive/sleep/opr_impl.cpp +++ b/dnn/src/naive/sleep/opr_impl.cpp @@ -11,6 +11,10 @@ #include "./opr_impl.h" +#if __DEPLOY_ON_XP_SP2__ +#define MEGDNN_NO_THREAD 1 +#endif + #include "src/naive/handle.h" #if !MEGDNN_NO_THREAD #include @@ -20,10 +24,10 @@ namespace megdnn { namespace naive { void SleepForwardImpl::exec() { - double seconds = m_param.time; #if MEGDNN_NO_THREAD megdnn_trap(); #else + double seconds = m_param.time; MEGDNN_DISPATCH_CPU_KERN_OPR( std::this_thread::sleep_for(std::chrono::microseconds( static_cast(seconds * 1e6)));); diff --git a/sdk/load-and-run/src/infile_persistent_cache.h b/sdk/load-and-run/src/infile_persistent_cache.h index 33fcc2c2..d9dc5bf0 100644 --- a/sdk/load-and-run/src/infile_persistent_cache.h +++ b/sdk/load-and-run/src/infile_persistent_cache.h @@ -52,7 +52,7 @@ class InFilePersistentCache final : public PersistentCache { std::unordered_map> m_cache; - std::mutex m_mtx; + MGB_MUTEX m_mtx; template void read_cache(Input& inp); diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index d4bad933..cca03b65 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -32,7 +32,7 @@ namespace { std::atomic_flag g_default_cpu_initialized, g_exit_handler_registered[CompNode::NR_DEVICE_TYPE]; - std::mutex g_device_map_mtx; + MGB_MUTEX g_device_map_mtx; ThinHashMap> g_device_map; CompNode::DeviceType g_unspec_locator_type; diff --git a/src/core/impl/comp_node/cpu/comp_node.cpp b/src/core/impl/comp_node/cpu/comp_node.cpp index 92633e15..0ac86e7d 100644 --- a/src/core/impl/comp_node/cpu/comp_node.cpp +++ b/src/core/impl/comp_node/cpu/comp_node.cpp @@ -60,7 +60,11 @@ class CpuCompNode::WorkerQueue final sys::set_cpu_affinity({m_locator.device}); #endif } +#if __DEPLOY_ON_XP_SP2__ + __builtin_trap(); +#else sys::set_thread_name(m_locator.to_string()); +#endif } void on_sync_all_task_finish() override { @@ -830,7 +834,9 @@ struct CpuCompNode::Pool { void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); } }; +#if !__DEPLOY_ON_XP_SP2__ std::recursive_mutex mtx; +#endif // use global memory pool to ensuare object memory accessible even after // global finalize std::aligned_storage_t callback) { for (size_t i = 0;; ++i) { CompNode cur; { +#if !__DEPLOY_ON_XP_SP2__ MGB_LOCK_GUARD(sm_pool->mtx); +#endif if (i >= sm_pool->nr_used_impl_storage) return; cur = make_comp_node_from_impl( @@ -909,7 +917,9 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator, locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT, "failed to load cpu for device:%d stream:%d", locator.device, locator.stream); +#if !__DEPLOY_ON_XP_SP2__ MGB_LOCK_GUARD(sm_pool->mtx); +#endif // encode both device ID and type into a int mgb_assert(locator_logical.device >= -1 || @@ -967,7 +977,9 @@ void CpuCompNode::sync_all() { if (!sm_pool) return; +#if !__DEPLOY_ON_XP_SP2__ MGB_LOCK_GUARD(sm_pool->mtx); +#endif for (auto&& i : sm_pool->locator2impl) i.second->sync(); for (auto&& i : sm_pool->locator2impl_multi_thread) @@ -1049,7 +1061,9 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::do_device_wait_by( auto waiter = [this, version]() { while (m_record_nr_finish.load(std::memory_order_acquire) < version) { +#if !__DEPLOY_ON_XP_SP2__ std::unique_lock lk{m_dev_wait_mtx}; +#endif if (m_record_nr_finish.load(std::memory_order_acquire) >= version) { break; } @@ -1078,10 +1092,12 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::on_finish() { } m_record_nr_finish.fetch_add(1, std::memory_order_release); +#if !__DEPLOY_ON_XP_SP2__ if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) { MGB_LOCK_GUARD(m_dev_wait_mtx); m_dev_wait_cv.notify_all(); } +#endif } bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() { @@ -1100,11 +1116,15 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::host_wait_cv() { m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release); for (;;) { +#if !__DEPLOY_ON_XP_SP2__ std::unique_lock lock{m_dev_wait_mtx}; +#endif if (finished()) { break; } +#if !__DEPLOY_ON_XP_SP2__ m_dev_wait_cv.wait(lock); +#endif } m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release); } diff --git a/src/core/impl/comp_node/impl_helper.cpp b/src/core/impl/comp_node/impl_helper.cpp index 8101c97b..585a83e1 100644 --- a/src/core/impl/comp_node/impl_helper.cpp +++ b/src/core/impl/comp_node/impl_helper.cpp @@ -45,9 +45,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() { return; } if (sm_cpu_sync_level >= 1) { +#if __DEPLOY_ON_XP_SP2__ +#if MGB_HAVE_THREAD + __builtin_trap(); +#else + return; +#endif +#else while (!finished()) { std::this_thread::yield(); } +#endif return; } mgb_assert(!sm_cpu_sync_level, "invalid cpu sync level: %d", @@ -57,9 +65,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() { } void CompNodeImplHelper::EventImplHelper::host_wait_cv() { +#if __DEPLOY_ON_XP_SP2__ +#if MGB_HAVE_THREAD + __builtin_trap(); +#else + return; +#endif +#else while (!finished()) { std::this_thread::yield(); } +#endif } double CompNodeImplHelper::EventImplHelper::elapsed_time_until(Event& end_) { diff --git a/src/core/impl/comp_node/impl_helper.h b/src/core/impl/comp_node/impl_helper.h index a1d4f1e6..62df1da9 100644 --- a/src/core/impl/comp_node/impl_helper.h +++ b/src/core/impl/comp_node/impl_helper.h @@ -49,7 +49,7 @@ namespace mgb { * been performed. */ class CompNodeImplHelper::EventImplHelper: public Event { - std::mutex m_mtx; + MGB_MUTEX m_mtx; bool m_recorded = false, m_finished = false; diff --git a/src/core/impl/comp_node/mem_alloc/impl.cpp b/src/core/impl/comp_node/mem_alloc/impl.cpp index 88c21225..75a8cfad 100644 --- a/src/core/impl/comp_node/mem_alloc/impl.cpp +++ b/src/core/impl/comp_node/mem_alloc/impl.cpp @@ -59,11 +59,15 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc( size_t size, bool allow_from_parent, bool log_stat_on_error) { mgb_assert(size); +#if !__DEPLOY_ON_XP_SP2__ m_mutex.lock(); +#endif auto iter = m_free_blk_size.lower_bound(FreeBlock{MemAddr{0, 0}, size}); if (iter == m_free_blk_size.end()) { +#if !__DEPLOY_ON_XP_SP2__ m_mutex.unlock(); +#endif if (!allow_from_parent) { if (log_stat_on_error) { print_memory_state(); @@ -87,7 +91,9 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc( if (remain) insert_free_unsafe({alloc_addr + size, remain}); +#if !__DEPLOY_ON_XP_SP2__ m_mutex.unlock(); +#endif return alloc_addr; } @@ -267,7 +273,9 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { { // sleep to wait for async dealloc using namespace std::literals; +#if !__DEPLOY_ON_XP_SP2__ std::this_thread::sleep_for(0.2s); +#endif } get = gather_stream_free_blk_and_release_full(); mgb_log("device %d: sync all device and try to " diff --git a/src/core/impl/comp_node/mem_alloc/impl.h b/src/core/impl/comp_node/mem_alloc/impl.h index 11e1de00..e5c08abe 100644 --- a/src/core/impl/comp_node/mem_alloc/impl.h +++ b/src/core/impl/comp_node/mem_alloc/impl.h @@ -73,7 +73,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { //! map from address to size and size iter std::map m_free_blk_addr; - std::mutex m_mutex; + MGB_MUTEX m_mutex; struct BlkByAddrIter { decltype(m_free_blk_addr.begin()) aiter; diff --git a/src/core/impl/graph/cg_impl_seq.cpp b/src/core/impl/graph/cg_impl_seq.cpp index c59604f4..1d5444d6 100644 --- a/src/core/impl/graph/cg_impl_seq.cpp +++ b/src/core/impl/graph/cg_impl_seq.cpp @@ -48,7 +48,11 @@ class ComputingGraphImpl::ComputingSequence::ExecContext { std::unique_ptr m_recorder; bool has_var_sanity_check() const { +#if __DEPLOY_ON_XP_SP2__ + return false; +#else return static_cast(m_comp_seq->m_var_sanity_check); +#endif } void try_reset_recorder() { @@ -305,10 +309,12 @@ void ComputingGraphImpl::ComputingSequence::preprocess(ExecContext* ctx) { m_owner_graph->var_node_mem_manager().alloc_var_node_mem_static(); bool first_exec = m_first_exec; +#if !__DEPLOY_ON_XP_SP2__ if (!first_exec) { // var sanity check only for first run m_var_sanity_check.reset(); } +#endif m_owner_graph->event().signal_inplace( m_owner_graph, this, &ctx->m_cleanup_callback, &m_used_comp_node, @@ -342,9 +348,13 @@ void ComputingGraphImpl::ComputingSequence::attach_to_graph() { static_cast(gimpl->m_current_comp_seq); prev_seq->cleanup(); } +#if !__DEPLOY_ON_XP_SP2__ + //! disable VarSanityCheck when __DEPLOY_ON_XP_SP2__=1. caused by + //! VarSanityCheck depends on std::thread if (gimpl->options().var_sanity_check_first_run) { m_var_sanity_check = std::make_unique(gimpl); } +#endif gimpl->m_current_comp_seq = this; } @@ -403,7 +413,9 @@ void ComputingGraphImpl::ComputingSequence::do_wait(bool explicit_user_wait) { } void ComputingGraphImpl::ComputingSequence::cleanup() { +#if !__DEPLOY_ON_XP_SP2__ m_var_sanity_check.reset(); +#endif if (has_uncaught_exception()) { mgb_log_warn( "fallback to simple graph waiting in dtor due to uncaught " diff --git a/src/core/impl/graph/cg_impl_seq.h b/src/core/impl/graph/cg_impl_seq.h index 47818a6f..f13e50e9 100644 --- a/src/core/impl/graph/cg_impl_seq.h +++ b/src/core/impl/graph/cg_impl_seq.h @@ -30,7 +30,9 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { size_t m_run_id = 0; size_t m_cg_event_version = 0; mutable Maybe m_prev_exec_time; +#if !__DEPLOY_ON_XP_SP2__ std::unique_ptr m_var_sanity_check; +#endif std::unique_ptr m_comp_node_seq_recorder; NormalExecEnv m_exec_env; @@ -46,7 +48,7 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { class ExecContext; std::unique_ptr m_async_exc; - std::mutex m_async_exc_mutex; + MGB_MUTEX m_async_exc_mutex; /*! * \brief check whether recording comp seq is enabled diff --git a/src/core/impl/graph/operator_node.cpp b/src/core/impl/graph/operator_node.cpp index 202e5876..d5bc5502 100644 --- a/src/core/impl/graph/operator_node.cpp +++ b/src/core/impl/graph/operator_node.cpp @@ -713,7 +713,9 @@ void PostExecActions::perform() { for (auto&& i : m_items) { if (enable) { +#if !__DEPLOY_ON_XP_SP2__ VarSanityCheck::check_var_after_exec(i.var, *i.recv_info); +#endif if (i.shape_sync_hdl) i.shape_sync_hdl->sync_from_var(); diff --git a/src/core/impl/graph/static_infer_impl.cpp b/src/core/impl/graph/static_infer_impl.cpp index 908e9318..9562b58c 100644 --- a/src/core/impl/graph/static_infer_impl.cpp +++ b/src/core/impl/graph/static_infer_impl.cpp @@ -141,7 +141,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final, TagTraitBase) // { struct InferResultCache { Spinlock mtx; +#if __DEPLOY_ON_XP_SP2__ + ThinHashMap storage; +#else ThinHashMap storage; +#endif }; static TagTraitArray sm_empty_deps; static InferResultCache sm_result_cache; @@ -167,7 +171,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final, { // thread_local not supported on ios; so we us a manual impl MGB_LOCK_GUARD(sm_result_cache.mtx); +#if __DEPLOY_ON_XP_SP2__ + ret = &sm_result_cache.storage[0]; +#else ret = &sm_result_cache.storage[std::this_thread::get_id()]; +#endif } ret->m_shape = &tag()->shape(); return ret; diff --git a/src/core/impl/graph/static_infer_impl.h b/src/core/impl/graph/static_infer_impl.h index 7594851f..f2af0e2c 100644 --- a/src/core/impl/graph/static_infer_impl.h +++ b/src/core/impl/graph/static_infer_impl.h @@ -122,7 +122,7 @@ class StaticInferManagerImpl final: public StaticInferManager { struct TagTraitContainer; ComputingGraph * const m_owner_graph; - std::recursive_mutex m_mtx; + MGB_RECURSIVE_MUTEX m_mtx; //! callbacks to be invoked in destructor ThinHashMap> m_dtor_callbacks; diff --git a/src/core/impl/graph/var_node.cpp b/src/core/impl/graph/var_node.cpp index 4a2ab692..596313c6 100644 --- a/src/core/impl/graph/var_node.cpp +++ b/src/core/impl/graph/var_node.cpp @@ -20,7 +20,7 @@ using namespace cg; /* ===================== MemAllocPlan ===================== */ -std::mutex MemAllocPlan::ReadonlyFwdList::list_mutex; +MGB_MUTEX MemAllocPlan::ReadonlyFwdList::list_mutex; void MemAllocPlan::ReadonlyFwdList::reset() { MGB_LOCK_GUARD(list_mutex); diff --git a/src/core/impl/graph/var_node_mem_mgr.h b/src/core/impl/graph/var_node_mem_mgr.h index be69da17..d657a0b9 100644 --- a/src/core/impl/graph/var_node_mem_mgr.h +++ b/src/core/impl/graph/var_node_mem_mgr.h @@ -440,7 +440,7 @@ class VarNodeMemManager { ImpureMemPlanManager m_impure_mem_plan_mgr; - std::mutex m_dynamic_alloc_mtx; + MGB_MUTEX m_dynamic_alloc_mtx; const size_t* m_run_id_ptr = nullptr; SyncableCounter m_cpu_async_release_barrier; diff --git a/src/core/impl/system.cpp b/src/core/impl/system.cpp index a9e695d8..8673d6a7 100644 --- a/src/core/impl/system.cpp +++ b/src/core/impl/system.cpp @@ -19,7 +19,13 @@ using namespace mgb; using namespace sys; int sys::get_cpu_count() { +#if __DEPLOY_ON_XP_SP2__ + //! when deploy on xp sp2, we only support single thread + //! so just return 1 even cpu number greater than 1 + return 1; +#else return std::max(std::thread::hardware_concurrency(), 1u); +#endif } #if defined(WIN32) @@ -153,9 +159,11 @@ bool sys::stderr_ansi_color() { void sys::set_thread_name(const std::string &) { } +#if !__DEPLOY_ON_XP_SP2__ std::string sys::get_thread_name(Maybe) { return "@"; } +#endif namespace { class FakeTimedFuncInvoker final: public TimedFuncInvoker { @@ -254,6 +262,7 @@ void sys::set_thread_name(const std::string &name) { #endif } +#if !__DEPLOY_ON_XP_SP2__ std::string sys::get_thread_name(Maybe tid_) { #if MGB_ENABLE_DEBUG_UTIL MGB_LOCK_GUARD(thread_name_map_lock); @@ -269,10 +278,11 @@ std::string sys::get_thread_name(Maybe tid_) { return ""; #endif } +#endif namespace { -class TimedFuncInvokerImpl final: public TimedFuncInvoker { +class TimedFuncInvokerImpl final : public TimedFuncInvoker { /* * server-client protocol: * @@ -308,7 +318,7 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { bool m_watcher_should_stop = false; std::condition_variable m_watcher_stop_cv; - std::mutex m_watcher_stop_mtx, m_global_mtx; + MGB_MUTEX m_watcher_stop_mtx, m_global_mtx; void clear_sock_fd() { if (m_peer_fd) @@ -567,8 +577,10 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { auto start = high_resolution_clock::now(), end = start + timeout_due; for (; ; ) { +#if !__DEPLOY_ON_XP_SP2__ std::unique_lock lk(m_watcher_stop_mtx); m_watcher_stop_cv.wait_until(lk, end); +#endif if (m_watcher_should_stop) return false; @@ -603,10 +615,9 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { } MGB_CATCH(..., {}); clear_sock_fd(); } - }; -} // anonymous namespace +} // anonymous namespace TimedFuncInvoker& TimedFuncInvoker::ins() { static TimedFuncInvokerImpl impl; diff --git a/src/core/include/megbrain/common.h b/src/core/include/megbrain/common.h index 5283e1b8..1972c14c 100644 --- a/src/core/include/megbrain/common.h +++ b/src/core/include/megbrain/common.h @@ -205,6 +205,21 @@ void __log__(LogLevel level, const char *file, const char *func, int line, #define MGB_TOKENPASTE2(x, y) MGB_TOKENPASTE(x, y) #define MGB_LOCK_GUARD_CTOR(mtx) MGB_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) +#if __DEPLOY_ON_XP_SP2__ +//! refer to +//! https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160 +//! xp sp2 do not support vc runtime fully, casused by KERNEL32.dll do not +//! implement some base apis for c++ std function, for example, +//! std::mutex/std::thread/std::condition_variable as a workround, we will +//! disable some MegEngine feature on xp sp2 env, for exampe, multi-thread etc! +#define MGB_MUTEX size_t +#define MGB_RECURSIVE_MUTEX size_t +#define MGB_LOCK_GUARD(mtx) MGB_MARK_USED_VAR(mtx) +#define MGB_LOCK_GUARD_UNIQUE(mtx) MGB_MARK_USED_VAR(mtx) +#define MGB_LOCK_GUARD_SHARED(mtx) MGB_MARK_USED_VAR(MGB_MARK_USED_VAR) +#else +#define MGB_MUTEX std::mutex +#define MGB_RECURSIVE_MUTEX std::recursive_mutex #define MGB_LOCK_GUARD(mtx) \ std::lock_guard MGB_LOCK_GUARD_CTOR(mtx) @@ -212,7 +227,8 @@ void __log__(LogLevel level, const char *file, const char *func, int line, std::unique_lock MGB_LOCK_GUARD_CTOR(mtx) #define MGB_LOCK_GUARD_SHARED(mtx) \ - std::shared_lock MGB_LOCK_GUARD_CTOR(mtx) + std::shared_lock MGB_LOCK_GUARD_CTOR(mtx) +#endif /*! * \brief printf-like std::string constructor diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h index 0ef2fdb6..fedb7fae 100644 --- a/src/core/include/megbrain/graph/var_node.h +++ b/src/core/include/megbrain/graph/var_node.h @@ -222,7 +222,7 @@ class MemAllocPlan final: public json::Serializable, public NonCopyableObj { private: class ReadonlyFwdList { MemAllocPlan *m_prev = nullptr, *m_next = nullptr; - static std::mutex list_mutex; + static MGB_MUTEX list_mutex; public: MemAllocPlan* next() const { return m_next; } void reset(); diff --git a/src/core/include/megbrain/system.h b/src/core/include/megbrain/system.h index 66b83690..1b2cd47e 100644 --- a/src/core/include/megbrain/system.h +++ b/src/core/include/megbrain/system.h @@ -27,11 +27,13 @@ namespace sys { //! set name of caller thread void set_thread_name(const std::string &name); +#if !__DEPLOY_ON_XP_SP2__ /*! * \brief get name of of given thread * \param tid thread id, or None to for the caller thread */ std::string get_thread_name(Maybe tid = None); +#endif //! get number of CPU cores on this system int get_cpu_count(); diff --git a/src/core/include/megbrain/utils/async_worker_impl_0.h b/src/core/include/megbrain/utils/async_worker_impl_0.h index 2801a188..6dd96437 100644 --- a/src/core/include/megbrain/utils/async_worker_impl_0.h +++ b/src/core/include/megbrain/utils/async_worker_impl_0.h @@ -35,14 +35,20 @@ class AsyncWorkerSet final: public NonCopyableObj { }; class FutureThreadPoolBase : public NonCopyableObj { +#if !__DEPLOY_ON_XP_SP2__ std::vector m_ids; +#endif public: FutureThreadPoolBase(const Maybe& = None) {} +#if __DEPLOY_ON_XP_SP2__ + size_t start(size_t concurrency) { return concurrency; } +#else const std::vector& start(size_t concurrency) { m_ids.resize(concurrency, std::this_thread::get_id()); return m_ids; } +#endif void stop() { } diff --git a/src/core/include/megbrain/utils/event.h b/src/core/include/megbrain/utils/event.h index a6ce7c19..66a33cff 100644 --- a/src/core/include/megbrain/utils/event.h +++ b/src/core/include/megbrain/utils/event.h @@ -53,7 +53,7 @@ class SyncEventConnecter: public NonCopyableObj { using ReceiverMap = ThinHashMap; bool m_is_empty = true; - std::mutex m_mtx; + MGB_MUTEX m_mtx; //! map from type to receiver; use shared_ptr because it would be kept by //! handlers std::shared_ptr m_receiver_map = diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h index 523a4ad6..4871dcce 100644 --- a/src/core/include/megbrain/utils/persistent_cache.h +++ b/src/core/include/megbrain/utils/persistent_cache.h @@ -83,7 +83,7 @@ namespace mgb { std::string, std::unordered_map> m_cache; - std::mutex m_mtx; + MGB_MUTEX m_mtx; }; /*! diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp index d7554fb6..ee63cafb 100644 --- a/src/opr/impl/basic_arith.cpp +++ b/src/opr/impl/basic_arith.cpp @@ -33,7 +33,7 @@ namespace { template class StaticInferOpr { intl::UniqPtrWithCN m_opr; - std::mutex m_mtx; + MGB_MUTEX m_mtx; public: class Lock { @@ -43,7 +43,9 @@ namespace { explicit Lock(StaticInferOpr *owner): m_owner{owner} { +#if !__DEPLOY_ON_XP_SP2__ m_owner->m_mtx.lock(); +#endif } public: @@ -54,8 +56,10 @@ namespace { } ~Lock() { +#if !__DEPLOY_ON_XP_SP2__ if (m_owner) m_owner->m_mtx.unlock(); +#endif } Lock& operator = (const Lock &) = delete; diff --git a/src/opr/impl/internal/indexing_helper.cpp b/src/opr/impl/internal/indexing_helper.cpp index db4f9087..29ccf9b6 100644 --- a/src/opr/impl/internal/indexing_helper.cpp +++ b/src/opr/impl/internal/indexing_helper.cpp @@ -277,7 +277,7 @@ SubTensorSpec FancyIndexingHelper::fancy_indexing_make_sub_spec( mgb_assert(m_require_scalar_index || !fake_single_idx); static DeviceTensorND fake_val; - static std::mutex fake_val_mtx; + static MGB_MUTEX fake_val_mtx; if (mgb_unlikely(fake_val.empty())) { MGB_LOCK_GUARD(fake_val_mtx); diff --git a/src/opr/impl/internal/megdnn_opr_wrapper.cpp b/src/opr/impl/internal/megdnn_opr_wrapper.cpp index a920dcbe..ae34d63d 100644 --- a/src/opr/impl/internal/megdnn_opr_wrapper.cpp +++ b/src/opr/impl/internal/megdnn_opr_wrapper.cpp @@ -53,7 +53,7 @@ namespace { MGB_TYPEINFO_OBJ_DECL; public: - std::mutex mtx; + MGB_MUTEX mtx; CompNode::UnorderedMap cn2storage; }; MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer); diff --git a/src/opr/impl/io.cpp b/src/opr/impl/io.cpp index 316e4969..4a691213 100644 --- a/src/opr/impl/io.cpp +++ b/src/opr/impl/io.cpp @@ -377,7 +377,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(SharedDeviceTensorWithFormat); MGB_DYN_TYPE_OBJ_FINAL_IMPL(ImmutableTensor); class ImmutableTensor::Value { - std::mutex m_mtx; + MGB_MUTEX m_mtx; DeviceTensorND m_dev, m_static_infer; std::string m_summary; @@ -527,7 +527,7 @@ class ImmutableTensor::DevValueCache final: public UserDataContainer::UserData { std::unordered_map m_tensor2val; std::unordered_map m_scalar2val; - std::mutex m_mtx; + MGB_MUTEX m_mtx; void setup_value(Value &dest, const HostTensorND &val) { dest.setup(m_comp_node, val); diff --git a/src/opr/impl/loop/impl.cpp b/src/opr/impl/loop/impl.cpp index d2f6b61b..3207104c 100644 --- a/src/opr/impl/loop/impl.cpp +++ b/src/opr/impl/loop/impl.cpp @@ -888,7 +888,7 @@ class LoopImpl::MutableStateSaver::Recorder final: public NonCopyableObj { //! mutex for m_saved_buckets, used between copy_bucket_to_host() and the //! async copy task in m_copy_threadpool - std::mutex m_saved_buckets_mtx; + MGB_MUTEX m_saved_buckets_mtx; //! see on_fwd_finish() TensorShape m_var_shape; bool m_enabled = false; diff --git a/src/opr/impl/search_policy/profiler.cpp b/src/opr/impl/search_policy/profiler.cpp index d2272abe..379c208e 100644 --- a/src/opr/impl/search_policy/profiler.cpp +++ b/src/opr/impl/search_policy/profiler.cpp @@ -356,7 +356,9 @@ typename TimedProfiler::TResult TimedProfiler::prof_impl( next_report_time = timer.get_secs() + 1; } using namespace std::literals; +#if !__DEPLOY_ON_XP_SP2__ std::this_thread::sleep_for(1000us); +#endif } // release all free blocks owned by child process, // in order to avoid main process running out of memory diff --git a/src/opr/impl/utility.cpp b/src/opr/impl/utility.cpp index 808e858f..2aa82fd2 100644 --- a/src/opr/impl/utility.cpp +++ b/src/opr/impl/utility.cpp @@ -731,7 +731,7 @@ class PersistentOutputStorage::StorageHolder final key.second); } }; - std::mutex m_mtx; + MGB_MUTEX m_mtx; std::unordered_map m_storage; public: diff --git a/src/plugin/impl/var_value_checker.cpp b/src/plugin/impl/var_value_checker.cpp index 04ae4c7d..6bf07464 100644 --- a/src/plugin/impl/var_value_checker.cpp +++ b/src/plugin/impl/var_value_checker.cpp @@ -125,9 +125,13 @@ void VarValueChecker::on_var_computed(VarNode *var) { } if (!m_init_val_dumped) { +#if !__DEPLOY_ON_XP_SP2__ m_var2val_mtx.lock(); - auto &&val = m_var2val[var]; +#endif + auto&& val = m_var2val[var]; +#if !__DEPLOY_ON_XP_SP2__ m_var2val_mtx.unlock(); +#endif mgb_assert(!val); val = std::make_shared(); diff --git a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h index cacca348..fb30563c 100644 --- a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h +++ b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h @@ -22,7 +22,7 @@ namespace mgb { * This is intended to find potential bugs in megdnn. */ class CPUDispatchChecker final: public PluginBase { - std::mutex m_cn2nr_task_mtx, + MGB_MUTEX m_cn2nr_task_mtx, m_failed_oprs_mtx_storage, *m_failed_oprs_mtx = &m_failed_oprs_mtx_storage; CompNode::UnorderedMap m_cn2nr_task; diff --git a/src/plugin/include/megbrain/plugin/opr_io_dump.h b/src/plugin/include/megbrain/plugin/opr_io_dump.h index 84bc55de..00dfe0cc 100644 --- a/src/plugin/include/megbrain/plugin/opr_io_dump.h +++ b/src/plugin/include/megbrain/plugin/opr_io_dump.h @@ -60,7 +60,7 @@ class TextOprIODump final : public OprIODumpBase { bool m_print_addr = true; std::shared_ptr m_fout; size_t m_max_size = 5; - std::mutex m_mtx; + MGB_MUTEX m_mtx; std::unique_ptr m_lazy_value; void dump_var(VarNode* var, bool lazy_sync) override; diff --git a/src/plugin/include/megbrain/plugin/var_sanity_check.h b/src/plugin/include/megbrain/plugin/var_sanity_check.h index 7f32cc8e..f18b0dde 100644 --- a/src/plugin/include/megbrain/plugin/var_sanity_check.h +++ b/src/plugin/include/megbrain/plugin/var_sanity_check.h @@ -64,7 +64,7 @@ class VarSanityCheck final : public PluginBase { //! map from caller thread to workspace map ThinHashMap m_workspace; - std::mutex m_workspace_mtx; + MGB_MUTEX m_workspace_mtx; ThinHashMap m_var2chksum; /*! the ids of varnodes that have been modified by recv_opr @@ -72,7 +72,7 @@ class VarSanityCheck final : public PluginBase { * cg::OperatorNodeBase::NodeProp::Flag:: FORCE_UPDATE_INPUT_VAR. */ ThinHashSet m_modified_vars; - std::mutex m_id2chksum_mtx; + MGB_MUTEX m_id2chksum_mtx; typedef void (VarSanityCheck::*input_checker_fn)(cg::OperatorNodeBase*, VarNode*); diff --git a/src/plugin/include/megbrain/plugin/var_value_checker.h b/src/plugin/include/megbrain/plugin/var_value_checker.h index b9b356f7..2041a337 100644 --- a/src/plugin/include/megbrain/plugin/var_value_checker.h +++ b/src/plugin/include/megbrain/plugin/var_value_checker.h @@ -50,7 +50,7 @@ namespace mgb { size_t m_cur_var_idx, m_nr_exec; VarNodeArray m_vars; - std::mutex m_var2val_mtx; + MGB_MUTEX m_var2val_mtx; ThinHashMap> m_var2val; Checker m_checker;