From c68e669530ee557fdebf7e7d2742f9351914bcc6 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 2 Aug 2021 14:30:30 +0800 Subject: [PATCH] feat(bazel/windows/xp/sp2/inference): implement inference on windows xp (os vesion >= sp2) build with bazel * bazel build support(define __DEPLOY_ON_XP_SP2__ when deploy on xp sp2): (dbg)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp' --compiler='clang_cl' -c dbg --copt "-D__DEPLOY_ON_XP_SP2__=1" (opt)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp' --compiler='clang_cl' -c opt --copt "-D__DEPLOY_ON_XP_SP2__=1" * internal behavior: will define MGB_HAVE_THREAD=0 when enable __DEPLOY_ON_XP_SP2__ * refer to https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160 xp sp2(x86) do not support vc runtime fully, casused by KERNEL32.dll do not implement some base apis for c++ std function, for example, std::mutex/std::thread/std::condition_variable as a workround, we will disable some MegEngine features on xp sp2 env, for exampe, multi-thread etc! * about DNN_MUTEX/MGB_MUTEX, if your code will build in inference code (even CPU backends), please replace std::mutex to DNN_MUTEX/MGB_MUTEX, * about multi-thread, if you code need multi-thread support, please enable it when MGB_HAVE_THREAD=1 * about test build env status 1: Visual Studio 2019(MSVC version <= 14.26.28801)---- pass 2: Visual Studio 2019(MSVC version > 14.26.28801) ---- failed caused by this 'new' version will put VCR depends on win7 KERNEL32.DLL, this may be fixed at Visual Studio 2019 later version but we do not test at this MR merge point 3: Visual Studio 2017 ---------- pass 4: Visual Studio 2014 ---------- pass GitOrigin-RevId: 65ac48b95e99f2c510fe5db449cc8182d682e113 --- dnn/src/common/basic_types.cpp | 4 ++-- dnn/src/common/cv/interp_helper.cpp | 4 ++-- dnn/src/common/cv/interp_helper.h | 2 +- dnn/src/common/elemwise/opr_impl.cpp | 4 ++-- dnn/src/common/elemwise_multi_type/opr_impl.cpp | 4 ++-- dnn/src/common/handle_impl.h | 4 ++-- dnn/src/common/opr_delegate.h | 2 +- dnn/src/common/tensor_format.cpp | 4 ++-- dnn/src/common/utils.h | 15 ++++++++++++--- dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h | 4 ++-- dnn/src/fallback/conv_bias/im2col/factory.h | 2 +- dnn/src/naive/sleep/opr_impl.cpp | 6 +++++- sdk/load-and-run/src/infile_persistent_cache.h | 2 +- src/core/impl/comp_node/comp_node.cpp | 2 +- src/core/impl/comp_node/cpu/comp_node.cpp | 20 ++++++++++++++++++++ src/core/impl/comp_node/impl_helper.cpp | 16 ++++++++++++++++ src/core/impl/comp_node/impl_helper.h | 2 +- src/core/impl/comp_node/mem_alloc/impl.cpp | 8 ++++++++ src/core/impl/comp_node/mem_alloc/impl.h | 2 +- src/core/impl/graph/cg_impl_seq.cpp | 12 ++++++++++++ src/core/impl/graph/cg_impl_seq.h | 4 +++- src/core/impl/graph/operator_node.cpp | 2 ++ src/core/impl/graph/static_infer_impl.cpp | 8 ++++++++ src/core/impl/graph/static_infer_impl.h | 2 +- src/core/impl/graph/var_node.cpp | 2 +- src/core/impl/graph/var_node_mem_mgr.h | 2 +- src/core/impl/system.cpp | 19 +++++++++++++++---- src/core/include/megbrain/common.h | 18 +++++++++++++++++- src/core/include/megbrain/graph/var_node.h | 2 +- src/core/include/megbrain/system.h | 2 ++ .../include/megbrain/utils/async_worker_impl_0.h | 6 ++++++ src/core/include/megbrain/utils/event.h | 2 +- src/core/include/megbrain/utils/persistent_cache.h | 2 +- src/opr/impl/basic_arith.cpp | 6 +++++- src/opr/impl/internal/indexing_helper.cpp | 2 +- src/opr/impl/internal/megdnn_opr_wrapper.cpp | 2 +- src/opr/impl/io.cpp | 4 ++-- src/opr/impl/loop/impl.cpp | 2 +- src/opr/impl/search_policy/profiler.cpp | 2 ++ src/opr/impl/utility.cpp | 2 +- src/plugin/impl/var_value_checker.cpp | 6 +++++- .../include/megbrain/plugin/cpu_dispatch_checker.h | 2 +- src/plugin/include/megbrain/plugin/opr_io_dump.h | 2 +- .../include/megbrain/plugin/var_sanity_check.h | 4 ++-- .../include/megbrain/plugin/var_value_checker.h | 2 +- 45 files changed, 176 insertions(+), 50 deletions(-) diff --git a/dnn/src/common/basic_types.cpp b/dnn/src/common/basic_types.cpp index 96de41d3..eeee2479 100644 --- a/dnn/src/common/basic_types.cpp +++ b/dnn/src/common/basic_types.cpp @@ -60,10 +60,10 @@ T deserialize_pod(const std::string& data, size_t& offset) { ErrorHandler* ErrorHandler::sm_inst; ErrorHandler* ErrorHandler::inst() { - static std::mutex mtx; + static DNN_MUTEX mtx; static DefaultErrorHandler default_handler; if (megdnn_unlikely(!sm_inst)) { - std::lock_guard lg{mtx}; + MEGDNN_LOCK_GUARD(mtx); if (!sm_inst) { sm_inst = &default_handler; } diff --git a/dnn/src/common/cv/interp_helper.cpp b/dnn/src/common/cv/interp_helper.cpp index 9506a3f6..d2cc60cd 100644 --- a/dnn/src/common/cv/interp_helper.cpp +++ b/dnn/src/common/cv/interp_helper.cpp @@ -145,7 +145,7 @@ init_inter_tab_1d(InterpolationMode imode, float* tab, int tabsz) { #if MEGDNN_X86 DEF_FUN(const int16_t*) get_linear_ic4_table() { auto table_holder = &sm_tab_linear; - std::lock_guard lg{table_holder->mtx}; + MEGDNN_LOCK_GUARD(table_holder->mtx); float* tab = nullptr; short* itab = nullptr; MEGDNN_MARK_USED_VAR(tab); @@ -175,7 +175,7 @@ DEF_FUN(const void*) get_table(InterpolationMode imode, bool fixpt) { default: megdnn_throw(("unsupported interpolation mode")); } - std::lock_guard lg{table_holder->mtx}; + MEGDNN_LOCK_GUARD(table_holder->mtx); float* tab = nullptr; short* itab = nullptr; diff --git a/dnn/src/common/cv/interp_helper.h b/dnn/src/common/cv/interp_helper.h index c1cf68f0..922a14a4 100644 --- a/dnn/src/common/cv/interp_helper.h +++ b/dnn/src/common/cv/interp_helper.h @@ -134,7 +134,7 @@ private: }; struct TableHolderBase { - std::mutex mtx; + DNN_MUTEX mtx; //! get table pointer; return whether already init virtual bool get(float**, int16_t**) = 0; diff --git a/dnn/src/common/elemwise/opr_impl.cpp b/dnn/src/common/elemwise/opr_impl.cpp index 52c01490..96eb820d 100644 --- a/dnn/src/common/elemwise/opr_impl.cpp +++ b/dnn/src/common/elemwise/opr_impl.cpp @@ -39,10 +39,10 @@ using Mode = param::Elemwise::Mode; using ModeTrait = ElemwiseForward::ModeTrait; const ModeTrait& ModeTrait::from_mode(Mode mode) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::vector traits; - std::lock_guard _lock(mtx); + MEGDNN_LOCK_GUARD(mtx); if (traits.empty()) { auto get = [&](Mode m) -> ModeTrait& { diff --git a/dnn/src/common/elemwise_multi_type/opr_impl.cpp b/dnn/src/common/elemwise_multi_type/opr_impl.cpp index ef1ec392..dd2046a1 100644 --- a/dnn/src/common/elemwise_multi_type/opr_impl.cpp +++ b/dnn/src/common/elemwise_multi_type/opr_impl.cpp @@ -28,10 +28,10 @@ void check_dtype(const ModeTrait& trait, size_t i, const TensorLayout& src) { } // anonymous namespace const ModeTrait& ModeTrait::from_mode(Mode mode) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::vector traits; - std::lock_guard _lock(mtx); + MEGDNN_LOCK_GUARD(mtx); auto make_check_dtype_func = [](DType expected) { auto func = [expected](DType dtype) { diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 34cf79c0..1f6431f2 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -70,7 +70,7 @@ protected: MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { static_assert(idx < NR_HELPER_OPRS, "invalid idx"); if (!self->m_helper_oprs[idx]) { - std::lock_guard lg{self->m_helper_oprs_mtx}; + MEGDNN_LOCK_GUARD(self->m_helper_oprs_mtx); if (!self->m_helper_oprs[idx]) { self->m_helper_oprs[idx] = self->template create_operator(); @@ -88,7 +88,7 @@ protected: private: std::array, NR_HELPER_OPRS> m_helper_oprs; - std::mutex m_helper_oprs_mtx; + DNN_MUTEX m_helper_oprs_mtx; }; } // namespace megdnn diff --git a/dnn/src/common/opr_delegate.h b/dnn/src/common/opr_delegate.h index 0be9f4d4..d5e96b4b 100644 --- a/dnn/src/common/opr_delegate.h +++ b/dnn/src/common/opr_delegate.h @@ -38,7 +38,7 @@ const std::shared_ptr& inplace_cpu_handle(int debug_level = 0); */ template class CpuOprDelegationStorage { - std::mutex m_mtx; + DNN_MUTEX m_mtx; std::shared_ptr m_handle; std::unique_ptr m_oprs[nr_opr]; diff --git a/dnn/src/common/tensor_format.cpp b/dnn/src/common/tensor_format.cpp index ac4736ad..1b700e94 100644 --- a/dnn/src/common/tensor_format.cpp +++ b/dnn/src/common/tensor_format.cpp @@ -604,7 +604,7 @@ TensorLayout LowbitsAlignedTensorFormatBase::collapse_contiguous_spec( TensorFormat Image2DPack4TensorFormat::make_raw( size_t align_axis, size_t align_size_in_elements, Handle::HandleVendorType vendor_type) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::unordered_map> cache; @@ -641,7 +641,7 @@ TensorFormat Image2DPack4TensorFormat::change_axis(size_t axis) const { /* ===================== LowbitsitsAlignedToBytesTensorFormat * ===================== */ TensorFormat LowbitsAlignedToBytesTensorFormat::make(size_t size_nbits) { - static std::mutex mtx; + static DNN_MUTEX mtx; static std::unordered_map< uint64_t, std::unique_ptr> cache; diff --git a/dnn/src/common/utils.h b/dnn/src/common/utils.h index 11077216..452477d9 100644 --- a/dnn/src/common/utils.h +++ b/dnn/src/common/utils.h @@ -118,8 +118,17 @@ #define megdnn_layout_msg(layout) \ std::string(#layout "=" + (layout).to_string()) -#define MEGDNN_LOCK_GUARD(var) \ - std::lock_guard> _lock_guard_##var { var } +#if __DEPLOY_ON_XP_SP2__ +#define DNN_MUTEX size_t +#define MEGDNN_LOCK_GUARD(var) MEGDNN_MARK_USED_VAR(var) +#else +#define DNN_MUTEX std::mutex +#define DNN_TOKENPASTE(x, y) x##y +#define DNN_TOKENPASTE2(x, y) DNN_TOKENPASTE(x, y) +#define DNN_LOCK_GUARD_CTOR(mtx) DNN_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) +#define MEGDNN_LOCK_GUARD(mtx) \ + std::lock_guard DNN_LOCK_GUARD_CTOR(mtx) +#endif namespace megdnn { @@ -487,7 +496,7 @@ struct _SafeMultipliesImplUnsigned : public std::binary_function { "implicit conversion disallowed in SafeMultiplies"); megdnn_trap(); } -}; +}; // namespace megdnn template <> struct SafeMultiplies : public _SafeMultipliesImplUnsigned {}; diff --git a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h index 11ab21aa..8e52a7ec 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h +++ b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h @@ -81,7 +81,7 @@ public: } private: - std::mutex m_mtx; + DNN_MUTEX m_mtx; std::unordered_map, StrategyHasher, StrategyHashKeyEqual> m_map_strategies; @@ -99,4 +99,4 @@ MatrixMulImpl::KernSizeParam get_matmul_kern_param( } // namespace fallback } // namespace megdnn -// vim: syntax=cpp.doxygen \ No newline at end of file +// vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/conv_bias/im2col/factory.h b/dnn/src/fallback/conv_bias/im2col/factory.h index f7daef77..bbaa0de2 100644 --- a/dnn/src/fallback/conv_bias/im2col/factory.h +++ b/dnn/src/fallback/conv_bias/im2col/factory.h @@ -110,7 +110,7 @@ struct StrategyHashParamEqual { }; class StrategyDelegationStorage { - std::mutex m_mtx; + DNN_MUTEX m_mtx; std::unordered_map, StrategyHashParamHash, StrategyHashParamEqual> map_strategys; diff --git a/dnn/src/naive/sleep/opr_impl.cpp b/dnn/src/naive/sleep/opr_impl.cpp index db1c167d..ca1ebb60 100644 --- a/dnn/src/naive/sleep/opr_impl.cpp +++ b/dnn/src/naive/sleep/opr_impl.cpp @@ -11,6 +11,10 @@ #include "./opr_impl.h" +#if __DEPLOY_ON_XP_SP2__ +#define MEGDNN_NO_THREAD 1 +#endif + #include "src/naive/handle.h" #if !MEGDNN_NO_THREAD #include @@ -20,10 +24,10 @@ namespace megdnn { namespace naive { void SleepForwardImpl::exec() { - double seconds = m_param.time; #if MEGDNN_NO_THREAD megdnn_trap(); #else + double seconds = m_param.time; MEGDNN_DISPATCH_CPU_KERN_OPR( std::this_thread::sleep_for(std::chrono::microseconds( static_cast(seconds * 1e6)));); diff --git a/sdk/load-and-run/src/infile_persistent_cache.h b/sdk/load-and-run/src/infile_persistent_cache.h index 33fcc2c2..d9dc5bf0 100644 --- a/sdk/load-and-run/src/infile_persistent_cache.h +++ b/sdk/load-and-run/src/infile_persistent_cache.h @@ -52,7 +52,7 @@ class InFilePersistentCache final : public PersistentCache { std::unordered_map> m_cache; - std::mutex m_mtx; + MGB_MUTEX m_mtx; template void read_cache(Input& inp); diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index d4bad933..cca03b65 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -32,7 +32,7 @@ namespace { std::atomic_flag g_default_cpu_initialized, g_exit_handler_registered[CompNode::NR_DEVICE_TYPE]; - std::mutex g_device_map_mtx; + MGB_MUTEX g_device_map_mtx; ThinHashMap> g_device_map; CompNode::DeviceType g_unspec_locator_type; diff --git a/src/core/impl/comp_node/cpu/comp_node.cpp b/src/core/impl/comp_node/cpu/comp_node.cpp index 92633e15..0ac86e7d 100644 --- a/src/core/impl/comp_node/cpu/comp_node.cpp +++ b/src/core/impl/comp_node/cpu/comp_node.cpp @@ -60,7 +60,11 @@ class CpuCompNode::WorkerQueue final sys::set_cpu_affinity({m_locator.device}); #endif } +#if __DEPLOY_ON_XP_SP2__ + __builtin_trap(); +#else sys::set_thread_name(m_locator.to_string()); +#endif } void on_sync_all_task_finish() override { @@ -830,7 +834,9 @@ struct CpuCompNode::Pool { void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); } }; +#if !__DEPLOY_ON_XP_SP2__ std::recursive_mutex mtx; +#endif // use global memory pool to ensuare object memory accessible even after // global finalize std::aligned_storage_t callback) { for (size_t i = 0;; ++i) { CompNode cur; { +#if !__DEPLOY_ON_XP_SP2__ MGB_LOCK_GUARD(sm_pool->mtx); +#endif if (i >= sm_pool->nr_used_impl_storage) return; cur = make_comp_node_from_impl( @@ -909,7 +917,9 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator, locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT, "failed to load cpu for device:%d stream:%d", locator.device, locator.stream); +#if !__DEPLOY_ON_XP_SP2__ MGB_LOCK_GUARD(sm_pool->mtx); +#endif // encode both device ID and type into a int mgb_assert(locator_logical.device >= -1 || @@ -967,7 +977,9 @@ void CpuCompNode::sync_all() { if (!sm_pool) return; +#if !__DEPLOY_ON_XP_SP2__ MGB_LOCK_GUARD(sm_pool->mtx); +#endif for (auto&& i : sm_pool->locator2impl) i.second->sync(); for (auto&& i : sm_pool->locator2impl_multi_thread) @@ -1049,7 +1061,9 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::do_device_wait_by( auto waiter = [this, version]() { while (m_record_nr_finish.load(std::memory_order_acquire) < version) { +#if !__DEPLOY_ON_XP_SP2__ std::unique_lock lk{m_dev_wait_mtx}; +#endif if (m_record_nr_finish.load(std::memory_order_acquire) >= version) { break; } @@ -1078,10 +1092,12 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::on_finish() { } m_record_nr_finish.fetch_add(1, std::memory_order_release); +#if !__DEPLOY_ON_XP_SP2__ if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) { MGB_LOCK_GUARD(m_dev_wait_mtx); m_dev_wait_cv.notify_all(); } +#endif } bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() { @@ -1100,11 +1116,15 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::host_wait_cv() { m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release); for (;;) { +#if !__DEPLOY_ON_XP_SP2__ std::unique_lock lock{m_dev_wait_mtx}; +#endif if (finished()) { break; } +#if !__DEPLOY_ON_XP_SP2__ m_dev_wait_cv.wait(lock); +#endif } m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release); } diff --git a/src/core/impl/comp_node/impl_helper.cpp b/src/core/impl/comp_node/impl_helper.cpp index 8101c97b..585a83e1 100644 --- a/src/core/impl/comp_node/impl_helper.cpp +++ b/src/core/impl/comp_node/impl_helper.cpp @@ -45,9 +45,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() { return; } if (sm_cpu_sync_level >= 1) { +#if __DEPLOY_ON_XP_SP2__ +#if MGB_HAVE_THREAD + __builtin_trap(); +#else + return; +#endif +#else while (!finished()) { std::this_thread::yield(); } +#endif return; } mgb_assert(!sm_cpu_sync_level, "invalid cpu sync level: %d", @@ -57,9 +65,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() { } void CompNodeImplHelper::EventImplHelper::host_wait_cv() { +#if __DEPLOY_ON_XP_SP2__ +#if MGB_HAVE_THREAD + __builtin_trap(); +#else + return; +#endif +#else while (!finished()) { std::this_thread::yield(); } +#endif } double CompNodeImplHelper::EventImplHelper::elapsed_time_until(Event& end_) { diff --git a/src/core/impl/comp_node/impl_helper.h b/src/core/impl/comp_node/impl_helper.h index a1d4f1e6..62df1da9 100644 --- a/src/core/impl/comp_node/impl_helper.h +++ b/src/core/impl/comp_node/impl_helper.h @@ -49,7 +49,7 @@ namespace mgb { * been performed. */ class CompNodeImplHelper::EventImplHelper: public Event { - std::mutex m_mtx; + MGB_MUTEX m_mtx; bool m_recorded = false, m_finished = false; diff --git a/src/core/impl/comp_node/mem_alloc/impl.cpp b/src/core/impl/comp_node/mem_alloc/impl.cpp index 88c21225..75a8cfad 100644 --- a/src/core/impl/comp_node/mem_alloc/impl.cpp +++ b/src/core/impl/comp_node/mem_alloc/impl.cpp @@ -59,11 +59,15 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc( size_t size, bool allow_from_parent, bool log_stat_on_error) { mgb_assert(size); +#if !__DEPLOY_ON_XP_SP2__ m_mutex.lock(); +#endif auto iter = m_free_blk_size.lower_bound(FreeBlock{MemAddr{0, 0}, size}); if (iter == m_free_blk_size.end()) { +#if !__DEPLOY_ON_XP_SP2__ m_mutex.unlock(); +#endif if (!allow_from_parent) { if (log_stat_on_error) { print_memory_state(); @@ -87,7 +91,9 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc( if (remain) insert_free_unsafe({alloc_addr + size, remain}); +#if !__DEPLOY_ON_XP_SP2__ m_mutex.unlock(); +#endif return alloc_addr; } @@ -267,7 +273,9 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) { { // sleep to wait for async dealloc using namespace std::literals; +#if !__DEPLOY_ON_XP_SP2__ std::this_thread::sleep_for(0.2s); +#endif } get = gather_stream_free_blk_and_release_full(); mgb_log("device %d: sync all device and try to " diff --git a/src/core/impl/comp_node/mem_alloc/impl.h b/src/core/impl/comp_node/mem_alloc/impl.h index 11e1de00..e5c08abe 100644 --- a/src/core/impl/comp_node/mem_alloc/impl.h +++ b/src/core/impl/comp_node/mem_alloc/impl.h @@ -73,7 +73,7 @@ class MemAllocImplHelper: virtual public MemAllocBase { //! map from address to size and size iter std::map m_free_blk_addr; - std::mutex m_mutex; + MGB_MUTEX m_mutex; struct BlkByAddrIter { decltype(m_free_blk_addr.begin()) aiter; diff --git a/src/core/impl/graph/cg_impl_seq.cpp b/src/core/impl/graph/cg_impl_seq.cpp index c59604f4..1d5444d6 100644 --- a/src/core/impl/graph/cg_impl_seq.cpp +++ b/src/core/impl/graph/cg_impl_seq.cpp @@ -48,7 +48,11 @@ class ComputingGraphImpl::ComputingSequence::ExecContext { std::unique_ptr m_recorder; bool has_var_sanity_check() const { +#if __DEPLOY_ON_XP_SP2__ + return false; +#else return static_cast(m_comp_seq->m_var_sanity_check); +#endif } void try_reset_recorder() { @@ -305,10 +309,12 @@ void ComputingGraphImpl::ComputingSequence::preprocess(ExecContext* ctx) { m_owner_graph->var_node_mem_manager().alloc_var_node_mem_static(); bool first_exec = m_first_exec; +#if !__DEPLOY_ON_XP_SP2__ if (!first_exec) { // var sanity check only for first run m_var_sanity_check.reset(); } +#endif m_owner_graph->event().signal_inplace( m_owner_graph, this, &ctx->m_cleanup_callback, &m_used_comp_node, @@ -342,9 +348,13 @@ void ComputingGraphImpl::ComputingSequence::attach_to_graph() { static_cast(gimpl->m_current_comp_seq); prev_seq->cleanup(); } +#if !__DEPLOY_ON_XP_SP2__ + //! disable VarSanityCheck when __DEPLOY_ON_XP_SP2__=1. caused by + //! VarSanityCheck depends on std::thread if (gimpl->options().var_sanity_check_first_run) { m_var_sanity_check = std::make_unique(gimpl); } +#endif gimpl->m_current_comp_seq = this; } @@ -403,7 +413,9 @@ void ComputingGraphImpl::ComputingSequence::do_wait(bool explicit_user_wait) { } void ComputingGraphImpl::ComputingSequence::cleanup() { +#if !__DEPLOY_ON_XP_SP2__ m_var_sanity_check.reset(); +#endif if (has_uncaught_exception()) { mgb_log_warn( "fallback to simple graph waiting in dtor due to uncaught " diff --git a/src/core/impl/graph/cg_impl_seq.h b/src/core/impl/graph/cg_impl_seq.h index 47818a6f..f13e50e9 100644 --- a/src/core/impl/graph/cg_impl_seq.h +++ b/src/core/impl/graph/cg_impl_seq.h @@ -30,7 +30,9 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { size_t m_run_id = 0; size_t m_cg_event_version = 0; mutable Maybe m_prev_exec_time; +#if !__DEPLOY_ON_XP_SP2__ std::unique_ptr m_var_sanity_check; +#endif std::unique_ptr m_comp_node_seq_recorder; NormalExecEnv m_exec_env; @@ -46,7 +48,7 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable { class ExecContext; std::unique_ptr m_async_exc; - std::mutex m_async_exc_mutex; + MGB_MUTEX m_async_exc_mutex; /*! * \brief check whether recording comp seq is enabled diff --git a/src/core/impl/graph/operator_node.cpp b/src/core/impl/graph/operator_node.cpp index 202e5876..d5bc5502 100644 --- a/src/core/impl/graph/operator_node.cpp +++ b/src/core/impl/graph/operator_node.cpp @@ -713,7 +713,9 @@ void PostExecActions::perform() { for (auto&& i : m_items) { if (enable) { +#if !__DEPLOY_ON_XP_SP2__ VarSanityCheck::check_var_after_exec(i.var, *i.recv_info); +#endif if (i.shape_sync_hdl) i.shape_sync_hdl->sync_from_var(); diff --git a/src/core/impl/graph/static_infer_impl.cpp b/src/core/impl/graph/static_infer_impl.cpp index 908e9318..9562b58c 100644 --- a/src/core/impl/graph/static_infer_impl.cpp +++ b/src/core/impl/graph/static_infer_impl.cpp @@ -141,7 +141,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final, TagTraitBase) // { struct InferResultCache { Spinlock mtx; +#if __DEPLOY_ON_XP_SP2__ + ThinHashMap storage; +#else ThinHashMap storage; +#endif }; static TagTraitArray sm_empty_deps; static InferResultCache sm_result_cache; @@ -167,7 +171,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final, { // thread_local not supported on ios; so we us a manual impl MGB_LOCK_GUARD(sm_result_cache.mtx); +#if __DEPLOY_ON_XP_SP2__ + ret = &sm_result_cache.storage[0]; +#else ret = &sm_result_cache.storage[std::this_thread::get_id()]; +#endif } ret->m_shape = &tag()->shape(); return ret; diff --git a/src/core/impl/graph/static_infer_impl.h b/src/core/impl/graph/static_infer_impl.h index 7594851f..f2af0e2c 100644 --- a/src/core/impl/graph/static_infer_impl.h +++ b/src/core/impl/graph/static_infer_impl.h @@ -122,7 +122,7 @@ class StaticInferManagerImpl final: public StaticInferManager { struct TagTraitContainer; ComputingGraph * const m_owner_graph; - std::recursive_mutex m_mtx; + MGB_RECURSIVE_MUTEX m_mtx; //! callbacks to be invoked in destructor ThinHashMap> m_dtor_callbacks; diff --git a/src/core/impl/graph/var_node.cpp b/src/core/impl/graph/var_node.cpp index 4a2ab692..596313c6 100644 --- a/src/core/impl/graph/var_node.cpp +++ b/src/core/impl/graph/var_node.cpp @@ -20,7 +20,7 @@ using namespace cg; /* ===================== MemAllocPlan ===================== */ -std::mutex MemAllocPlan::ReadonlyFwdList::list_mutex; +MGB_MUTEX MemAllocPlan::ReadonlyFwdList::list_mutex; void MemAllocPlan::ReadonlyFwdList::reset() { MGB_LOCK_GUARD(list_mutex); diff --git a/src/core/impl/graph/var_node_mem_mgr.h b/src/core/impl/graph/var_node_mem_mgr.h index be69da17..d657a0b9 100644 --- a/src/core/impl/graph/var_node_mem_mgr.h +++ b/src/core/impl/graph/var_node_mem_mgr.h @@ -440,7 +440,7 @@ class VarNodeMemManager { ImpureMemPlanManager m_impure_mem_plan_mgr; - std::mutex m_dynamic_alloc_mtx; + MGB_MUTEX m_dynamic_alloc_mtx; const size_t* m_run_id_ptr = nullptr; SyncableCounter m_cpu_async_release_barrier; diff --git a/src/core/impl/system.cpp b/src/core/impl/system.cpp index a9e695d8..8673d6a7 100644 --- a/src/core/impl/system.cpp +++ b/src/core/impl/system.cpp @@ -19,7 +19,13 @@ using namespace mgb; using namespace sys; int sys::get_cpu_count() { +#if __DEPLOY_ON_XP_SP2__ + //! when deploy on xp sp2, we only support single thread + //! so just return 1 even cpu number greater than 1 + return 1; +#else return std::max(std::thread::hardware_concurrency(), 1u); +#endif } #if defined(WIN32) @@ -153,9 +159,11 @@ bool sys::stderr_ansi_color() { void sys::set_thread_name(const std::string &) { } +#if !__DEPLOY_ON_XP_SP2__ std::string sys::get_thread_name(Maybe) { return "@"; } +#endif namespace { class FakeTimedFuncInvoker final: public TimedFuncInvoker { @@ -254,6 +262,7 @@ void sys::set_thread_name(const std::string &name) { #endif } +#if !__DEPLOY_ON_XP_SP2__ std::string sys::get_thread_name(Maybe tid_) { #if MGB_ENABLE_DEBUG_UTIL MGB_LOCK_GUARD(thread_name_map_lock); @@ -269,10 +278,11 @@ std::string sys::get_thread_name(Maybe tid_) { return ""; #endif } +#endif namespace { -class TimedFuncInvokerImpl final: public TimedFuncInvoker { +class TimedFuncInvokerImpl final : public TimedFuncInvoker { /* * server-client protocol: * @@ -308,7 +318,7 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { bool m_watcher_should_stop = false; std::condition_variable m_watcher_stop_cv; - std::mutex m_watcher_stop_mtx, m_global_mtx; + MGB_MUTEX m_watcher_stop_mtx, m_global_mtx; void clear_sock_fd() { if (m_peer_fd) @@ -567,8 +577,10 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { auto start = high_resolution_clock::now(), end = start + timeout_due; for (; ; ) { +#if !__DEPLOY_ON_XP_SP2__ std::unique_lock lk(m_watcher_stop_mtx); m_watcher_stop_cv.wait_until(lk, end); +#endif if (m_watcher_should_stop) return false; @@ -603,10 +615,9 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker { } MGB_CATCH(..., {}); clear_sock_fd(); } - }; -} // anonymous namespace +} // anonymous namespace TimedFuncInvoker& TimedFuncInvoker::ins() { static TimedFuncInvokerImpl impl; diff --git a/src/core/include/megbrain/common.h b/src/core/include/megbrain/common.h index 5283e1b8..1972c14c 100644 --- a/src/core/include/megbrain/common.h +++ b/src/core/include/megbrain/common.h @@ -205,6 +205,21 @@ void __log__(LogLevel level, const char *file, const char *func, int line, #define MGB_TOKENPASTE2(x, y) MGB_TOKENPASTE(x, y) #define MGB_LOCK_GUARD_CTOR(mtx) MGB_TOKENPASTE2(__lock_guard_, __LINE__)(mtx) +#if __DEPLOY_ON_XP_SP2__ +//! refer to +//! https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160 +//! xp sp2 do not support vc runtime fully, casused by KERNEL32.dll do not +//! implement some base apis for c++ std function, for example, +//! std::mutex/std::thread/std::condition_variable as a workround, we will +//! disable some MegEngine feature on xp sp2 env, for exampe, multi-thread etc! +#define MGB_MUTEX size_t +#define MGB_RECURSIVE_MUTEX size_t +#define MGB_LOCK_GUARD(mtx) MGB_MARK_USED_VAR(mtx) +#define MGB_LOCK_GUARD_UNIQUE(mtx) MGB_MARK_USED_VAR(mtx) +#define MGB_LOCK_GUARD_SHARED(mtx) MGB_MARK_USED_VAR(MGB_MARK_USED_VAR) +#else +#define MGB_MUTEX std::mutex +#define MGB_RECURSIVE_MUTEX std::recursive_mutex #define MGB_LOCK_GUARD(mtx) \ std::lock_guard MGB_LOCK_GUARD_CTOR(mtx) @@ -212,7 +227,8 @@ void __log__(LogLevel level, const char *file, const char *func, int line, std::unique_lock MGB_LOCK_GUARD_CTOR(mtx) #define MGB_LOCK_GUARD_SHARED(mtx) \ - std::shared_lock MGB_LOCK_GUARD_CTOR(mtx) + std::shared_lock MGB_LOCK_GUARD_CTOR(mtx) +#endif /*! * \brief printf-like std::string constructor diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h index 0ef2fdb6..fedb7fae 100644 --- a/src/core/include/megbrain/graph/var_node.h +++ b/src/core/include/megbrain/graph/var_node.h @@ -222,7 +222,7 @@ class MemAllocPlan final: public json::Serializable, public NonCopyableObj { private: class ReadonlyFwdList { MemAllocPlan *m_prev = nullptr, *m_next = nullptr; - static std::mutex list_mutex; + static MGB_MUTEX list_mutex; public: MemAllocPlan* next() const { return m_next; } void reset(); diff --git a/src/core/include/megbrain/system.h b/src/core/include/megbrain/system.h index 66b83690..1b2cd47e 100644 --- a/src/core/include/megbrain/system.h +++ b/src/core/include/megbrain/system.h @@ -27,11 +27,13 @@ namespace sys { //! set name of caller thread void set_thread_name(const std::string &name); +#if !__DEPLOY_ON_XP_SP2__ /*! * \brief get name of of given thread * \param tid thread id, or None to for the caller thread */ std::string get_thread_name(Maybe tid = None); +#endif //! get number of CPU cores on this system int get_cpu_count(); diff --git a/src/core/include/megbrain/utils/async_worker_impl_0.h b/src/core/include/megbrain/utils/async_worker_impl_0.h index 2801a188..6dd96437 100644 --- a/src/core/include/megbrain/utils/async_worker_impl_0.h +++ b/src/core/include/megbrain/utils/async_worker_impl_0.h @@ -35,14 +35,20 @@ class AsyncWorkerSet final: public NonCopyableObj { }; class FutureThreadPoolBase : public NonCopyableObj { +#if !__DEPLOY_ON_XP_SP2__ std::vector m_ids; +#endif public: FutureThreadPoolBase(const Maybe& = None) {} +#if __DEPLOY_ON_XP_SP2__ + size_t start(size_t concurrency) { return concurrency; } +#else const std::vector& start(size_t concurrency) { m_ids.resize(concurrency, std::this_thread::get_id()); return m_ids; } +#endif void stop() { } diff --git a/src/core/include/megbrain/utils/event.h b/src/core/include/megbrain/utils/event.h index a6ce7c19..66a33cff 100644 --- a/src/core/include/megbrain/utils/event.h +++ b/src/core/include/megbrain/utils/event.h @@ -53,7 +53,7 @@ class SyncEventConnecter: public NonCopyableObj { using ReceiverMap = ThinHashMap; bool m_is_empty = true; - std::mutex m_mtx; + MGB_MUTEX m_mtx; //! map from type to receiver; use shared_ptr because it would be kept by //! handlers std::shared_ptr m_receiver_map = diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h index 523a4ad6..4871dcce 100644 --- a/src/core/include/megbrain/utils/persistent_cache.h +++ b/src/core/include/megbrain/utils/persistent_cache.h @@ -83,7 +83,7 @@ namespace mgb { std::string, std::unordered_map> m_cache; - std::mutex m_mtx; + MGB_MUTEX m_mtx; }; /*! diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp index d7554fb6..ee63cafb 100644 --- a/src/opr/impl/basic_arith.cpp +++ b/src/opr/impl/basic_arith.cpp @@ -33,7 +33,7 @@ namespace { template class StaticInferOpr { intl::UniqPtrWithCN m_opr; - std::mutex m_mtx; + MGB_MUTEX m_mtx; public: class Lock { @@ -43,7 +43,9 @@ namespace { explicit Lock(StaticInferOpr *owner): m_owner{owner} { +#if !__DEPLOY_ON_XP_SP2__ m_owner->m_mtx.lock(); +#endif } public: @@ -54,8 +56,10 @@ namespace { } ~Lock() { +#if !__DEPLOY_ON_XP_SP2__ if (m_owner) m_owner->m_mtx.unlock(); +#endif } Lock& operator = (const Lock &) = delete; diff --git a/src/opr/impl/internal/indexing_helper.cpp b/src/opr/impl/internal/indexing_helper.cpp index db4f9087..29ccf9b6 100644 --- a/src/opr/impl/internal/indexing_helper.cpp +++ b/src/opr/impl/internal/indexing_helper.cpp @@ -277,7 +277,7 @@ SubTensorSpec FancyIndexingHelper::fancy_indexing_make_sub_spec( mgb_assert(m_require_scalar_index || !fake_single_idx); static DeviceTensorND fake_val; - static std::mutex fake_val_mtx; + static MGB_MUTEX fake_val_mtx; if (mgb_unlikely(fake_val.empty())) { MGB_LOCK_GUARD(fake_val_mtx); diff --git a/src/opr/impl/internal/megdnn_opr_wrapper.cpp b/src/opr/impl/internal/megdnn_opr_wrapper.cpp index a920dcbe..ae34d63d 100644 --- a/src/opr/impl/internal/megdnn_opr_wrapper.cpp +++ b/src/opr/impl/internal/megdnn_opr_wrapper.cpp @@ -53,7 +53,7 @@ namespace { MGB_TYPEINFO_OBJ_DECL; public: - std::mutex mtx; + MGB_MUTEX mtx; CompNode::UnorderedMap cn2storage; }; MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer); diff --git a/src/opr/impl/io.cpp b/src/opr/impl/io.cpp index 316e4969..4a691213 100644 --- a/src/opr/impl/io.cpp +++ b/src/opr/impl/io.cpp @@ -377,7 +377,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(SharedDeviceTensorWithFormat); MGB_DYN_TYPE_OBJ_FINAL_IMPL(ImmutableTensor); class ImmutableTensor::Value { - std::mutex m_mtx; + MGB_MUTEX m_mtx; DeviceTensorND m_dev, m_static_infer; std::string m_summary; @@ -527,7 +527,7 @@ class ImmutableTensor::DevValueCache final: public UserDataContainer::UserData { std::unordered_map m_tensor2val; std::unordered_map m_scalar2val; - std::mutex m_mtx; + MGB_MUTEX m_mtx; void setup_value(Value &dest, const HostTensorND &val) { dest.setup(m_comp_node, val); diff --git a/src/opr/impl/loop/impl.cpp b/src/opr/impl/loop/impl.cpp index d2f6b61b..3207104c 100644 --- a/src/opr/impl/loop/impl.cpp +++ b/src/opr/impl/loop/impl.cpp @@ -888,7 +888,7 @@ class LoopImpl::MutableStateSaver::Recorder final: public NonCopyableObj { //! mutex for m_saved_buckets, used between copy_bucket_to_host() and the //! async copy task in m_copy_threadpool - std::mutex m_saved_buckets_mtx; + MGB_MUTEX m_saved_buckets_mtx; //! see on_fwd_finish() TensorShape m_var_shape; bool m_enabled = false; diff --git a/src/opr/impl/search_policy/profiler.cpp b/src/opr/impl/search_policy/profiler.cpp index d2272abe..379c208e 100644 --- a/src/opr/impl/search_policy/profiler.cpp +++ b/src/opr/impl/search_policy/profiler.cpp @@ -356,7 +356,9 @@ typename TimedProfiler::TResult TimedProfiler::prof_impl( next_report_time = timer.get_secs() + 1; } using namespace std::literals; +#if !__DEPLOY_ON_XP_SP2__ std::this_thread::sleep_for(1000us); +#endif } // release all free blocks owned by child process, // in order to avoid main process running out of memory diff --git a/src/opr/impl/utility.cpp b/src/opr/impl/utility.cpp index 808e858f..2aa82fd2 100644 --- a/src/opr/impl/utility.cpp +++ b/src/opr/impl/utility.cpp @@ -731,7 +731,7 @@ class PersistentOutputStorage::StorageHolder final key.second); } }; - std::mutex m_mtx; + MGB_MUTEX m_mtx; std::unordered_map m_storage; public: diff --git a/src/plugin/impl/var_value_checker.cpp b/src/plugin/impl/var_value_checker.cpp index 04ae4c7d..6bf07464 100644 --- a/src/plugin/impl/var_value_checker.cpp +++ b/src/plugin/impl/var_value_checker.cpp @@ -125,9 +125,13 @@ void VarValueChecker::on_var_computed(VarNode *var) { } if (!m_init_val_dumped) { +#if !__DEPLOY_ON_XP_SP2__ m_var2val_mtx.lock(); - auto &&val = m_var2val[var]; +#endif + auto&& val = m_var2val[var]; +#if !__DEPLOY_ON_XP_SP2__ m_var2val_mtx.unlock(); +#endif mgb_assert(!val); val = std::make_shared(); diff --git a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h index cacca348..fb30563c 100644 --- a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h +++ b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h @@ -22,7 +22,7 @@ namespace mgb { * This is intended to find potential bugs in megdnn. */ class CPUDispatchChecker final: public PluginBase { - std::mutex m_cn2nr_task_mtx, + MGB_MUTEX m_cn2nr_task_mtx, m_failed_oprs_mtx_storage, *m_failed_oprs_mtx = &m_failed_oprs_mtx_storage; CompNode::UnorderedMap m_cn2nr_task; diff --git a/src/plugin/include/megbrain/plugin/opr_io_dump.h b/src/plugin/include/megbrain/plugin/opr_io_dump.h index 84bc55de..00dfe0cc 100644 --- a/src/plugin/include/megbrain/plugin/opr_io_dump.h +++ b/src/plugin/include/megbrain/plugin/opr_io_dump.h @@ -60,7 +60,7 @@ class TextOprIODump final : public OprIODumpBase { bool m_print_addr = true; std::shared_ptr m_fout; size_t m_max_size = 5; - std::mutex m_mtx; + MGB_MUTEX m_mtx; std::unique_ptr m_lazy_value; void dump_var(VarNode* var, bool lazy_sync) override; diff --git a/src/plugin/include/megbrain/plugin/var_sanity_check.h b/src/plugin/include/megbrain/plugin/var_sanity_check.h index 7f32cc8e..f18b0dde 100644 --- a/src/plugin/include/megbrain/plugin/var_sanity_check.h +++ b/src/plugin/include/megbrain/plugin/var_sanity_check.h @@ -64,7 +64,7 @@ class VarSanityCheck final : public PluginBase { //! map from caller thread to workspace map ThinHashMap m_workspace; - std::mutex m_workspace_mtx; + MGB_MUTEX m_workspace_mtx; ThinHashMap m_var2chksum; /*! the ids of varnodes that have been modified by recv_opr @@ -72,7 +72,7 @@ class VarSanityCheck final : public PluginBase { * cg::OperatorNodeBase::NodeProp::Flag:: FORCE_UPDATE_INPUT_VAR. */ ThinHashSet m_modified_vars; - std::mutex m_id2chksum_mtx; + MGB_MUTEX m_id2chksum_mtx; typedef void (VarSanityCheck::*input_checker_fn)(cg::OperatorNodeBase*, VarNode*); diff --git a/src/plugin/include/megbrain/plugin/var_value_checker.h b/src/plugin/include/megbrain/plugin/var_value_checker.h index b9b356f7..2041a337 100644 --- a/src/plugin/include/megbrain/plugin/var_value_checker.h +++ b/src/plugin/include/megbrain/plugin/var_value_checker.h @@ -50,7 +50,7 @@ namespace mgb { size_t m_cur_var_idx, m_nr_exec; VarNodeArray m_vars; - std::mutex m_var2val_mtx; + MGB_MUTEX m_var2val_mtx; ThinHashMap> m_var2val; Checker m_checker;