From c68e669530ee557fdebf7e7d2742f9351914bcc6 Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Mon, 2 Aug 2021 14:30:30 +0800
Subject: [PATCH] feat(bazel/windows/xp/sp2/inference): implement inference on
 windows xp (os vesion >= sp2) build with bazel

* bazel build support(define __DEPLOY_ON_XP_SP2__ when deploy on xp sp2):
(dbg)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp'
--compiler='clang_cl' -c dbg --copt "-D__DEPLOY_ON_XP_SP2__=1"

(opt)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp'
--compiler='clang_cl' -c opt --copt "-D__DEPLOY_ON_XP_SP2__=1"

* internal behavior:
will define MGB_HAVE_THREAD=0 when enable __DEPLOY_ON_XP_SP2__

* refer to
https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160
xp sp2(x86) do not support vc runtime fully, casused by KERNEL32.dll do not
implement some base apis for c++ std function, for example,
std::mutex/std::thread/std::condition_variable as a workround, we will
disable some MegEngine features on xp sp2 env, for exampe, multi-thread etc!

* about DNN_MUTEX/MGB_MUTEX, if your code will build in inference
code (even CPU backends), please replace std::mutex to DNN_MUTEX/MGB_MUTEX,

* about multi-thread, if you code need multi-thread support, please
enable it when MGB_HAVE_THREAD=1

* about test build env status
1: Visual Studio 2019(MSVC version <= 14.26.28801)---- pass
2: Visual Studio 2019(MSVC version > 14.26.28801) ---- failed
   caused by this 'new' version will put VCR depends on win7
   KERNEL32.DLL, this may be fixed at Visual Studio 2019 later version
   but we do not test at this MR merge point
3: Visual Studio 2017   ---------- pass
4: Visual Studio 2014   ---------- pass
GitOrigin-RevId: 65ac48b95e99f2c510fe5db449cc8182d682e113
---
 dnn/src/common/basic_types.cpp                       |  4 ++--
 dnn/src/common/cv/interp_helper.cpp                  |  4 ++--
 dnn/src/common/cv/interp_helper.h                    |  2 +-
 dnn/src/common/elemwise/opr_impl.cpp                 |  4 ++--
 dnn/src/common/elemwise_multi_type/opr_impl.cpp      |  4 ++--
 dnn/src/common/handle_impl.h                         |  4 ++--
 dnn/src/common/opr_delegate.h                        |  2 +-
 dnn/src/common/tensor_format.cpp                     |  4 ++--
 dnn/src/common/utils.h                               | 15 ++++++++++++---
 dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h   |  4 ++--
 dnn/src/fallback/conv_bias/im2col/factory.h          |  2 +-
 dnn/src/naive/sleep/opr_impl.cpp                     |  6 +++++-
 sdk/load-and-run/src/infile_persistent_cache.h       |  2 +-
 src/core/impl/comp_node/comp_node.cpp                |  2 +-
 src/core/impl/comp_node/cpu/comp_node.cpp            | 20 ++++++++++++++++++++
 src/core/impl/comp_node/impl_helper.cpp              | 16 ++++++++++++++++
 src/core/impl/comp_node/impl_helper.h                |  2 +-
 src/core/impl/comp_node/mem_alloc/impl.cpp           |  8 ++++++++
 src/core/impl/comp_node/mem_alloc/impl.h             |  2 +-
 src/core/impl/graph/cg_impl_seq.cpp                  | 12 ++++++++++++
 src/core/impl/graph/cg_impl_seq.h                    |  4 +++-
 src/core/impl/graph/operator_node.cpp                |  2 ++
 src/core/impl/graph/static_infer_impl.cpp            |  8 ++++++++
 src/core/impl/graph/static_infer_impl.h              |  2 +-
 src/core/impl/graph/var_node.cpp                     |  2 +-
 src/core/impl/graph/var_node_mem_mgr.h               |  2 +-
 src/core/impl/system.cpp                             | 19 +++++++++++++++----
 src/core/include/megbrain/common.h                   | 18 +++++++++++++++++-
 src/core/include/megbrain/graph/var_node.h           |  2 +-
 src/core/include/megbrain/system.h                   |  2 ++
 .../include/megbrain/utils/async_worker_impl_0.h     |  6 ++++++
 src/core/include/megbrain/utils/event.h              |  2 +-
 src/core/include/megbrain/utils/persistent_cache.h   |  2 +-
 src/opr/impl/basic_arith.cpp                         |  6 +++++-
 src/opr/impl/internal/indexing_helper.cpp            |  2 +-
 src/opr/impl/internal/megdnn_opr_wrapper.cpp         |  2 +-
 src/opr/impl/io.cpp                                  |  4 ++--
 src/opr/impl/loop/impl.cpp                           |  2 +-
 src/opr/impl/search_policy/profiler.cpp              |  2 ++
 src/opr/impl/utility.cpp                             |  2 +-
 src/plugin/impl/var_value_checker.cpp                |  6 +++++-
 .../include/megbrain/plugin/cpu_dispatch_checker.h   |  2 +-
 src/plugin/include/megbrain/plugin/opr_io_dump.h     |  2 +-
 .../include/megbrain/plugin/var_sanity_check.h       |  4 ++--
 .../include/megbrain/plugin/var_value_checker.h      |  2 +-
 45 files changed, 176 insertions(+), 50 deletions(-)

diff --git a/dnn/src/common/basic_types.cpp b/dnn/src/common/basic_types.cpp
index 96de41d3..eeee2479 100644
--- a/dnn/src/common/basic_types.cpp
+++ b/dnn/src/common/basic_types.cpp
@@ -60,10 +60,10 @@ T deserialize_pod(const std::string& data, size_t& offset) {
 ErrorHandler* ErrorHandler::sm_inst;
 
 ErrorHandler* ErrorHandler::inst() {
-    static std::mutex mtx;
+    static DNN_MUTEX mtx;
     static DefaultErrorHandler default_handler;
     if (megdnn_unlikely(!sm_inst)) {
-        std::lock_guard<std::mutex> lg{mtx};
+        MEGDNN_LOCK_GUARD(mtx);
         if (!sm_inst) {
             sm_inst = &default_handler;
         }
diff --git a/dnn/src/common/cv/interp_helper.cpp b/dnn/src/common/cv/interp_helper.cpp
index 9506a3f6..d2cc60cd 100644
--- a/dnn/src/common/cv/interp_helper.cpp
+++ b/dnn/src/common/cv/interp_helper.cpp
@@ -145,7 +145,7 @@ init_inter_tab_1d(InterpolationMode imode, float* tab, int tabsz) {
 #if MEGDNN_X86
 DEF_FUN(const int16_t*) get_linear_ic4_table() {
     auto table_holder = &sm_tab_linear;
-    std::lock_guard<std::mutex> lg{table_holder->mtx};
+    MEGDNN_LOCK_GUARD(table_holder->mtx);
     float* tab = nullptr;
     short* itab = nullptr;
     MEGDNN_MARK_USED_VAR(tab);
@@ -175,7 +175,7 @@ DEF_FUN(const void*) get_table(InterpolationMode imode, bool fixpt) {
         default:
             megdnn_throw(("unsupported interpolation mode"));
     }
-    std::lock_guard<std::mutex> lg{table_holder->mtx};
+    MEGDNN_LOCK_GUARD(table_holder->mtx);
 
     float* tab = nullptr;
     short* itab = nullptr;
diff --git a/dnn/src/common/cv/interp_helper.h b/dnn/src/common/cv/interp_helper.h
index c1cf68f0..922a14a4 100644
--- a/dnn/src/common/cv/interp_helper.h
+++ b/dnn/src/common/cv/interp_helper.h
@@ -134,7 +134,7 @@ private:
     };
 
     struct TableHolderBase {
-        std::mutex mtx;
+        DNN_MUTEX mtx;
 
         //! get table pointer; return whether already init
         virtual bool get(float**, int16_t**) = 0;
diff --git a/dnn/src/common/elemwise/opr_impl.cpp b/dnn/src/common/elemwise/opr_impl.cpp
index 52c01490..96eb820d 100644
--- a/dnn/src/common/elemwise/opr_impl.cpp
+++ b/dnn/src/common/elemwise/opr_impl.cpp
@@ -39,10 +39,10 @@ using Mode = param::Elemwise::Mode;
 using ModeTrait = ElemwiseForward::ModeTrait;
 
 const ModeTrait& ModeTrait::from_mode(Mode mode) {
-    static std::mutex mtx;
+    static DNN_MUTEX mtx;
     static std::vector<ModeTrait> traits;
 
-    std::lock_guard<std::mutex> _lock(mtx);
+    MEGDNN_LOCK_GUARD(mtx);
 
     if (traits.empty()) {
         auto get = [&](Mode m) -> ModeTrait& {
diff --git a/dnn/src/common/elemwise_multi_type/opr_impl.cpp b/dnn/src/common/elemwise_multi_type/opr_impl.cpp
index ef1ec392..dd2046a1 100644
--- a/dnn/src/common/elemwise_multi_type/opr_impl.cpp
+++ b/dnn/src/common/elemwise_multi_type/opr_impl.cpp
@@ -28,10 +28,10 @@ void check_dtype(const ModeTrait& trait, size_t i, const TensorLayout& src) {
 }  // anonymous namespace
 
 const ModeTrait& ModeTrait::from_mode(Mode mode) {
-    static std::mutex mtx;
+    static DNN_MUTEX mtx;
     static std::vector<ModeTrait> traits;
 
-    std::lock_guard<std::mutex> _lock(mtx);
+    MEGDNN_LOCK_GUARD(mtx);
 
     auto make_check_dtype_func = [](DType expected) {
         auto func = [expected](DType dtype) {
diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h
index 34cf79c0..1f6431f2 100644
--- a/dnn/src/common/handle_impl.h
+++ b/dnn/src/common/handle_impl.h
@@ -70,7 +70,7 @@ protected:
         MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
             static_assert(idx < NR_HELPER_OPRS, "invalid idx");
             if (!self->m_helper_oprs[idx]) {
-                std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
+                MEGDNN_LOCK_GUARD(self->m_helper_oprs_mtx);
                 if (!self->m_helper_oprs[idx]) {
                     self->m_helper_oprs[idx] =
                             self->template create_operator<Opr>();
@@ -88,7 +88,7 @@ protected:
 
 private:
     std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs;
-    std::mutex m_helper_oprs_mtx;
+    DNN_MUTEX m_helper_oprs_mtx;
 };
 
 }  // namespace megdnn
diff --git a/dnn/src/common/opr_delegate.h b/dnn/src/common/opr_delegate.h
index 0be9f4d4..d5e96b4b 100644
--- a/dnn/src/common/opr_delegate.h
+++ b/dnn/src/common/opr_delegate.h
@@ -38,7 +38,7 @@ const std::shared_ptr<Handle>& inplace_cpu_handle(int debug_level = 0);
  */
 template <int nr_opr = 1>
 class CpuOprDelegationStorage {
-    std::mutex m_mtx;
+    DNN_MUTEX m_mtx;
     std::shared_ptr<Handle> m_handle;
     std::unique_ptr<OperatorBase> m_oprs[nr_opr];
 
diff --git a/dnn/src/common/tensor_format.cpp b/dnn/src/common/tensor_format.cpp
index ac4736ad..1b700e94 100644
--- a/dnn/src/common/tensor_format.cpp
+++ b/dnn/src/common/tensor_format.cpp
@@ -604,7 +604,7 @@ TensorLayout LowbitsAlignedTensorFormatBase::collapse_contiguous_spec(
 TensorFormat Image2DPack4TensorFormat::make_raw(
         size_t align_axis, size_t align_size_in_elements,
         Handle::HandleVendorType vendor_type) {
-    static std::mutex mtx;
+    static DNN_MUTEX mtx;
     static std::unordered_map<uint64_t,
                               std::unique_ptr<Image2DPack4TensorFormat>>
             cache;
@@ -641,7 +641,7 @@ TensorFormat Image2DPack4TensorFormat::change_axis(size_t axis) const {
 /* ===================== LowbitsitsAlignedToBytesTensorFormat
  * ===================== */
 TensorFormat LowbitsAlignedToBytesTensorFormat::make(size_t size_nbits) {
-    static std::mutex mtx;
+    static DNN_MUTEX mtx;
     static std::unordered_map<
             uint64_t, std::unique_ptr<LowbitsAlignedToBytesTensorFormat>>
             cache;
diff --git a/dnn/src/common/utils.h b/dnn/src/common/utils.h
index 11077216..452477d9 100644
--- a/dnn/src/common/utils.h
+++ b/dnn/src/common/utils.h
@@ -118,8 +118,17 @@
 #define megdnn_layout_msg(layout) \
     std::string(#layout "=" + (layout).to_string())
 
-#define MEGDNN_LOCK_GUARD(var) \
-    std::lock_guard<std::remove_cv_t<decltype(var)>> _lock_guard_##var { var }
+#if __DEPLOY_ON_XP_SP2__
+#define DNN_MUTEX size_t
+#define MEGDNN_LOCK_GUARD(var) MEGDNN_MARK_USED_VAR(var)
+#else
+#define DNN_MUTEX std::mutex
+#define DNN_TOKENPASTE(x, y) x##y
+#define DNN_TOKENPASTE2(x, y) DNN_TOKENPASTE(x, y)
+#define DNN_LOCK_GUARD_CTOR(mtx) DNN_TOKENPASTE2(__lock_guard_, __LINE__)(mtx)
+#define MEGDNN_LOCK_GUARD(mtx) \
+    std::lock_guard<decltype(mtx)> DNN_LOCK_GUARD_CTOR(mtx)
+#endif
 
 namespace megdnn {
 
@@ -487,7 +496,7 @@ struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> {
                 "implicit conversion disallowed in SafeMultiplies");
         megdnn_trap();
     }
-};
+};  // namespace megdnn
 
 template <>
 struct SafeMultiplies<size_t> : public _SafeMultipliesImplUnsigned<size_t> {};
diff --git a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h
index 11ab21aa..8e52a7ec 100644
--- a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h
+++ b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_utils.h
@@ -81,7 +81,7 @@ public:
     }
 
 private:
-    std::mutex m_mtx;
+    DNN_MUTEX m_mtx;
     std::unordered_map<StrategyHashKey, std::unique_ptr<T>, StrategyHasher,
                        StrategyHashKeyEqual>
             m_map_strategies;
@@ -99,4 +99,4 @@ MatrixMulImpl::KernSizeParam get_matmul_kern_param(
 }  // namespace fallback
 }  // namespace megdnn
 
-// vim: syntax=cpp.doxygen
\ No newline at end of file
+// vim: syntax=cpp.doxygen
diff --git a/dnn/src/fallback/conv_bias/im2col/factory.h b/dnn/src/fallback/conv_bias/im2col/factory.h
index f7daef77..bbaa0de2 100644
--- a/dnn/src/fallback/conv_bias/im2col/factory.h
+++ b/dnn/src/fallback/conv_bias/im2col/factory.h
@@ -110,7 +110,7 @@ struct StrategyHashParamEqual {
 };
 
 class StrategyDelegationStorage {
-    std::mutex m_mtx;
+    DNN_MUTEX m_mtx;
     std::unordered_map<StrategyHashParam, std::unique_ptr<StrategyBase>,
                        StrategyHashParamHash, StrategyHashParamEqual>
             map_strategys;
diff --git a/dnn/src/naive/sleep/opr_impl.cpp b/dnn/src/naive/sleep/opr_impl.cpp
index db1c167d..ca1ebb60 100644
--- a/dnn/src/naive/sleep/opr_impl.cpp
+++ b/dnn/src/naive/sleep/opr_impl.cpp
@@ -11,6 +11,10 @@
 
 #include "./opr_impl.h"
 
+#if __DEPLOY_ON_XP_SP2__
+#define MEGDNN_NO_THREAD 1
+#endif
+
 #include "src/naive/handle.h"
 #if !MEGDNN_NO_THREAD
 #include <thread>
@@ -20,10 +24,10 @@ namespace megdnn {
 namespace naive {
 
 void SleepForwardImpl::exec() {
-    double seconds = m_param.time;
 #if MEGDNN_NO_THREAD
     megdnn_trap();
 #else
+    double seconds = m_param.time;
     MEGDNN_DISPATCH_CPU_KERN_OPR(
             std::this_thread::sleep_for(std::chrono::microseconds(
                     static_cast<uint64_t>(seconds * 1e6))););
diff --git a/sdk/load-and-run/src/infile_persistent_cache.h b/sdk/load-and-run/src/infile_persistent_cache.h
index 33fcc2c2..d9dc5bf0 100644
--- a/sdk/load-and-run/src/infile_persistent_cache.h
+++ b/sdk/load-and-run/src/infile_persistent_cache.h
@@ -52,7 +52,7 @@ class InFilePersistentCache final : public PersistentCache {
     std::unordered_map<std::string, std::unordered_map<BlobStorage, BlobStorage,
                                                        BlobStorage::Hash>>
             m_cache;
-    std::mutex m_mtx;
+    MGB_MUTEX m_mtx;
 
     template <typename Input>
     void read_cache(Input& inp);
diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp
index d4bad933..cca03b65 100644
--- a/src/core/impl/comp_node/comp_node.cpp
+++ b/src/core/impl/comp_node/comp_node.cpp
@@ -32,7 +32,7 @@ namespace {
     std::atomic_flag
         g_default_cpu_initialized,
         g_exit_handler_registered[CompNode::NR_DEVICE_TYPE];
-    std::mutex g_device_map_mtx;
+    MGB_MUTEX g_device_map_mtx;
     ThinHashMap<CompNode::DeviceType, ThinHashMap<int, int>> g_device_map;
     CompNode::DeviceType g_unspec_locator_type;
 
diff --git a/src/core/impl/comp_node/cpu/comp_node.cpp b/src/core/impl/comp_node/cpu/comp_node.cpp
index 92633e15..0ac86e7d 100644
--- a/src/core/impl/comp_node/cpu/comp_node.cpp
+++ b/src/core/impl/comp_node/cpu/comp_node.cpp
@@ -60,7 +60,11 @@ class CpuCompNode::WorkerQueue final
             sys::set_cpu_affinity({m_locator.device});
 #endif
         }
+#if __DEPLOY_ON_XP_SP2__
+        __builtin_trap();
+#else
         sys::set_thread_name(m_locator.to_string());
+#endif
     }
 
     void on_sync_all_task_finish() override {
@@ -830,7 +834,9 @@ struct CpuCompNode::Pool {
         void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); }
     };
 
+#if !__DEPLOY_ON_XP_SP2__
     std::recursive_mutex mtx;
+#endif
     // use global memory pool to ensuare object memory accessible even after
     // global finalize
     std::aligned_storage_t<sizeof(CompNodeRecorderImpl),
@@ -862,7 +868,9 @@ void CpuCompNode::foreach (thin_function<void(CompNode)> callback) {
     for (size_t i = 0;; ++i) {
         CompNode cur;
         {
+#if !__DEPLOY_ON_XP_SP2__
             MGB_LOCK_GUARD(sm_pool->mtx);
+#endif
             if (i >= sm_pool->nr_used_impl_storage)
                 return;
             cur = make_comp_node_from_impl(
@@ -909,7 +917,9 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator,
                        locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT,
                "failed to load cpu for device:%d stream:%d", locator.device,
                locator.stream);
+#if !__DEPLOY_ON_XP_SP2__
     MGB_LOCK_GUARD(sm_pool->mtx);
+#endif
 
     // encode both device ID and type into a int
     mgb_assert(locator_logical.device >= -1 ||
@@ -967,7 +977,9 @@ void CpuCompNode::sync_all() {
     if (!sm_pool)
         return;
 
+#if !__DEPLOY_ON_XP_SP2__
     MGB_LOCK_GUARD(sm_pool->mtx);
+#endif
     for (auto&& i : sm_pool->locator2impl)
         i.second->sync();
     for (auto&& i : sm_pool->locator2impl_multi_thread)
@@ -1049,7 +1061,9 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::do_device_wait_by(
 
     auto waiter = [this, version]() {
         while (m_record_nr_finish.load(std::memory_order_acquire) < version) {
+#if !__DEPLOY_ON_XP_SP2__
             std::unique_lock<std::mutex> lk{m_dev_wait_mtx};
+#endif
             if (m_record_nr_finish.load(std::memory_order_acquire) >= version) {
                 break;
             }
@@ -1078,10 +1092,12 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::on_finish() {
     }
 
     m_record_nr_finish.fetch_add(1, std::memory_order_release);
+#if !__DEPLOY_ON_XP_SP2__
     if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) {
         MGB_LOCK_GUARD(m_dev_wait_mtx);
         m_dev_wait_cv.notify_all();
     }
+#endif
 }
 
 bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() {
@@ -1100,11 +1116,15 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::host_wait_cv() {
 
     m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release);
     for (;;) {
+#if !__DEPLOY_ON_XP_SP2__
         std::unique_lock<std::mutex> lock{m_dev_wait_mtx};
+#endif
         if (finished()) {
             break;
         }
+#if !__DEPLOY_ON_XP_SP2__
         m_dev_wait_cv.wait(lock);
+#endif
     }
     m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release);
 }
diff --git a/src/core/impl/comp_node/impl_helper.cpp b/src/core/impl/comp_node/impl_helper.cpp
index 8101c97b..585a83e1 100644
--- a/src/core/impl/comp_node/impl_helper.cpp
+++ b/src/core/impl/comp_node/impl_helper.cpp
@@ -45,9 +45,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() {
         return;
     }
     if (sm_cpu_sync_level >= 1) {
+#if __DEPLOY_ON_XP_SP2__
+#if MGB_HAVE_THREAD
+        __builtin_trap();
+#else
+        return;
+#endif
+#else
         while (!finished()) {
             std::this_thread::yield();
         }
+#endif
         return;
     }
     mgb_assert(!sm_cpu_sync_level, "invalid cpu sync level: %d",
@@ -57,9 +65,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() {
 }
 
 void CompNodeImplHelper::EventImplHelper::host_wait_cv() {
+#if __DEPLOY_ON_XP_SP2__
+#if MGB_HAVE_THREAD
+    __builtin_trap();
+#else
+    return;
+#endif
+#else
     while (!finished()) {
         std::this_thread::yield();
     }
+#endif
 }
 
 double CompNodeImplHelper::EventImplHelper::elapsed_time_until(Event& end_) {
diff --git a/src/core/impl/comp_node/impl_helper.h b/src/core/impl/comp_node/impl_helper.h
index a1d4f1e6..62df1da9 100644
--- a/src/core/impl/comp_node/impl_helper.h
+++ b/src/core/impl/comp_node/impl_helper.h
@@ -49,7 +49,7 @@ namespace mgb {
      * been performed.
      */
     class CompNodeImplHelper::EventImplHelper: public Event {
-        std::mutex m_mtx;
+        MGB_MUTEX m_mtx;
 
         bool m_recorded = false, m_finished = false;
 
diff --git a/src/core/impl/comp_node/mem_alloc/impl.cpp b/src/core/impl/comp_node/mem_alloc/impl.cpp
index 88c21225..75a8cfad 100644
--- a/src/core/impl/comp_node/mem_alloc/impl.cpp
+++ b/src/core/impl/comp_node/mem_alloc/impl.cpp
@@ -59,11 +59,15 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc(
         size_t size, bool allow_from_parent, bool log_stat_on_error) {
 
     mgb_assert(size);
+#if !__DEPLOY_ON_XP_SP2__
     m_mutex.lock();
+#endif
 
     auto iter = m_free_blk_size.lower_bound(FreeBlock{MemAddr{0, 0}, size});
     if (iter == m_free_blk_size.end()) {
+#if !__DEPLOY_ON_XP_SP2__
         m_mutex.unlock();
+#endif
         if (!allow_from_parent) {
             if (log_stat_on_error) {
                 print_memory_state();
@@ -87,7 +91,9 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc(
     if (remain)
         insert_free_unsafe({alloc_addr + size, remain});
 
+#if !__DEPLOY_ON_XP_SP2__
     m_mutex.unlock();
+#endif
     return alloc_addr;
 }
 
@@ -267,7 +273,9 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) {
             {
                 // sleep to wait for async dealloc
                 using namespace std::literals;
+#if !__DEPLOY_ON_XP_SP2__
                 std::this_thread::sleep_for(0.2s);
+#endif
             }
             get = gather_stream_free_blk_and_release_full();
             mgb_log("device %d: sync all device and try to "
diff --git a/src/core/impl/comp_node/mem_alloc/impl.h b/src/core/impl/comp_node/mem_alloc/impl.h
index 11e1de00..e5c08abe 100644
--- a/src/core/impl/comp_node/mem_alloc/impl.h
+++ b/src/core/impl/comp_node/mem_alloc/impl.h
@@ -73,7 +73,7 @@ class MemAllocImplHelper: virtual public MemAllocBase {
         //! map from address to size and size iter
         std::map<size_t, FreeBlockAddrInfo> m_free_blk_addr;
 
-        std::mutex m_mutex;
+        MGB_MUTEX m_mutex;
 
         struct BlkByAddrIter {
             decltype(m_free_blk_addr.begin()) aiter;
diff --git a/src/core/impl/graph/cg_impl_seq.cpp b/src/core/impl/graph/cg_impl_seq.cpp
index c59604f4..1d5444d6 100644
--- a/src/core/impl/graph/cg_impl_seq.cpp
+++ b/src/core/impl/graph/cg_impl_seq.cpp
@@ -48,7 +48,11 @@ class ComputingGraphImpl::ComputingSequence::ExecContext {
     std::unique_ptr<CompNodeSeqRecorder> m_recorder;
 
     bool has_var_sanity_check() const {
+#if __DEPLOY_ON_XP_SP2__
+        return false;
+#else
         return static_cast<bool>(m_comp_seq->m_var_sanity_check);
+#endif
     }
 
     void try_reset_recorder() {
@@ -305,10 +309,12 @@ void ComputingGraphImpl::ComputingSequence::preprocess(ExecContext* ctx) {
             m_owner_graph->var_node_mem_manager().alloc_var_node_mem_static();
 
     bool first_exec = m_first_exec;
+#if !__DEPLOY_ON_XP_SP2__
     if (!first_exec) {
         // var sanity check only for first run
         m_var_sanity_check.reset();
     }
+#endif
 
     m_owner_graph->event().signal_inplace<event::CompSeqExecBeforeStart>(
             m_owner_graph, this, &ctx->m_cleanup_callback, &m_used_comp_node,
@@ -342,9 +348,13 @@ void ComputingGraphImpl::ComputingSequence::attach_to_graph() {
                 static_cast<ComputingSequence*>(gimpl->m_current_comp_seq);
         prev_seq->cleanup();
     }
+#if !__DEPLOY_ON_XP_SP2__
+    //! disable VarSanityCheck when __DEPLOY_ON_XP_SP2__=1. caused by
+    //! VarSanityCheck depends on std::thread
     if (gimpl->options().var_sanity_check_first_run) {
         m_var_sanity_check = std::make_unique<VarSanityCheck>(gimpl);
     }
+#endif
     gimpl->m_current_comp_seq = this;
 }
 
@@ -403,7 +413,9 @@ void ComputingGraphImpl::ComputingSequence::do_wait(bool explicit_user_wait) {
 }
 
 void ComputingGraphImpl::ComputingSequence::cleanup() {
+#if !__DEPLOY_ON_XP_SP2__
     m_var_sanity_check.reset();
+#endif
     if (has_uncaught_exception()) {
         mgb_log_warn(
                 "fallback to simple graph waiting in dtor due to uncaught "
diff --git a/src/core/impl/graph/cg_impl_seq.h b/src/core/impl/graph/cg_impl_seq.h
index 47818a6f..f13e50e9 100644
--- a/src/core/impl/graph/cg_impl_seq.h
+++ b/src/core/impl/graph/cg_impl_seq.h
@@ -30,7 +30,9 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable {
     size_t m_run_id = 0;
     size_t m_cg_event_version = 0;
     mutable Maybe<double> m_prev_exec_time;
+#if !__DEPLOY_ON_XP_SP2__
     std::unique_ptr<VarSanityCheck> m_var_sanity_check;
+#endif
     std::unique_ptr<CompNodeSeqRecorder> m_comp_node_seq_recorder;
 
     NormalExecEnv m_exec_env;
@@ -46,7 +48,7 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable {
     class ExecContext;
 
     std::unique_ptr<MegBrainError> m_async_exc;
-    std::mutex m_async_exc_mutex;
+    MGB_MUTEX m_async_exc_mutex;
 
     /*!
      * \brief check whether recording comp seq is enabled
diff --git a/src/core/impl/graph/operator_node.cpp b/src/core/impl/graph/operator_node.cpp
index 202e5876..d5bc5502 100644
--- a/src/core/impl/graph/operator_node.cpp
+++ b/src/core/impl/graph/operator_node.cpp
@@ -713,7 +713,9 @@ void PostExecActions::perform() {
 
     for (auto&& i : m_items) {
         if (enable) {
+#if !__DEPLOY_ON_XP_SP2__
             VarSanityCheck::check_var_after_exec(i.var, *i.recv_info);
+#endif
 
             if (i.shape_sync_hdl)
                 i.shape_sync_hdl->sync_from_var();
diff --git a/src/core/impl/graph/static_infer_impl.cpp b/src/core/impl/graph/static_infer_impl.cpp
index 908e9318..9562b58c 100644
--- a/src/core/impl/graph/static_infer_impl.cpp
+++ b/src/core/impl/graph/static_infer_impl.cpp
@@ -141,7 +141,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final,
             TagTraitBase) //  {
     struct InferResultCache {
         Spinlock mtx;
+#if __DEPLOY_ON_XP_SP2__
+        ThinHashMap<size_t, InpElement> storage;
+#else
         ThinHashMap<std::thread::id, InpElement> storage;
+#endif
     };
     static TagTraitArray sm_empty_deps;
     static InferResultCache sm_result_cache;
@@ -167,7 +171,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final,
             {
                 // thread_local not supported on ios; so we us a manual impl
                 MGB_LOCK_GUARD(sm_result_cache.mtx);
+#if __DEPLOY_ON_XP_SP2__
+                ret = &sm_result_cache.storage[0];
+#else
                 ret = &sm_result_cache.storage[std::this_thread::get_id()];
+#endif
             }
             ret->m_shape = &tag()->shape();
             return ret;
diff --git a/src/core/impl/graph/static_infer_impl.h b/src/core/impl/graph/static_infer_impl.h
index 7594851f..f2af0e2c 100644
--- a/src/core/impl/graph/static_infer_impl.h
+++ b/src/core/impl/graph/static_infer_impl.h
@@ -122,7 +122,7 @@ class StaticInferManagerImpl final: public StaticInferManager {
         struct TagTraitContainer;
 
         ComputingGraph * const m_owner_graph;
-        std::recursive_mutex m_mtx;
+        MGB_RECURSIVE_MUTEX m_mtx;
 
         //! callbacks to be invoked in destructor
         ThinHashMap<void*, thin_function<void()>> m_dtor_callbacks;
diff --git a/src/core/impl/graph/var_node.cpp b/src/core/impl/graph/var_node.cpp
index 4a2ab692..596313c6 100644
--- a/src/core/impl/graph/var_node.cpp
+++ b/src/core/impl/graph/var_node.cpp
@@ -20,7 +20,7 @@ using namespace cg;
 
 /* ===================== MemAllocPlan =====================  */
 
-std::mutex MemAllocPlan::ReadonlyFwdList::list_mutex;
+MGB_MUTEX MemAllocPlan::ReadonlyFwdList::list_mutex;
 
 void MemAllocPlan::ReadonlyFwdList::reset() {
     MGB_LOCK_GUARD(list_mutex);
diff --git a/src/core/impl/graph/var_node_mem_mgr.h b/src/core/impl/graph/var_node_mem_mgr.h
index be69da17..d657a0b9 100644
--- a/src/core/impl/graph/var_node_mem_mgr.h
+++ b/src/core/impl/graph/var_node_mem_mgr.h
@@ -440,7 +440,7 @@ class VarNodeMemManager {
 
         ImpureMemPlanManager m_impure_mem_plan_mgr;
 
-        std::mutex m_dynamic_alloc_mtx;
+        MGB_MUTEX m_dynamic_alloc_mtx;
         const size_t* m_run_id_ptr = nullptr;
 
         SyncableCounter m_cpu_async_release_barrier;
diff --git a/src/core/impl/system.cpp b/src/core/impl/system.cpp
index a9e695d8..8673d6a7 100644
--- a/src/core/impl/system.cpp
+++ b/src/core/impl/system.cpp
@@ -19,7 +19,13 @@ using namespace mgb;
 using namespace sys;
 
 int sys::get_cpu_count() {
+#if __DEPLOY_ON_XP_SP2__
+    //! when deploy on xp sp2, we only support single thread
+    //! so just return 1 even cpu number greater than 1
+    return 1;
+#else
     return std::max(std::thread::hardware_concurrency(), 1u);
+#endif
 }
 
 #if defined(WIN32)
@@ -153,9 +159,11 @@ bool sys::stderr_ansi_color() {
 void sys::set_thread_name(const std::string &) {
 }
 
+#if !__DEPLOY_ON_XP_SP2__
 std::string sys::get_thread_name(Maybe<std::thread::id>) {
     return "@";
 }
+#endif
 
 namespace {
     class FakeTimedFuncInvoker final: public TimedFuncInvoker {
@@ -254,6 +262,7 @@ void sys::set_thread_name(const std::string &name) {
 #endif
 }
 
+#if !__DEPLOY_ON_XP_SP2__
 std::string sys::get_thread_name(Maybe<std::thread::id> tid_) {
 #if MGB_ENABLE_DEBUG_UTIL
     MGB_LOCK_GUARD(thread_name_map_lock);
@@ -269,10 +278,11 @@ std::string sys::get_thread_name(Maybe<std::thread::id> tid_) {
     return "";
 #endif
 }
+#endif
 
 namespace {
 
-class TimedFuncInvokerImpl final: public TimedFuncInvoker {
+class TimedFuncInvokerImpl final : public TimedFuncInvoker {
     /*
      * server-client protocol:
      *
@@ -308,7 +318,7 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker {
 
     bool m_watcher_should_stop = false;
     std::condition_variable m_watcher_stop_cv;
-    std::mutex m_watcher_stop_mtx, m_global_mtx;
+    MGB_MUTEX m_watcher_stop_mtx, m_global_mtx;
 
     void clear_sock_fd() {
         if (m_peer_fd)
@@ -567,8 +577,10 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker {
         auto start = high_resolution_clock::now(),
              end = start + timeout_due;
         for (; ; ) {
+#if !__DEPLOY_ON_XP_SP2__
             std::unique_lock<std::mutex> lk(m_watcher_stop_mtx);
             m_watcher_stop_cv.wait_until(lk, end);
+#endif
 
             if (m_watcher_should_stop)
                 return false;
@@ -603,10 +615,9 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker {
             } MGB_CATCH(..., {});
             clear_sock_fd();
         }
-
 };
 
-} // anonymous namespace
+}  // anonymous namespace
 
 TimedFuncInvoker& TimedFuncInvoker::ins() {
     static TimedFuncInvokerImpl impl;
diff --git a/src/core/include/megbrain/common.h b/src/core/include/megbrain/common.h
index 5283e1b8..1972c14c 100644
--- a/src/core/include/megbrain/common.h
+++ b/src/core/include/megbrain/common.h
@@ -205,6 +205,21 @@ void __log__(LogLevel level, const char *file, const char *func, int line,
 #define MGB_TOKENPASTE2(x, y) MGB_TOKENPASTE(x, y)
 #define MGB_LOCK_GUARD_CTOR(mtx) MGB_TOKENPASTE2(__lock_guard_, __LINE__)(mtx)
 
+#if __DEPLOY_ON_XP_SP2__
+//! refer to
+//! https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160
+//! xp sp2 do not support vc runtime fully, casused by KERNEL32.dll do not
+//! implement some base apis for c++ std function, for example,
+//! std::mutex/std::thread/std::condition_variable as a workround, we will
+//! disable some MegEngine feature on xp sp2 env, for exampe, multi-thread etc!
+#define MGB_MUTEX size_t
+#define MGB_RECURSIVE_MUTEX size_t
+#define MGB_LOCK_GUARD(mtx) MGB_MARK_USED_VAR(mtx)
+#define MGB_LOCK_GUARD_UNIQUE(mtx) MGB_MARK_USED_VAR(mtx)
+#define MGB_LOCK_GUARD_SHARED(mtx) MGB_MARK_USED_VAR(MGB_MARK_USED_VAR)
+#else
+#define MGB_MUTEX std::mutex
+#define MGB_RECURSIVE_MUTEX std::recursive_mutex
 #define MGB_LOCK_GUARD(mtx) \
     std::lock_guard<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
 
@@ -212,7 +227,8 @@ void __log__(LogLevel level, const char *file, const char *func, int line,
     std::unique_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
 
 #define MGB_LOCK_GUARD_SHARED(mtx) \
-	std::shared_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
+    std::shared_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
+#endif
 
 /*!
  * \brief printf-like std::string constructor
diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h
index 0ef2fdb6..fedb7fae 100644
--- a/src/core/include/megbrain/graph/var_node.h
+++ b/src/core/include/megbrain/graph/var_node.h
@@ -222,7 +222,7 @@ class MemAllocPlan final: public json::Serializable, public NonCopyableObj {
     private:
         class ReadonlyFwdList {
             MemAllocPlan *m_prev = nullptr, *m_next = nullptr;
-            static std::mutex list_mutex;
+            static MGB_MUTEX list_mutex;
         public:
             MemAllocPlan* next() const { return m_next; }
             void reset();
diff --git a/src/core/include/megbrain/system.h b/src/core/include/megbrain/system.h
index 66b83690..1b2cd47e 100644
--- a/src/core/include/megbrain/system.h
+++ b/src/core/include/megbrain/system.h
@@ -27,11 +27,13 @@ namespace sys {
     //! set name of caller thread
     void set_thread_name(const std::string &name);
 
+#if !__DEPLOY_ON_XP_SP2__
     /*!
      * \brief get name of of given thread
      * \param tid thread id, or None to for the caller thread
      */
     std::string get_thread_name(Maybe<std::thread::id> tid = None);
+#endif
 
     //! get number of CPU cores on this system
     int get_cpu_count();
diff --git a/src/core/include/megbrain/utils/async_worker_impl_0.h b/src/core/include/megbrain/utils/async_worker_impl_0.h
index 2801a188..6dd96437 100644
--- a/src/core/include/megbrain/utils/async_worker_impl_0.h
+++ b/src/core/include/megbrain/utils/async_worker_impl_0.h
@@ -35,14 +35,20 @@ class AsyncWorkerSet final: public NonCopyableObj {
 };
 
 class FutureThreadPoolBase : public NonCopyableObj {
+#if !__DEPLOY_ON_XP_SP2__
     std::vector<std::thread::id> m_ids;
+#endif
     public:
         FutureThreadPoolBase(const Maybe<std::string>& = None) {}
 
+#if __DEPLOY_ON_XP_SP2__
+        size_t start(size_t concurrency) { return concurrency; }
+#else
         const std::vector<std::thread::id>& start(size_t concurrency) {
             m_ids.resize(concurrency, std::this_thread::get_id());
             return m_ids;
         }
+#endif
 
         void stop() {
         }
diff --git a/src/core/include/megbrain/utils/event.h b/src/core/include/megbrain/utils/event.h
index a6ce7c19..66a33cff 100644
--- a/src/core/include/megbrain/utils/event.h
+++ b/src/core/include/megbrain/utils/event.h
@@ -53,7 +53,7 @@ class SyncEventConnecter: public NonCopyableObj {
     using ReceiverMap = ThinHashMap<Typeinfo*, ReceiverList>;
 
     bool m_is_empty = true;
-    std::mutex m_mtx;
+    MGB_MUTEX m_mtx;
     //! map from type to receiver; use shared_ptr because it would be kept by
     //! handlers
     std::shared_ptr<ReceiverMap> m_receiver_map =
diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h
index 523a4ad6..4871dcce 100644
--- a/src/core/include/megbrain/utils/persistent_cache.h
+++ b/src/core/include/megbrain/utils/persistent_cache.h
@@ -83,7 +83,7 @@ namespace mgb {
                 std::string,
                 std::unordered_map<BlobStorage, BlobStorage, BlobStorage::Hash>>
                 m_cache;
-        std::mutex m_mtx;
+        MGB_MUTEX m_mtx;
     };
 
     /*!
diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp
index d7554fb6..ee63cafb 100644
--- a/src/opr/impl/basic_arith.cpp
+++ b/src/opr/impl/basic_arith.cpp
@@ -33,7 +33,7 @@ namespace {
     template<class Opr>
     class StaticInferOpr {
         intl::UniqPtrWithCN<Opr> m_opr;
-        std::mutex m_mtx;
+        MGB_MUTEX m_mtx;
 
         public:
             class Lock {
@@ -43,7 +43,9 @@ namespace {
                 explicit Lock(StaticInferOpr *owner):
                     m_owner{owner}
                 {
+#if !__DEPLOY_ON_XP_SP2__
                     m_owner->m_mtx.lock();
+#endif
                 }
 
                 public:
@@ -54,8 +56,10 @@ namespace {
                     }
 
                     ~Lock() {
+#if !__DEPLOY_ON_XP_SP2__
                         if (m_owner)
                             m_owner->m_mtx.unlock();
+#endif
                     }
 
                     Lock& operator = (const Lock &) = delete;
diff --git a/src/opr/impl/internal/indexing_helper.cpp b/src/opr/impl/internal/indexing_helper.cpp
index db4f9087..29ccf9b6 100644
--- a/src/opr/impl/internal/indexing_helper.cpp
+++ b/src/opr/impl/internal/indexing_helper.cpp
@@ -277,7 +277,7 @@ SubTensorSpec FancyIndexingHelper::fancy_indexing_make_sub_spec(
     mgb_assert(m_require_scalar_index || !fake_single_idx);
 
     static DeviceTensorND fake_val;
-    static std::mutex fake_val_mtx;
+    static MGB_MUTEX fake_val_mtx;
 
     if (mgb_unlikely(fake_val.empty())) {
         MGB_LOCK_GUARD(fake_val_mtx);
diff --git a/src/opr/impl/internal/megdnn_opr_wrapper.cpp b/src/opr/impl/internal/megdnn_opr_wrapper.cpp
index a920dcbe..ae34d63d 100644
--- a/src/opr/impl/internal/megdnn_opr_wrapper.cpp
+++ b/src/opr/impl/internal/megdnn_opr_wrapper.cpp
@@ -53,7 +53,7 @@ namespace {
         MGB_TYPEINFO_OBJ_DECL;
 
         public:
-            std::mutex mtx;
+            MGB_MUTEX mtx;
             CompNode::UnorderedMap<DeviceTensorStorage> cn2storage;
     };
     MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer);
diff --git a/src/opr/impl/io.cpp b/src/opr/impl/io.cpp
index 316e4969..4a691213 100644
--- a/src/opr/impl/io.cpp
+++ b/src/opr/impl/io.cpp
@@ -377,7 +377,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(SharedDeviceTensorWithFormat);
 MGB_DYN_TYPE_OBJ_FINAL_IMPL(ImmutableTensor);
 
 class ImmutableTensor::Value {
-    std::mutex m_mtx;
+    MGB_MUTEX m_mtx;
     DeviceTensorND m_dev, m_static_infer;
     std::string m_summary;
 
@@ -527,7 +527,7 @@ class ImmutableTensor::DevValueCache final: public UserDataContainer::UserData {
     std::unordered_map<TensorKey, Value, Hash> m_tensor2val;
     std::unordered_map<ScalarKey, Value, Hash> m_scalar2val;
 
-    std::mutex m_mtx;
+    MGB_MUTEX m_mtx;
 
     void setup_value(Value &dest, const HostTensorND &val) {
         dest.setup(m_comp_node, val);
diff --git a/src/opr/impl/loop/impl.cpp b/src/opr/impl/loop/impl.cpp
index d2f6b61b..3207104c 100644
--- a/src/opr/impl/loop/impl.cpp
+++ b/src/opr/impl/loop/impl.cpp
@@ -888,7 +888,7 @@ class LoopImpl::MutableStateSaver::Recorder final: public NonCopyableObj {
 
     //! mutex for m_saved_buckets, used between copy_bucket_to_host() and the
     //! async copy task in m_copy_threadpool
-    std::mutex m_saved_buckets_mtx;
+    MGB_MUTEX m_saved_buckets_mtx;
     //! see on_fwd_finish()
     TensorShape m_var_shape;
     bool m_enabled = false;
diff --git a/src/opr/impl/search_policy/profiler.cpp b/src/opr/impl/search_policy/profiler.cpp
index d2272abe..379c208e 100644
--- a/src/opr/impl/search_policy/profiler.cpp
+++ b/src/opr/impl/search_policy/profiler.cpp
@@ -356,7 +356,9 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
             next_report_time = timer.get_secs() + 1;
         }
         using namespace std::literals;
+#if !__DEPLOY_ON_XP_SP2__
         std::this_thread::sleep_for(1000us);
+#endif
     }
     // release all free blocks owned by child process,
     // in order to avoid main process running out of memory
diff --git a/src/opr/impl/utility.cpp b/src/opr/impl/utility.cpp
index 808e858f..2aa82fd2 100644
--- a/src/opr/impl/utility.cpp
+++ b/src/opr/impl/utility.cpp
@@ -731,7 +731,7 @@ class PersistentOutputStorage::StorageHolder final
                                      key.second);
         }
     };
-    std::mutex m_mtx;
+    MGB_MUTEX m_mtx;
     std::unordered_map<Key, DeviceTensorStorage, KeyHash> m_storage;
 
 public:
diff --git a/src/plugin/impl/var_value_checker.cpp b/src/plugin/impl/var_value_checker.cpp
index 04ae4c7d..6bf07464 100644
--- a/src/plugin/impl/var_value_checker.cpp
+++ b/src/plugin/impl/var_value_checker.cpp
@@ -125,9 +125,13 @@ void VarValueChecker::on_var_computed(VarNode *var) {
     }
 
     if (!m_init_val_dumped) {
+#if !__DEPLOY_ON_XP_SP2__
         m_var2val_mtx.lock();
-        auto &&val = m_var2val[var];
+#endif
+        auto&& val = m_var2val[var];
+#if !__DEPLOY_ON_XP_SP2__
         m_var2val_mtx.unlock();
+#endif
 
         mgb_assert(!val);
         val = std::make_shared<DeviceTensorND>();
diff --git a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h
index cacca348..fb30563c 100644
--- a/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h
+++ b/src/plugin/include/megbrain/plugin/cpu_dispatch_checker.h
@@ -22,7 +22,7 @@ namespace mgb {
      * This is intended to find potential bugs in megdnn.
      */
     class CPUDispatchChecker final: public PluginBase {
-        std::mutex m_cn2nr_task_mtx,
+        MGB_MUTEX m_cn2nr_task_mtx,
             m_failed_oprs_mtx_storage,
             *m_failed_oprs_mtx = &m_failed_oprs_mtx_storage;
         CompNode::UnorderedMap<size_t> m_cn2nr_task;
diff --git a/src/plugin/include/megbrain/plugin/opr_io_dump.h b/src/plugin/include/megbrain/plugin/opr_io_dump.h
index 84bc55de..00dfe0cc 100644
--- a/src/plugin/include/megbrain/plugin/opr_io_dump.h
+++ b/src/plugin/include/megbrain/plugin/opr_io_dump.h
@@ -60,7 +60,7 @@ class TextOprIODump final : public OprIODumpBase {
     bool m_print_addr = true;
     std::shared_ptr<FILE> m_fout;
     size_t m_max_size = 5;
-    std::mutex m_mtx;
+    MGB_MUTEX m_mtx;
     std::unique_ptr<LazyValueRecorder> m_lazy_value;
 
     void dump_var(VarNode* var, bool lazy_sync) override;
diff --git a/src/plugin/include/megbrain/plugin/var_sanity_check.h b/src/plugin/include/megbrain/plugin/var_sanity_check.h
index 7f32cc8e..f18b0dde 100644
--- a/src/plugin/include/megbrain/plugin/var_sanity_check.h
+++ b/src/plugin/include/megbrain/plugin/var_sanity_check.h
@@ -64,7 +64,7 @@ class VarSanityCheck final : public PluginBase {
 
     //! map from caller thread to workspace map
     ThinHashMap<std::thread::id, WorkspaceCache> m_workspace;
-    std::mutex m_workspace_mtx;
+    MGB_MUTEX m_workspace_mtx;
 
     ThinHashMap<VarNode*, ChecksumResult> m_var2chksum;
     /*! the ids of varnodes that have been modified by recv_opr
@@ -72,7 +72,7 @@ class VarSanityCheck final : public PluginBase {
      * cg::OperatorNodeBase::NodeProp::Flag:: FORCE_UPDATE_INPUT_VAR.
      */
     ThinHashSet<VarNode*> m_modified_vars;
-    std::mutex m_id2chksum_mtx;
+    MGB_MUTEX m_id2chksum_mtx;
 
     typedef void (VarSanityCheck::*input_checker_fn)(cg::OperatorNodeBase*,
                                                      VarNode*);
diff --git a/src/plugin/include/megbrain/plugin/var_value_checker.h b/src/plugin/include/megbrain/plugin/var_value_checker.h
index b9b356f7..2041a337 100644
--- a/src/plugin/include/megbrain/plugin/var_value_checker.h
+++ b/src/plugin/include/megbrain/plugin/var_value_checker.h
@@ -50,7 +50,7 @@ namespace mgb {
         size_t m_cur_var_idx, m_nr_exec;
 
         VarNodeArray m_vars;
-        std::mutex m_var2val_mtx;
+        MGB_MUTEX m_var2val_mtx;
         ThinHashMap<VarNode*, std::shared_ptr<DeviceTensorND>> m_var2val;
         Checker m_checker;