chore(format): fix compile bugs after code format

GitOrigin-RevId: 11a4b06f6f
3 years ago · bfb30dcb81
--- a/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h
+++ b/dnn/src/aarch64/matrix_mul/int8x8x16/kernel_mk4_4x4x8_a72.h
@@ -53,7 +53,9 @@ static inline void kern_4x4(const int8_t* packA, const int8_t* packB, int K,
    const int8_t* b_ptr = packB;

    LDC = LDC * sizeof(int8_t);
 // clang-format off

    // clang-format off

    #define STORE_LINE(reg0)                 \
    "cmp w10, #0 \n"                         \
    "beq 101f\n"                             \
--- a/dnn/src/arm_common/elemwise/opr_impl.h
+++ b/dnn/src/arm_common/elemwise/opr_impl.h
@@ -10,7 +10,6 @@
 * implied.
 */
 #pragma once

 #include "src/fallback/elemwise/opr_impl.h"

 #include "src/arm_common/elemwise_op.h"
--- a/dnn/src/arm_common/matrix_mul/int8/gemv.cpp
+++ b/dnn/src/arm_common/matrix_mul/int8/gemv.cpp
@@ -10,6 +10,7 @@
 */

 #include "src/arm_common/simd_macro/marm_neon.h"

 #include "src/arm_common/matrix_mul/int8/gemv.h"
 #include "src/common/utils.h"
 #include "megdnn/oprs.h"
--- a/dnn/src/common/cv/interp_helper.cpp
+++ b/dnn/src/common/cv/interp_helper.cpp
@@ -60,11 +60,8 @@
 #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
 // TableHolderBase has no problem; ignore the warning for old clang versions

 #include "./helper.h"
 #include "./interp_helper.h"

 #include "src/common/utils.h"

 using namespace megdnn;
 using namespace megdnn::megcv;

--- a/dnn/src/common/cv/interp_helper.h
+++ b/dnn/src/common/cv/interp_helper.h
@@ -62,7 +62,9 @@
 #pragma once

 #include "src/common/cv/aligned_allocator.h"
 #include "src/common/utils.h"

 #include "./helper.h"
 #include "megdnn/opr_param_defs.h"

 #include <cstdint>
--- a/dnn/src/cuda/batch_conv_bias/helper.cuh
+++ b/dnn/src/cuda/batch_conv_bias/helper.cuh
@@ -10,6 +10,7 @@
 */
 #pragma once
 #include "src/cuda/convolution_helper/parameter.cuh"
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/cuda/concat/concat.cuh
+++ b/dnn/src/cuda/concat/concat.cuh
@@ -10,6 +10,7 @@
 */
 #pragma once
 #include <stdint.h>
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/cuda/dot/dot.cuh
+++ b/dnn/src/cuda/dot/dot.cuh
@@ -10,6 +10,7 @@
 */
 #pragma once
 #include "megdnn/dtype.h"
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/cuda/repeat/repeat.cuh
+++ b/dnn/src/cuda/repeat/repeat.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/cuda/tile/tile.cuh
+++ b/dnn/src/cuda/tile/tile.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/cuda/warp_affine/common.cuh
+++ b/dnn/src/cuda/warp_affine/common.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/cuda/warp_perspective/common.cuh
+++ b/dnn/src/cuda/warp_perspective/common.cuh
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
 #include "src/cuda/utils.cuh"

 namespace megdnn {
 namespace cuda {
--- a/dnn/src/rocm/handle.h
+++ b/dnn/src/rocm/handle.h
@@ -9,6 +9,8 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
 #include "src/rocm/miopen_wrapper.h"

 #include "megcore_rocm.h"
 #include "megdnn/basic_types.h"
 #include "megdnn/handle.h"
@@ -16,7 +18,6 @@

 #include "src/common/handle_impl.h"
 #include "src/common/utils.h"
 #include "src/rocm/miopen_with_check.h"

 #include <rocblas.h>
 #include <atomic>
--- a/dnn/src/x86/avx_helper.h
+++ b/dnn/src/x86/avx_helper.h
@@ -13,9 +13,11 @@
 #include "megdnn/arch.h"

 #include <immintrin.h>
 #ifdef WIN32
 #include <avxintrin.h>
 #include <avx2intrin.h>
 #include <fmaintrin.h>
 #endif

 #if !defined (__clang__)
 #pragma GCC target ("avx")
--- a/dnn/src/x86/conv_bias/f32/do_conv_stride2.h
+++ b/dnn/src/x86/conv_bias/f32/do_conv_stride2.h
@@ -9,7 +9,8 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once

 // clang-format off
 #include "src/x86/simd_macro/sse_helper.h"
 #include "src/fallback/convolution/do_conv_stride2_decl.inl"
 #include "src/x86/simd_macro/sse_helper_epilogue.h"
 // clang-format on
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh1_avx.cpp
@@ -801,8 +801,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh2_avx.cpp
@@ -896,8 +896,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh3_avx.cpp
@@ -943,8 +943,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh4_avx.cpp
@@ -948,8 +948,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh5_avx.cpp
@@ -917,8 +917,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh6_avx.cpp
@@ -856,8 +856,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_conv_fh7_avx.cpp
@@ -771,8 +771,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh1_avx.cpp
@@ -788,8 +788,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh2_avx.cpp
@@ -872,8 +872,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh3_avx.cpp
@@ -910,8 +910,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh4_avx.cpp
@@ -908,8 +908,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh5_avx.cpp
@@ -872,8 +872,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh6_avx.cpp
@@ -808,8 +808,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp
+++ b/dnn/src/x86/convolution/avx/convolution_xcorr_fh7_avx.cpp
@@ -722,8 +722,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh1_fma.cpp
@@ -785,9 +785,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh2_fma.cpp
@@ -827,9 +827,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh3_fma.cpp
@@ -842,9 +842,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh4_fma.cpp
@@ -833,9 +833,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh5_fma.cpp
@@ -803,9 +803,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh6_fma.cpp
@@ -755,9 +755,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_conv_fh7_fma.cpp
@@ -692,9 +692,7 @@
        }                                                     \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh1_fma.cpp
@@ -771,9 +771,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh2_fma.cpp
@@ -801,9 +801,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh3_fma.cpp
@@ -806,9 +806,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh4_fma.cpp
@@ -789,9 +789,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh5_fma.cpp
@@ -753,9 +753,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh6_fma.cpp
@@ -701,9 +701,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp
+++ b/dnn/src/x86/convolution/fma/convolution_xcorr_fh7_fma.cpp
@@ -636,9 +636,7 @@
        }                                                                  \
    } while (0)

 #include <immintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #include "src/x86/avx_helper.h"
 #include <algorithm>

 #include "../convolution_direct_special_cases.h"
--- a/dnn/src/x86/local/local_avx.cpp
+++ b/dnn/src/x86/local/local_avx.cpp
@@ -8,6 +8,8 @@
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 // clang-format off
 #include "src/x86/simd_helper.h"
 #include "src/x86/simd_macro/avx_helper.h"
 #include "src/common/local/local_def.inl"
 // clang-format on
--- a/dnn/src/x86/local/local_fma.cpp
+++ b/dnn/src/x86/local/local_fma.cpp
@@ -8,6 +8,8 @@
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 // clang-format off
 #include "src/x86/simd_helper.h"
 #include "src/x86/simd_macro/fma_helper.h"
 #include "src/common/local/local_def.inl"
 // clang-format on
--- a/dnn/src/x86/local/local_simd.h
+++ b/dnn/src/x86/local/local_simd.h
@@ -10,6 +10,7 @@
 */
 #pragma once

 // clang-format off
 #include "src/x86/simd_macro/sse_helper.h"
 #include "src/common/local/local_decl.inl"
 #include "src/x86/simd_macro/sse_helper_epilogue.h"
@@ -21,3 +22,4 @@
 #include "src/x86/simd_macro/fma_helper.h"
 #include "src/common/local/local_decl.inl"
 #include "src/x86/simd_macro/fma_helper_epilogue.h"
 // clang-format on
--- a/dnn/src/x86/local/local_sse.cpp
+++ b/dnn/src/x86/local/local_sse.cpp
@@ -8,6 +8,8 @@
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
 // clang-format off
 #include "src/x86/simd_helper.h"
 #include "src/x86/simd_macro/sse_helper.h"
 #include "src/common/local/local_def.inl"
 // clang-form on
--- a/dnn/src/x86/matrix_mul/common/common.h
+++ b/dnn/src/x86/matrix_mul/common/common.h
@@ -11,7 +11,6 @@
 */
 #pragma once
 #include <x86intrin.h>

 #ifdef WIN32
 #include <avx2intrin.h>
 #include <avxintrin.h>
--- a/dnn/src/x86/simd_helper.h
+++ b/dnn/src/x86/simd_helper.h
@@ -13,9 +13,11 @@
 #include "megdnn/arch.h"

 #include <immintrin.h>
 #ifdef WIN32
 #include <xmmintrin.h>
 #include <avxintrin.h>
 #include <fmaintrin.h>
 #endif
 #include <cmath>
 #include <algorithm>

--- a/imperative/tablegen/emitter.h
+++ b/imperative/tablegen/emitter.h
@@ -17,6 +17,7 @@
 #include "llvm/Support/raw_ostream.h"

 namespace mlir::tblgen {
 using llvm::raw_ostream;

 struct Environment {
    std::unordered_map<unsigned int, std::pair<llvm::StringRef, llvm::StringRef>> enumAlias;
@@ -37,4 +38,4 @@ protected:
    Environment* env_p = nullptr;
 };

 } // namespace mlir::tblgen
 } // namespace mlir::tblgen
--- a/imperative/tablegen/targets/macros.cpp
+++ b/imperative/tablegen/targets/macros.cpp
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

 #include "./macros.h"
 #include "./cpp_class.h"
 #include "../emitter.h"

--- a/src/core/impl/graph/var_node_mem_mgr.cpp
+++ b/src/core/impl/graph/var_node_mem_mgr.cpp
@@ -125,7 +125,7 @@ StaticDeviceMemoryManager::make_default_impl() {
 #endif  // MGB_THREAD_SAFE

 /* ==================== AsyncVarReleaser ==================== */
 #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
 #if MGB_COMMON_ASYNC_COMPNODE
 class VarNodeMemManager::AsyncVarReleaser {
    struct WaiterParam {
        CompNode cn;
@@ -248,7 +248,7 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() {
 VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph)
        : m_owner_graph(graph),
          m_seq_mem_opt(graph)
 #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
 #if MGB_COMMON_ASYNC_COMPNODE
          ,m_asyn_var_releaser(new AsyncVarReleaser)
 #endif
 {
@@ -256,7 +256,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph)
        MGB_MARK_USED_VAR(ev);
        // async release is only used for sync between multiple comp nodes, and
        // does not wait for device to finish
 #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
 #if MGB_COMMON_ASYNC_COMPNODE
        m_asyn_var_releaser->wait_release_finish();
 #endif
        m_cpu_async_release_barrier.wait_zero();
@@ -297,8 +297,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph)
    graph->event().register_receiver_permanent<event::CompSeqExecError>(
            on_comp_seq_error);

 #if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER &&                                   \
        (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM)
 #if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && MGB_COMMON_ASYNC_COMPNODE
    auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) {
        m_asyn_var_releaser->wait_release_finish();
    };
--- a/src/core/impl/graph/var_node_mem_mgr.h
+++ b/src/core/impl/graph/var_node_mem_mgr.h
@@ -445,7 +445,12 @@ class VarNodeMemManager {

        SyncableCounter m_cpu_async_release_barrier;

 #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM
 // clang-format off
 #define MGB_COMMON_ASYNC_COMPNODE \
    (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON  || MGB_ROCM)
    // clang-format on

 #if MGB_COMMON_ASYNC_COMPNODE
        //! release dynamic var on after compnode event finishes
        class AsyncVarReleaser;
        std::unique_ptr<AsyncVarReleaser> m_asyn_var_releaser;
--- a/src/core/include/megbrain/utils/thread_impl_spinlock.h
+++ b/src/core/include/megbrain/utils/thread_impl_spinlock.h
@@ -14,6 +14,7 @@
 #include "megbrain/common.h"
 #include <thread>
 #include <atomic>
 #include "megbrain/utils/metahelper.h"

 namespace mgb {

@@ -24,7 +25,7 @@ class Spinlock final: public NonCopyableObj {
    public:

        void lock() {
            while (m_state.test_and_set(std::memory_order_acquire));
            while (m_state.test_and_set(std::memory_order_acquire)) {};
        }

        void unlock() {
--- a/src/opr/include/megbrain/opr/basic_arith.h
+++ b/src/opr/include/megbrain/opr/basic_arith.h
@@ -281,8 +281,8 @@ MGB_DEFINE_OPR_CLASS(AddUpdate,
 * Mode specifies the actual arithmetic; and exactly one of *axis* and
 * *target_shape* must be provided, to specify output shape.
 */
 MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic<
        intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolder>>) //  {
 MGB_DEFINE_OPR_CLASS(Reduce,
        intl::DynamicOutputIfInputDynamic<intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolder>>) // {

    public:
        using Param = megdnn::param::Reduce;
@@ -350,16 +350,17 @@ MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic<
 * the optimizer.
 */
 MGB_DEFINE_OPR_CLASS(PowC, intl::MegDNNOprWrapperFwd<megdnn::PowC>) // {
 public:
    PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config);
    static SymbolVar make(SymbolVar inp, const Param& param = {},
                          const OperatorNodeConfig& config = {});

 private:
    void add_input_layout_constraint() override;
    void init_output_static_infer_desc() override;
    void mem_plan_fwd_in2out_writable() override;
    NodeProp* do_make_node_prop() const override;
    void scn_do_execute() override;

 public:
    PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config);
    static SymbolVar make(SymbolVar inp, const Param& param = {},
                          const OperatorNodeConfig& config = {});
 };

 } // namespace opr
--- a/src/opr/test/atlas_models.h
+++ b/src/opr/test/atlas_models.h
@@ -1,4 +1,5 @@
 //generated by tools/atlas/embed.py
 // generated by tools/atlas/embed.py
 // clang-format off
 #pragma once
 #include <map>
 #include <string>