diff --git a/dnn/src/fallback/general_intrinsic/gi_common.h b/dnn/src/fallback/general_intrinsic/gi_common.h index 20660524..c108ad20 100644 --- a/dnn/src/fallback/general_intrinsic/gi_common.h +++ b/dnn/src/fallback/general_intrinsic/gi_common.h @@ -221,10 +221,33 @@ typedef __m128i GI_INT64_t; return NAME(a b, 0); \ } #if !defined(__SSE3__) -GI_FORCEINLINE __m128i _sse2_mm_alignr_epi8(__m128i b, __m128i a, int imm8) { - int imm2 = sizeof(__m128i) - imm8; - return _mm_or_si128(_mm_srli_si128(a, imm8), _mm_slli_si128(b, imm2)); -} +#ifdef __cplusplus +#define _sse2_mm_alignr_epi8(b, a, imm8) \ + __extension__({ \ + __m128i d; \ + const int imm2 = sizeof(__m128i) - imm8; \ + d = _mm_or_si128(_mm_srli_si128(a, imm8), _mm_slli_si128(b, imm2)); \ + d; \ + }) +#else +#define _sse2_mm_alignr_epi8(b, a, imm8) \ + __extension__({ \ + __m128i d; \ + const int imm2 = sizeof(__m128i) - imm8; \ + if (16 == imm2) { \ + d = _mm_or_si128(_mm_srli_si128(a, 0), _mm_slli_si128(b, 16)); \ + } else if (12 == imm2) { \ + d = _mm_or_si128(_mm_srli_si128(a, 4), _mm_slli_si128(b, 12)); \ + } else if (8 == imm2) { \ + d = _mm_or_si128(_mm_srli_si128(a, 8), _mm_slli_si128(b, 8)); \ + } else if (4 == imm2) { \ + d = _mm_or_si128(_mm_srli_si128(a, 12), _mm_slli_si128(b, 4)); \ + } else { \ + gi_trap(); \ + } \ + d; \ + }) +#endif #endif #define _SSE_COMMA ,