Browse Source

fix(dnn/fallback): fix naive shift multidefination error and optimize GiCvtFromInt32V4ToUint8

GitOrigin-RevId: 6660c35214
HuaHua404-patch-4
Megvii Engine Team 2 years ago
parent
commit
8395a459b5
2 changed files with 5 additions and 13 deletions
  1. +4
    -12
      dnn/src/fallback/general_intrinsic/gi_int.h
  2. +1
    -1
      dnn/test/fallback/gi.cpp

+ 4
- 12
dnn/src/fallback/general_intrinsic/gi_int.h View File

@@ -2055,6 +2055,7 @@ void GiStoreZipInt8V3(void* Buffer, GI_INT8_t a, GI_INT8_t b, GI_INT8_t c) {
#define GiShiftRightInt32(Vector, n) \
vsra_vx_i32m1(Vector, n, GI_SIMD_LEN_BYTE / sizeof(int32_t))
#else
GI_FORCEINLINE
GI_INT32_t ShiftRightNaive(GI_INT32_t src, const size_t shift) {
GI_INT32_t ret;
for (size_t idx = 0; idx < GI_SIMD_LEN_BYTE / sizeof(int32_t); ++idx) {
@@ -2074,6 +2075,7 @@ GI_INT32_t ShiftRightNaive(GI_INT32_t src, const size_t shift) {
#define GiShiftLeftInt32(Vector, n) \
vsll_vx_i32m1(Vector, n, GI_SIMD_LEN_BYTE / sizeof(int32_t))
#else
GI_FORCEINLINE
GI_INT32_t ShiftLeftNaive(GI_INT32_t src, const size_t shift) {
GI_INT32_t ret;
for (size_t idx = 0; idx < GI_SIMD_LEN_BYTE / sizeof(int32_t); ++idx) {
@@ -2306,19 +2308,9 @@ GI_UINT8_t GiCvtFromInt32V4ToUint8(
GI_INT32_t Vector0, GI_INT32_t Vector1, GI_INT32_t Vector2,
GI_INT32_t Vector3) {
#if defined(GI_NEON_INTRINSICS)
int16x8_t mid1 = vmaxq_s16(
vdupq_n_s16(0),
vminq_s16(
vcombine_s16(vqmovn_s32(Vector0), vqmovn_s32(Vector1)),
vdupq_n_s16(UINT8_MAX)));
int16x8_t mid2 = vmaxq_s16(
vdupq_n_s16(0),
vminq_s16(
vcombine_s16(vqmovn_s32(Vector2), vqmovn_s32(Vector3)),
vdupq_n_s16(UINT8_MAX)));
return vcombine_u8(
vqmovn_u16(vreinterpretq_u16_s16(mid1)),
vqmovn_u16(vreinterpretq_u16_s16(mid2)));
vqmovun_s16(vcombine_s16(vqmovn_s32(Vector0), vqmovn_s32(Vector1))),
vqmovun_s16(vcombine_s16(vqmovn_s32(Vector2), vqmovn_s32(Vector3))));
#elif defined(GI_SSE2_INTRINSICS)
__m128i vepi16_0 = _mm_packs_epi32(Vector0, Vector1);
__m128i vepi16_1 = _mm_packs_epi32(Vector2, Vector3);


+ 1
- 1
dnn/test/fallback/gi.cpp View File

@@ -4954,7 +4954,7 @@ TEST_F(FALLBACK, GiMultiplyAddInt16LongHigh) {
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}
TEST_F(FALLBACK, GiCvtFromInt32V4ToUint8) {
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678};
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, INT32_MAX, INT32_MIN};
GI_INT32_t src0, src1, src2, src3;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);


Loading…
Cancel
Save